dpdk/drivers/net/bonding/rte_eth_bond_pmd.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2017 Intel Corporation
   3 */
   4#include <stdlib.h>
   5#include <stdbool.h>
   6#include <netinet/in.h>
   7
   8#include <rte_mbuf.h>
   9#include <rte_malloc.h>
  10#include <ethdev_driver.h>
  11#include <ethdev_vdev.h>
  12#include <rte_tcp.h>
  13#include <rte_udp.h>
  14#include <rte_ip.h>
  15#include <rte_ip_frag.h>
  16#include <rte_devargs.h>
  17#include <rte_kvargs.h>
  18#include <rte_bus_vdev.h>
  19#include <rte_alarm.h>
  20#include <rte_cycles.h>
  21#include <rte_string_fns.h>
  22
  23#include "rte_eth_bond.h"
  24#include "eth_bond_private.h"
  25#include "eth_bond_8023ad_private.h"
  26
  27#define REORDER_PERIOD_MS 10
  28#define DEFAULT_POLLING_INTERVAL_10_MS (10)
  29#define BOND_MAX_MAC_ADDRS 16
  30
  31#define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
  32
  33/* Table for statistics in mode 5 TLB */
  34static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
  35
  36static inline size_t
  37get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
  38{
  39        size_t vlan_offset = 0;
  40
  41        if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
  42                rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
  43                struct rte_vlan_hdr *vlan_hdr =
  44                        (struct rte_vlan_hdr *)(eth_hdr + 1);
  45
  46                vlan_offset = sizeof(struct rte_vlan_hdr);
  47                *proto = vlan_hdr->eth_proto;
  48
  49                if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
  50                        vlan_hdr = vlan_hdr + 1;
  51                        *proto = vlan_hdr->eth_proto;
  52                        vlan_offset += sizeof(struct rte_vlan_hdr);
  53                }
  54        }
  55        return vlan_offset;
  56}
  57
  58static uint16_t
  59bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
  60{
  61        struct bond_dev_private *internals;
  62
  63        uint16_t num_rx_total = 0;
  64        uint16_t slave_count;
  65        uint16_t active_slave;
  66        int i;
  67
  68        /* Cast to structure, containing bonded device's port id and queue id */
  69        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
  70        internals = bd_rx_q->dev_private;
  71        slave_count = internals->active_slave_count;
  72        active_slave = bd_rx_q->active_slave;
  73
  74        for (i = 0; i < slave_count && nb_pkts; i++) {
  75                uint16_t num_rx_slave;
  76
  77                /* Offset of pointer to *bufs increases as packets are received
  78                 * from other slaves */
  79                num_rx_slave =
  80                        rte_eth_rx_burst(internals->active_slaves[active_slave],
  81                                         bd_rx_q->queue_id,
  82                                         bufs + num_rx_total, nb_pkts);
  83                num_rx_total += num_rx_slave;
  84                nb_pkts -= num_rx_slave;
  85                if (++active_slave == slave_count)
  86                        active_slave = 0;
  87        }
  88
  89        if (++bd_rx_q->active_slave >= slave_count)
  90                bd_rx_q->active_slave = 0;
  91        return num_rx_total;
  92}
  93
  94static uint16_t
  95bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
  96                uint16_t nb_pkts)
  97{
  98        struct bond_dev_private *internals;
  99
 100        /* Cast to structure, containing bonded device's port id and queue id */
 101        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
 102
 103        internals = bd_rx_q->dev_private;
 104
 105        return rte_eth_rx_burst(internals->current_primary_port,
 106                        bd_rx_q->queue_id, bufs, nb_pkts);
 107}
 108
 109static inline uint8_t
 110is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
 111{
 112        const uint16_t ether_type_slow_be =
 113                rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
 114
 115        return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
 116                (ethertype == ether_type_slow_be &&
 117                (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
 118}
 119
 120/*****************************************************************************
 121 * Flow director's setup for mode 4 optimization
 122 */
 123
 124static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
 125        .dst.addr_bytes = { 0 },
 126        .src.addr_bytes = { 0 },
 127        .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
 128};
 129
 130static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
 131        .dst.addr_bytes = { 0 },
 132        .src.addr_bytes = { 0 },
 133        .type = 0xFFFF,
 134};
 135
 136static struct rte_flow_item flow_item_8023ad[] = {
 137        {
 138                .type = RTE_FLOW_ITEM_TYPE_ETH,
 139                .spec = &flow_item_eth_type_8023ad,
 140                .last = NULL,
 141                .mask = &flow_item_eth_mask_type_8023ad,
 142        },
 143        {
 144                .type = RTE_FLOW_ITEM_TYPE_END,
 145                .spec = NULL,
 146                .last = NULL,
 147                .mask = NULL,
 148        }
 149};
 150
 151const struct rte_flow_attr flow_attr_8023ad = {
 152        .group = 0,
 153        .priority = 0,
 154        .ingress = 1,
 155        .egress = 0,
 156        .reserved = 0,
 157};
 158
 159int
 160bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
 161                uint16_t slave_port) {
 162        struct rte_eth_dev_info slave_info;
 163        struct rte_flow_error error;
 164        struct bond_dev_private *internals = bond_dev->data->dev_private;
 165
 166        const struct rte_flow_action_queue lacp_queue_conf = {
 167                .index = 0,
 168        };
 169
 170        const struct rte_flow_action actions[] = {
 171                {
 172                        .type = RTE_FLOW_ACTION_TYPE_QUEUE,
 173                        .conf = &lacp_queue_conf
 174                },
 175                {
 176                        .type = RTE_FLOW_ACTION_TYPE_END,
 177                }
 178        };
 179
 180        int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
 181                        flow_item_8023ad, actions, &error);
 182        if (ret < 0) {
 183                RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
 184                                __func__, error.message, slave_port,
 185                                internals->mode4.dedicated_queues.rx_qid);
 186                return -1;
 187        }
 188
 189        ret = rte_eth_dev_info_get(slave_port, &slave_info);
 190        if (ret != 0) {
 191                RTE_BOND_LOG(ERR,
 192                        "%s: Error during getting device (port %u) info: %s\n",
 193                        __func__, slave_port, strerror(-ret));
 194
 195                return ret;
 196        }
 197
 198        if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
 199                        slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
 200                RTE_BOND_LOG(ERR,
 201                        "%s: Slave %d capabilities doesn't allow to allocate additional queues",
 202                        __func__, slave_port);
 203                return -1;
 204        }
 205
 206        return 0;
 207}
 208
 209int
 210bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
 211        struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
 212        struct bond_dev_private *internals = bond_dev->data->dev_private;
 213        struct rte_eth_dev_info bond_info;
 214        uint16_t idx;
 215        int ret;
 216
 217        /* Verify if all slaves in bonding supports flow director and */
 218        if (internals->slave_count > 0) {
 219                ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
 220                if (ret != 0) {
 221                        RTE_BOND_LOG(ERR,
 222                                "%s: Error during getting device (port %u) info: %s\n",
 223                                __func__, bond_dev->data->port_id,
 224                                strerror(-ret));
 225
 226                        return ret;
 227                }
 228
 229                internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
 230                internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
 231
 232                for (idx = 0; idx < internals->slave_count; idx++) {
 233                        if (bond_ethdev_8023ad_flow_verify(bond_dev,
 234                                        internals->slaves[idx].port_id) != 0)
 235                                return -1;
 236                }
 237        }
 238
 239        return 0;
 240}
 241
 242int
 243bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
 244
 245        struct rte_flow_error error;
 246        struct bond_dev_private *internals = bond_dev->data->dev_private;
 247        struct rte_flow_action_queue lacp_queue_conf = {
 248                .index = internals->mode4.dedicated_queues.rx_qid,
 249        };
 250
 251        const struct rte_flow_action actions[] = {
 252                {
 253                        .type = RTE_FLOW_ACTION_TYPE_QUEUE,
 254                        .conf = &lacp_queue_conf
 255                },
 256                {
 257                        .type = RTE_FLOW_ACTION_TYPE_END,
 258                }
 259        };
 260
 261        internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
 262                        &flow_attr_8023ad, flow_item_8023ad, actions, &error);
 263        if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
 264                RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
 265                                "(slave_port=%d queue_id=%d)",
 266                                error.message, slave_port,
 267                                internals->mode4.dedicated_queues.rx_qid);
 268                return -1;
 269        }
 270
 271        return 0;
 272}
 273
 274static inline uint16_t
 275rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
 276                bool dedicated_rxq)
 277{
 278        /* Cast to structure, containing bonded device's port id and queue id */
 279        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
 280        struct bond_dev_private *internals = bd_rx_q->dev_private;
 281        struct rte_eth_dev *bonded_eth_dev =
 282                                        &rte_eth_devices[internals->port_id];
 283        struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
 284        struct rte_ether_hdr *hdr;
 285
 286        const uint16_t ether_type_slow_be =
 287                rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
 288        uint16_t num_rx_total = 0;      /* Total number of received packets */
 289        uint16_t slaves[RTE_MAX_ETHPORTS];
 290        uint16_t slave_count, idx;
 291
 292        uint8_t collecting;  /* current slave collecting status */
 293        const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
 294        const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
 295        uint8_t subtype;
 296        uint16_t i;
 297        uint16_t j;
 298        uint16_t k;
 299
 300        /* Copy slave list to protect against slave up/down changes during tx
 301         * bursting */
 302        slave_count = internals->active_slave_count;
 303        memcpy(slaves, internals->active_slaves,
 304                        sizeof(internals->active_slaves[0]) * slave_count);
 305
 306        idx = bd_rx_q->active_slave;
 307        if (idx >= slave_count) {
 308                bd_rx_q->active_slave = 0;
 309                idx = 0;
 310        }
 311        for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
 312                j = num_rx_total;
 313                collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
 314                                         COLLECTING);
 315
 316                /* Read packets from this slave */
 317                num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
 318                                &bufs[num_rx_total], nb_pkts - num_rx_total);
 319
 320                for (k = j; k < 2 && k < num_rx_total; k++)
 321                        rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
 322
 323                /* Handle slow protocol packets. */
 324                while (j < num_rx_total) {
 325                        if (j + 3 < num_rx_total)
 326                                rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
 327
 328                        hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
 329                        subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
 330
 331                        /* Remove packet from array if:
 332                         * - it is slow packet but no dedicated rxq is present,
 333                         * - slave is not in collecting state,
 334                         * - bonding interface is not in promiscuous mode:
 335                         *   - packet is unicast and address does not match,
 336                         *   - packet is multicast and bonding interface
 337                         *     is not in allmulti,
 338                         */
 339                        if (unlikely(
 340                                (!dedicated_rxq &&
 341                                 is_lacp_packets(hdr->ether_type, subtype,
 342                                                 bufs[j])) ||
 343                                !collecting ||
 344                                (!promisc &&
 345                                 ((rte_is_unicast_ether_addr(&hdr->d_addr) &&
 346                                   !rte_is_same_ether_addr(bond_mac,
 347                                                       &hdr->d_addr)) ||
 348                                  (!allmulti &&
 349                                   rte_is_multicast_ether_addr(&hdr->d_addr)))))) {
 350
 351                                if (hdr->ether_type == ether_type_slow_be) {
 352                                        bond_mode_8023ad_handle_slow_pkt(
 353                                            internals, slaves[idx], bufs[j]);
 354                                } else
 355                                        rte_pktmbuf_free(bufs[j]);
 356
 357                                /* Packet is managed by mode 4 or dropped, shift the array */
 358                                num_rx_total--;
 359                                if (j < num_rx_total) {
 360                                        memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
 361                                                (num_rx_total - j));
 362                                }
 363                        } else
 364                                j++;
 365                }
 366                if (unlikely(++idx == slave_count))
 367                        idx = 0;
 368        }
 369
 370        if (++bd_rx_q->active_slave >= slave_count)
 371                bd_rx_q->active_slave = 0;
 372
 373        return num_rx_total;
 374}
 375
 376static uint16_t
 377bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
 378                uint16_t nb_pkts)
 379{
 380        return rx_burst_8023ad(queue, bufs, nb_pkts, false);
 381}
 382
 383static uint16_t
 384bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
 385                uint16_t nb_pkts)
 386{
 387        return rx_burst_8023ad(queue, bufs, nb_pkts, true);
 388}
 389
 390#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
 391uint32_t burstnumberRX;
 392uint32_t burstnumberTX;
 393
 394#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
 395
 396static void
 397arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
 398{
 399        switch (arp_op) {
 400        case RTE_ARP_OP_REQUEST:
 401                strlcpy(buf, "ARP Request", buf_len);
 402                return;
 403        case RTE_ARP_OP_REPLY:
 404                strlcpy(buf, "ARP Reply", buf_len);
 405                return;
 406        case RTE_ARP_OP_REVREQUEST:
 407                strlcpy(buf, "Reverse ARP Request", buf_len);
 408                return;
 409        case RTE_ARP_OP_REVREPLY:
 410                strlcpy(buf, "Reverse ARP Reply", buf_len);
 411                return;
 412        case RTE_ARP_OP_INVREQUEST:
 413                strlcpy(buf, "Peer Identify Request", buf_len);
 414                return;
 415        case RTE_ARP_OP_INVREPLY:
 416                strlcpy(buf, "Peer Identify Reply", buf_len);
 417                return;
 418        default:
 419                break;
 420        }
 421        strlcpy(buf, "Unknown", buf_len);
 422        return;
 423}
 424#endif
 425#define MaxIPv4String   16
 426static void
 427ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
 428{
 429        uint32_t ipv4_addr;
 430
 431        ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
 432        snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
 433                (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
 434                ipv4_addr & 0xFF);
 435}
 436
 437#define MAX_CLIENTS_NUMBER      128
 438uint8_t active_clients;
 439struct client_stats_t {
 440        uint16_t port;
 441        uint32_t ipv4_addr;
 442        uint32_t ipv4_rx_packets;
 443        uint32_t ipv4_tx_packets;
 444};
 445struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
 446
 447static void
 448update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
 449{
 450        int i = 0;
 451
 452        for (; i < MAX_CLIENTS_NUMBER; i++)     {
 453                if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
 454                        /* Just update RX packets number for this client */
 455                        if (TXorRXindicator == &burstnumberRX)
 456                                client_stats[i].ipv4_rx_packets++;
 457                        else
 458                                client_stats[i].ipv4_tx_packets++;
 459                        return;
 460                }
 461        }
 462        /* We have a new client. Insert him to the table, and increment stats */
 463        if (TXorRXindicator == &burstnumberRX)
 464                client_stats[active_clients].ipv4_rx_packets++;
 465        else
 466                client_stats[active_clients].ipv4_tx_packets++;
 467        client_stats[active_clients].ipv4_addr = addr;
 468        client_stats[active_clients].port = port;
 469        active_clients++;
 470
 471}
 472
 473#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
 474#define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
 475        rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
 476                "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
 477                "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
 478                info,                                                   \
 479                port,                                                   \
 480                eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
 481                eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
 482                eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
 483                src_ip,                                                 \
 484                eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
 485                eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
 486                eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
 487                dst_ip,                                                 \
 488                arp_op, ++burstnumber)
 489#endif
 490
 491static void
 492mode6_debug(const char __rte_unused *info,
 493        struct rte_ether_hdr *eth_h, uint16_t port,
 494        uint32_t __rte_unused *burstnumber)
 495{
 496        struct rte_ipv4_hdr *ipv4_h;
 497#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
 498        struct rte_arp_hdr *arp_h;
 499        char dst_ip[16];
 500        char ArpOp[24];
 501        char buf[16];
 502#endif
 503        char src_ip[16];
 504
 505        uint16_t ether_type = eth_h->ether_type;
 506        uint16_t offset = get_vlan_offset(eth_h, &ether_type);
 507
 508#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
 509        strlcpy(buf, info, 16);
 510#endif
 511
 512        if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
 513                ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
 514                ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
 515#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
 516                ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
 517                MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
 518#endif
 519                update_client_stats(ipv4_h->src_addr, port, burstnumber);
 520        }
 521#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
 522        else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
 523                arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
 524                ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
 525                ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
 526                arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
 527                                ArpOp, sizeof(ArpOp));
 528                MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
 529        }
 530#endif
 531}
 532#endif
 533
 534static uint16_t
 535bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 536{
 537        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
 538        struct bond_dev_private *internals = bd_rx_q->dev_private;
 539        struct rte_ether_hdr *eth_h;
 540        uint16_t ether_type, offset;
 541        uint16_t nb_recv_pkts;
 542        int i;
 543
 544        nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
 545
 546        for (i = 0; i < nb_recv_pkts; i++) {
 547                eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
 548                ether_type = eth_h->ether_type;
 549                offset = get_vlan_offset(eth_h, &ether_type);
 550
 551                if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
 552#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
 553                        mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
 554#endif
 555                        bond_mode_alb_arp_recv(eth_h, offset, internals);
 556                }
 557#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
 558                else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
 559                        mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
 560#endif
 561        }
 562
 563        return nb_recv_pkts;
 564}
 565
 566static uint16_t
 567bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
 568                uint16_t nb_pkts)
 569{
 570        struct bond_dev_private *internals;
 571        struct bond_tx_queue *bd_tx_q;
 572
 573        struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
 574        uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
 575
 576        uint16_t num_of_slaves;
 577        uint16_t slaves[RTE_MAX_ETHPORTS];
 578
 579        uint16_t num_tx_total = 0, num_tx_slave;
 580
 581        static int slave_idx = 0;
 582        int i, cslave_idx = 0, tx_fail_total = 0;
 583
 584        bd_tx_q = (struct bond_tx_queue *)queue;
 585        internals = bd_tx_q->dev_private;
 586
 587        /* Copy slave list to protect against slave up/down changes during tx
 588         * bursting */
 589        num_of_slaves = internals->active_slave_count;
 590        memcpy(slaves, internals->active_slaves,
 591                        sizeof(internals->active_slaves[0]) * num_of_slaves);
 592
 593        if (num_of_slaves < 1)
 594                return num_tx_total;
 595
 596        /* Populate slaves mbuf with which packets are to be sent on it  */
 597        for (i = 0; i < nb_pkts; i++) {
 598                cslave_idx = (slave_idx + i) % num_of_slaves;
 599                slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
 600        }
 601
 602        /* increment current slave index so the next call to tx burst starts on the
 603         * next slave */
 604        slave_idx = ++cslave_idx;
 605
 606        /* Send packet burst on each slave device */
 607        for (i = 0; i < num_of_slaves; i++) {
 608                if (slave_nb_pkts[i] > 0) {
 609                        num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
 610                                        slave_bufs[i], slave_nb_pkts[i]);
 611
 612                        /* if tx burst fails move packets to end of bufs */
 613                        if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
 614                                int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
 615
 616                                tx_fail_total += tx_fail_slave;
 617
 618                                memcpy(&bufs[nb_pkts - tx_fail_total],
 619                                       &slave_bufs[i][num_tx_slave],
 620                                       tx_fail_slave * sizeof(bufs[0]));
 621                        }
 622                        num_tx_total += num_tx_slave;
 623                }
 624        }
 625
 626        return num_tx_total;
 627}
 628
 629static uint16_t
 630bond_ethdev_tx_burst_active_backup(void *queue,
 631                struct rte_mbuf **bufs, uint16_t nb_pkts)
 632{
 633        struct bond_dev_private *internals;
 634        struct bond_tx_queue *bd_tx_q;
 635
 636        bd_tx_q = (struct bond_tx_queue *)queue;
 637        internals = bd_tx_q->dev_private;
 638
 639        if (internals->active_slave_count < 1)
 640                return 0;
 641
 642        return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
 643                        bufs, nb_pkts);
 644}
 645
 646static inline uint16_t
 647ether_hash(struct rte_ether_hdr *eth_hdr)
 648{
 649        unaligned_uint16_t *word_src_addr =
 650                (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
 651        unaligned_uint16_t *word_dst_addr =
 652                (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
 653
 654        return (word_src_addr[0] ^ word_dst_addr[0]) ^
 655                        (word_src_addr[1] ^ word_dst_addr[1]) ^
 656                        (word_src_addr[2] ^ word_dst_addr[2]);
 657}
 658
 659static inline uint32_t
 660ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
 661{
 662        return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
 663}
 664
 665static inline uint32_t
 666ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
 667{
 668        unaligned_uint32_t *word_src_addr =
 669                (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
 670        unaligned_uint32_t *word_dst_addr =
 671                (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
 672
 673        return (word_src_addr[0] ^ word_dst_addr[0]) ^
 674                        (word_src_addr[1] ^ word_dst_addr[1]) ^
 675                        (word_src_addr[2] ^ word_dst_addr[2]) ^
 676                        (word_src_addr[3] ^ word_dst_addr[3]);
 677}
 678
 679
 680void
 681burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
 682                uint16_t slave_count, uint16_t *slaves)
 683{
 684        struct rte_ether_hdr *eth_hdr;
 685        uint32_t hash;
 686        int i;
 687
 688        for (i = 0; i < nb_pkts; i++) {
 689                eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
 690
 691                hash = ether_hash(eth_hdr);
 692
 693                slaves[i] = (hash ^= hash >> 8) % slave_count;
 694        }
 695}
 696
 697void
 698burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
 699                uint16_t slave_count, uint16_t *slaves)
 700{
 701        uint16_t i;
 702        struct rte_ether_hdr *eth_hdr;
 703        uint16_t proto;
 704        size_t vlan_offset;
 705        uint32_t hash, l3hash;
 706
 707        for (i = 0; i < nb_pkts; i++) {
 708                eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
 709                l3hash = 0;
 710
 711                proto = eth_hdr->ether_type;
 712                hash = ether_hash(eth_hdr);
 713
 714                vlan_offset = get_vlan_offset(eth_hdr, &proto);
 715
 716                if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
 717                        struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
 718                                        ((char *)(eth_hdr + 1) + vlan_offset);
 719                        l3hash = ipv4_hash(ipv4_hdr);
 720
 721                } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
 722                        struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
 723                                        ((char *)(eth_hdr + 1) + vlan_offset);
 724                        l3hash = ipv6_hash(ipv6_hdr);
 725                }
 726
 727                hash = hash ^ l3hash;
 728                hash ^= hash >> 16;
 729                hash ^= hash >> 8;
 730
 731                slaves[i] = hash % slave_count;
 732        }
 733}
 734
 735void
 736burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
 737                uint16_t slave_count, uint16_t *slaves)
 738{
 739        struct rte_ether_hdr *eth_hdr;
 740        uint16_t proto;
 741        size_t vlan_offset;
 742        int i;
 743
 744        struct rte_udp_hdr *udp_hdr;
 745        struct rte_tcp_hdr *tcp_hdr;
 746        uint32_t hash, l3hash, l4hash;
 747
 748        for (i = 0; i < nb_pkts; i++) {
 749                eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
 750                size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
 751                proto = eth_hdr->ether_type;
 752                vlan_offset = get_vlan_offset(eth_hdr, &proto);
 753                l3hash = 0;
 754                l4hash = 0;
 755
 756                if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
 757                        struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
 758                                        ((char *)(eth_hdr + 1) + vlan_offset);
 759                        size_t ip_hdr_offset;
 760
 761                        l3hash = ipv4_hash(ipv4_hdr);
 762
 763                        /* there is no L4 header in fragmented packet */
 764                        if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
 765                                                                == 0)) {
 766                                ip_hdr_offset = (ipv4_hdr->version_ihl
 767                                        & RTE_IPV4_HDR_IHL_MASK) *
 768                                        RTE_IPV4_IHL_MULTIPLIER;
 769
 770                                if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
 771                                        tcp_hdr = (struct rte_tcp_hdr *)
 772                                                ((char *)ipv4_hdr +
 773                                                        ip_hdr_offset);
 774                                        if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
 775                                                        < pkt_end)
 776                                                l4hash = HASH_L4_PORTS(tcp_hdr);
 777                                } else if (ipv4_hdr->next_proto_id ==
 778                                                                IPPROTO_UDP) {
 779                                        udp_hdr = (struct rte_udp_hdr *)
 780                                                ((char *)ipv4_hdr +
 781                                                        ip_hdr_offset);
 782                                        if ((size_t)udp_hdr + sizeof(*udp_hdr)
 783                                                        < pkt_end)
 784                                                l4hash = HASH_L4_PORTS(udp_hdr);
 785                                }
 786                        }
 787                } else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
 788                        struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
 789                                        ((char *)(eth_hdr + 1) + vlan_offset);
 790                        l3hash = ipv6_hash(ipv6_hdr);
 791
 792                        if (ipv6_hdr->proto == IPPROTO_TCP) {
 793                                tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
 794                                l4hash = HASH_L4_PORTS(tcp_hdr);
 795                        } else if (ipv6_hdr->proto == IPPROTO_UDP) {
 796                                udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
 797                                l4hash = HASH_L4_PORTS(udp_hdr);
 798                        }
 799                }
 800
 801                hash = l3hash ^ l4hash;
 802                hash ^= hash >> 16;
 803                hash ^= hash >> 8;
 804
 805                slaves[i] = hash % slave_count;
 806        }
 807}
 808
 809struct bwg_slave {
 810        uint64_t bwg_left_int;
 811        uint64_t bwg_left_remainder;
 812        uint16_t slave;
 813};
 814
 815void
 816bond_tlb_activate_slave(struct bond_dev_private *internals) {
 817        int i;
 818
 819        for (i = 0; i < internals->active_slave_count; i++) {
 820                tlb_last_obytets[internals->active_slaves[i]] = 0;
 821        }
 822}
 823
 824static int
 825bandwidth_cmp(const void *a, const void *b)
 826{
 827        const struct bwg_slave *bwg_a = a;
 828        const struct bwg_slave *bwg_b = b;
 829        int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
 830        int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
 831                        (int64_t)bwg_a->bwg_left_remainder;
 832        if (diff > 0)
 833                return 1;
 834        else if (diff < 0)
 835                return -1;
 836        else if (diff2 > 0)
 837                return 1;
 838        else if (diff2 < 0)
 839                return -1;
 840        else
 841                return 0;
 842}
 843
 844static void
 845bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
 846                struct bwg_slave *bwg_slave)
 847{
 848        struct rte_eth_link link_status;
 849        int ret;
 850
 851        ret = rte_eth_link_get_nowait(port_id, &link_status);
 852        if (ret < 0) {
 853                RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
 854                             port_id, rte_strerror(-ret));
 855                return;
 856        }
 857        uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
 858        if (link_bwg == 0)
 859                return;
 860        link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
 861        bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
 862        bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
 863}
 864
 865static void
 866bond_ethdev_update_tlb_slave_cb(void *arg)
 867{
 868        struct bond_dev_private *internals = arg;
 869        struct rte_eth_stats slave_stats;
 870        struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
 871        uint16_t slave_count;
 872        uint64_t tx_bytes;
 873
 874        uint8_t update_stats = 0;
 875        uint16_t slave_id;
 876        uint16_t i;
 877
 878        internals->slave_update_idx++;
 879
 880
 881        if (internals->slave_update_idx >= REORDER_PERIOD_MS)
 882                update_stats = 1;
 883
 884        for (i = 0; i < internals->active_slave_count; i++) {
 885                slave_id = internals->active_slaves[i];
 886                rte_eth_stats_get(slave_id, &slave_stats);
 887                tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
 888                bandwidth_left(slave_id, tx_bytes,
 889                                internals->slave_update_idx, &bwg_array[i]);
 890                bwg_array[i].slave = slave_id;
 891
 892                if (update_stats) {
 893                        tlb_last_obytets[slave_id] = slave_stats.obytes;
 894                }
 895        }
 896
 897        if (update_stats == 1)
 898                internals->slave_update_idx = 0;
 899
 900        slave_count = i;
 901        qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
 902        for (i = 0; i < slave_count; i++)
 903                internals->tlb_slaves_order[i] = bwg_array[i].slave;
 904
 905        rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
 906                        (struct bond_dev_private *)internals);
 907}
 908
 909static uint16_t
 910bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 911{
 912        struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
 913        struct bond_dev_private *internals = bd_tx_q->dev_private;
 914
 915        struct rte_eth_dev *primary_port =
 916                        &rte_eth_devices[internals->primary_port];
 917        uint16_t num_tx_total = 0;
 918        uint16_t i, j;
 919
 920        uint16_t num_of_slaves = internals->active_slave_count;
 921        uint16_t slaves[RTE_MAX_ETHPORTS];
 922
 923        struct rte_ether_hdr *ether_hdr;
 924        struct rte_ether_addr primary_slave_addr;
 925        struct rte_ether_addr active_slave_addr;
 926
 927        if (num_of_slaves < 1)
 928                return num_tx_total;
 929
 930        memcpy(slaves, internals->tlb_slaves_order,
 931                                sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
 932
 933
 934        rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
 935
 936        if (nb_pkts > 3) {
 937                for (i = 0; i < 3; i++)
 938                        rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
 939        }
 940
 941        for (i = 0; i < num_of_slaves; i++) {
 942                rte_eth_macaddr_get(slaves[i], &active_slave_addr);
 943                for (j = num_tx_total; j < nb_pkts; j++) {
 944                        if (j + 3 < nb_pkts)
 945                                rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
 946
 947                        ether_hdr = rte_pktmbuf_mtod(bufs[j],
 948                                                struct rte_ether_hdr *);
 949                        if (rte_is_same_ether_addr(&ether_hdr->s_addr,
 950                                                        &primary_slave_addr))
 951                                rte_ether_addr_copy(&active_slave_addr,
 952                                                &ether_hdr->s_addr);
 953#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
 954                                        mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
 955#endif
 956                }
 957
 958                num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
 959                                bufs + num_tx_total, nb_pkts - num_tx_total);
 960
 961                if (num_tx_total == nb_pkts)
 962                        break;
 963        }
 964
 965        return num_tx_total;
 966}
 967
 968void
 969bond_tlb_disable(struct bond_dev_private *internals)
 970{
 971        rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
 972}
 973
 974void
 975bond_tlb_enable(struct bond_dev_private *internals)
 976{
 977        bond_ethdev_update_tlb_slave_cb(internals);
 978}
 979
 980static uint16_t
 981bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 982{
 983        struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
 984        struct bond_dev_private *internals = bd_tx_q->dev_private;
 985
 986        struct rte_ether_hdr *eth_h;
 987        uint16_t ether_type, offset;
 988
 989        struct client_data *client_info;
 990
 991        /*
 992         * We create transmit buffers for every slave and one additional to send
 993         * through tlb. In worst case every packet will be send on one port.
 994         */
 995        struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
 996        uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
 997
 998        /*
 999         * We create separate transmit buffers for update packets as they won't
1000         * be counted in num_tx_total.
1001         */
1002        struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1003        uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1004
1005        struct rte_mbuf *upd_pkt;
1006        size_t pkt_size;
1007
1008        uint16_t num_send, num_not_send = 0;
1009        uint16_t num_tx_total = 0;
1010        uint16_t slave_idx;
1011
1012        int i, j;
1013
1014        /* Search tx buffer for ARP packets and forward them to alb */
1015        for (i = 0; i < nb_pkts; i++) {
1016                eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1017                ether_type = eth_h->ether_type;
1018                offset = get_vlan_offset(eth_h, &ether_type);
1019
1020                if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1021                        slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1022
1023                        /* Change src mac in eth header */
1024                        rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1025
1026                        /* Add packet to slave tx buffer */
1027                        slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1028                        slave_bufs_pkts[slave_idx]++;
1029                } else {
1030                        /* If packet is not ARP, send it with TLB policy */
1031                        slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1032                                        bufs[i];
1033                        slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1034                }
1035        }
1036
1037        /* Update connected client ARP tables */
1038        if (internals->mode6.ntt) {
1039                for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1040                        client_info = &internals->mode6.client_table[i];
1041
1042                        if (client_info->in_use) {
1043                                /* Allocate new packet to send ARP update on current slave */
1044                                upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1045                                if (upd_pkt == NULL) {
1046                                        RTE_BOND_LOG(ERR,
1047                                                     "Failed to allocate ARP packet from pool");
1048                                        continue;
1049                                }
1050                                pkt_size = sizeof(struct rte_ether_hdr) +
1051                                        sizeof(struct rte_arp_hdr) +
1052                                        client_info->vlan_count *
1053                                        sizeof(struct rte_vlan_hdr);
1054                                upd_pkt->data_len = pkt_size;
1055                                upd_pkt->pkt_len = pkt_size;
1056
1057                                slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1058                                                internals);
1059
1060                                /* Add packet to update tx buffer */
1061                                update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1062                                update_bufs_pkts[slave_idx]++;
1063                        }
1064                }
1065                internals->mode6.ntt = 0;
1066        }
1067
1068        /* Send ARP packets on proper slaves */
1069        for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1070                if (slave_bufs_pkts[i] > 0) {
1071                        num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1072                                        slave_bufs[i], slave_bufs_pkts[i]);
1073                        for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1074                                bufs[nb_pkts - 1 - num_not_send - j] =
1075                                                slave_bufs[i][nb_pkts - 1 - j];
1076                        }
1077
1078                        num_tx_total += num_send;
1079                        num_not_send += slave_bufs_pkts[i] - num_send;
1080
1081#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1082        /* Print TX stats including update packets */
1083                        for (j = 0; j < slave_bufs_pkts[i]; j++) {
1084                                eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1085                                                        struct rte_ether_hdr *);
1086                                mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1087                        }
1088#endif
1089                }
1090        }
1091
1092        /* Send update packets on proper slaves */
1093        for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1094                if (update_bufs_pkts[i] > 0) {
1095                        num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1096                                        update_bufs_pkts[i]);
1097                        for (j = num_send; j < update_bufs_pkts[i]; j++) {
1098                                rte_pktmbuf_free(update_bufs[i][j]);
1099                        }
1100#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1101                        for (j = 0; j < update_bufs_pkts[i]; j++) {
1102                                eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1103                                                        struct rte_ether_hdr *);
1104                                mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1105                        }
1106#endif
1107                }
1108        }
1109
1110        /* Send non-ARP packets using tlb policy */
1111        if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1112                num_send = bond_ethdev_tx_burst_tlb(queue,
1113                                slave_bufs[RTE_MAX_ETHPORTS],
1114                                slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1115
1116                for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1117                        bufs[nb_pkts - 1 - num_not_send - j] =
1118                                        slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1119                }
1120
1121                num_tx_total += num_send;
1122        }
1123
1124        return num_tx_total;
1125}
1126
1127static inline uint16_t
1128tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1129                 uint16_t *slave_port_ids, uint16_t slave_count)
1130{
1131        struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1132        struct bond_dev_private *internals = bd_tx_q->dev_private;
1133
1134        /* Array to sort mbufs for transmission on each slave into */
1135        struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1136        /* Number of mbufs for transmission on each slave */
1137        uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1138        /* Mapping array generated by hash function to map mbufs to slaves */
1139        uint16_t bufs_slave_port_idxs[nb_bufs];
1140
1141        uint16_t slave_tx_count;
1142        uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1143
1144        uint16_t i;
1145
1146        /*
1147         * Populate slaves mbuf with the packets which are to be sent on it
1148         * selecting output slave using hash based on xmit policy
1149         */
1150        internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1151                        bufs_slave_port_idxs);
1152
1153        for (i = 0; i < nb_bufs; i++) {
1154                /* Populate slave mbuf arrays with mbufs for that slave. */
1155                uint16_t slave_idx = bufs_slave_port_idxs[i];
1156
1157                slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1158        }
1159
1160        /* Send packet burst on each slave device */
1161        for (i = 0; i < slave_count; i++) {
1162                if (slave_nb_bufs[i] == 0)
1163                        continue;
1164
1165                slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1166                                bd_tx_q->queue_id, slave_bufs[i],
1167                                slave_nb_bufs[i]);
1168
1169                total_tx_count += slave_tx_count;
1170
1171                /* If tx burst fails move packets to end of bufs */
1172                if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1173                        int slave_tx_fail_count = slave_nb_bufs[i] -
1174                                        slave_tx_count;
1175                        total_tx_fail_count += slave_tx_fail_count;
1176                        memcpy(&bufs[nb_bufs - total_tx_fail_count],
1177                               &slave_bufs[i][slave_tx_count],
1178                               slave_tx_fail_count * sizeof(bufs[0]));
1179                }
1180        }
1181
1182        return total_tx_count;
1183}
1184
1185static uint16_t
1186bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1187                uint16_t nb_bufs)
1188{
1189        struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1190        struct bond_dev_private *internals = bd_tx_q->dev_private;
1191
1192        uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1193        uint16_t slave_count;
1194
1195        if (unlikely(nb_bufs == 0))
1196                return 0;
1197
1198        /* Copy slave list to protect against slave up/down changes during tx
1199         * bursting
1200         */
1201        slave_count = internals->active_slave_count;
1202        if (unlikely(slave_count < 1))
1203                return 0;
1204
1205        memcpy(slave_port_ids, internals->active_slaves,
1206                        sizeof(slave_port_ids[0]) * slave_count);
1207        return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1208                                slave_count);
1209}
1210
1211static inline uint16_t
1212tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1213                bool dedicated_txq)
1214{
1215        struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1216        struct bond_dev_private *internals = bd_tx_q->dev_private;
1217
1218        uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1219        uint16_t slave_count;
1220
1221        uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1222        uint16_t dist_slave_count;
1223
1224        uint16_t slave_tx_count;
1225
1226        uint16_t i;
1227
1228        /* Copy slave list to protect against slave up/down changes during tx
1229         * bursting */
1230        slave_count = internals->active_slave_count;
1231        if (unlikely(slave_count < 1))
1232                return 0;
1233
1234        memcpy(slave_port_ids, internals->active_slaves,
1235                        sizeof(slave_port_ids[0]) * slave_count);
1236
1237        if (dedicated_txq)
1238                goto skip_tx_ring;
1239
1240        /* Check for LACP control packets and send if available */
1241        for (i = 0; i < slave_count; i++) {
1242                struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1243                struct rte_mbuf *ctrl_pkt = NULL;
1244
1245                if (likely(rte_ring_empty(port->tx_ring)))
1246                        continue;
1247
1248                if (rte_ring_dequeue(port->tx_ring,
1249                                     (void **)&ctrl_pkt) != -ENOENT) {
1250                        slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1251                                        bd_tx_q->queue_id, &ctrl_pkt, 1);
1252                        /*
1253                         * re-enqueue LAG control plane packets to buffering
1254                         * ring if transmission fails so the packet isn't lost.
1255                         */
1256                        if (slave_tx_count != 1)
1257                                rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1258                }
1259        }
1260
1261skip_tx_ring:
1262        if (unlikely(nb_bufs == 0))
1263                return 0;
1264
1265        dist_slave_count = 0;
1266        for (i = 0; i < slave_count; i++) {
1267                struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1268
1269                if (ACTOR_STATE(port, DISTRIBUTING))
1270                        dist_slave_port_ids[dist_slave_count++] =
1271                                        slave_port_ids[i];
1272        }
1273
1274        if (unlikely(dist_slave_count < 1))
1275                return 0;
1276
1277        return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1278                                dist_slave_count);
1279}
1280
1281static uint16_t
1282bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1283                uint16_t nb_bufs)
1284{
1285        return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1286}
1287
1288static uint16_t
1289bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1290                uint16_t nb_bufs)
1291{
1292        return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1293}
1294
1295static uint16_t
1296bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1297                uint16_t nb_pkts)
1298{
1299        struct bond_dev_private *internals;
1300        struct bond_tx_queue *bd_tx_q;
1301
1302        uint16_t slaves[RTE_MAX_ETHPORTS];
1303        uint8_t tx_failed_flag = 0;
1304        uint16_t num_of_slaves;
1305
1306        uint16_t max_nb_of_tx_pkts = 0;
1307
1308        int slave_tx_total[RTE_MAX_ETHPORTS];
1309        int i, most_successful_tx_slave = -1;
1310
1311        bd_tx_q = (struct bond_tx_queue *)queue;
1312        internals = bd_tx_q->dev_private;
1313
1314        /* Copy slave list to protect against slave up/down changes during tx
1315         * bursting */
1316        num_of_slaves = internals->active_slave_count;
1317        memcpy(slaves, internals->active_slaves,
1318                        sizeof(internals->active_slaves[0]) * num_of_slaves);
1319
1320        if (num_of_slaves < 1)
1321                return 0;
1322
1323        /* Increment reference count on mbufs */
1324        for (i = 0; i < nb_pkts; i++)
1325                rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1326
1327        /* Transmit burst on each active slave */
1328        for (i = 0; i < num_of_slaves; i++) {
1329                slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1330                                        bufs, nb_pkts);
1331
1332                if (unlikely(slave_tx_total[i] < nb_pkts))
1333                        tx_failed_flag = 1;
1334
1335                /* record the value and slave index for the slave which transmits the
1336                 * maximum number of packets */
1337                if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1338                        max_nb_of_tx_pkts = slave_tx_total[i];
1339                        most_successful_tx_slave = i;
1340                }
1341        }
1342
1343        /* if slaves fail to transmit packets from burst, the calling application
1344         * is not expected to know about multiple references to packets so we must
1345         * handle failures of all packets except those of the most successful slave
1346         */
1347        if (unlikely(tx_failed_flag))
1348                for (i = 0; i < num_of_slaves; i++)
1349                        if (i != most_successful_tx_slave)
1350                                while (slave_tx_total[i] < nb_pkts)
1351                                        rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1352
1353        return max_nb_of_tx_pkts;
1354}
1355
1356static void
1357link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1358{
1359        struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1360
1361        if (bond_ctx->mode == BONDING_MODE_8023AD) {
1362                /**
1363                 * If in mode 4 then save the link properties of the first
1364                 * slave, all subsequent slaves must match these properties
1365                 */
1366                struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1367
1368                bond_link->link_autoneg = slave_link->link_autoneg;
1369                bond_link->link_duplex = slave_link->link_duplex;
1370                bond_link->link_speed = slave_link->link_speed;
1371        } else {
1372                /**
1373                 * In any other mode the link properties are set to default
1374                 * values of AUTONEG/DUPLEX
1375                 */
1376                ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1377                ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1378        }
1379}
1380
1381static int
1382link_properties_valid(struct rte_eth_dev *ethdev,
1383                struct rte_eth_link *slave_link)
1384{
1385        struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1386
1387        if (bond_ctx->mode == BONDING_MODE_8023AD) {
1388                struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1389
1390                if (bond_link->link_duplex != slave_link->link_duplex ||
1391                        bond_link->link_autoneg != slave_link->link_autoneg ||
1392                        bond_link->link_speed != slave_link->link_speed)
1393                        return -1;
1394        }
1395
1396        return 0;
1397}
1398
1399int
1400mac_address_get(struct rte_eth_dev *eth_dev,
1401                struct rte_ether_addr *dst_mac_addr)
1402{
1403        struct rte_ether_addr *mac_addr;
1404
1405        if (eth_dev == NULL) {
1406                RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1407                return -1;
1408        }
1409
1410        if (dst_mac_addr == NULL) {
1411                RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1412                return -1;
1413        }
1414
1415        mac_addr = eth_dev->data->mac_addrs;
1416
1417        rte_ether_addr_copy(mac_addr, dst_mac_addr);
1418        return 0;
1419}
1420
1421int
1422mac_address_set(struct rte_eth_dev *eth_dev,
1423                struct rte_ether_addr *new_mac_addr)
1424{
1425        struct rte_ether_addr *mac_addr;
1426
1427        if (eth_dev == NULL) {
1428                RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1429                return -1;
1430        }
1431
1432        if (new_mac_addr == NULL) {
1433                RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1434                return -1;
1435        }
1436
1437        mac_addr = eth_dev->data->mac_addrs;
1438
1439        /* If new MAC is different to current MAC then update */
1440        if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1441                memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1442
1443        return 0;
1444}
1445
1446static const struct rte_ether_addr null_mac_addr;
1447
1448/*
1449 * Add additional MAC addresses to the slave
1450 */
1451int
1452slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1453                uint16_t slave_port_id)
1454{
1455        int i, ret;
1456        struct rte_ether_addr *mac_addr;
1457
1458        for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1459                mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1460                if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1461                        break;
1462
1463                ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1464                if (ret < 0) {
1465                        /* rollback */
1466                        for (i--; i > 0; i--)
1467                                rte_eth_dev_mac_addr_remove(slave_port_id,
1468                                        &bonded_eth_dev->data->mac_addrs[i]);
1469                        return ret;
1470                }
1471        }
1472
1473        return 0;
1474}
1475
1476/*
1477 * Remove additional MAC addresses from the slave
1478 */
1479int
1480slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1481                uint16_t slave_port_id)
1482{
1483        int i, rc, ret;
1484        struct rte_ether_addr *mac_addr;
1485
1486        rc = 0;
1487        for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1488                mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1489                if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1490                        break;
1491
1492                ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1493                /* save only the first error */
1494                if (ret < 0 && rc == 0)
1495                        rc = ret;
1496        }
1497
1498        return rc;
1499}
1500
1501int
1502mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1503{
1504        struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1505        bool set;
1506        int i;
1507
1508        /* Update slave devices MAC addresses */
1509        if (internals->slave_count < 1)
1510                return -1;
1511
1512        switch (internals->mode) {
1513        case BONDING_MODE_ROUND_ROBIN:
1514        case BONDING_MODE_BALANCE:
1515        case BONDING_MODE_BROADCAST:
1516                for (i = 0; i < internals->slave_count; i++) {
1517                        if (rte_eth_dev_default_mac_addr_set(
1518                                        internals->slaves[i].port_id,
1519                                        bonded_eth_dev->data->mac_addrs)) {
1520                                RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1521                                                internals->slaves[i].port_id);
1522                                return -1;
1523                        }
1524                }
1525                break;
1526        case BONDING_MODE_8023AD:
1527                bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1528                break;
1529        case BONDING_MODE_ACTIVE_BACKUP:
1530        case BONDING_MODE_TLB:
1531        case BONDING_MODE_ALB:
1532        default:
1533                set = true;
1534                for (i = 0; i < internals->slave_count; i++) {
1535                        if (internals->slaves[i].port_id ==
1536                                        internals->current_primary_port) {
1537                                if (rte_eth_dev_default_mac_addr_set(
1538                                                internals->current_primary_port,
1539                                                bonded_eth_dev->data->mac_addrs)) {
1540                                        RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1541                                                        internals->current_primary_port);
1542                                        set = false;
1543                                }
1544                        } else {
1545                                if (rte_eth_dev_default_mac_addr_set(
1546                                                internals->slaves[i].port_id,
1547                                                &internals->slaves[i].persisted_mac_addr)) {
1548                                        RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1549                                                        internals->slaves[i].port_id);
1550                                }
1551                        }
1552                }
1553                if (!set)
1554                        return -1;
1555        }
1556
1557        return 0;
1558}
1559
1560int
1561bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1562{
1563        struct bond_dev_private *internals;
1564
1565        internals = eth_dev->data->dev_private;
1566
1567        switch (mode) {
1568        case BONDING_MODE_ROUND_ROBIN:
1569                eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1570                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1571                break;
1572        case BONDING_MODE_ACTIVE_BACKUP:
1573                eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1574                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1575                break;
1576        case BONDING_MODE_BALANCE:
1577                eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1578                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1579                break;
1580        case BONDING_MODE_BROADCAST:
1581                eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1582                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1583                break;
1584        case BONDING_MODE_8023AD:
1585                if (bond_mode_8023ad_enable(eth_dev) != 0)
1586                        return -1;
1587
1588                if (internals->mode4.dedicated_queues.enabled == 0) {
1589                        eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1590                        eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1591                        RTE_BOND_LOG(WARNING,
1592                                "Using mode 4, it is necessary to do TX burst "
1593                                "and RX burst at least every 100ms.");
1594                } else {
1595                        /* Use flow director's optimization */
1596                        eth_dev->rx_pkt_burst =
1597                                        bond_ethdev_rx_burst_8023ad_fast_queue;
1598                        eth_dev->tx_pkt_burst =
1599                                        bond_ethdev_tx_burst_8023ad_fast_queue;
1600                }
1601                break;
1602        case BONDING_MODE_TLB:
1603                eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1604                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1605                break;
1606        case BONDING_MODE_ALB:
1607                if (bond_mode_alb_enable(eth_dev) != 0)
1608                        return -1;
1609
1610                eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1611                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1612                break;
1613        default:
1614                return -1;
1615        }
1616
1617        internals->mode = mode;
1618
1619        return 0;
1620}
1621
1622
1623static int
1624slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1625                struct rte_eth_dev *slave_eth_dev)
1626{
1627        int errval = 0;
1628        struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1629        struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1630
1631        if (port->slow_pool == NULL) {
1632                char mem_name[256];
1633                int slave_id = slave_eth_dev->data->port_id;
1634
1635                snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1636                                slave_id);
1637                port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1638                        250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1639                        slave_eth_dev->data->numa_node);
1640
1641                /* Any memory allocation failure in initialization is critical because
1642                 * resources can't be free, so reinitialization is impossible. */
1643                if (port->slow_pool == NULL) {
1644                        rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1645                                slave_id, mem_name, rte_strerror(rte_errno));
1646                }
1647        }
1648
1649        if (internals->mode4.dedicated_queues.enabled == 1) {
1650                /* Configure slow Rx queue */
1651
1652                errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1653                                internals->mode4.dedicated_queues.rx_qid, 128,
1654                                rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1655                                NULL, port->slow_pool);
1656                if (errval != 0) {
1657                        RTE_BOND_LOG(ERR,
1658                                        "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1659                                        slave_eth_dev->data->port_id,
1660                                        internals->mode4.dedicated_queues.rx_qid,
1661                                        errval);
1662                        return errval;
1663                }
1664
1665                errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1666                                internals->mode4.dedicated_queues.tx_qid, 512,
1667                                rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1668                                NULL);
1669                if (errval != 0) {
1670                        RTE_BOND_LOG(ERR,
1671                                "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1672                                slave_eth_dev->data->port_id,
1673                                internals->mode4.dedicated_queues.tx_qid,
1674                                errval);
1675                        return errval;
1676                }
1677        }
1678        return 0;
1679}
1680
1681int
1682slave_configure(struct rte_eth_dev *bonded_eth_dev,
1683                struct rte_eth_dev *slave_eth_dev)
1684{
1685        struct bond_rx_queue *bd_rx_q;
1686        struct bond_tx_queue *bd_tx_q;
1687        uint16_t nb_rx_queues;
1688        uint16_t nb_tx_queues;
1689
1690        int errval;
1691        uint16_t q_id;
1692        struct rte_flow_error flow_error;
1693
1694        struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1695
1696        /* Stop slave */
1697        errval = rte_eth_dev_stop(slave_eth_dev->data->port_id);
1698        if (errval != 0)
1699                RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)",
1700                             slave_eth_dev->data->port_id, errval);
1701
1702        /* Enable interrupts on slave device if supported */
1703        if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1704                slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1705
1706        /* If RSS is enabled for bonding, try to enable it for slaves  */
1707        if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1708                if (internals->rss_key_len != 0) {
1709                        slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1710                                        internals->rss_key_len;
1711                        slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1712                                        internals->rss_key;
1713                } else {
1714                        slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1715                }
1716
1717                slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1718                                bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1719                slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1720                                bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1721        }
1722
1723        if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1724                        DEV_RX_OFFLOAD_VLAN_FILTER)
1725                slave_eth_dev->data->dev_conf.rxmode.offloads |=
1726                                DEV_RX_OFFLOAD_VLAN_FILTER;
1727        else
1728                slave_eth_dev->data->dev_conf.rxmode.offloads &=
1729                                ~DEV_RX_OFFLOAD_VLAN_FILTER;
1730
1731        slave_eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1732                        bonded_eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1733
1734        if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1735                        DEV_RX_OFFLOAD_JUMBO_FRAME)
1736                slave_eth_dev->data->dev_conf.rxmode.offloads |=
1737                                DEV_RX_OFFLOAD_JUMBO_FRAME;
1738        else
1739                slave_eth_dev->data->dev_conf.rxmode.offloads &=
1740                                ~DEV_RX_OFFLOAD_JUMBO_FRAME;
1741
1742        nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1743        nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1744
1745        if (internals->mode == BONDING_MODE_8023AD) {
1746                if (internals->mode4.dedicated_queues.enabled == 1) {
1747                        nb_rx_queues++;
1748                        nb_tx_queues++;
1749                }
1750        }
1751
1752        errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1753                                     bonded_eth_dev->data->mtu);
1754        if (errval != 0 && errval != -ENOTSUP) {
1755                RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1756                                slave_eth_dev->data->port_id, errval);
1757                return errval;
1758        }
1759
1760        /* Configure device */
1761        errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1762                        nb_rx_queues, nb_tx_queues,
1763                        &(slave_eth_dev->data->dev_conf));
1764        if (errval != 0) {
1765                RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1766                                slave_eth_dev->data->port_id, errval);
1767                return errval;
1768        }
1769
1770        /* Setup Rx Queues */
1771        for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1772                bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1773
1774                errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1775                                bd_rx_q->nb_rx_desc,
1776                                rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1777                                &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1778                if (errval != 0) {
1779                        RTE_BOND_LOG(ERR,
1780                                        "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1781                                        slave_eth_dev->data->port_id, q_id, errval);
1782                        return errval;
1783                }
1784        }
1785
1786        /* Setup Tx Queues */
1787        for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1788                bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1789
1790                errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1791                                bd_tx_q->nb_tx_desc,
1792                                rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1793                                &bd_tx_q->tx_conf);
1794                if (errval != 0) {
1795                        RTE_BOND_LOG(ERR,
1796                                "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1797                                slave_eth_dev->data->port_id, q_id, errval);
1798                        return errval;
1799                }
1800        }
1801
1802        if (internals->mode == BONDING_MODE_8023AD &&
1803                        internals->mode4.dedicated_queues.enabled == 1) {
1804                if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1805                                != 0)
1806                        return errval;
1807
1808                errval = bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1809                                slave_eth_dev->data->port_id);
1810                if (errval != 0) {
1811                        RTE_BOND_LOG(ERR,
1812                                "bond_ethdev_8023ad_flow_verify: port=%d, err (%d)",
1813                                slave_eth_dev->data->port_id, errval);
1814                        return errval;
1815                }
1816
1817                if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1818                        rte_flow_destroy(slave_eth_dev->data->port_id,
1819                                        internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1820                                        &flow_error);
1821
1822                errval = bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1823                                slave_eth_dev->data->port_id);
1824                if (errval != 0) {
1825                        RTE_BOND_LOG(ERR,
1826                                "bond_ethdev_8023ad_flow_set: port=%d, err (%d)",
1827                                slave_eth_dev->data->port_id, errval);
1828                        return errval;
1829                }
1830        }
1831
1832        /* Start device */
1833        errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1834        if (errval != 0) {
1835                RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1836                                slave_eth_dev->data->port_id, errval);
1837                return -1;
1838        }
1839
1840        /* If RSS is enabled for bonding, synchronize RETA */
1841        if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1842                int i;
1843                struct bond_dev_private *internals;
1844
1845                internals = bonded_eth_dev->data->dev_private;
1846
1847                for (i = 0; i < internals->slave_count; i++) {
1848                        if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1849                                errval = rte_eth_dev_rss_reta_update(
1850                                                slave_eth_dev->data->port_id,
1851                                                &internals->reta_conf[0],
1852                                                internals->slaves[i].reta_size);
1853                                if (errval != 0) {
1854                                        RTE_BOND_LOG(WARNING,
1855                                                     "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1856                                                     " RSS Configuration for bonding may be inconsistent.",
1857                                                     slave_eth_dev->data->port_id, errval);
1858                                }
1859                                break;
1860                        }
1861                }
1862        }
1863
1864        /* If lsc interrupt is set, check initial slave's link status */
1865        if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1866                slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1867                bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1868                        RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1869                        NULL);
1870        }
1871
1872        return 0;
1873}
1874
1875void
1876slave_remove(struct bond_dev_private *internals,
1877                struct rte_eth_dev *slave_eth_dev)
1878{
1879        uint16_t i;
1880
1881        for (i = 0; i < internals->slave_count; i++)
1882                if (internals->slaves[i].port_id ==
1883                                slave_eth_dev->data->port_id)
1884                        break;
1885
1886        if (i < (internals->slave_count - 1)) {
1887                struct rte_flow *flow;
1888
1889                memmove(&internals->slaves[i], &internals->slaves[i + 1],
1890                                sizeof(internals->slaves[0]) *
1891                                (internals->slave_count - i - 1));
1892                TAILQ_FOREACH(flow, &internals->flow_list, next) {
1893                        memmove(&flow->flows[i], &flow->flows[i + 1],
1894                                sizeof(flow->flows[0]) *
1895                                (internals->slave_count - i - 1));
1896                        flow->flows[internals->slave_count - 1] = NULL;
1897                }
1898        }
1899
1900        internals->slave_count--;
1901
1902        /* force reconfiguration of slave interfaces */
1903        rte_eth_dev_internal_reset(slave_eth_dev);
1904}
1905
1906static void
1907bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1908
1909void
1910slave_add(struct bond_dev_private *internals,
1911                struct rte_eth_dev *slave_eth_dev)
1912{
1913        struct bond_slave_details *slave_details =
1914                        &internals->slaves[internals->slave_count];
1915
1916        slave_details->port_id = slave_eth_dev->data->port_id;
1917        slave_details->last_link_status = 0;
1918
1919        /* Mark slave devices that don't support interrupts so we can
1920         * compensate when we start the bond
1921         */
1922        if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1923                slave_details->link_status_poll_enabled = 1;
1924        }
1925
1926        slave_details->link_status_wait_to_complete = 0;
1927        /* clean tlb_last_obytes when adding port for bonding device */
1928        memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1929                        sizeof(struct rte_ether_addr));
1930}
1931
1932void
1933bond_ethdev_primary_set(struct bond_dev_private *internals,
1934                uint16_t slave_port_id)
1935{
1936        int i;
1937
1938        if (internals->active_slave_count < 1)
1939                internals->current_primary_port = slave_port_id;
1940        else
1941                /* Search bonded device slave ports for new proposed primary port */
1942                for (i = 0; i < internals->active_slave_count; i++) {
1943                        if (internals->active_slaves[i] == slave_port_id)
1944                                internals->current_primary_port = slave_port_id;
1945                }
1946}
1947
1948static int
1949bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1950
1951static int
1952bond_ethdev_start(struct rte_eth_dev *eth_dev)
1953{
1954        struct bond_dev_private *internals;
1955        int i;
1956
1957        /* slave eth dev will be started by bonded device */
1958        if (check_for_bonded_ethdev(eth_dev)) {
1959                RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1960                                eth_dev->data->port_id);
1961                return -1;
1962        }
1963
1964        eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1965        eth_dev->data->dev_started = 1;
1966
1967        internals = eth_dev->data->dev_private;
1968
1969        if (internals->slave_count == 0) {
1970                RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1971                goto out_err;
1972        }
1973
1974        if (internals->user_defined_mac == 0) {
1975                struct rte_ether_addr *new_mac_addr = NULL;
1976
1977                for (i = 0; i < internals->slave_count; i++)
1978                        if (internals->slaves[i].port_id == internals->primary_port)
1979                                new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1980
1981                if (new_mac_addr == NULL)
1982                        goto out_err;
1983
1984                if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1985                        RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1986                                        eth_dev->data->port_id);
1987                        goto out_err;
1988                }
1989        }
1990
1991        if (internals->mode == BONDING_MODE_8023AD) {
1992                if (internals->mode4.dedicated_queues.enabled == 1) {
1993                        internals->mode4.dedicated_queues.rx_qid =
1994                                        eth_dev->data->nb_rx_queues;
1995                        internals->mode4.dedicated_queues.tx_qid =
1996                                        eth_dev->data->nb_tx_queues;
1997                }
1998        }
1999
2000
2001        /* Reconfigure each slave device if starting bonded device */
2002        for (i = 0; i < internals->slave_count; i++) {
2003                struct rte_eth_dev *slave_ethdev =
2004                                &(rte_eth_devices[internals->slaves[i].port_id]);
2005                if (slave_configure(eth_dev, slave_ethdev) != 0) {
2006                        RTE_BOND_LOG(ERR,
2007                                "bonded port (%d) failed to reconfigure slave device (%d)",
2008                                eth_dev->data->port_id,
2009                                internals->slaves[i].port_id);
2010                        goto out_err;
2011                }
2012                /* We will need to poll for link status if any slave doesn't
2013                 * support interrupts
2014                 */
2015                if (internals->slaves[i].link_status_poll_enabled)
2016                        internals->link_status_polling_enabled = 1;
2017        }
2018
2019        /* start polling if needed */
2020        if (internals->link_status_polling_enabled) {
2021                rte_eal_alarm_set(
2022                        internals->link_status_polling_interval_ms * 1000,
2023                        bond_ethdev_slave_link_status_change_monitor,
2024                        (void *)&rte_eth_devices[internals->port_id]);
2025        }
2026
2027        /* Update all slave devices MACs*/
2028        if (mac_address_slaves_update(eth_dev) != 0)
2029                goto out_err;
2030
2031        if (internals->user_defined_primary_port)
2032                bond_ethdev_primary_set(internals, internals->primary_port);
2033
2034        if (internals->mode == BONDING_MODE_8023AD)
2035                bond_mode_8023ad_start(eth_dev);
2036
2037        if (internals->mode == BONDING_MODE_TLB ||
2038                        internals->mode == BONDING_MODE_ALB)
2039                bond_tlb_enable(internals);
2040
2041        return 0;
2042
2043out_err:
2044        eth_dev->data->dev_started = 0;
2045        return -1;
2046}
2047
2048static void
2049bond_ethdev_free_queues(struct rte_eth_dev *dev)
2050{
2051        uint16_t i;
2052
2053        if (dev->data->rx_queues != NULL) {
2054                for (i = 0; i < dev->data->nb_rx_queues; i++) {
2055                        rte_free(dev->data->rx_queues[i]);
2056                        dev->data->rx_queues[i] = NULL;
2057                }
2058                dev->data->nb_rx_queues = 0;
2059        }
2060
2061        if (dev->data->tx_queues != NULL) {
2062                for (i = 0; i < dev->data->nb_tx_queues; i++) {
2063                        rte_free(dev->data->tx_queues[i]);
2064                        dev->data->tx_queues[i] = NULL;
2065                }
2066                dev->data->nb_tx_queues = 0;
2067        }
2068}
2069
2070int
2071bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2072{
2073        struct bond_dev_private *internals = eth_dev->data->dev_private;
2074        uint16_t i;
2075        int ret;
2076
2077        if (internals->mode == BONDING_MODE_8023AD) {
2078                struct port *port;
2079                void *pkt = NULL;
2080
2081                bond_mode_8023ad_stop(eth_dev);
2082
2083                /* Discard all messages to/from mode 4 state machines */
2084                for (i = 0; i < internals->active_slave_count; i++) {
2085                        port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2086
2087                        RTE_ASSERT(port->rx_ring != NULL);
2088                        while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2089                                rte_pktmbuf_free(pkt);
2090
2091                        RTE_ASSERT(port->tx_ring != NULL);
2092                        while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2093                                rte_pktmbuf_free(pkt);
2094                }
2095        }
2096
2097        if (internals->mode == BONDING_MODE_TLB ||
2098                        internals->mode == BONDING_MODE_ALB) {
2099                bond_tlb_disable(internals);
2100                for (i = 0; i < internals->active_slave_count; i++)
2101                        tlb_last_obytets[internals->active_slaves[i]] = 0;
2102        }
2103
2104        eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2105        eth_dev->data->dev_started = 0;
2106
2107        internals->link_status_polling_enabled = 0;
2108        for (i = 0; i < internals->slave_count; i++) {
2109                uint16_t slave_id = internals->slaves[i].port_id;
2110                if (find_slave_by_id(internals->active_slaves,
2111                                internals->active_slave_count, slave_id) !=
2112                                                internals->active_slave_count) {
2113                        internals->slaves[i].last_link_status = 0;
2114                        ret = rte_eth_dev_stop(slave_id);
2115                        if (ret != 0) {
2116                                RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2117                                             slave_id);
2118                                return ret;
2119                        }
2120                        deactivate_slave(eth_dev, slave_id);
2121                }
2122        }
2123
2124        return 0;
2125}
2126
2127int
2128bond_ethdev_close(struct rte_eth_dev *dev)
2129{
2130        struct bond_dev_private *internals = dev->data->dev_private;
2131        uint16_t bond_port_id = internals->port_id;
2132        int skipped = 0;
2133        struct rte_flow_error ferror;
2134
2135        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2136                return 0;
2137
2138        RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2139        while (internals->slave_count != skipped) {
2140                uint16_t port_id = internals->slaves[skipped].port_id;
2141
2142                if (rte_eth_dev_stop(port_id) != 0) {
2143                        RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2144                                     port_id);
2145                        skipped++;
2146                }
2147
2148                if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2149                        RTE_BOND_LOG(ERR,
2150                                     "Failed to remove port %d from bonded device %s",
2151                                     port_id, dev->device->name);
2152                        skipped++;
2153                }
2154        }
2155        bond_flow_ops.flush(dev, &ferror);
2156        bond_ethdev_free_queues(dev);
2157        rte_bitmap_reset(internals->vlan_filter_bmp);
2158        rte_bitmap_free(internals->vlan_filter_bmp);
2159        rte_free(internals->vlan_filter_bmpmem);
2160
2161        /* Try to release mempool used in mode6. If the bond
2162         * device is not mode6, free the NULL is not problem.
2163         */
2164        rte_mempool_free(internals->mode6.mempool);
2165
2166        return 0;
2167}
2168
2169/* forward declaration */
2170static int bond_ethdev_configure(struct rte_eth_dev *dev);
2171
2172static int
2173bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2174{
2175        struct bond_dev_private *internals = dev->data->dev_private;
2176        struct bond_slave_details slave;
2177        int ret;
2178
2179        uint16_t max_nb_rx_queues = UINT16_MAX;
2180        uint16_t max_nb_tx_queues = UINT16_MAX;
2181        uint16_t max_rx_desc_lim = UINT16_MAX;
2182        uint16_t max_tx_desc_lim = UINT16_MAX;
2183
2184        dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2185
2186        dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2187                        internals->candidate_max_rx_pktlen :
2188                        RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2189
2190        /* Max number of tx/rx queues that the bonded device can support is the
2191         * minimum values of the bonded slaves, as all slaves must be capable
2192         * of supporting the same number of tx/rx queues.
2193         */
2194        if (internals->slave_count > 0) {
2195                struct rte_eth_dev_info slave_info;
2196                uint16_t idx;
2197
2198                for (idx = 0; idx < internals->slave_count; idx++) {
2199                        slave = internals->slaves[idx];
2200                        ret = rte_eth_dev_info_get(slave.port_id, &slave_info);
2201                        if (ret != 0) {
2202                                RTE_BOND_LOG(ERR,
2203                                        "%s: Error during getting device (port %u) info: %s\n",
2204                                        __func__,
2205                                        slave.port_id,
2206                                        strerror(-ret));
2207
2208                                return ret;
2209                        }
2210
2211                        if (slave_info.max_rx_queues < max_nb_rx_queues)
2212                                max_nb_rx_queues = slave_info.max_rx_queues;
2213
2214                        if (slave_info.max_tx_queues < max_nb_tx_queues)
2215                                max_nb_tx_queues = slave_info.max_tx_queues;
2216
2217                        if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2218                                max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2219
2220                        if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2221                                max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2222                }
2223        }
2224
2225        dev_info->max_rx_queues = max_nb_rx_queues;
2226        dev_info->max_tx_queues = max_nb_tx_queues;
2227
2228        memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2229               sizeof(dev_info->default_rxconf));
2230        memcpy(&dev_info->default_txconf, &internals->default_txconf,
2231               sizeof(dev_info->default_txconf));
2232
2233        dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2234        dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2235
2236        /**
2237         * If dedicated hw queues enabled for link bonding device in LACP mode
2238         * then we need to reduce the maximum number of data path queues by 1.
2239         */
2240        if (internals->mode == BONDING_MODE_8023AD &&
2241                internals->mode4.dedicated_queues.enabled == 1) {
2242                dev_info->max_rx_queues--;
2243                dev_info->max_tx_queues--;
2244        }
2245
2246        dev_info->min_rx_bufsize = 0;
2247
2248        dev_info->rx_offload_capa = internals->rx_offload_capa;
2249        dev_info->tx_offload_capa = internals->tx_offload_capa;
2250        dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2251        dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2252        dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2253
2254        dev_info->reta_size = internals->reta_size;
2255
2256        return 0;
2257}
2258
2259static int
2260bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2261{
2262        int res;
2263        uint16_t i;
2264        struct bond_dev_private *internals = dev->data->dev_private;
2265
2266        /* don't do this while a slave is being added */
2267        rte_spinlock_lock(&internals->lock);
2268
2269        if (on)
2270                rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2271        else
2272                rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2273
2274        for (i = 0; i < internals->slave_count; i++) {
2275                uint16_t port_id = internals->slaves[i].port_id;
2276
2277                res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2278                if (res == ENOTSUP)
2279                        RTE_BOND_LOG(WARNING,
2280                                     "Setting VLAN filter on slave port %u not supported.",
2281                                     port_id);
2282        }
2283
2284        rte_spinlock_unlock(&internals->lock);
2285        return 0;
2286}
2287
2288static int
2289bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2290                uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2291                const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2292{
2293        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2294                        rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2295                                        0, dev->data->numa_node);
2296        if (bd_rx_q == NULL)
2297                return -1;
2298
2299        bd_rx_q->queue_id = rx_queue_id;
2300        bd_rx_q->dev_private = dev->data->dev_private;
2301
2302        bd_rx_q->nb_rx_desc = nb_rx_desc;
2303
2304        memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2305        bd_rx_q->mb_pool = mb_pool;
2306
2307        dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2308
2309        return 0;
2310}
2311
2312static int
2313bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2314                uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2315                const struct rte_eth_txconf *tx_conf)
2316{
2317        struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2318                        rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2319                                        0, dev->data->numa_node);
2320
2321        if (bd_tx_q == NULL)
2322                return -1;
2323
2324        bd_tx_q->queue_id = tx_queue_id;
2325        bd_tx_q->dev_private = dev->data->dev_private;
2326
2327        bd_tx_q->nb_tx_desc = nb_tx_desc;
2328        memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2329
2330        dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2331
2332        return 0;
2333}
2334
2335static void
2336bond_ethdev_rx_queue_release(void *queue)
2337{
2338        if (queue == NULL)
2339                return;
2340
2341        rte_free(queue);
2342}
2343
2344static void
2345bond_ethdev_tx_queue_release(void *queue)
2346{
2347        if (queue == NULL)
2348                return;
2349
2350        rte_free(queue);
2351}
2352
2353static void
2354bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2355{
2356        struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2357        struct bond_dev_private *internals;
2358
2359        /* Default value for polling slave found is true as we don't want to
2360         * disable the polling thread if we cannot get the lock */
2361        int i, polling_slave_found = 1;
2362
2363        if (cb_arg == NULL)
2364                return;
2365
2366        bonded_ethdev = cb_arg;
2367        internals = bonded_ethdev->data->dev_private;
2368
2369        if (!bonded_ethdev->data->dev_started ||
2370                !internals->link_status_polling_enabled)
2371                return;
2372
2373        /* If device is currently being configured then don't check slaves link
2374         * status, wait until next period */
2375        if (rte_spinlock_trylock(&internals->lock)) {
2376                if (internals->slave_count > 0)
2377                        polling_slave_found = 0;
2378
2379                for (i = 0; i < internals->slave_count; i++) {
2380                        if (!internals->slaves[i].link_status_poll_enabled)
2381                                continue;
2382
2383                        slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2384                        polling_slave_found = 1;
2385
2386                        /* Update slave link status */
2387                        (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2388                                        internals->slaves[i].link_status_wait_to_complete);
2389
2390                        /* if link status has changed since last checked then call lsc
2391                         * event callback */
2392                        if (slave_ethdev->data->dev_link.link_status !=
2393                                        internals->slaves[i].last_link_status) {
2394                                internals->slaves[i].last_link_status =
2395                                                slave_ethdev->data->dev_link.link_status;
2396
2397                                bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2398                                                RTE_ETH_EVENT_INTR_LSC,
2399                                                &bonded_ethdev->data->port_id,
2400                                                NULL);
2401                        }
2402                }
2403                rte_spinlock_unlock(&internals->lock);
2404        }
2405
2406        if (polling_slave_found)
2407                /* Set alarm to continue monitoring link status of slave ethdev's */
2408                rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2409                                bond_ethdev_slave_link_status_change_monitor, cb_arg);
2410}
2411
2412static int
2413bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2414{
2415        int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2416
2417        struct bond_dev_private *bond_ctx;
2418        struct rte_eth_link slave_link;
2419
2420        bool one_link_update_succeeded;
2421        uint32_t idx;
2422        int ret;
2423
2424        bond_ctx = ethdev->data->dev_private;
2425
2426        ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2427
2428        if (ethdev->data->dev_started == 0 ||
2429                        bond_ctx->active_slave_count == 0) {
2430                ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2431                return 0;
2432        }
2433
2434        ethdev->data->dev_link.link_status = ETH_LINK_UP;
2435
2436        if (wait_to_complete)
2437                link_update = rte_eth_link_get;
2438        else
2439                link_update = rte_eth_link_get_nowait;
2440
2441        switch (bond_ctx->mode) {
2442        case BONDING_MODE_BROADCAST:
2443                /**
2444                 * Setting link speed to UINT32_MAX to ensure we pick up the
2445                 * value of the first active slave
2446                 */
2447                ethdev->data->dev_link.link_speed = UINT32_MAX;
2448
2449                /**
2450                 * link speed is minimum value of all the slaves link speed as
2451                 * packet loss will occur on this slave if transmission at rates
2452                 * greater than this are attempted
2453                 */
2454                for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2455                        ret = link_update(bond_ctx->active_slaves[idx],
2456                                          &slave_link);
2457                        if (ret < 0) {
2458                                ethdev->data->dev_link.link_speed =
2459                                        ETH_SPEED_NUM_NONE;
2460                                RTE_BOND_LOG(ERR,
2461                                        "Slave (port %u) link get failed: %s",
2462                                        bond_ctx->active_slaves[idx],
2463                                        rte_strerror(-ret));
2464                                return 0;
2465                        }
2466
2467                        if (slave_link.link_speed <
2468                                        ethdev->data->dev_link.link_speed)
2469                                ethdev->data->dev_link.link_speed =
2470                                                slave_link.link_speed;
2471                }
2472                break;
2473        case BONDING_MODE_ACTIVE_BACKUP:
2474                /* Current primary slave */
2475                ret = link_update(bond_ctx->current_primary_port, &slave_link);
2476                if (ret < 0) {
2477                        RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
2478                                bond_ctx->current_primary_port,
2479                                rte_strerror(-ret));
2480                        return 0;
2481                }
2482
2483                ethdev->data->dev_link.link_speed = slave_link.link_speed;
2484                break;
2485        case BONDING_MODE_8023AD:
2486                ethdev->data->dev_link.link_autoneg =
2487                                bond_ctx->mode4.slave_link.link_autoneg;
2488                ethdev->data->dev_link.link_duplex =
2489                                bond_ctx->mode4.slave_link.link_duplex;
2490                /* fall through */
2491                /* to update link speed */
2492        case BONDING_MODE_ROUND_ROBIN:
2493        case BONDING_MODE_BALANCE:
2494        case BONDING_MODE_TLB:
2495        case BONDING_MODE_ALB:
2496        default:
2497                /**
2498                 * In theses mode the maximum theoretical link speed is the sum
2499                 * of all the slaves
2500                 */
2501                ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2502                one_link_update_succeeded = false;
2503
2504                for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2505                        ret = link_update(bond_ctx->active_slaves[idx],
2506                                        &slave_link);
2507                        if (ret < 0) {
2508                                RTE_BOND_LOG(ERR,
2509                                        "Slave (port %u) link get failed: %s",
2510                                        bond_ctx->active_slaves[idx],
2511                                        rte_strerror(-ret));
2512                                continue;
2513                        }
2514
2515                        one_link_update_succeeded = true;
2516                        ethdev->data->dev_link.link_speed +=
2517                                        slave_link.link_speed;
2518                }
2519
2520                if (!one_link_update_succeeded) {
2521                        RTE_BOND_LOG(ERR, "All slaves link get failed");
2522                        return 0;
2523                }
2524        }
2525
2526
2527        return 0;
2528}
2529
2530
2531static int
2532bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2533{
2534        struct bond_dev_private *internals = dev->data->dev_private;
2535        struct rte_eth_stats slave_stats;
2536        int i, j;
2537
2538        for (i = 0; i < internals->slave_count; i++) {
2539                rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2540
2541                stats->ipackets += slave_stats.ipackets;
2542                stats->opackets += slave_stats.opackets;
2543                stats->ibytes += slave_stats.ibytes;
2544                stats->obytes += slave_stats.obytes;
2545                stats->imissed += slave_stats.imissed;
2546                stats->ierrors += slave_stats.ierrors;
2547                stats->oerrors += slave_stats.oerrors;
2548                stats->rx_nombuf += slave_stats.rx_nombuf;
2549
2550                for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2551                        stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2552                        stats->q_opackets[j] += slave_stats.q_opackets[j];
2553                        stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2554                        stats->q_obytes[j] += slave_stats.q_obytes[j];
2555                        stats->q_errors[j] += slave_stats.q_errors[j];
2556                }
2557
2558        }
2559
2560        return 0;
2561}
2562
2563static int
2564bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2565{
2566        struct bond_dev_private *internals = dev->data->dev_private;
2567        int i;
2568        int err;
2569        int ret;
2570
2571        for (i = 0, err = 0; i < internals->slave_count; i++) {
2572                ret = rte_eth_stats_reset(internals->slaves[i].port_id);
2573                if (ret != 0)
2574                        err = ret;
2575        }
2576
2577        return err;
2578}
2579
2580static int
2581bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2582{
2583        struct bond_dev_private *internals = eth_dev->data->dev_private;
2584        int i;
2585        int ret = 0;
2586        uint16_t port_id;
2587
2588        switch (internals->mode) {
2589        /* Promiscuous mode is propagated to all slaves */
2590        case BONDING_MODE_ROUND_ROBIN:
2591        case BONDING_MODE_BALANCE:
2592        case BONDING_MODE_BROADCAST:
2593        case BONDING_MODE_8023AD: {
2594                unsigned int slave_ok = 0;
2595
2596                for (i = 0; i < internals->slave_count; i++) {
2597                        port_id = internals->slaves[i].port_id;
2598
2599                        ret = rte_eth_promiscuous_enable(port_id);
2600                        if (ret != 0)
2601                                RTE_BOND_LOG(ERR,
2602                                        "Failed to enable promiscuous mode for port %u: %s",
2603                                        port_id, rte_strerror(-ret));
2604                        else
2605                                slave_ok++;
2606                }
2607                /*
2608                 * Report success if operation is successful on at least
2609                 * on one slave. Otherwise return last error code.
2610                 */
2611                if (slave_ok > 0)
2612                        ret = 0;
2613                break;
2614        }
2615        /* Promiscuous mode is propagated only to primary slave */
2616        case BONDING_MODE_ACTIVE_BACKUP:
2617        case BONDING_MODE_TLB:
2618        case BONDING_MODE_ALB:
2619        default:
2620                /* Do not touch promisc when there cannot be primary ports */
2621                if (internals->slave_count == 0)
2622                        break;
2623                port_id = internals->current_primary_port;
2624                ret = rte_eth_promiscuous_enable(port_id);
2625                if (ret != 0)
2626                        RTE_BOND_LOG(ERR,
2627                                "Failed to enable promiscuous mode for port %u: %s",
2628                                port_id, rte_strerror(-ret));
2629        }
2630
2631        return ret;
2632}
2633
2634static int
2635bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2636{
2637        struct bond_dev_private *internals = dev->data->dev_private;
2638        int i;
2639        int ret = 0;
2640        uint16_t port_id;
2641
2642        switch (internals->mode) {
2643        /* Promiscuous mode is propagated to all slaves */
2644        case BONDING_MODE_ROUND_ROBIN:
2645        case BONDING_MODE_BALANCE:
2646        case BONDING_MODE_BROADCAST:
2647        case BONDING_MODE_8023AD: {
2648                unsigned int slave_ok = 0;
2649
2650                for (i = 0; i < internals->slave_count; i++) {
2651                        port_id = internals->slaves[i].port_id;
2652
2653                        if (internals->mode == BONDING_MODE_8023AD &&
2654                            bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2655                                        BOND_8023AD_FORCED_PROMISC) {
2656                                slave_ok++;
2657                                continue;
2658                        }
2659                        ret = rte_eth_promiscuous_disable(port_id);
2660                        if (ret != 0)
2661                                RTE_BOND_LOG(ERR,
2662                                        "Failed to disable promiscuous mode for port %u: %s",
2663                                        port_id, rte_strerror(-ret));
2664                        else
2665                                slave_ok++;
2666                }
2667                /*
2668                 * Report success if operation is successful on at least
2669                 * on one slave. Otherwise return last error code.
2670                 */
2671                if (slave_ok > 0)
2672                        ret = 0;
2673                break;
2674        }
2675        /* Promiscuous mode is propagated only to primary slave */
2676        case BONDING_MODE_ACTIVE_BACKUP:
2677        case BONDING_MODE_TLB:
2678        case BONDING_MODE_ALB:
2679        default:
2680                /* Do not touch promisc when there cannot be primary ports */
2681                if (internals->slave_count == 0)
2682                        break;
2683                port_id = internals->current_primary_port;
2684                ret = rte_eth_promiscuous_disable(port_id);
2685                if (ret != 0)
2686                        RTE_BOND_LOG(ERR,
2687                                "Failed to disable promiscuous mode for port %u: %s",
2688                                port_id, rte_strerror(-ret));
2689        }
2690
2691        return ret;
2692}
2693
2694static int
2695bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2696{
2697        struct bond_dev_private *internals = eth_dev->data->dev_private;
2698        int i;
2699        int ret = 0;
2700        uint16_t port_id;
2701
2702        switch (internals->mode) {
2703        /* allmulti mode is propagated to all slaves */
2704        case BONDING_MODE_ROUND_ROBIN:
2705        case BONDING_MODE_BALANCE:
2706        case BONDING_MODE_BROADCAST:
2707        case BONDING_MODE_8023AD: {
2708                unsigned int slave_ok = 0;
2709
2710                for (i = 0; i < internals->slave_count; i++) {
2711                        port_id = internals->slaves[i].port_id;
2712
2713                        ret = rte_eth_allmulticast_enable(port_id);
2714                        if (ret != 0)
2715                                RTE_BOND_LOG(ERR,
2716                                        "Failed to enable allmulti mode for port %u: %s",
2717                                        port_id, rte_strerror(-ret));
2718                        else
2719                                slave_ok++;
2720                }
2721                /*
2722                 * Report success if operation is successful on at least
2723                 * on one slave. Otherwise return last error code.
2724                 */
2725                if (slave_ok > 0)
2726                        ret = 0;
2727                break;
2728        }
2729        /* allmulti mode is propagated only to primary slave */
2730        case BONDING_MODE_ACTIVE_BACKUP:
2731        case BONDING_MODE_TLB:
2732        case BONDING_MODE_ALB:
2733        default:
2734                /* Do not touch allmulti when there cannot be primary ports */
2735                if (internals->slave_count == 0)
2736                        break;
2737                port_id = internals->current_primary_port;
2738                ret = rte_eth_allmulticast_enable(port_id);
2739                if (ret != 0)
2740                        RTE_BOND_LOG(ERR,
2741                                "Failed to enable allmulti mode for port %u: %s",
2742                                port_id, rte_strerror(-ret));
2743        }
2744
2745        return ret;
2746}
2747
2748static int
2749bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2750{
2751        struct bond_dev_private *internals = eth_dev->data->dev_private;
2752        int i;
2753        int ret = 0;
2754        uint16_t port_id;
2755
2756        switch (internals->mode) {
2757        /* allmulti mode is propagated to all slaves */
2758        case BONDING_MODE_ROUND_ROBIN:
2759        case BONDING_MODE_BALANCE:
2760        case BONDING_MODE_BROADCAST:
2761        case BONDING_MODE_8023AD: {
2762                unsigned int slave_ok = 0;
2763
2764                for (i = 0; i < internals->slave_count; i++) {
2765                        uint16_t port_id = internals->slaves[i].port_id;
2766
2767                        if (internals->mode == BONDING_MODE_8023AD &&
2768                            bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2769                                        BOND_8023AD_FORCED_ALLMULTI)
2770                                continue;
2771
2772                        ret = rte_eth_allmulticast_disable(port_id);
2773                        if (ret != 0)
2774                                RTE_BOND_LOG(ERR,
2775                                        "Failed to disable allmulti mode for port %u: %s",
2776                                        port_id, rte_strerror(-ret));
2777                        else
2778                                slave_ok++;
2779                }
2780                /*
2781                 * Report success if operation is successful on at least
2782                 * on one slave. Otherwise return last error code.
2783                 */
2784                if (slave_ok > 0)
2785                        ret = 0;
2786                break;
2787        }
2788        /* allmulti mode is propagated only to primary slave */
2789        case BONDING_MODE_ACTIVE_BACKUP:
2790        case BONDING_MODE_TLB:
2791        case BONDING_MODE_ALB:
2792        default:
2793                /* Do not touch allmulti when there cannot be primary ports */
2794                if (internals->slave_count == 0)
2795                        break;
2796                port_id = internals->current_primary_port;
2797                ret = rte_eth_allmulticast_disable(port_id);
2798                if (ret != 0)
2799                        RTE_BOND_LOG(ERR,
2800                                "Failed to disable allmulti mode for port %u: %s",
2801                                port_id, rte_strerror(-ret));
2802        }
2803
2804        return ret;
2805}
2806
2807static void
2808bond_ethdev_delayed_lsc_propagation(void *arg)
2809{
2810        if (arg == NULL)
2811                return;
2812
2813        rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2814                        RTE_ETH_EVENT_INTR_LSC, NULL);
2815}
2816
2817int
2818bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2819                void *param, void *ret_param __rte_unused)
2820{
2821        struct rte_eth_dev *bonded_eth_dev;
2822        struct bond_dev_private *internals;
2823        struct rte_eth_link link;
2824        int rc = -1;
2825        int ret;
2826
2827        uint8_t lsc_flag = 0;
2828        int valid_slave = 0;
2829        uint16_t active_pos;
2830        uint16_t i;
2831
2832        if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2833                return rc;
2834
2835        bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2836
2837        if (check_for_bonded_ethdev(bonded_eth_dev))
2838                return rc;
2839
2840        internals = bonded_eth_dev->data->dev_private;
2841
2842        /* If the device isn't started don't handle interrupts */
2843        if (!bonded_eth_dev->data->dev_started)
2844                return rc;
2845
2846        /* verify that port_id is a valid slave of bonded port */
2847        for (i = 0; i < internals->slave_count; i++) {
2848                if (internals->slaves[i].port_id == port_id) {
2849                        valid_slave = 1;
2850                        break;
2851                }
2852        }
2853
2854        if (!valid_slave)
2855                return rc;
2856
2857        /* Synchronize lsc callback parallel calls either by real link event
2858         * from the slaves PMDs or by the bonding PMD itself.
2859         */
2860        rte_spinlock_lock(&internals->lsc_lock);
2861
2862        /* Search for port in active port list */
2863        active_pos = find_slave_by_id(internals->active_slaves,
2864                        internals->active_slave_count, port_id);
2865
2866        ret = rte_eth_link_get_nowait(port_id, &link);
2867        if (ret < 0)
2868                RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id);
2869
2870        if (ret == 0 && link.link_status) {
2871                if (active_pos < internals->active_slave_count)
2872                        goto link_update;
2873
2874                /* check link state properties if bonded link is up*/
2875                if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2876                        if (link_properties_valid(bonded_eth_dev, &link) != 0)
2877                                RTE_BOND_LOG(ERR, "Invalid link properties "
2878                                             "for slave %d in bonding mode %d",
2879                                             port_id, internals->mode);
2880                } else {
2881                        /* inherit slave link properties */
2882                        link_properties_set(bonded_eth_dev, &link);
2883                }
2884
2885                /* If no active slave ports then set this port to be
2886                 * the primary port.
2887                 */
2888                if (internals->active_slave_count < 1) {
2889                        /* If first active slave, then change link status */
2890                        bonded_eth_dev->data->dev_link.link_status =
2891                                                                ETH_LINK_UP;
2892                        internals->current_primary_port = port_id;
2893                        lsc_flag = 1;
2894
2895                        mac_address_slaves_update(bonded_eth_dev);
2896                }
2897
2898                activate_slave(bonded_eth_dev, port_id);
2899
2900                /* If the user has defined the primary port then default to
2901                 * using it.
2902                 */
2903                if (internals->user_defined_primary_port &&
2904                                internals->primary_port == port_id)
2905                        bond_ethdev_primary_set(internals, port_id);
2906        } else {
2907                if (active_pos == internals->active_slave_count)
2908                        goto link_update;
2909
2910                /* Remove from active slave list */
2911                deactivate_slave(bonded_eth_dev, port_id);
2912
2913                if (internals->active_slave_count < 1)
2914                        lsc_flag = 1;
2915
2916                /* Update primary id, take first active slave from list or if none
2917                 * available set to -1 */
2918                if (port_id == internals->current_primary_port) {
2919                        if (internals->active_slave_count > 0)
2920                                bond_ethdev_primary_set(internals,
2921                                                internals->active_slaves[0]);
2922                        else
2923                                internals->current_primary_port = internals->primary_port;
2924                        mac_address_slaves_update(bonded_eth_dev);
2925                }
2926        }
2927
2928link_update:
2929        /**
2930         * Update bonded device link properties after any change to active
2931         * slaves
2932         */
2933        bond_ethdev_link_update(bonded_eth_dev, 0);
2934
2935        if (lsc_flag) {
2936                /* Cancel any possible outstanding interrupts if delays are enabled */
2937                if (internals->link_up_delay_ms > 0 ||
2938                        internals->link_down_delay_ms > 0)
2939                        rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2940                                        bonded_eth_dev);
2941
2942                if (bonded_eth_dev->data->dev_link.link_status) {
2943                        if (internals->link_up_delay_ms > 0)
2944                                rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2945                                                bond_ethdev_delayed_lsc_propagation,
2946                                                (void *)bonded_eth_dev);
2947                        else
2948                                rte_eth_dev_callback_process(bonded_eth_dev,
2949                                                RTE_ETH_EVENT_INTR_LSC,
2950                                                NULL);
2951
2952                } else {
2953                        if (internals->link_down_delay_ms > 0)
2954                                rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2955                                                bond_ethdev_delayed_lsc_propagation,
2956                                                (void *)bonded_eth_dev);
2957                        else
2958                                rte_eth_dev_callback_process(bonded_eth_dev,
2959                                                RTE_ETH_EVENT_INTR_LSC,
2960                                                NULL);
2961                }
2962        }
2963
2964        rte_spinlock_unlock(&internals->lsc_lock);
2965
2966        return rc;
2967}
2968
2969static int
2970bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2971                struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2972{
2973        unsigned i, j;
2974        int result = 0;
2975        int slave_reta_size;
2976        unsigned reta_count;
2977        struct bond_dev_private *internals = dev->data->dev_private;
2978
2979        if (reta_size != internals->reta_size)
2980                return -EINVAL;
2981
2982         /* Copy RETA table */
2983        reta_count = (reta_size + RTE_RETA_GROUP_SIZE - 1) /
2984                        RTE_RETA_GROUP_SIZE;
2985
2986        for (i = 0; i < reta_count; i++) {
2987                internals->reta_conf[i].mask = reta_conf[i].mask;
2988                for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2989                        if ((reta_conf[i].mask >> j) & 0x01)
2990                                internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2991        }
2992
2993        /* Fill rest of array */
2994        for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2995                memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2996                                sizeof(internals->reta_conf[0]) * reta_count);
2997
2998        /* Propagate RETA over slaves */
2999        for (i = 0; i < internals->slave_count; i++) {
3000                slave_reta_size = internals->slaves[i].reta_size;
3001                result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
3002                                &internals->reta_conf[0], slave_reta_size);
3003                if (result < 0)
3004                        return result;
3005        }
3006
3007        return 0;
3008}
3009
3010static int
3011bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
3012                struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3013{
3014        int i, j;
3015        struct bond_dev_private *internals = dev->data->dev_private;
3016
3017        if (reta_size != internals->reta_size)
3018                return -EINVAL;
3019
3020         /* Copy RETA table */
3021        for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
3022                for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3023                        if ((reta_conf[i].mask >> j) & 0x01)
3024                                reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
3025
3026        return 0;
3027}
3028
3029static int
3030bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
3031                struct rte_eth_rss_conf *rss_conf)
3032{
3033        int i, result = 0;
3034        struct bond_dev_private *internals = dev->data->dev_private;
3035        struct rte_eth_rss_conf bond_rss_conf;
3036
3037        memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
3038
3039        bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
3040
3041        if (bond_rss_conf.rss_hf != 0)
3042                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
3043
3044        if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
3045                        sizeof(internals->rss_key)) {
3046                if (bond_rss_conf.rss_key_len == 0)
3047                        bond_rss_conf.rss_key_len = 40;
3048                internals->rss_key_len = bond_rss_conf.rss_key_len;
3049                memcpy(internals->rss_key, bond_rss_conf.rss_key,
3050                                internals->rss_key_len);
3051        }
3052
3053        for (i = 0; i < internals->slave_count; i++) {
3054                result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
3055                                &bond_rss_conf);
3056                if (result < 0)
3057                        return result;
3058        }
3059
3060        return 0;
3061}
3062
3063static int
3064bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3065                struct rte_eth_rss_conf *rss_conf)
3066{
3067        struct bond_dev_private *internals = dev->data->dev_private;
3068
3069        rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3070        rss_conf->rss_key_len = internals->rss_key_len;
3071        if (rss_conf->rss_key)
3072                memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3073
3074        return 0;
3075}
3076
3077static int
3078bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3079{
3080        struct rte_eth_dev *slave_eth_dev;
3081        struct bond_dev_private *internals = dev->data->dev_private;
3082        int ret, i;
3083
3084        rte_spinlock_lock(&internals->lock);
3085
3086        for (i = 0; i < internals->slave_count; i++) {
3087                slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3088                if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
3089                        rte_spinlock_unlock(&internals->lock);
3090                        return -ENOTSUP;
3091                }
3092        }
3093        for (i = 0; i < internals->slave_count; i++) {
3094                ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
3095                if (ret < 0) {
3096                        rte_spinlock_unlock(&internals->lock);
3097                        return ret;
3098                }
3099        }
3100
3101        rte_spinlock_unlock(&internals->lock);
3102        return 0;
3103}
3104
3105static int
3106bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3107                        struct rte_ether_addr *addr)
3108{
3109        if (mac_address_set(dev, addr)) {
3110                RTE_BOND_LOG(ERR, "Failed to update MAC address");
3111                return -EINVAL;
3112        }
3113
3114        return 0;
3115}
3116
3117static int
3118bond_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
3119                  const struct rte_flow_ops **ops)
3120{
3121        *ops = &bond_flow_ops;
3122        return 0;
3123}
3124
3125static int
3126bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3127                        struct rte_ether_addr *mac_addr,
3128                        __rte_unused uint32_t index, uint32_t vmdq)
3129{
3130        struct rte_eth_dev *slave_eth_dev;
3131        struct bond_dev_private *internals = dev->data->dev_private;
3132        int ret, i;
3133
3134        rte_spinlock_lock(&internals->lock);
3135
3136        for (i = 0; i < internals->slave_count; i++) {
3137                slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3138                if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
3139                         *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
3140                        ret = -ENOTSUP;
3141                        goto end;
3142                }
3143        }
3144
3145        for (i = 0; i < internals->slave_count; i++) {
3146                ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
3147                                mac_addr, vmdq);
3148                if (ret < 0) {
3149                        /* rollback */
3150                        for (i--; i >= 0; i--)
3151                                rte_eth_dev_mac_addr_remove(
3152                                        internals->slaves[i].port_id, mac_addr);
3153                        goto end;
3154                }
3155        }
3156
3157        ret = 0;
3158end:
3159        rte_spinlock_unlock(&internals->lock);
3160        return ret;
3161}
3162
3163static void
3164bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3165{
3166        struct rte_eth_dev *slave_eth_dev;
3167        struct bond_dev_private *internals = dev->data->dev_private;
3168        int i;
3169
3170        rte_spinlock_lock(&internals->lock);
3171
3172        for (i = 0; i < internals->slave_count; i++) {
3173                slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3174                if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3175                        goto end;
3176        }
3177
3178        struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3179
3180        for (i = 0; i < internals->slave_count; i++)
3181                rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3182                                mac_addr);
3183
3184end:
3185        rte_spinlock_unlock(&internals->lock);
3186}
3187
3188const struct eth_dev_ops default_dev_ops = {
3189        .dev_start            = bond_ethdev_start,
3190        .dev_stop             = bond_ethdev_stop,
3191        .dev_close            = bond_ethdev_close,
3192        .dev_configure        = bond_ethdev_configure,
3193        .dev_infos_get        = bond_ethdev_info,
3194        .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3195        .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3196        .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3197        .rx_queue_release     = bond_ethdev_rx_queue_release,
3198        .tx_queue_release     = bond_ethdev_tx_queue_release,
3199        .link_update          = bond_ethdev_link_update,
3200        .stats_get            = bond_ethdev_stats_get,
3201        .stats_reset          = bond_ethdev_stats_reset,
3202        .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3203        .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3204        .allmulticast_enable  = bond_ethdev_allmulticast_enable,
3205        .allmulticast_disable = bond_ethdev_allmulticast_disable,
3206        .reta_update          = bond_ethdev_rss_reta_update,
3207        .reta_query           = bond_ethdev_rss_reta_query,
3208        .rss_hash_update      = bond_ethdev_rss_hash_update,
3209        .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3210        .mtu_set              = bond_ethdev_mtu_set,
3211        .mac_addr_set         = bond_ethdev_mac_address_set,
3212        .mac_addr_add         = bond_ethdev_mac_addr_add,
3213        .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3214        .flow_ops_get         = bond_flow_ops_get
3215};
3216
3217static int
3218bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3219{
3220        const char *name = rte_vdev_device_name(dev);
3221        uint8_t socket_id = dev->device.numa_node;
3222        struct bond_dev_private *internals = NULL;
3223        struct rte_eth_dev *eth_dev = NULL;
3224        uint32_t vlan_filter_bmp_size;
3225
3226        /* now do all data allocation - for eth_dev structure, dummy pci driver
3227         * and internal (private) data
3228         */
3229
3230        /* reserve an ethdev entry */
3231        eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3232        if (eth_dev == NULL) {
3233                RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3234                goto err;
3235        }
3236
3237        internals = eth_dev->data->dev_private;
3238        eth_dev->data->nb_rx_queues = (uint16_t)1;
3239        eth_dev->data->nb_tx_queues = (uint16_t)1;
3240
3241        /* Allocate memory for storing MAC addresses */
3242        eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3243                        BOND_MAX_MAC_ADDRS, 0, socket_id);
3244        if (eth_dev->data->mac_addrs == NULL) {
3245                RTE_BOND_LOG(ERR,
3246                             "Failed to allocate %u bytes needed to store MAC addresses",
3247                             RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3248                goto err;
3249        }
3250
3251        eth_dev->dev_ops = &default_dev_ops;
3252        eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
3253                                        RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
3254
3255        rte_spinlock_init(&internals->lock);
3256        rte_spinlock_init(&internals->lsc_lock);
3257
3258        internals->port_id = eth_dev->data->port_id;
3259        internals->mode = BONDING_MODE_INVALID;
3260        internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3261        internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3262        internals->burst_xmit_hash = burst_xmit_l2_hash;
3263        internals->user_defined_mac = 0;
3264
3265        internals->link_status_polling_enabled = 0;
3266
3267        internals->link_status_polling_interval_ms =
3268                DEFAULT_POLLING_INTERVAL_10_MS;
3269        internals->link_down_delay_ms = 0;
3270        internals->link_up_delay_ms = 0;
3271
3272        internals->slave_count = 0;
3273        internals->active_slave_count = 0;
3274        internals->rx_offload_capa = 0;
3275        internals->tx_offload_capa = 0;
3276        internals->rx_queue_offload_capa = 0;
3277        internals->tx_queue_offload_capa = 0;
3278        internals->candidate_max_rx_pktlen = 0;
3279        internals->max_rx_pktlen = 0;
3280
3281        /* Initially allow to choose any offload type */
3282        internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3283
3284        memset(&internals->default_rxconf, 0,
3285               sizeof(internals->default_rxconf));
3286        memset(&internals->default_txconf, 0,
3287               sizeof(internals->default_txconf));
3288
3289        memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3290        memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3291
3292        memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3293        memset(internals->slaves, 0, sizeof(internals->slaves));
3294
3295        TAILQ_INIT(&internals->flow_list);
3296        internals->flow_isolated_valid = 0;
3297
3298        /* Set mode 4 default configuration */
3299        bond_mode_8023ad_setup(eth_dev, NULL);
3300        if (bond_ethdev_mode_set(eth_dev, mode)) {
3301                RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3302                                 eth_dev->data->port_id, mode);
3303                goto err;
3304        }
3305
3306        vlan_filter_bmp_size =
3307                rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3308        internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3309                                                   RTE_CACHE_LINE_SIZE);
3310        if (internals->vlan_filter_bmpmem == NULL) {
3311                RTE_BOND_LOG(ERR,
3312                             "Failed to allocate vlan bitmap for bonded device %u",
3313                             eth_dev->data->port_id);
3314                goto err;
3315        }
3316
3317        internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3318                        internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3319        if (internals->vlan_filter_bmp == NULL) {
3320                RTE_BOND_LOG(ERR,
3321                             "Failed to init vlan bitmap for bonded device %u",
3322                             eth_dev->data->port_id);
3323                rte_free(internals->vlan_filter_bmpmem);
3324                goto err;
3325        }
3326
3327        return eth_dev->data->port_id;
3328
3329err:
3330        rte_free(internals);
3331        if (eth_dev != NULL)
3332                eth_dev->data->dev_private = NULL;
3333        rte_eth_dev_release_port(eth_dev);
3334        return -1;
3335}
3336
3337static int
3338bond_probe(struct rte_vdev_device *dev)
3339{
3340        const char *name;
3341        struct bond_dev_private *internals;
3342        struct rte_kvargs *kvlist;
3343        uint8_t bonding_mode;
3344        int arg_count, port_id;
3345        int socket_id;
3346        uint8_t agg_mode;
3347        struct rte_eth_dev *eth_dev;
3348
3349        if (!dev)
3350                return -EINVAL;
3351
3352        name = rte_vdev_device_name(dev);
3353        RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3354
3355        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3356                eth_dev = rte_eth_dev_attach_secondary(name);
3357                if (!eth_dev) {
3358                        RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3359                        return -1;
3360                }
3361                /* TODO: request info from primary to set up Rx and Tx */
3362                eth_dev->dev_ops = &default_dev_ops;
3363                eth_dev->device = &dev->device;
3364                rte_eth_dev_probing_finish(eth_dev);
3365                return 0;
3366        }
3367
3368        kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3369                pmd_bond_init_valid_arguments);
3370        if (kvlist == NULL)
3371                return -1;
3372
3373        /* Parse link bonding mode */
3374        if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3375                if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3376                                &bond_ethdev_parse_slave_mode_kvarg,
3377                                &bonding_mode) != 0) {
3378                        RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3379                                        name);
3380                        goto parse_error;
3381                }
3382        } else {
3383                RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3384                                "device %s", name);
3385                goto parse_error;
3386        }
3387
3388        /* Parse socket id to create bonding device on */
3389        arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3390        if (arg_count == 1) {
3391                if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3392                                &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3393                                != 0) {
3394                        RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3395                                        "bonded device %s", name);
3396                        goto parse_error;
3397                }
3398        } else if (arg_count > 1) {
3399                RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3400                                "bonded device %s", name);
3401                goto parse_error;
3402        } else {
3403                socket_id = rte_socket_id();
3404        }
3405
3406        dev->device.numa_node = socket_id;
3407
3408        /* Create link bonding eth device */
3409        port_id = bond_alloc(dev, bonding_mode);
3410        if (port_id < 0) {
3411                RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3412                                "socket %u.",   name, bonding_mode, socket_id);
3413                goto parse_error;
3414        }
3415        internals = rte_eth_devices[port_id].data->dev_private;
3416        internals->kvlist = kvlist;
3417
3418        if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3419                if (rte_kvargs_process(kvlist,
3420                                PMD_BOND_AGG_MODE_KVARG,
3421                                &bond_ethdev_parse_slave_agg_mode_kvarg,
3422                                &agg_mode) != 0) {
3423                        RTE_BOND_LOG(ERR,
3424                                        "Failed to parse agg selection mode for bonded device %s",
3425                                        name);
3426                        goto parse_error;
3427                }
3428
3429                if (internals->mode == BONDING_MODE_8023AD)
3430                        internals->mode4.agg_selection = agg_mode;
3431        } else {
3432                internals->mode4.agg_selection = AGG_STABLE;
3433        }
3434
3435        rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3436        RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3437                        "socket %u.",   name, port_id, bonding_mode, socket_id);
3438        return 0;
3439
3440parse_error:
3441        rte_kvargs_free(kvlist);
3442
3443        return -1;
3444}
3445
3446static int
3447bond_remove(struct rte_vdev_device *dev)
3448{
3449        struct rte_eth_dev *eth_dev;
3450        struct bond_dev_private *internals;
3451        const char *name;
3452        int ret = 0;
3453
3454        if (!dev)
3455                return -EINVAL;
3456
3457        name = rte_vdev_device_name(dev);
3458        RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3459
3460        /* find an ethdev entry */
3461        eth_dev = rte_eth_dev_allocated(name);
3462        if (eth_dev == NULL)
3463                return 0; /* port already released */
3464
3465        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3466                return rte_eth_dev_release_port(eth_dev);
3467
3468        RTE_ASSERT(eth_dev->device == &dev->device);
3469
3470        internals = eth_dev->data->dev_private;
3471        if (internals->slave_count != 0)
3472                return -EBUSY;
3473
3474        if (eth_dev->data->dev_started == 1) {
3475                ret = bond_ethdev_stop(eth_dev);
3476                bond_ethdev_close(eth_dev);
3477        }
3478        if (internals->kvlist != NULL)
3479                rte_kvargs_free(internals->kvlist);
3480        rte_eth_dev_release_port(eth_dev);
3481
3482        return ret;
3483}
3484
3485/* this part will resolve the slave portids after all the other pdev and vdev
3486 * have been allocated */
3487static int
3488bond_ethdev_configure(struct rte_eth_dev *dev)
3489{
3490        const char *name = dev->device->name;
3491        struct bond_dev_private *internals = dev->data->dev_private;
3492        struct rte_kvargs *kvlist = internals->kvlist;
3493        int arg_count;
3494        uint16_t port_id = dev - rte_eth_devices;
3495        uint8_t agg_mode;
3496
3497        static const uint8_t default_rss_key[40] = {
3498                0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3499                0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3500                0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3501                0xBE, 0xAC, 0x01, 0xFA
3502        };
3503
3504        unsigned i, j;
3505
3506        /*
3507         * If RSS is enabled, fill table with default values and
3508         * set key to the the value specified in port RSS configuration.
3509         * Fall back to default RSS key if the key is not specified
3510         */
3511        if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3512                if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3513                        internals->rss_key_len =
3514                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3515                        memcpy(internals->rss_key,
3516                               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3517                               internals->rss_key_len);
3518                } else {
3519                        internals->rss_key_len = sizeof(default_rss_key);
3520                        memcpy(internals->rss_key, default_rss_key,
3521                               internals->rss_key_len);
3522                }
3523
3524                for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3525                        internals->reta_conf[i].mask = ~0LL;
3526                        for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3527                                internals->reta_conf[i].reta[j] =
3528                                                (i * RTE_RETA_GROUP_SIZE + j) %
3529                                                dev->data->nb_rx_queues;
3530                }
3531        }
3532
3533        /* set the max_rx_pktlen */
3534        internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3535
3536        /*
3537         * if no kvlist, it means that this bonded device has been created
3538         * through the bonding api.
3539         */
3540        if (!kvlist)
3541                return 0;
3542
3543        /* Parse MAC address for bonded device */
3544        arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3545        if (arg_count == 1) {
3546                struct rte_ether_addr bond_mac;
3547
3548                if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3549                                       &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3550                        RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3551                                     name);
3552                        return -1;
3553                }
3554
3555                /* Set MAC address */
3556                if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3557                        RTE_BOND_LOG(ERR,
3558                                     "Failed to set mac address on bonded device %s",
3559                                     name);
3560                        return -1;
3561                }
3562        } else if (arg_count > 1) {
3563                RTE_BOND_LOG(ERR,
3564                             "MAC address can be specified only once for bonded device %s",
3565                             name);
3566                return -1;
3567        }
3568
3569        /* Parse/set balance mode transmit policy */
3570        arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3571        if (arg_count == 1) {
3572                uint8_t xmit_policy;
3573
3574                if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3575                                       &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3576                    0) {
3577                        RTE_BOND_LOG(INFO,
3578                                     "Invalid xmit policy specified for bonded device %s",
3579                                     name);
3580                        return -1;
3581                }
3582
3583                /* Set balance mode transmit policy*/
3584                if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3585                        RTE_BOND_LOG(ERR,
3586                                     "Failed to set balance xmit policy on bonded device %s",
3587                                     name);
3588                        return -1;
3589                }
3590        } else if (arg_count > 1) {
3591                RTE_BOND_LOG(ERR,
3592                             "Transmit policy can be specified only once for bonded device %s",
3593                             name);
3594                return -1;
3595        }
3596
3597        if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3598                if (rte_kvargs_process(kvlist,
3599                                       PMD_BOND_AGG_MODE_KVARG,
3600                                       &bond_ethdev_parse_slave_agg_mode_kvarg,
3601                                       &agg_mode) != 0) {
3602                        RTE_BOND_LOG(ERR,
3603                                     "Failed to parse agg selection mode for bonded device %s",
3604                                     name);
3605                }
3606                if (internals->mode == BONDING_MODE_8023AD) {
3607                        int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3608                                        agg_mode);
3609                        if (ret < 0) {
3610                                RTE_BOND_LOG(ERR,
3611                                        "Invalid args for agg selection set for bonded device %s",
3612                                        name);
3613                                return -1;
3614                        }
3615                }
3616        }
3617
3618        /* Parse/add slave ports to bonded device */
3619        if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3620                struct bond_ethdev_slave_ports slave_ports;
3621                unsigned i;
3622
3623                memset(&slave_ports, 0, sizeof(slave_ports));
3624
3625                if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3626                                       &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3627                        RTE_BOND_LOG(ERR,
3628                                     "Failed to parse slave ports for bonded device %s",
3629                                     name);
3630                        return -1;
3631                }
3632
3633                for (i = 0; i < slave_ports.slave_count; i++) {
3634                        if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3635                                RTE_BOND_LOG(ERR,
3636                                             "Failed to add port %d as slave to bonded device %s",
3637                                             slave_ports.slaves[i], name);
3638                        }
3639                }
3640
3641        } else {
3642                RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3643                return -1;
3644        }
3645
3646        /* Parse/set primary slave port id*/
3647        arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3648        if (arg_count == 1) {
3649                uint16_t primary_slave_port_id;
3650
3651                if (rte_kvargs_process(kvlist,
3652                                       PMD_BOND_PRIMARY_SLAVE_KVARG,
3653                                       &bond_ethdev_parse_primary_slave_port_id_kvarg,
3654                                       &primary_slave_port_id) < 0) {
3655                        RTE_BOND_LOG(INFO,
3656                                     "Invalid primary slave port id specified for bonded device %s",
3657                                     name);
3658                        return -1;
3659                }
3660
3661                /* Set balance mode transmit policy*/
3662                if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3663                    != 0) {
3664                        RTE_BOND_LOG(ERR,
3665                                     "Failed to set primary slave port %d on bonded device %s",
3666                                     primary_slave_port_id, name);
3667                        return -1;
3668                }
3669        } else if (arg_count > 1) {
3670                RTE_BOND_LOG(INFO,
3671                             "Primary slave can be specified only once for bonded device %s",
3672                             name);
3673                return -1;
3674        }
3675
3676        /* Parse link status monitor polling interval */
3677        arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3678        if (arg_count == 1) {
3679                uint32_t lsc_poll_interval_ms;
3680
3681                if (rte_kvargs_process(kvlist,
3682                                       PMD_BOND_LSC_POLL_PERIOD_KVARG,
3683                                       &bond_ethdev_parse_time_ms_kvarg,
3684                                       &lsc_poll_interval_ms) < 0) {
3685                        RTE_BOND_LOG(INFO,
3686                                     "Invalid lsc polling interval value specified for bonded"
3687                                     " device %s", name);
3688                        return -1;
3689                }
3690
3691                if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3692                    != 0) {
3693                        RTE_BOND_LOG(ERR,
3694                                     "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3695                                     lsc_poll_interval_ms, name);
3696                        return -1;
3697                }
3698        } else if (arg_count > 1) {
3699                RTE_BOND_LOG(INFO,
3700                             "LSC polling interval can be specified only once for bonded"
3701                             " device %s", name);
3702                return -1;
3703        }
3704
3705        /* Parse link up interrupt propagation delay */
3706        arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3707        if (arg_count == 1) {
3708                uint32_t link_up_delay_ms;
3709
3710                if (rte_kvargs_process(kvlist,
3711                                       PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3712                                       &bond_ethdev_parse_time_ms_kvarg,
3713                                       &link_up_delay_ms) < 0) {
3714                        RTE_BOND_LOG(INFO,
3715                                     "Invalid link up propagation delay value specified for"
3716                                     " bonded device %s", name);
3717                        return -1;
3718                }
3719
3720                /* Set balance mode transmit policy*/
3721                if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3722                    != 0) {
3723                        RTE_BOND_LOG(ERR,
3724                                     "Failed to set link up propagation delay (%u ms) on bonded"
3725                                     " device %s", link_up_delay_ms, name);
3726                        return -1;
3727                }
3728        } else if (arg_count > 1) {
3729                RTE_BOND_LOG(INFO,
3730                             "Link up propagation delay can be specified only once for"
3731                             " bonded device %s", name);
3732                return -1;
3733        }
3734
3735        /* Parse link down interrupt propagation delay */
3736        arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3737        if (arg_count == 1) {
3738                uint32_t link_down_delay_ms;
3739
3740                if (rte_kvargs_process(kvlist,
3741                                       PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3742                                       &bond_ethdev_parse_time_ms_kvarg,
3743                                       &link_down_delay_ms) < 0) {
3744                        RTE_BOND_LOG(INFO,
3745                                     "Invalid link down propagation delay value specified for"
3746                                     " bonded device %s", name);
3747                        return -1;
3748                }
3749
3750                /* Set balance mode transmit policy*/
3751                if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3752                    != 0) {
3753                        RTE_BOND_LOG(ERR,
3754                                     "Failed to set link down propagation delay (%u ms) on bonded device %s",
3755                                     link_down_delay_ms, name);
3756                        return -1;
3757                }
3758        } else if (arg_count > 1) {
3759                RTE_BOND_LOG(INFO,
3760                             "Link down propagation delay can be specified only once for  bonded device %s",
3761                             name);
3762                return -1;
3763        }
3764
3765        return 0;
3766}
3767
3768struct rte_vdev_driver pmd_bond_drv = {
3769        .probe = bond_probe,
3770        .remove = bond_remove,
3771};
3772
3773RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3774RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3775
3776RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3777        "slave=<ifc> "
3778        "primary=<ifc> "
3779        "mode=[0-6] "
3780        "xmit_policy=[l2 | l23 | l34] "
3781        "agg_mode=[count | stable | bandwidth] "
3782        "socket_id=<int> "
3783        "mac=<mac addr> "
3784        "lsc_poll_period_ms=<int> "
3785        "up_delay=<int> "
3786        "down_delay=<int>");
3787
3788/* We can't use RTE_LOG_REGISTER_DEFAULT because of the forced name for
3789 * this library, see meson.build.
3790 */
3791RTE_LOG_REGISTER(bond_logtype, pmd.net.bonding, NOTICE);
3792