1
2
3
4#include <stdlib.h>
5#include <stdbool.h>
6#include <netinet/in.h>
7
8#include <rte_mbuf.h>
9#include <rte_malloc.h>
10#include <ethdev_driver.h>
11#include <ethdev_vdev.h>
12#include <rte_tcp.h>
13#include <rte_udp.h>
14#include <rte_ip.h>
15#include <rte_ip_frag.h>
16#include <rte_devargs.h>
17#include <rte_kvargs.h>
18#include <rte_bus_vdev.h>
19#include <rte_alarm.h>
20#include <rte_cycles.h>
21#include <rte_string_fns.h>
22
23#include "rte_eth_bond.h"
24#include "eth_bond_private.h"
25#include "eth_bond_8023ad_private.h"
26
27#define REORDER_PERIOD_MS 10
28#define DEFAULT_POLLING_INTERVAL_10_MS (10)
29#define BOND_MAX_MAC_ADDRS 16
30
31#define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32
33
34static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35
36static inline size_t
37get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
38{
39 size_t vlan_offset = 0;
40
41 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 struct rte_vlan_hdr *vlan_hdr =
44 (struct rte_vlan_hdr *)(eth_hdr + 1);
45
46 vlan_offset = sizeof(struct rte_vlan_hdr);
47 *proto = vlan_hdr->eth_proto;
48
49 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 vlan_hdr = vlan_hdr + 1;
51 *proto = vlan_hdr->eth_proto;
52 vlan_offset += sizeof(struct rte_vlan_hdr);
53 }
54 }
55 return vlan_offset;
56}
57
58static uint16_t
59bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
60{
61 struct bond_dev_private *internals;
62
63 uint16_t num_rx_total = 0;
64 uint16_t slave_count;
65 uint16_t active_slave;
66 int i;
67
68
69 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 internals = bd_rx_q->dev_private;
71 slave_count = internals->active_slave_count;
72 active_slave = bd_rx_q->active_slave;
73
74 for (i = 0; i < slave_count && nb_pkts; i++) {
75 uint16_t num_rx_slave;
76
77
78
79 num_rx_slave =
80 rte_eth_rx_burst(internals->active_slaves[active_slave],
81 bd_rx_q->queue_id,
82 bufs + num_rx_total, nb_pkts);
83 num_rx_total += num_rx_slave;
84 nb_pkts -= num_rx_slave;
85 if (++active_slave == slave_count)
86 active_slave = 0;
87 }
88
89 if (++bd_rx_q->active_slave >= slave_count)
90 bd_rx_q->active_slave = 0;
91 return num_rx_total;
92}
93
94static uint16_t
95bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96 uint16_t nb_pkts)
97{
98 struct bond_dev_private *internals;
99
100
101 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
102
103 internals = bd_rx_q->dev_private;
104
105 return rte_eth_rx_burst(internals->current_primary_port,
106 bd_rx_q->queue_id, bufs, nb_pkts);
107}
108
109static inline uint8_t
110is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
111{
112 const uint16_t ether_type_slow_be =
113 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
114
115 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 (ethertype == ether_type_slow_be &&
117 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
118}
119
120
121
122
123
124static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 .dst.addr_bytes = { 0 },
126 .src.addr_bytes = { 0 },
127 .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
128};
129
130static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 .dst.addr_bytes = { 0 },
132 .src.addr_bytes = { 0 },
133 .type = 0xFFFF,
134};
135
136static struct rte_flow_item flow_item_8023ad[] = {
137 {
138 .type = RTE_FLOW_ITEM_TYPE_ETH,
139 .spec = &flow_item_eth_type_8023ad,
140 .last = NULL,
141 .mask = &flow_item_eth_mask_type_8023ad,
142 },
143 {
144 .type = RTE_FLOW_ITEM_TYPE_END,
145 .spec = NULL,
146 .last = NULL,
147 .mask = NULL,
148 }
149};
150
151const struct rte_flow_attr flow_attr_8023ad = {
152 .group = 0,
153 .priority = 0,
154 .ingress = 1,
155 .egress = 0,
156 .reserved = 0,
157};
158
159int
160bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 uint16_t slave_port) {
162 struct rte_eth_dev_info slave_info;
163 struct rte_flow_error error;
164 struct bond_dev_private *internals = bond_dev->data->dev_private;
165
166 const struct rte_flow_action_queue lacp_queue_conf = {
167 .index = 0,
168 };
169
170 const struct rte_flow_action actions[] = {
171 {
172 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 .conf = &lacp_queue_conf
174 },
175 {
176 .type = RTE_FLOW_ACTION_TYPE_END,
177 }
178 };
179
180 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 flow_item_8023ad, actions, &error);
182 if (ret < 0) {
183 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 __func__, error.message, slave_port,
185 internals->mode4.dedicated_queues.rx_qid);
186 return -1;
187 }
188
189 ret = rte_eth_dev_info_get(slave_port, &slave_info);
190 if (ret != 0) {
191 RTE_BOND_LOG(ERR,
192 "%s: Error during getting device (port %u) info: %s\n",
193 __func__, slave_port, strerror(-ret));
194
195 return ret;
196 }
197
198 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
199 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
200 RTE_BOND_LOG(ERR,
201 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
202 __func__, slave_port);
203 return -1;
204 }
205
206 return 0;
207}
208
209int
210bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
211 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
212 struct bond_dev_private *internals = bond_dev->data->dev_private;
213 struct rte_eth_dev_info bond_info;
214 uint16_t idx;
215 int ret;
216
217
218 if (internals->slave_count > 0) {
219 ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
220 if (ret != 0) {
221 RTE_BOND_LOG(ERR,
222 "%s: Error during getting device (port %u) info: %s\n",
223 __func__, bond_dev->data->port_id,
224 strerror(-ret));
225
226 return ret;
227 }
228
229 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
230 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
231
232 for (idx = 0; idx < internals->slave_count; idx++) {
233 if (bond_ethdev_8023ad_flow_verify(bond_dev,
234 internals->slaves[idx].port_id) != 0)
235 return -1;
236 }
237 }
238
239 return 0;
240}
241
242int
243bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
244
245 struct rte_flow_error error;
246 struct bond_dev_private *internals = bond_dev->data->dev_private;
247 struct rte_flow_action_queue lacp_queue_conf = {
248 .index = internals->mode4.dedicated_queues.rx_qid,
249 };
250
251 const struct rte_flow_action actions[] = {
252 {
253 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
254 .conf = &lacp_queue_conf
255 },
256 {
257 .type = RTE_FLOW_ACTION_TYPE_END,
258 }
259 };
260
261 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
262 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
263 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
264 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
265 "(slave_port=%d queue_id=%d)",
266 error.message, slave_port,
267 internals->mode4.dedicated_queues.rx_qid);
268 return -1;
269 }
270
271 return 0;
272}
273
274static inline uint16_t
275rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
276 bool dedicated_rxq)
277{
278
279 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
280 struct bond_dev_private *internals = bd_rx_q->dev_private;
281 struct rte_eth_dev *bonded_eth_dev =
282 &rte_eth_devices[internals->port_id];
283 struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
284 struct rte_ether_hdr *hdr;
285
286 const uint16_t ether_type_slow_be =
287 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
288 uint16_t num_rx_total = 0;
289 uint16_t slaves[RTE_MAX_ETHPORTS];
290 uint16_t slave_count, idx;
291
292 uint8_t collecting;
293 const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
294 const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
295 uint8_t subtype;
296 uint16_t i;
297 uint16_t j;
298 uint16_t k;
299
300
301
302 slave_count = internals->active_slave_count;
303 memcpy(slaves, internals->active_slaves,
304 sizeof(internals->active_slaves[0]) * slave_count);
305
306 idx = bd_rx_q->active_slave;
307 if (idx >= slave_count) {
308 bd_rx_q->active_slave = 0;
309 idx = 0;
310 }
311 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
312 j = num_rx_total;
313 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
314 COLLECTING);
315
316
317 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
318 &bufs[num_rx_total], nb_pkts - num_rx_total);
319
320 for (k = j; k < 2 && k < num_rx_total; k++)
321 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
322
323
324 while (j < num_rx_total) {
325 if (j + 3 < num_rx_total)
326 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
327
328 hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
329 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
330
331
332
333
334
335
336
337
338
339 if (unlikely(
340 (!dedicated_rxq &&
341 is_lacp_packets(hdr->ether_type, subtype,
342 bufs[j])) ||
343 !collecting ||
344 (!promisc &&
345 ((rte_is_unicast_ether_addr(&hdr->d_addr) &&
346 !rte_is_same_ether_addr(bond_mac,
347 &hdr->d_addr)) ||
348 (!allmulti &&
349 rte_is_multicast_ether_addr(&hdr->d_addr)))))) {
350
351 if (hdr->ether_type == ether_type_slow_be) {
352 bond_mode_8023ad_handle_slow_pkt(
353 internals, slaves[idx], bufs[j]);
354 } else
355 rte_pktmbuf_free(bufs[j]);
356
357
358 num_rx_total--;
359 if (j < num_rx_total) {
360 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
361 (num_rx_total - j));
362 }
363 } else
364 j++;
365 }
366 if (unlikely(++idx == slave_count))
367 idx = 0;
368 }
369
370 if (++bd_rx_q->active_slave >= slave_count)
371 bd_rx_q->active_slave = 0;
372
373 return num_rx_total;
374}
375
376static uint16_t
377bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
378 uint16_t nb_pkts)
379{
380 return rx_burst_8023ad(queue, bufs, nb_pkts, false);
381}
382
383static uint16_t
384bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
385 uint16_t nb_pkts)
386{
387 return rx_burst_8023ad(queue, bufs, nb_pkts, true);
388}
389
390#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
391uint32_t burstnumberRX;
392uint32_t burstnumberTX;
393
394#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
395
396static void
397arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
398{
399 switch (arp_op) {
400 case RTE_ARP_OP_REQUEST:
401 strlcpy(buf, "ARP Request", buf_len);
402 return;
403 case RTE_ARP_OP_REPLY:
404 strlcpy(buf, "ARP Reply", buf_len);
405 return;
406 case RTE_ARP_OP_REVREQUEST:
407 strlcpy(buf, "Reverse ARP Request", buf_len);
408 return;
409 case RTE_ARP_OP_REVREPLY:
410 strlcpy(buf, "Reverse ARP Reply", buf_len);
411 return;
412 case RTE_ARP_OP_INVREQUEST:
413 strlcpy(buf, "Peer Identify Request", buf_len);
414 return;
415 case RTE_ARP_OP_INVREPLY:
416 strlcpy(buf, "Peer Identify Reply", buf_len);
417 return;
418 default:
419 break;
420 }
421 strlcpy(buf, "Unknown", buf_len);
422 return;
423}
424#endif
425#define MaxIPv4String 16
426static void
427ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
428{
429 uint32_t ipv4_addr;
430
431 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
432 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
433 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
434 ipv4_addr & 0xFF);
435}
436
437#define MAX_CLIENTS_NUMBER 128
438uint8_t active_clients;
439struct client_stats_t {
440 uint16_t port;
441 uint32_t ipv4_addr;
442 uint32_t ipv4_rx_packets;
443 uint32_t ipv4_tx_packets;
444};
445struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
446
447static void
448update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
449{
450 int i = 0;
451
452 for (; i < MAX_CLIENTS_NUMBER; i++) {
453 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
454
455 if (TXorRXindicator == &burstnumberRX)
456 client_stats[i].ipv4_rx_packets++;
457 else
458 client_stats[i].ipv4_tx_packets++;
459 return;
460 }
461 }
462
463 if (TXorRXindicator == &burstnumberRX)
464 client_stats[active_clients].ipv4_rx_packets++;
465 else
466 client_stats[active_clients].ipv4_tx_packets++;
467 client_stats[active_clients].ipv4_addr = addr;
468 client_stats[active_clients].port = port;
469 active_clients++;
470
471}
472
473#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
474#define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
475 rte_log(RTE_LOG_DEBUG, bond_logtype, \
476 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
477 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
478 info, \
479 port, \
480 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
481 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
482 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
483 src_ip, \
484 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
485 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
486 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
487 dst_ip, \
488 arp_op, ++burstnumber)
489#endif
490
491static void
492mode6_debug(const char __rte_unused *info,
493 struct rte_ether_hdr *eth_h, uint16_t port,
494 uint32_t __rte_unused *burstnumber)
495{
496 struct rte_ipv4_hdr *ipv4_h;
497#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
498 struct rte_arp_hdr *arp_h;
499 char dst_ip[16];
500 char ArpOp[24];
501 char buf[16];
502#endif
503 char src_ip[16];
504
505 uint16_t ether_type = eth_h->ether_type;
506 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
507
508#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
509 strlcpy(buf, info, 16);
510#endif
511
512 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
513 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
514 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
515#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
516 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
517 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
518#endif
519 update_client_stats(ipv4_h->src_addr, port, burstnumber);
520 }
521#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
522 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
523 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
524 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
525 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
526 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
527 ArpOp, sizeof(ArpOp));
528 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
529 }
530#endif
531}
532#endif
533
534static uint16_t
535bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
536{
537 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
538 struct bond_dev_private *internals = bd_rx_q->dev_private;
539 struct rte_ether_hdr *eth_h;
540 uint16_t ether_type, offset;
541 uint16_t nb_recv_pkts;
542 int i;
543
544 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
545
546 for (i = 0; i < nb_recv_pkts; i++) {
547 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
548 ether_type = eth_h->ether_type;
549 offset = get_vlan_offset(eth_h, ðer_type);
550
551 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
552#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
553 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
554#endif
555 bond_mode_alb_arp_recv(eth_h, offset, internals);
556 }
557#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
558 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
559 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
560#endif
561 }
562
563 return nb_recv_pkts;
564}
565
566static uint16_t
567bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
568 uint16_t nb_pkts)
569{
570 struct bond_dev_private *internals;
571 struct bond_tx_queue *bd_tx_q;
572
573 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
574 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
575
576 uint16_t num_of_slaves;
577 uint16_t slaves[RTE_MAX_ETHPORTS];
578
579 uint16_t num_tx_total = 0, num_tx_slave;
580
581 static int slave_idx = 0;
582 int i, cslave_idx = 0, tx_fail_total = 0;
583
584 bd_tx_q = (struct bond_tx_queue *)queue;
585 internals = bd_tx_q->dev_private;
586
587
588
589 num_of_slaves = internals->active_slave_count;
590 memcpy(slaves, internals->active_slaves,
591 sizeof(internals->active_slaves[0]) * num_of_slaves);
592
593 if (num_of_slaves < 1)
594 return num_tx_total;
595
596
597 for (i = 0; i < nb_pkts; i++) {
598 cslave_idx = (slave_idx + i) % num_of_slaves;
599 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
600 }
601
602
603
604 slave_idx = ++cslave_idx;
605
606
607 for (i = 0; i < num_of_slaves; i++) {
608 if (slave_nb_pkts[i] > 0) {
609 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
610 slave_bufs[i], slave_nb_pkts[i]);
611
612
613 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
614 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
615
616 tx_fail_total += tx_fail_slave;
617
618 memcpy(&bufs[nb_pkts - tx_fail_total],
619 &slave_bufs[i][num_tx_slave],
620 tx_fail_slave * sizeof(bufs[0]));
621 }
622 num_tx_total += num_tx_slave;
623 }
624 }
625
626 return num_tx_total;
627}
628
629static uint16_t
630bond_ethdev_tx_burst_active_backup(void *queue,
631 struct rte_mbuf **bufs, uint16_t nb_pkts)
632{
633 struct bond_dev_private *internals;
634 struct bond_tx_queue *bd_tx_q;
635
636 bd_tx_q = (struct bond_tx_queue *)queue;
637 internals = bd_tx_q->dev_private;
638
639 if (internals->active_slave_count < 1)
640 return 0;
641
642 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
643 bufs, nb_pkts);
644}
645
646static inline uint16_t
647ether_hash(struct rte_ether_hdr *eth_hdr)
648{
649 unaligned_uint16_t *word_src_addr =
650 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
651 unaligned_uint16_t *word_dst_addr =
652 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
653
654 return (word_src_addr[0] ^ word_dst_addr[0]) ^
655 (word_src_addr[1] ^ word_dst_addr[1]) ^
656 (word_src_addr[2] ^ word_dst_addr[2]);
657}
658
659static inline uint32_t
660ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
661{
662 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
663}
664
665static inline uint32_t
666ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
667{
668 unaligned_uint32_t *word_src_addr =
669 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
670 unaligned_uint32_t *word_dst_addr =
671 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
672
673 return (word_src_addr[0] ^ word_dst_addr[0]) ^
674 (word_src_addr[1] ^ word_dst_addr[1]) ^
675 (word_src_addr[2] ^ word_dst_addr[2]) ^
676 (word_src_addr[3] ^ word_dst_addr[3]);
677}
678
679
680void
681burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
682 uint16_t slave_count, uint16_t *slaves)
683{
684 struct rte_ether_hdr *eth_hdr;
685 uint32_t hash;
686 int i;
687
688 for (i = 0; i < nb_pkts; i++) {
689 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
690
691 hash = ether_hash(eth_hdr);
692
693 slaves[i] = (hash ^= hash >> 8) % slave_count;
694 }
695}
696
697void
698burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
699 uint16_t slave_count, uint16_t *slaves)
700{
701 uint16_t i;
702 struct rte_ether_hdr *eth_hdr;
703 uint16_t proto;
704 size_t vlan_offset;
705 uint32_t hash, l3hash;
706
707 for (i = 0; i < nb_pkts; i++) {
708 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
709 l3hash = 0;
710
711 proto = eth_hdr->ether_type;
712 hash = ether_hash(eth_hdr);
713
714 vlan_offset = get_vlan_offset(eth_hdr, &proto);
715
716 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
717 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
718 ((char *)(eth_hdr + 1) + vlan_offset);
719 l3hash = ipv4_hash(ipv4_hdr);
720
721 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
722 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
723 ((char *)(eth_hdr + 1) + vlan_offset);
724 l3hash = ipv6_hash(ipv6_hdr);
725 }
726
727 hash = hash ^ l3hash;
728 hash ^= hash >> 16;
729 hash ^= hash >> 8;
730
731 slaves[i] = hash % slave_count;
732 }
733}
734
735void
736burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
737 uint16_t slave_count, uint16_t *slaves)
738{
739 struct rte_ether_hdr *eth_hdr;
740 uint16_t proto;
741 size_t vlan_offset;
742 int i;
743
744 struct rte_udp_hdr *udp_hdr;
745 struct rte_tcp_hdr *tcp_hdr;
746 uint32_t hash, l3hash, l4hash;
747
748 for (i = 0; i < nb_pkts; i++) {
749 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
750 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
751 proto = eth_hdr->ether_type;
752 vlan_offset = get_vlan_offset(eth_hdr, &proto);
753 l3hash = 0;
754 l4hash = 0;
755
756 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
757 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
758 ((char *)(eth_hdr + 1) + vlan_offset);
759 size_t ip_hdr_offset;
760
761 l3hash = ipv4_hash(ipv4_hdr);
762
763
764 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
765 == 0)) {
766 ip_hdr_offset = (ipv4_hdr->version_ihl
767 & RTE_IPV4_HDR_IHL_MASK) *
768 RTE_IPV4_IHL_MULTIPLIER;
769
770 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
771 tcp_hdr = (struct rte_tcp_hdr *)
772 ((char *)ipv4_hdr +
773 ip_hdr_offset);
774 if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
775 < pkt_end)
776 l4hash = HASH_L4_PORTS(tcp_hdr);
777 } else if (ipv4_hdr->next_proto_id ==
778 IPPROTO_UDP) {
779 udp_hdr = (struct rte_udp_hdr *)
780 ((char *)ipv4_hdr +
781 ip_hdr_offset);
782 if ((size_t)udp_hdr + sizeof(*udp_hdr)
783 < pkt_end)
784 l4hash = HASH_L4_PORTS(udp_hdr);
785 }
786 }
787 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
788 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
789 ((char *)(eth_hdr + 1) + vlan_offset);
790 l3hash = ipv6_hash(ipv6_hdr);
791
792 if (ipv6_hdr->proto == IPPROTO_TCP) {
793 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
794 l4hash = HASH_L4_PORTS(tcp_hdr);
795 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
796 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
797 l4hash = HASH_L4_PORTS(udp_hdr);
798 }
799 }
800
801 hash = l3hash ^ l4hash;
802 hash ^= hash >> 16;
803 hash ^= hash >> 8;
804
805 slaves[i] = hash % slave_count;
806 }
807}
808
809struct bwg_slave {
810 uint64_t bwg_left_int;
811 uint64_t bwg_left_remainder;
812 uint16_t slave;
813};
814
815void
816bond_tlb_activate_slave(struct bond_dev_private *internals) {
817 int i;
818
819 for (i = 0; i < internals->active_slave_count; i++) {
820 tlb_last_obytets[internals->active_slaves[i]] = 0;
821 }
822}
823
824static int
825bandwidth_cmp(const void *a, const void *b)
826{
827 const struct bwg_slave *bwg_a = a;
828 const struct bwg_slave *bwg_b = b;
829 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
830 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
831 (int64_t)bwg_a->bwg_left_remainder;
832 if (diff > 0)
833 return 1;
834 else if (diff < 0)
835 return -1;
836 else if (diff2 > 0)
837 return 1;
838 else if (diff2 < 0)
839 return -1;
840 else
841 return 0;
842}
843
844static void
845bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
846 struct bwg_slave *bwg_slave)
847{
848 struct rte_eth_link link_status;
849 int ret;
850
851 ret = rte_eth_link_get_nowait(port_id, &link_status);
852 if (ret < 0) {
853 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
854 port_id, rte_strerror(-ret));
855 return;
856 }
857 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
858 if (link_bwg == 0)
859 return;
860 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
861 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
862 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
863}
864
865static void
866bond_ethdev_update_tlb_slave_cb(void *arg)
867{
868 struct bond_dev_private *internals = arg;
869 struct rte_eth_stats slave_stats;
870 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
871 uint16_t slave_count;
872 uint64_t tx_bytes;
873
874 uint8_t update_stats = 0;
875 uint16_t slave_id;
876 uint16_t i;
877
878 internals->slave_update_idx++;
879
880
881 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
882 update_stats = 1;
883
884 for (i = 0; i < internals->active_slave_count; i++) {
885 slave_id = internals->active_slaves[i];
886 rte_eth_stats_get(slave_id, &slave_stats);
887 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
888 bandwidth_left(slave_id, tx_bytes,
889 internals->slave_update_idx, &bwg_array[i]);
890 bwg_array[i].slave = slave_id;
891
892 if (update_stats) {
893 tlb_last_obytets[slave_id] = slave_stats.obytes;
894 }
895 }
896
897 if (update_stats == 1)
898 internals->slave_update_idx = 0;
899
900 slave_count = i;
901 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
902 for (i = 0; i < slave_count; i++)
903 internals->tlb_slaves_order[i] = bwg_array[i].slave;
904
905 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
906 (struct bond_dev_private *)internals);
907}
908
909static uint16_t
910bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
911{
912 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
913 struct bond_dev_private *internals = bd_tx_q->dev_private;
914
915 struct rte_eth_dev *primary_port =
916 &rte_eth_devices[internals->primary_port];
917 uint16_t num_tx_total = 0;
918 uint16_t i, j;
919
920 uint16_t num_of_slaves = internals->active_slave_count;
921 uint16_t slaves[RTE_MAX_ETHPORTS];
922
923 struct rte_ether_hdr *ether_hdr;
924 struct rte_ether_addr primary_slave_addr;
925 struct rte_ether_addr active_slave_addr;
926
927 if (num_of_slaves < 1)
928 return num_tx_total;
929
930 memcpy(slaves, internals->tlb_slaves_order,
931 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
932
933
934 rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
935
936 if (nb_pkts > 3) {
937 for (i = 0; i < 3; i++)
938 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
939 }
940
941 for (i = 0; i < num_of_slaves; i++) {
942 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
943 for (j = num_tx_total; j < nb_pkts; j++) {
944 if (j + 3 < nb_pkts)
945 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
946
947 ether_hdr = rte_pktmbuf_mtod(bufs[j],
948 struct rte_ether_hdr *);
949 if (rte_is_same_ether_addr(ðer_hdr->s_addr,
950 &primary_slave_addr))
951 rte_ether_addr_copy(&active_slave_addr,
952 ðer_hdr->s_addr);
953#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
954 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
955#endif
956 }
957
958 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
959 bufs + num_tx_total, nb_pkts - num_tx_total);
960
961 if (num_tx_total == nb_pkts)
962 break;
963 }
964
965 return num_tx_total;
966}
967
968void
969bond_tlb_disable(struct bond_dev_private *internals)
970{
971 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
972}
973
974void
975bond_tlb_enable(struct bond_dev_private *internals)
976{
977 bond_ethdev_update_tlb_slave_cb(internals);
978}
979
980static uint16_t
981bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
982{
983 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
984 struct bond_dev_private *internals = bd_tx_q->dev_private;
985
986 struct rte_ether_hdr *eth_h;
987 uint16_t ether_type, offset;
988
989 struct client_data *client_info;
990
991
992
993
994
995 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
996 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
997
998
999
1000
1001
1002 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1003 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1004
1005 struct rte_mbuf *upd_pkt;
1006 size_t pkt_size;
1007
1008 uint16_t num_send, num_not_send = 0;
1009 uint16_t num_tx_total = 0;
1010 uint16_t slave_idx;
1011
1012 int i, j;
1013
1014
1015 for (i = 0; i < nb_pkts; i++) {
1016 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1017 ether_type = eth_h->ether_type;
1018 offset = get_vlan_offset(eth_h, ðer_type);
1019
1020 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1021 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1022
1023
1024 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1025
1026
1027 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1028 slave_bufs_pkts[slave_idx]++;
1029 } else {
1030
1031 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1032 bufs[i];
1033 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1034 }
1035 }
1036
1037
1038 if (internals->mode6.ntt) {
1039 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1040 client_info = &internals->mode6.client_table[i];
1041
1042 if (client_info->in_use) {
1043
1044 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1045 if (upd_pkt == NULL) {
1046 RTE_BOND_LOG(ERR,
1047 "Failed to allocate ARP packet from pool");
1048 continue;
1049 }
1050 pkt_size = sizeof(struct rte_ether_hdr) +
1051 sizeof(struct rte_arp_hdr) +
1052 client_info->vlan_count *
1053 sizeof(struct rte_vlan_hdr);
1054 upd_pkt->data_len = pkt_size;
1055 upd_pkt->pkt_len = pkt_size;
1056
1057 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1058 internals);
1059
1060
1061 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1062 update_bufs_pkts[slave_idx]++;
1063 }
1064 }
1065 internals->mode6.ntt = 0;
1066 }
1067
1068
1069 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1070 if (slave_bufs_pkts[i] > 0) {
1071 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1072 slave_bufs[i], slave_bufs_pkts[i]);
1073 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1074 bufs[nb_pkts - 1 - num_not_send - j] =
1075 slave_bufs[i][nb_pkts - 1 - j];
1076 }
1077
1078 num_tx_total += num_send;
1079 num_not_send += slave_bufs_pkts[i] - num_send;
1080
1081#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1082
1083 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1084 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1085 struct rte_ether_hdr *);
1086 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1087 }
1088#endif
1089 }
1090 }
1091
1092
1093 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1094 if (update_bufs_pkts[i] > 0) {
1095 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1096 update_bufs_pkts[i]);
1097 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1098 rte_pktmbuf_free(update_bufs[i][j]);
1099 }
1100#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1101 for (j = 0; j < update_bufs_pkts[i]; j++) {
1102 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1103 struct rte_ether_hdr *);
1104 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1105 }
1106#endif
1107 }
1108 }
1109
1110
1111 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1112 num_send = bond_ethdev_tx_burst_tlb(queue,
1113 slave_bufs[RTE_MAX_ETHPORTS],
1114 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1115
1116 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1117 bufs[nb_pkts - 1 - num_not_send - j] =
1118 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1119 }
1120
1121 num_tx_total += num_send;
1122 }
1123
1124 return num_tx_total;
1125}
1126
1127static inline uint16_t
1128tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1129 uint16_t *slave_port_ids, uint16_t slave_count)
1130{
1131 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1132 struct bond_dev_private *internals = bd_tx_q->dev_private;
1133
1134
1135 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1136
1137 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1138
1139 uint16_t bufs_slave_port_idxs[nb_bufs];
1140
1141 uint16_t slave_tx_count;
1142 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1143
1144 uint16_t i;
1145
1146
1147
1148
1149
1150 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1151 bufs_slave_port_idxs);
1152
1153 for (i = 0; i < nb_bufs; i++) {
1154
1155 uint16_t slave_idx = bufs_slave_port_idxs[i];
1156
1157 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1158 }
1159
1160
1161 for (i = 0; i < slave_count; i++) {
1162 if (slave_nb_bufs[i] == 0)
1163 continue;
1164
1165 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1166 bd_tx_q->queue_id, slave_bufs[i],
1167 slave_nb_bufs[i]);
1168
1169 total_tx_count += slave_tx_count;
1170
1171
1172 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1173 int slave_tx_fail_count = slave_nb_bufs[i] -
1174 slave_tx_count;
1175 total_tx_fail_count += slave_tx_fail_count;
1176 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1177 &slave_bufs[i][slave_tx_count],
1178 slave_tx_fail_count * sizeof(bufs[0]));
1179 }
1180 }
1181
1182 return total_tx_count;
1183}
1184
1185static uint16_t
1186bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1187 uint16_t nb_bufs)
1188{
1189 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1190 struct bond_dev_private *internals = bd_tx_q->dev_private;
1191
1192 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1193 uint16_t slave_count;
1194
1195 if (unlikely(nb_bufs == 0))
1196 return 0;
1197
1198
1199
1200
1201 slave_count = internals->active_slave_count;
1202 if (unlikely(slave_count < 1))
1203 return 0;
1204
1205 memcpy(slave_port_ids, internals->active_slaves,
1206 sizeof(slave_port_ids[0]) * slave_count);
1207 return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1208 slave_count);
1209}
1210
1211static inline uint16_t
1212tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1213 bool dedicated_txq)
1214{
1215 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1216 struct bond_dev_private *internals = bd_tx_q->dev_private;
1217
1218 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1219 uint16_t slave_count;
1220
1221 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1222 uint16_t dist_slave_count;
1223
1224 uint16_t slave_tx_count;
1225
1226 uint16_t i;
1227
1228
1229
1230 slave_count = internals->active_slave_count;
1231 if (unlikely(slave_count < 1))
1232 return 0;
1233
1234 memcpy(slave_port_ids, internals->active_slaves,
1235 sizeof(slave_port_ids[0]) * slave_count);
1236
1237 if (dedicated_txq)
1238 goto skip_tx_ring;
1239
1240
1241 for (i = 0; i < slave_count; i++) {
1242 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1243 struct rte_mbuf *ctrl_pkt = NULL;
1244
1245 if (likely(rte_ring_empty(port->tx_ring)))
1246 continue;
1247
1248 if (rte_ring_dequeue(port->tx_ring,
1249 (void **)&ctrl_pkt) != -ENOENT) {
1250 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1251 bd_tx_q->queue_id, &ctrl_pkt, 1);
1252
1253
1254
1255
1256 if (slave_tx_count != 1)
1257 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1258 }
1259 }
1260
1261skip_tx_ring:
1262 if (unlikely(nb_bufs == 0))
1263 return 0;
1264
1265 dist_slave_count = 0;
1266 for (i = 0; i < slave_count; i++) {
1267 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1268
1269 if (ACTOR_STATE(port, DISTRIBUTING))
1270 dist_slave_port_ids[dist_slave_count++] =
1271 slave_port_ids[i];
1272 }
1273
1274 if (unlikely(dist_slave_count < 1))
1275 return 0;
1276
1277 return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1278 dist_slave_count);
1279}
1280
1281static uint16_t
1282bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1283 uint16_t nb_bufs)
1284{
1285 return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1286}
1287
1288static uint16_t
1289bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1290 uint16_t nb_bufs)
1291{
1292 return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1293}
1294
1295static uint16_t
1296bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1297 uint16_t nb_pkts)
1298{
1299 struct bond_dev_private *internals;
1300 struct bond_tx_queue *bd_tx_q;
1301
1302 uint16_t slaves[RTE_MAX_ETHPORTS];
1303 uint8_t tx_failed_flag = 0;
1304 uint16_t num_of_slaves;
1305
1306 uint16_t max_nb_of_tx_pkts = 0;
1307
1308 int slave_tx_total[RTE_MAX_ETHPORTS];
1309 int i, most_successful_tx_slave = -1;
1310
1311 bd_tx_q = (struct bond_tx_queue *)queue;
1312 internals = bd_tx_q->dev_private;
1313
1314
1315
1316 num_of_slaves = internals->active_slave_count;
1317 memcpy(slaves, internals->active_slaves,
1318 sizeof(internals->active_slaves[0]) * num_of_slaves);
1319
1320 if (num_of_slaves < 1)
1321 return 0;
1322
1323
1324 for (i = 0; i < nb_pkts; i++)
1325 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1326
1327
1328 for (i = 0; i < num_of_slaves; i++) {
1329 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1330 bufs, nb_pkts);
1331
1332 if (unlikely(slave_tx_total[i] < nb_pkts))
1333 tx_failed_flag = 1;
1334
1335
1336
1337 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1338 max_nb_of_tx_pkts = slave_tx_total[i];
1339 most_successful_tx_slave = i;
1340 }
1341 }
1342
1343
1344
1345
1346
1347 if (unlikely(tx_failed_flag))
1348 for (i = 0; i < num_of_slaves; i++)
1349 if (i != most_successful_tx_slave)
1350 while (slave_tx_total[i] < nb_pkts)
1351 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1352
1353 return max_nb_of_tx_pkts;
1354}
1355
1356static void
1357link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1358{
1359 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1360
1361 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1362
1363
1364
1365
1366 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1367
1368 bond_link->link_autoneg = slave_link->link_autoneg;
1369 bond_link->link_duplex = slave_link->link_duplex;
1370 bond_link->link_speed = slave_link->link_speed;
1371 } else {
1372
1373
1374
1375
1376 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1377 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1378 }
1379}
1380
1381static int
1382link_properties_valid(struct rte_eth_dev *ethdev,
1383 struct rte_eth_link *slave_link)
1384{
1385 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1386
1387 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1388 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1389
1390 if (bond_link->link_duplex != slave_link->link_duplex ||
1391 bond_link->link_autoneg != slave_link->link_autoneg ||
1392 bond_link->link_speed != slave_link->link_speed)
1393 return -1;
1394 }
1395
1396 return 0;
1397}
1398
1399int
1400mac_address_get(struct rte_eth_dev *eth_dev,
1401 struct rte_ether_addr *dst_mac_addr)
1402{
1403 struct rte_ether_addr *mac_addr;
1404
1405 if (eth_dev == NULL) {
1406 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1407 return -1;
1408 }
1409
1410 if (dst_mac_addr == NULL) {
1411 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1412 return -1;
1413 }
1414
1415 mac_addr = eth_dev->data->mac_addrs;
1416
1417 rte_ether_addr_copy(mac_addr, dst_mac_addr);
1418 return 0;
1419}
1420
1421int
1422mac_address_set(struct rte_eth_dev *eth_dev,
1423 struct rte_ether_addr *new_mac_addr)
1424{
1425 struct rte_ether_addr *mac_addr;
1426
1427 if (eth_dev == NULL) {
1428 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1429 return -1;
1430 }
1431
1432 if (new_mac_addr == NULL) {
1433 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1434 return -1;
1435 }
1436
1437 mac_addr = eth_dev->data->mac_addrs;
1438
1439
1440 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1441 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1442
1443 return 0;
1444}
1445
1446static const struct rte_ether_addr null_mac_addr;
1447
1448
1449
1450
1451int
1452slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1453 uint16_t slave_port_id)
1454{
1455 int i, ret;
1456 struct rte_ether_addr *mac_addr;
1457
1458 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1459 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1460 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1461 break;
1462
1463 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1464 if (ret < 0) {
1465
1466 for (i--; i > 0; i--)
1467 rte_eth_dev_mac_addr_remove(slave_port_id,
1468 &bonded_eth_dev->data->mac_addrs[i]);
1469 return ret;
1470 }
1471 }
1472
1473 return 0;
1474}
1475
1476
1477
1478
1479int
1480slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1481 uint16_t slave_port_id)
1482{
1483 int i, rc, ret;
1484 struct rte_ether_addr *mac_addr;
1485
1486 rc = 0;
1487 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1488 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1489 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1490 break;
1491
1492 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1493
1494 if (ret < 0 && rc == 0)
1495 rc = ret;
1496 }
1497
1498 return rc;
1499}
1500
1501int
1502mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1503{
1504 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1505 bool set;
1506 int i;
1507
1508
1509 if (internals->slave_count < 1)
1510 return -1;
1511
1512 switch (internals->mode) {
1513 case BONDING_MODE_ROUND_ROBIN:
1514 case BONDING_MODE_BALANCE:
1515 case BONDING_MODE_BROADCAST:
1516 for (i = 0; i < internals->slave_count; i++) {
1517 if (rte_eth_dev_default_mac_addr_set(
1518 internals->slaves[i].port_id,
1519 bonded_eth_dev->data->mac_addrs)) {
1520 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1521 internals->slaves[i].port_id);
1522 return -1;
1523 }
1524 }
1525 break;
1526 case BONDING_MODE_8023AD:
1527 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1528 break;
1529 case BONDING_MODE_ACTIVE_BACKUP:
1530 case BONDING_MODE_TLB:
1531 case BONDING_MODE_ALB:
1532 default:
1533 set = true;
1534 for (i = 0; i < internals->slave_count; i++) {
1535 if (internals->slaves[i].port_id ==
1536 internals->current_primary_port) {
1537 if (rte_eth_dev_default_mac_addr_set(
1538 internals->current_primary_port,
1539 bonded_eth_dev->data->mac_addrs)) {
1540 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1541 internals->current_primary_port);
1542 set = false;
1543 }
1544 } else {
1545 if (rte_eth_dev_default_mac_addr_set(
1546 internals->slaves[i].port_id,
1547 &internals->slaves[i].persisted_mac_addr)) {
1548 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1549 internals->slaves[i].port_id);
1550 }
1551 }
1552 }
1553 if (!set)
1554 return -1;
1555 }
1556
1557 return 0;
1558}
1559
1560int
1561bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1562{
1563 struct bond_dev_private *internals;
1564
1565 internals = eth_dev->data->dev_private;
1566
1567 switch (mode) {
1568 case BONDING_MODE_ROUND_ROBIN:
1569 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1570 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1571 break;
1572 case BONDING_MODE_ACTIVE_BACKUP:
1573 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1574 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1575 break;
1576 case BONDING_MODE_BALANCE:
1577 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1578 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1579 break;
1580 case BONDING_MODE_BROADCAST:
1581 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1582 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1583 break;
1584 case BONDING_MODE_8023AD:
1585 if (bond_mode_8023ad_enable(eth_dev) != 0)
1586 return -1;
1587
1588 if (internals->mode4.dedicated_queues.enabled == 0) {
1589 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1590 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1591 RTE_BOND_LOG(WARNING,
1592 "Using mode 4, it is necessary to do TX burst "
1593 "and RX burst at least every 100ms.");
1594 } else {
1595
1596 eth_dev->rx_pkt_burst =
1597 bond_ethdev_rx_burst_8023ad_fast_queue;
1598 eth_dev->tx_pkt_burst =
1599 bond_ethdev_tx_burst_8023ad_fast_queue;
1600 }
1601 break;
1602 case BONDING_MODE_TLB:
1603 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1604 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1605 break;
1606 case BONDING_MODE_ALB:
1607 if (bond_mode_alb_enable(eth_dev) != 0)
1608 return -1;
1609
1610 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1611 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1612 break;
1613 default:
1614 return -1;
1615 }
1616
1617 internals->mode = mode;
1618
1619 return 0;
1620}
1621
1622
1623static int
1624slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1625 struct rte_eth_dev *slave_eth_dev)
1626{
1627 int errval = 0;
1628 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1629 struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1630
1631 if (port->slow_pool == NULL) {
1632 char mem_name[256];
1633 int slave_id = slave_eth_dev->data->port_id;
1634
1635 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1636 slave_id);
1637 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1638 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1639 slave_eth_dev->data->numa_node);
1640
1641
1642
1643 if (port->slow_pool == NULL) {
1644 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1645 slave_id, mem_name, rte_strerror(rte_errno));
1646 }
1647 }
1648
1649 if (internals->mode4.dedicated_queues.enabled == 1) {
1650
1651
1652 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1653 internals->mode4.dedicated_queues.rx_qid, 128,
1654 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1655 NULL, port->slow_pool);
1656 if (errval != 0) {
1657 RTE_BOND_LOG(ERR,
1658 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1659 slave_eth_dev->data->port_id,
1660 internals->mode4.dedicated_queues.rx_qid,
1661 errval);
1662 return errval;
1663 }
1664
1665 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1666 internals->mode4.dedicated_queues.tx_qid, 512,
1667 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1668 NULL);
1669 if (errval != 0) {
1670 RTE_BOND_LOG(ERR,
1671 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1672 slave_eth_dev->data->port_id,
1673 internals->mode4.dedicated_queues.tx_qid,
1674 errval);
1675 return errval;
1676 }
1677 }
1678 return 0;
1679}
1680
1681int
1682slave_configure(struct rte_eth_dev *bonded_eth_dev,
1683 struct rte_eth_dev *slave_eth_dev)
1684{
1685 struct bond_rx_queue *bd_rx_q;
1686 struct bond_tx_queue *bd_tx_q;
1687 uint16_t nb_rx_queues;
1688 uint16_t nb_tx_queues;
1689
1690 int errval;
1691 uint16_t q_id;
1692 struct rte_flow_error flow_error;
1693
1694 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1695
1696
1697 errval = rte_eth_dev_stop(slave_eth_dev->data->port_id);
1698 if (errval != 0)
1699 RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)",
1700 slave_eth_dev->data->port_id, errval);
1701
1702
1703 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1704 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1705
1706
1707 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1708 if (internals->rss_key_len != 0) {
1709 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1710 internals->rss_key_len;
1711 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1712 internals->rss_key;
1713 } else {
1714 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1715 }
1716
1717 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1718 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1719 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1720 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1721 }
1722
1723 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1724 DEV_RX_OFFLOAD_VLAN_FILTER)
1725 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1726 DEV_RX_OFFLOAD_VLAN_FILTER;
1727 else
1728 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1729 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1730
1731 slave_eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1732 bonded_eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1733
1734 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1735 DEV_RX_OFFLOAD_JUMBO_FRAME)
1736 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1737 DEV_RX_OFFLOAD_JUMBO_FRAME;
1738 else
1739 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1740 ~DEV_RX_OFFLOAD_JUMBO_FRAME;
1741
1742 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1743 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1744
1745 if (internals->mode == BONDING_MODE_8023AD) {
1746 if (internals->mode4.dedicated_queues.enabled == 1) {
1747 nb_rx_queues++;
1748 nb_tx_queues++;
1749 }
1750 }
1751
1752 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1753 bonded_eth_dev->data->mtu);
1754 if (errval != 0 && errval != -ENOTSUP) {
1755 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1756 slave_eth_dev->data->port_id, errval);
1757 return errval;
1758 }
1759
1760
1761 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1762 nb_rx_queues, nb_tx_queues,
1763 &(slave_eth_dev->data->dev_conf));
1764 if (errval != 0) {
1765 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1766 slave_eth_dev->data->port_id, errval);
1767 return errval;
1768 }
1769
1770
1771 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1772 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1773
1774 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1775 bd_rx_q->nb_rx_desc,
1776 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1777 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1778 if (errval != 0) {
1779 RTE_BOND_LOG(ERR,
1780 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1781 slave_eth_dev->data->port_id, q_id, errval);
1782 return errval;
1783 }
1784 }
1785
1786
1787 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1788 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1789
1790 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1791 bd_tx_q->nb_tx_desc,
1792 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1793 &bd_tx_q->tx_conf);
1794 if (errval != 0) {
1795 RTE_BOND_LOG(ERR,
1796 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1797 slave_eth_dev->data->port_id, q_id, errval);
1798 return errval;
1799 }
1800 }
1801
1802 if (internals->mode == BONDING_MODE_8023AD &&
1803 internals->mode4.dedicated_queues.enabled == 1) {
1804 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1805 != 0)
1806 return errval;
1807
1808 errval = bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1809 slave_eth_dev->data->port_id);
1810 if (errval != 0) {
1811 RTE_BOND_LOG(ERR,
1812 "bond_ethdev_8023ad_flow_verify: port=%d, err (%d)",
1813 slave_eth_dev->data->port_id, errval);
1814 return errval;
1815 }
1816
1817 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1818 rte_flow_destroy(slave_eth_dev->data->port_id,
1819 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1820 &flow_error);
1821
1822 errval = bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1823 slave_eth_dev->data->port_id);
1824 if (errval != 0) {
1825 RTE_BOND_LOG(ERR,
1826 "bond_ethdev_8023ad_flow_set: port=%d, err (%d)",
1827 slave_eth_dev->data->port_id, errval);
1828 return errval;
1829 }
1830 }
1831
1832
1833 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1834 if (errval != 0) {
1835 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1836 slave_eth_dev->data->port_id, errval);
1837 return -1;
1838 }
1839
1840
1841 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1842 int i;
1843 struct bond_dev_private *internals;
1844
1845 internals = bonded_eth_dev->data->dev_private;
1846
1847 for (i = 0; i < internals->slave_count; i++) {
1848 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1849 errval = rte_eth_dev_rss_reta_update(
1850 slave_eth_dev->data->port_id,
1851 &internals->reta_conf[0],
1852 internals->slaves[i].reta_size);
1853 if (errval != 0) {
1854 RTE_BOND_LOG(WARNING,
1855 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1856 " RSS Configuration for bonding may be inconsistent.",
1857 slave_eth_dev->data->port_id, errval);
1858 }
1859 break;
1860 }
1861 }
1862 }
1863
1864
1865 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1866 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1867 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1868 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1869 NULL);
1870 }
1871
1872 return 0;
1873}
1874
1875void
1876slave_remove(struct bond_dev_private *internals,
1877 struct rte_eth_dev *slave_eth_dev)
1878{
1879 uint16_t i;
1880
1881 for (i = 0; i < internals->slave_count; i++)
1882 if (internals->slaves[i].port_id ==
1883 slave_eth_dev->data->port_id)
1884 break;
1885
1886 if (i < (internals->slave_count - 1)) {
1887 struct rte_flow *flow;
1888
1889 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1890 sizeof(internals->slaves[0]) *
1891 (internals->slave_count - i - 1));
1892 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1893 memmove(&flow->flows[i], &flow->flows[i + 1],
1894 sizeof(flow->flows[0]) *
1895 (internals->slave_count - i - 1));
1896 flow->flows[internals->slave_count - 1] = NULL;
1897 }
1898 }
1899
1900 internals->slave_count--;
1901
1902
1903 rte_eth_dev_internal_reset(slave_eth_dev);
1904}
1905
1906static void
1907bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1908
1909void
1910slave_add(struct bond_dev_private *internals,
1911 struct rte_eth_dev *slave_eth_dev)
1912{
1913 struct bond_slave_details *slave_details =
1914 &internals->slaves[internals->slave_count];
1915
1916 slave_details->port_id = slave_eth_dev->data->port_id;
1917 slave_details->last_link_status = 0;
1918
1919
1920
1921
1922 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1923 slave_details->link_status_poll_enabled = 1;
1924 }
1925
1926 slave_details->link_status_wait_to_complete = 0;
1927
1928 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1929 sizeof(struct rte_ether_addr));
1930}
1931
1932void
1933bond_ethdev_primary_set(struct bond_dev_private *internals,
1934 uint16_t slave_port_id)
1935{
1936 int i;
1937
1938 if (internals->active_slave_count < 1)
1939 internals->current_primary_port = slave_port_id;
1940 else
1941
1942 for (i = 0; i < internals->active_slave_count; i++) {
1943 if (internals->active_slaves[i] == slave_port_id)
1944 internals->current_primary_port = slave_port_id;
1945 }
1946}
1947
1948static int
1949bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1950
1951static int
1952bond_ethdev_start(struct rte_eth_dev *eth_dev)
1953{
1954 struct bond_dev_private *internals;
1955 int i;
1956
1957
1958 if (check_for_bonded_ethdev(eth_dev)) {
1959 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1960 eth_dev->data->port_id);
1961 return -1;
1962 }
1963
1964 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1965 eth_dev->data->dev_started = 1;
1966
1967 internals = eth_dev->data->dev_private;
1968
1969 if (internals->slave_count == 0) {
1970 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1971 goto out_err;
1972 }
1973
1974 if (internals->user_defined_mac == 0) {
1975 struct rte_ether_addr *new_mac_addr = NULL;
1976
1977 for (i = 0; i < internals->slave_count; i++)
1978 if (internals->slaves[i].port_id == internals->primary_port)
1979 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1980
1981 if (new_mac_addr == NULL)
1982 goto out_err;
1983
1984 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1985 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1986 eth_dev->data->port_id);
1987 goto out_err;
1988 }
1989 }
1990
1991 if (internals->mode == BONDING_MODE_8023AD) {
1992 if (internals->mode4.dedicated_queues.enabled == 1) {
1993 internals->mode4.dedicated_queues.rx_qid =
1994 eth_dev->data->nb_rx_queues;
1995 internals->mode4.dedicated_queues.tx_qid =
1996 eth_dev->data->nb_tx_queues;
1997 }
1998 }
1999
2000
2001
2002 for (i = 0; i < internals->slave_count; i++) {
2003 struct rte_eth_dev *slave_ethdev =
2004 &(rte_eth_devices[internals->slaves[i].port_id]);
2005 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2006 RTE_BOND_LOG(ERR,
2007 "bonded port (%d) failed to reconfigure slave device (%d)",
2008 eth_dev->data->port_id,
2009 internals->slaves[i].port_id);
2010 goto out_err;
2011 }
2012
2013
2014
2015 if (internals->slaves[i].link_status_poll_enabled)
2016 internals->link_status_polling_enabled = 1;
2017 }
2018
2019
2020 if (internals->link_status_polling_enabled) {
2021 rte_eal_alarm_set(
2022 internals->link_status_polling_interval_ms * 1000,
2023 bond_ethdev_slave_link_status_change_monitor,
2024 (void *)&rte_eth_devices[internals->port_id]);
2025 }
2026
2027
2028 if (mac_address_slaves_update(eth_dev) != 0)
2029 goto out_err;
2030
2031 if (internals->user_defined_primary_port)
2032 bond_ethdev_primary_set(internals, internals->primary_port);
2033
2034 if (internals->mode == BONDING_MODE_8023AD)
2035 bond_mode_8023ad_start(eth_dev);
2036
2037 if (internals->mode == BONDING_MODE_TLB ||
2038 internals->mode == BONDING_MODE_ALB)
2039 bond_tlb_enable(internals);
2040
2041 return 0;
2042
2043out_err:
2044 eth_dev->data->dev_started = 0;
2045 return -1;
2046}
2047
2048static void
2049bond_ethdev_free_queues(struct rte_eth_dev *dev)
2050{
2051 uint16_t i;
2052
2053 if (dev->data->rx_queues != NULL) {
2054 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2055 rte_free(dev->data->rx_queues[i]);
2056 dev->data->rx_queues[i] = NULL;
2057 }
2058 dev->data->nb_rx_queues = 0;
2059 }
2060
2061 if (dev->data->tx_queues != NULL) {
2062 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2063 rte_free(dev->data->tx_queues[i]);
2064 dev->data->tx_queues[i] = NULL;
2065 }
2066 dev->data->nb_tx_queues = 0;
2067 }
2068}
2069
2070int
2071bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2072{
2073 struct bond_dev_private *internals = eth_dev->data->dev_private;
2074 uint16_t i;
2075 int ret;
2076
2077 if (internals->mode == BONDING_MODE_8023AD) {
2078 struct port *port;
2079 void *pkt = NULL;
2080
2081 bond_mode_8023ad_stop(eth_dev);
2082
2083
2084 for (i = 0; i < internals->active_slave_count; i++) {
2085 port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2086
2087 RTE_ASSERT(port->rx_ring != NULL);
2088 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2089 rte_pktmbuf_free(pkt);
2090
2091 RTE_ASSERT(port->tx_ring != NULL);
2092 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2093 rte_pktmbuf_free(pkt);
2094 }
2095 }
2096
2097 if (internals->mode == BONDING_MODE_TLB ||
2098 internals->mode == BONDING_MODE_ALB) {
2099 bond_tlb_disable(internals);
2100 for (i = 0; i < internals->active_slave_count; i++)
2101 tlb_last_obytets[internals->active_slaves[i]] = 0;
2102 }
2103
2104 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2105 eth_dev->data->dev_started = 0;
2106
2107 internals->link_status_polling_enabled = 0;
2108 for (i = 0; i < internals->slave_count; i++) {
2109 uint16_t slave_id = internals->slaves[i].port_id;
2110 if (find_slave_by_id(internals->active_slaves,
2111 internals->active_slave_count, slave_id) !=
2112 internals->active_slave_count) {
2113 internals->slaves[i].last_link_status = 0;
2114 ret = rte_eth_dev_stop(slave_id);
2115 if (ret != 0) {
2116 RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2117 slave_id);
2118 return ret;
2119 }
2120 deactivate_slave(eth_dev, slave_id);
2121 }
2122 }
2123
2124 return 0;
2125}
2126
2127int
2128bond_ethdev_close(struct rte_eth_dev *dev)
2129{
2130 struct bond_dev_private *internals = dev->data->dev_private;
2131 uint16_t bond_port_id = internals->port_id;
2132 int skipped = 0;
2133 struct rte_flow_error ferror;
2134
2135 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2136 return 0;
2137
2138 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2139 while (internals->slave_count != skipped) {
2140 uint16_t port_id = internals->slaves[skipped].port_id;
2141
2142 if (rte_eth_dev_stop(port_id) != 0) {
2143 RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2144 port_id);
2145 skipped++;
2146 }
2147
2148 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2149 RTE_BOND_LOG(ERR,
2150 "Failed to remove port %d from bonded device %s",
2151 port_id, dev->device->name);
2152 skipped++;
2153 }
2154 }
2155 bond_flow_ops.flush(dev, &ferror);
2156 bond_ethdev_free_queues(dev);
2157 rte_bitmap_reset(internals->vlan_filter_bmp);
2158 rte_bitmap_free(internals->vlan_filter_bmp);
2159 rte_free(internals->vlan_filter_bmpmem);
2160
2161
2162
2163
2164 rte_mempool_free(internals->mode6.mempool);
2165
2166 return 0;
2167}
2168
2169
2170static int bond_ethdev_configure(struct rte_eth_dev *dev);
2171
2172static int
2173bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2174{
2175 struct bond_dev_private *internals = dev->data->dev_private;
2176 struct bond_slave_details slave;
2177 int ret;
2178
2179 uint16_t max_nb_rx_queues = UINT16_MAX;
2180 uint16_t max_nb_tx_queues = UINT16_MAX;
2181 uint16_t max_rx_desc_lim = UINT16_MAX;
2182 uint16_t max_tx_desc_lim = UINT16_MAX;
2183
2184 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2185
2186 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2187 internals->candidate_max_rx_pktlen :
2188 RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2189
2190
2191
2192
2193
2194 if (internals->slave_count > 0) {
2195 struct rte_eth_dev_info slave_info;
2196 uint16_t idx;
2197
2198 for (idx = 0; idx < internals->slave_count; idx++) {
2199 slave = internals->slaves[idx];
2200 ret = rte_eth_dev_info_get(slave.port_id, &slave_info);
2201 if (ret != 0) {
2202 RTE_BOND_LOG(ERR,
2203 "%s: Error during getting device (port %u) info: %s\n",
2204 __func__,
2205 slave.port_id,
2206 strerror(-ret));
2207
2208 return ret;
2209 }
2210
2211 if (slave_info.max_rx_queues < max_nb_rx_queues)
2212 max_nb_rx_queues = slave_info.max_rx_queues;
2213
2214 if (slave_info.max_tx_queues < max_nb_tx_queues)
2215 max_nb_tx_queues = slave_info.max_tx_queues;
2216
2217 if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2218 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2219
2220 if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2221 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2222 }
2223 }
2224
2225 dev_info->max_rx_queues = max_nb_rx_queues;
2226 dev_info->max_tx_queues = max_nb_tx_queues;
2227
2228 memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2229 sizeof(dev_info->default_rxconf));
2230 memcpy(&dev_info->default_txconf, &internals->default_txconf,
2231 sizeof(dev_info->default_txconf));
2232
2233 dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2234 dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2235
2236
2237
2238
2239
2240 if (internals->mode == BONDING_MODE_8023AD &&
2241 internals->mode4.dedicated_queues.enabled == 1) {
2242 dev_info->max_rx_queues--;
2243 dev_info->max_tx_queues--;
2244 }
2245
2246 dev_info->min_rx_bufsize = 0;
2247
2248 dev_info->rx_offload_capa = internals->rx_offload_capa;
2249 dev_info->tx_offload_capa = internals->tx_offload_capa;
2250 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2251 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2252 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2253
2254 dev_info->reta_size = internals->reta_size;
2255
2256 return 0;
2257}
2258
2259static int
2260bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2261{
2262 int res;
2263 uint16_t i;
2264 struct bond_dev_private *internals = dev->data->dev_private;
2265
2266
2267 rte_spinlock_lock(&internals->lock);
2268
2269 if (on)
2270 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2271 else
2272 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2273
2274 for (i = 0; i < internals->slave_count; i++) {
2275 uint16_t port_id = internals->slaves[i].port_id;
2276
2277 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2278 if (res == ENOTSUP)
2279 RTE_BOND_LOG(WARNING,
2280 "Setting VLAN filter on slave port %u not supported.",
2281 port_id);
2282 }
2283
2284 rte_spinlock_unlock(&internals->lock);
2285 return 0;
2286}
2287
2288static int
2289bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2290 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2291 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2292{
2293 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2294 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2295 0, dev->data->numa_node);
2296 if (bd_rx_q == NULL)
2297 return -1;
2298
2299 bd_rx_q->queue_id = rx_queue_id;
2300 bd_rx_q->dev_private = dev->data->dev_private;
2301
2302 bd_rx_q->nb_rx_desc = nb_rx_desc;
2303
2304 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2305 bd_rx_q->mb_pool = mb_pool;
2306
2307 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2308
2309 return 0;
2310}
2311
2312static int
2313bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2314 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2315 const struct rte_eth_txconf *tx_conf)
2316{
2317 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2318 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2319 0, dev->data->numa_node);
2320
2321 if (bd_tx_q == NULL)
2322 return -1;
2323
2324 bd_tx_q->queue_id = tx_queue_id;
2325 bd_tx_q->dev_private = dev->data->dev_private;
2326
2327 bd_tx_q->nb_tx_desc = nb_tx_desc;
2328 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2329
2330 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2331
2332 return 0;
2333}
2334
2335static void
2336bond_ethdev_rx_queue_release(void *queue)
2337{
2338 if (queue == NULL)
2339 return;
2340
2341 rte_free(queue);
2342}
2343
2344static void
2345bond_ethdev_tx_queue_release(void *queue)
2346{
2347 if (queue == NULL)
2348 return;
2349
2350 rte_free(queue);
2351}
2352
2353static void
2354bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2355{
2356 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2357 struct bond_dev_private *internals;
2358
2359
2360
2361 int i, polling_slave_found = 1;
2362
2363 if (cb_arg == NULL)
2364 return;
2365
2366 bonded_ethdev = cb_arg;
2367 internals = bonded_ethdev->data->dev_private;
2368
2369 if (!bonded_ethdev->data->dev_started ||
2370 !internals->link_status_polling_enabled)
2371 return;
2372
2373
2374
2375 if (rte_spinlock_trylock(&internals->lock)) {
2376 if (internals->slave_count > 0)
2377 polling_slave_found = 0;
2378
2379 for (i = 0; i < internals->slave_count; i++) {
2380 if (!internals->slaves[i].link_status_poll_enabled)
2381 continue;
2382
2383 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2384 polling_slave_found = 1;
2385
2386
2387 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2388 internals->slaves[i].link_status_wait_to_complete);
2389
2390
2391
2392 if (slave_ethdev->data->dev_link.link_status !=
2393 internals->slaves[i].last_link_status) {
2394 internals->slaves[i].last_link_status =
2395 slave_ethdev->data->dev_link.link_status;
2396
2397 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2398 RTE_ETH_EVENT_INTR_LSC,
2399 &bonded_ethdev->data->port_id,
2400 NULL);
2401 }
2402 }
2403 rte_spinlock_unlock(&internals->lock);
2404 }
2405
2406 if (polling_slave_found)
2407
2408 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2409 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2410}
2411
2412static int
2413bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2414{
2415 int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2416
2417 struct bond_dev_private *bond_ctx;
2418 struct rte_eth_link slave_link;
2419
2420 bool one_link_update_succeeded;
2421 uint32_t idx;
2422 int ret;
2423
2424 bond_ctx = ethdev->data->dev_private;
2425
2426 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2427
2428 if (ethdev->data->dev_started == 0 ||
2429 bond_ctx->active_slave_count == 0) {
2430 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2431 return 0;
2432 }
2433
2434 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2435
2436 if (wait_to_complete)
2437 link_update = rte_eth_link_get;
2438 else
2439 link_update = rte_eth_link_get_nowait;
2440
2441 switch (bond_ctx->mode) {
2442 case BONDING_MODE_BROADCAST:
2443
2444
2445
2446
2447 ethdev->data->dev_link.link_speed = UINT32_MAX;
2448
2449
2450
2451
2452
2453
2454 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2455 ret = link_update(bond_ctx->active_slaves[idx],
2456 &slave_link);
2457 if (ret < 0) {
2458 ethdev->data->dev_link.link_speed =
2459 ETH_SPEED_NUM_NONE;
2460 RTE_BOND_LOG(ERR,
2461 "Slave (port %u) link get failed: %s",
2462 bond_ctx->active_slaves[idx],
2463 rte_strerror(-ret));
2464 return 0;
2465 }
2466
2467 if (slave_link.link_speed <
2468 ethdev->data->dev_link.link_speed)
2469 ethdev->data->dev_link.link_speed =
2470 slave_link.link_speed;
2471 }
2472 break;
2473 case BONDING_MODE_ACTIVE_BACKUP:
2474
2475 ret = link_update(bond_ctx->current_primary_port, &slave_link);
2476 if (ret < 0) {
2477 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
2478 bond_ctx->current_primary_port,
2479 rte_strerror(-ret));
2480 return 0;
2481 }
2482
2483 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2484 break;
2485 case BONDING_MODE_8023AD:
2486 ethdev->data->dev_link.link_autoneg =
2487 bond_ctx->mode4.slave_link.link_autoneg;
2488 ethdev->data->dev_link.link_duplex =
2489 bond_ctx->mode4.slave_link.link_duplex;
2490
2491
2492 case BONDING_MODE_ROUND_ROBIN:
2493 case BONDING_MODE_BALANCE:
2494 case BONDING_MODE_TLB:
2495 case BONDING_MODE_ALB:
2496 default:
2497
2498
2499
2500
2501 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2502 one_link_update_succeeded = false;
2503
2504 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2505 ret = link_update(bond_ctx->active_slaves[idx],
2506 &slave_link);
2507 if (ret < 0) {
2508 RTE_BOND_LOG(ERR,
2509 "Slave (port %u) link get failed: %s",
2510 bond_ctx->active_slaves[idx],
2511 rte_strerror(-ret));
2512 continue;
2513 }
2514
2515 one_link_update_succeeded = true;
2516 ethdev->data->dev_link.link_speed +=
2517 slave_link.link_speed;
2518 }
2519
2520 if (!one_link_update_succeeded) {
2521 RTE_BOND_LOG(ERR, "All slaves link get failed");
2522 return 0;
2523 }
2524 }
2525
2526
2527 return 0;
2528}
2529
2530
2531static int
2532bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2533{
2534 struct bond_dev_private *internals = dev->data->dev_private;
2535 struct rte_eth_stats slave_stats;
2536 int i, j;
2537
2538 for (i = 0; i < internals->slave_count; i++) {
2539 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2540
2541 stats->ipackets += slave_stats.ipackets;
2542 stats->opackets += slave_stats.opackets;
2543 stats->ibytes += slave_stats.ibytes;
2544 stats->obytes += slave_stats.obytes;
2545 stats->imissed += slave_stats.imissed;
2546 stats->ierrors += slave_stats.ierrors;
2547 stats->oerrors += slave_stats.oerrors;
2548 stats->rx_nombuf += slave_stats.rx_nombuf;
2549
2550 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2551 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2552 stats->q_opackets[j] += slave_stats.q_opackets[j];
2553 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2554 stats->q_obytes[j] += slave_stats.q_obytes[j];
2555 stats->q_errors[j] += slave_stats.q_errors[j];
2556 }
2557
2558 }
2559
2560 return 0;
2561}
2562
2563static int
2564bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2565{
2566 struct bond_dev_private *internals = dev->data->dev_private;
2567 int i;
2568 int err;
2569 int ret;
2570
2571 for (i = 0, err = 0; i < internals->slave_count; i++) {
2572 ret = rte_eth_stats_reset(internals->slaves[i].port_id);
2573 if (ret != 0)
2574 err = ret;
2575 }
2576
2577 return err;
2578}
2579
2580static int
2581bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2582{
2583 struct bond_dev_private *internals = eth_dev->data->dev_private;
2584 int i;
2585 int ret = 0;
2586 uint16_t port_id;
2587
2588 switch (internals->mode) {
2589
2590 case BONDING_MODE_ROUND_ROBIN:
2591 case BONDING_MODE_BALANCE:
2592 case BONDING_MODE_BROADCAST:
2593 case BONDING_MODE_8023AD: {
2594 unsigned int slave_ok = 0;
2595
2596 for (i = 0; i < internals->slave_count; i++) {
2597 port_id = internals->slaves[i].port_id;
2598
2599 ret = rte_eth_promiscuous_enable(port_id);
2600 if (ret != 0)
2601 RTE_BOND_LOG(ERR,
2602 "Failed to enable promiscuous mode for port %u: %s",
2603 port_id, rte_strerror(-ret));
2604 else
2605 slave_ok++;
2606 }
2607
2608
2609
2610
2611 if (slave_ok > 0)
2612 ret = 0;
2613 break;
2614 }
2615
2616 case BONDING_MODE_ACTIVE_BACKUP:
2617 case BONDING_MODE_TLB:
2618 case BONDING_MODE_ALB:
2619 default:
2620
2621 if (internals->slave_count == 0)
2622 break;
2623 port_id = internals->current_primary_port;
2624 ret = rte_eth_promiscuous_enable(port_id);
2625 if (ret != 0)
2626 RTE_BOND_LOG(ERR,
2627 "Failed to enable promiscuous mode for port %u: %s",
2628 port_id, rte_strerror(-ret));
2629 }
2630
2631 return ret;
2632}
2633
2634static int
2635bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2636{
2637 struct bond_dev_private *internals = dev->data->dev_private;
2638 int i;
2639 int ret = 0;
2640 uint16_t port_id;
2641
2642 switch (internals->mode) {
2643
2644 case BONDING_MODE_ROUND_ROBIN:
2645 case BONDING_MODE_BALANCE:
2646 case BONDING_MODE_BROADCAST:
2647 case BONDING_MODE_8023AD: {
2648 unsigned int slave_ok = 0;
2649
2650 for (i = 0; i < internals->slave_count; i++) {
2651 port_id = internals->slaves[i].port_id;
2652
2653 if (internals->mode == BONDING_MODE_8023AD &&
2654 bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2655 BOND_8023AD_FORCED_PROMISC) {
2656 slave_ok++;
2657 continue;
2658 }
2659 ret = rte_eth_promiscuous_disable(port_id);
2660 if (ret != 0)
2661 RTE_BOND_LOG(ERR,
2662 "Failed to disable promiscuous mode for port %u: %s",
2663 port_id, rte_strerror(-ret));
2664 else
2665 slave_ok++;
2666 }
2667
2668
2669
2670
2671 if (slave_ok > 0)
2672 ret = 0;
2673 break;
2674 }
2675
2676 case BONDING_MODE_ACTIVE_BACKUP:
2677 case BONDING_MODE_TLB:
2678 case BONDING_MODE_ALB:
2679 default:
2680
2681 if (internals->slave_count == 0)
2682 break;
2683 port_id = internals->current_primary_port;
2684 ret = rte_eth_promiscuous_disable(port_id);
2685 if (ret != 0)
2686 RTE_BOND_LOG(ERR,
2687 "Failed to disable promiscuous mode for port %u: %s",
2688 port_id, rte_strerror(-ret));
2689 }
2690
2691 return ret;
2692}
2693
2694static int
2695bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2696{
2697 struct bond_dev_private *internals = eth_dev->data->dev_private;
2698 int i;
2699 int ret = 0;
2700 uint16_t port_id;
2701
2702 switch (internals->mode) {
2703
2704 case BONDING_MODE_ROUND_ROBIN:
2705 case BONDING_MODE_BALANCE:
2706 case BONDING_MODE_BROADCAST:
2707 case BONDING_MODE_8023AD: {
2708 unsigned int slave_ok = 0;
2709
2710 for (i = 0; i < internals->slave_count; i++) {
2711 port_id = internals->slaves[i].port_id;
2712
2713 ret = rte_eth_allmulticast_enable(port_id);
2714 if (ret != 0)
2715 RTE_BOND_LOG(ERR,
2716 "Failed to enable allmulti mode for port %u: %s",
2717 port_id, rte_strerror(-ret));
2718 else
2719 slave_ok++;
2720 }
2721
2722
2723
2724
2725 if (slave_ok > 0)
2726 ret = 0;
2727 break;
2728 }
2729
2730 case BONDING_MODE_ACTIVE_BACKUP:
2731 case BONDING_MODE_TLB:
2732 case BONDING_MODE_ALB:
2733 default:
2734
2735 if (internals->slave_count == 0)
2736 break;
2737 port_id = internals->current_primary_port;
2738 ret = rte_eth_allmulticast_enable(port_id);
2739 if (ret != 0)
2740 RTE_BOND_LOG(ERR,
2741 "Failed to enable allmulti mode for port %u: %s",
2742 port_id, rte_strerror(-ret));
2743 }
2744
2745 return ret;
2746}
2747
2748static int
2749bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2750{
2751 struct bond_dev_private *internals = eth_dev->data->dev_private;
2752 int i;
2753 int ret = 0;
2754 uint16_t port_id;
2755
2756 switch (internals->mode) {
2757
2758 case BONDING_MODE_ROUND_ROBIN:
2759 case BONDING_MODE_BALANCE:
2760 case BONDING_MODE_BROADCAST:
2761 case BONDING_MODE_8023AD: {
2762 unsigned int slave_ok = 0;
2763
2764 for (i = 0; i < internals->slave_count; i++) {
2765 uint16_t port_id = internals->slaves[i].port_id;
2766
2767 if (internals->mode == BONDING_MODE_8023AD &&
2768 bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2769 BOND_8023AD_FORCED_ALLMULTI)
2770 continue;
2771
2772 ret = rte_eth_allmulticast_disable(port_id);
2773 if (ret != 0)
2774 RTE_BOND_LOG(ERR,
2775 "Failed to disable allmulti mode for port %u: %s",
2776 port_id, rte_strerror(-ret));
2777 else
2778 slave_ok++;
2779 }
2780
2781
2782
2783
2784 if (slave_ok > 0)
2785 ret = 0;
2786 break;
2787 }
2788
2789 case BONDING_MODE_ACTIVE_BACKUP:
2790 case BONDING_MODE_TLB:
2791 case BONDING_MODE_ALB:
2792 default:
2793
2794 if (internals->slave_count == 0)
2795 break;
2796 port_id = internals->current_primary_port;
2797 ret = rte_eth_allmulticast_disable(port_id);
2798 if (ret != 0)
2799 RTE_BOND_LOG(ERR,
2800 "Failed to disable allmulti mode for port %u: %s",
2801 port_id, rte_strerror(-ret));
2802 }
2803
2804 return ret;
2805}
2806
2807static void
2808bond_ethdev_delayed_lsc_propagation(void *arg)
2809{
2810 if (arg == NULL)
2811 return;
2812
2813 rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2814 RTE_ETH_EVENT_INTR_LSC, NULL);
2815}
2816
2817int
2818bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2819 void *param, void *ret_param __rte_unused)
2820{
2821 struct rte_eth_dev *bonded_eth_dev;
2822 struct bond_dev_private *internals;
2823 struct rte_eth_link link;
2824 int rc = -1;
2825 int ret;
2826
2827 uint8_t lsc_flag = 0;
2828 int valid_slave = 0;
2829 uint16_t active_pos;
2830 uint16_t i;
2831
2832 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2833 return rc;
2834
2835 bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2836
2837 if (check_for_bonded_ethdev(bonded_eth_dev))
2838 return rc;
2839
2840 internals = bonded_eth_dev->data->dev_private;
2841
2842
2843 if (!bonded_eth_dev->data->dev_started)
2844 return rc;
2845
2846
2847 for (i = 0; i < internals->slave_count; i++) {
2848 if (internals->slaves[i].port_id == port_id) {
2849 valid_slave = 1;
2850 break;
2851 }
2852 }
2853
2854 if (!valid_slave)
2855 return rc;
2856
2857
2858
2859
2860 rte_spinlock_lock(&internals->lsc_lock);
2861
2862
2863 active_pos = find_slave_by_id(internals->active_slaves,
2864 internals->active_slave_count, port_id);
2865
2866 ret = rte_eth_link_get_nowait(port_id, &link);
2867 if (ret < 0)
2868 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id);
2869
2870 if (ret == 0 && link.link_status) {
2871 if (active_pos < internals->active_slave_count)
2872 goto link_update;
2873
2874
2875 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2876 if (link_properties_valid(bonded_eth_dev, &link) != 0)
2877 RTE_BOND_LOG(ERR, "Invalid link properties "
2878 "for slave %d in bonding mode %d",
2879 port_id, internals->mode);
2880 } else {
2881
2882 link_properties_set(bonded_eth_dev, &link);
2883 }
2884
2885
2886
2887
2888 if (internals->active_slave_count < 1) {
2889
2890 bonded_eth_dev->data->dev_link.link_status =
2891 ETH_LINK_UP;
2892 internals->current_primary_port = port_id;
2893 lsc_flag = 1;
2894
2895 mac_address_slaves_update(bonded_eth_dev);
2896 }
2897
2898 activate_slave(bonded_eth_dev, port_id);
2899
2900
2901
2902
2903 if (internals->user_defined_primary_port &&
2904 internals->primary_port == port_id)
2905 bond_ethdev_primary_set(internals, port_id);
2906 } else {
2907 if (active_pos == internals->active_slave_count)
2908 goto link_update;
2909
2910
2911 deactivate_slave(bonded_eth_dev, port_id);
2912
2913 if (internals->active_slave_count < 1)
2914 lsc_flag = 1;
2915
2916
2917
2918 if (port_id == internals->current_primary_port) {
2919 if (internals->active_slave_count > 0)
2920 bond_ethdev_primary_set(internals,
2921 internals->active_slaves[0]);
2922 else
2923 internals->current_primary_port = internals->primary_port;
2924 mac_address_slaves_update(bonded_eth_dev);
2925 }
2926 }
2927
2928link_update:
2929
2930
2931
2932
2933 bond_ethdev_link_update(bonded_eth_dev, 0);
2934
2935 if (lsc_flag) {
2936
2937 if (internals->link_up_delay_ms > 0 ||
2938 internals->link_down_delay_ms > 0)
2939 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2940 bonded_eth_dev);
2941
2942 if (bonded_eth_dev->data->dev_link.link_status) {
2943 if (internals->link_up_delay_ms > 0)
2944 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2945 bond_ethdev_delayed_lsc_propagation,
2946 (void *)bonded_eth_dev);
2947 else
2948 rte_eth_dev_callback_process(bonded_eth_dev,
2949 RTE_ETH_EVENT_INTR_LSC,
2950 NULL);
2951
2952 } else {
2953 if (internals->link_down_delay_ms > 0)
2954 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2955 bond_ethdev_delayed_lsc_propagation,
2956 (void *)bonded_eth_dev);
2957 else
2958 rte_eth_dev_callback_process(bonded_eth_dev,
2959 RTE_ETH_EVENT_INTR_LSC,
2960 NULL);
2961 }
2962 }
2963
2964 rte_spinlock_unlock(&internals->lsc_lock);
2965
2966 return rc;
2967}
2968
2969static int
2970bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2971 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2972{
2973 unsigned i, j;
2974 int result = 0;
2975 int slave_reta_size;
2976 unsigned reta_count;
2977 struct bond_dev_private *internals = dev->data->dev_private;
2978
2979 if (reta_size != internals->reta_size)
2980 return -EINVAL;
2981
2982
2983 reta_count = (reta_size + RTE_RETA_GROUP_SIZE - 1) /
2984 RTE_RETA_GROUP_SIZE;
2985
2986 for (i = 0; i < reta_count; i++) {
2987 internals->reta_conf[i].mask = reta_conf[i].mask;
2988 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2989 if ((reta_conf[i].mask >> j) & 0x01)
2990 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2991 }
2992
2993
2994 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2995 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2996 sizeof(internals->reta_conf[0]) * reta_count);
2997
2998
2999 for (i = 0; i < internals->slave_count; i++) {
3000 slave_reta_size = internals->slaves[i].reta_size;
3001 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
3002 &internals->reta_conf[0], slave_reta_size);
3003 if (result < 0)
3004 return result;
3005 }
3006
3007 return 0;
3008}
3009
3010static int
3011bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
3012 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3013{
3014 int i, j;
3015 struct bond_dev_private *internals = dev->data->dev_private;
3016
3017 if (reta_size != internals->reta_size)
3018 return -EINVAL;
3019
3020
3021 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
3022 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3023 if ((reta_conf[i].mask >> j) & 0x01)
3024 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
3025
3026 return 0;
3027}
3028
3029static int
3030bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
3031 struct rte_eth_rss_conf *rss_conf)
3032{
3033 int i, result = 0;
3034 struct bond_dev_private *internals = dev->data->dev_private;
3035 struct rte_eth_rss_conf bond_rss_conf;
3036
3037 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
3038
3039 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
3040
3041 if (bond_rss_conf.rss_hf != 0)
3042 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
3043
3044 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
3045 sizeof(internals->rss_key)) {
3046 if (bond_rss_conf.rss_key_len == 0)
3047 bond_rss_conf.rss_key_len = 40;
3048 internals->rss_key_len = bond_rss_conf.rss_key_len;
3049 memcpy(internals->rss_key, bond_rss_conf.rss_key,
3050 internals->rss_key_len);
3051 }
3052
3053 for (i = 0; i < internals->slave_count; i++) {
3054 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
3055 &bond_rss_conf);
3056 if (result < 0)
3057 return result;
3058 }
3059
3060 return 0;
3061}
3062
3063static int
3064bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3065 struct rte_eth_rss_conf *rss_conf)
3066{
3067 struct bond_dev_private *internals = dev->data->dev_private;
3068
3069 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3070 rss_conf->rss_key_len = internals->rss_key_len;
3071 if (rss_conf->rss_key)
3072 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3073
3074 return 0;
3075}
3076
3077static int
3078bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3079{
3080 struct rte_eth_dev *slave_eth_dev;
3081 struct bond_dev_private *internals = dev->data->dev_private;
3082 int ret, i;
3083
3084 rte_spinlock_lock(&internals->lock);
3085
3086 for (i = 0; i < internals->slave_count; i++) {
3087 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3088 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
3089 rte_spinlock_unlock(&internals->lock);
3090 return -ENOTSUP;
3091 }
3092 }
3093 for (i = 0; i < internals->slave_count; i++) {
3094 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
3095 if (ret < 0) {
3096 rte_spinlock_unlock(&internals->lock);
3097 return ret;
3098 }
3099 }
3100
3101 rte_spinlock_unlock(&internals->lock);
3102 return 0;
3103}
3104
3105static int
3106bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3107 struct rte_ether_addr *addr)
3108{
3109 if (mac_address_set(dev, addr)) {
3110 RTE_BOND_LOG(ERR, "Failed to update MAC address");
3111 return -EINVAL;
3112 }
3113
3114 return 0;
3115}
3116
3117static int
3118bond_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
3119 const struct rte_flow_ops **ops)
3120{
3121 *ops = &bond_flow_ops;
3122 return 0;
3123}
3124
3125static int
3126bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3127 struct rte_ether_addr *mac_addr,
3128 __rte_unused uint32_t index, uint32_t vmdq)
3129{
3130 struct rte_eth_dev *slave_eth_dev;
3131 struct bond_dev_private *internals = dev->data->dev_private;
3132 int ret, i;
3133
3134 rte_spinlock_lock(&internals->lock);
3135
3136 for (i = 0; i < internals->slave_count; i++) {
3137 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3138 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
3139 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
3140 ret = -ENOTSUP;
3141 goto end;
3142 }
3143 }
3144
3145 for (i = 0; i < internals->slave_count; i++) {
3146 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
3147 mac_addr, vmdq);
3148 if (ret < 0) {
3149
3150 for (i--; i >= 0; i--)
3151 rte_eth_dev_mac_addr_remove(
3152 internals->slaves[i].port_id, mac_addr);
3153 goto end;
3154 }
3155 }
3156
3157 ret = 0;
3158end:
3159 rte_spinlock_unlock(&internals->lock);
3160 return ret;
3161}
3162
3163static void
3164bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3165{
3166 struct rte_eth_dev *slave_eth_dev;
3167 struct bond_dev_private *internals = dev->data->dev_private;
3168 int i;
3169
3170 rte_spinlock_lock(&internals->lock);
3171
3172 for (i = 0; i < internals->slave_count; i++) {
3173 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3174 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3175 goto end;
3176 }
3177
3178 struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3179
3180 for (i = 0; i < internals->slave_count; i++)
3181 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3182 mac_addr);
3183
3184end:
3185 rte_spinlock_unlock(&internals->lock);
3186}
3187
3188const struct eth_dev_ops default_dev_ops = {
3189 .dev_start = bond_ethdev_start,
3190 .dev_stop = bond_ethdev_stop,
3191 .dev_close = bond_ethdev_close,
3192 .dev_configure = bond_ethdev_configure,
3193 .dev_infos_get = bond_ethdev_info,
3194 .vlan_filter_set = bond_ethdev_vlan_filter_set,
3195 .rx_queue_setup = bond_ethdev_rx_queue_setup,
3196 .tx_queue_setup = bond_ethdev_tx_queue_setup,
3197 .rx_queue_release = bond_ethdev_rx_queue_release,
3198 .tx_queue_release = bond_ethdev_tx_queue_release,
3199 .link_update = bond_ethdev_link_update,
3200 .stats_get = bond_ethdev_stats_get,
3201 .stats_reset = bond_ethdev_stats_reset,
3202 .promiscuous_enable = bond_ethdev_promiscuous_enable,
3203 .promiscuous_disable = bond_ethdev_promiscuous_disable,
3204 .allmulticast_enable = bond_ethdev_allmulticast_enable,
3205 .allmulticast_disable = bond_ethdev_allmulticast_disable,
3206 .reta_update = bond_ethdev_rss_reta_update,
3207 .reta_query = bond_ethdev_rss_reta_query,
3208 .rss_hash_update = bond_ethdev_rss_hash_update,
3209 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
3210 .mtu_set = bond_ethdev_mtu_set,
3211 .mac_addr_set = bond_ethdev_mac_address_set,
3212 .mac_addr_add = bond_ethdev_mac_addr_add,
3213 .mac_addr_remove = bond_ethdev_mac_addr_remove,
3214 .flow_ops_get = bond_flow_ops_get
3215};
3216
3217static int
3218bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3219{
3220 const char *name = rte_vdev_device_name(dev);
3221 uint8_t socket_id = dev->device.numa_node;
3222 struct bond_dev_private *internals = NULL;
3223 struct rte_eth_dev *eth_dev = NULL;
3224 uint32_t vlan_filter_bmp_size;
3225
3226
3227
3228
3229
3230
3231 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3232 if (eth_dev == NULL) {
3233 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3234 goto err;
3235 }
3236
3237 internals = eth_dev->data->dev_private;
3238 eth_dev->data->nb_rx_queues = (uint16_t)1;
3239 eth_dev->data->nb_tx_queues = (uint16_t)1;
3240
3241
3242 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3243 BOND_MAX_MAC_ADDRS, 0, socket_id);
3244 if (eth_dev->data->mac_addrs == NULL) {
3245 RTE_BOND_LOG(ERR,
3246 "Failed to allocate %u bytes needed to store MAC addresses",
3247 RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3248 goto err;
3249 }
3250
3251 eth_dev->dev_ops = &default_dev_ops;
3252 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
3253 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
3254
3255 rte_spinlock_init(&internals->lock);
3256 rte_spinlock_init(&internals->lsc_lock);
3257
3258 internals->port_id = eth_dev->data->port_id;
3259 internals->mode = BONDING_MODE_INVALID;
3260 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3261 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3262 internals->burst_xmit_hash = burst_xmit_l2_hash;
3263 internals->user_defined_mac = 0;
3264
3265 internals->link_status_polling_enabled = 0;
3266
3267 internals->link_status_polling_interval_ms =
3268 DEFAULT_POLLING_INTERVAL_10_MS;
3269 internals->link_down_delay_ms = 0;
3270 internals->link_up_delay_ms = 0;
3271
3272 internals->slave_count = 0;
3273 internals->active_slave_count = 0;
3274 internals->rx_offload_capa = 0;
3275 internals->tx_offload_capa = 0;
3276 internals->rx_queue_offload_capa = 0;
3277 internals->tx_queue_offload_capa = 0;
3278 internals->candidate_max_rx_pktlen = 0;
3279 internals->max_rx_pktlen = 0;
3280
3281
3282 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3283
3284 memset(&internals->default_rxconf, 0,
3285 sizeof(internals->default_rxconf));
3286 memset(&internals->default_txconf, 0,
3287 sizeof(internals->default_txconf));
3288
3289 memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3290 memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3291
3292 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3293 memset(internals->slaves, 0, sizeof(internals->slaves));
3294
3295 TAILQ_INIT(&internals->flow_list);
3296 internals->flow_isolated_valid = 0;
3297
3298
3299 bond_mode_8023ad_setup(eth_dev, NULL);
3300 if (bond_ethdev_mode_set(eth_dev, mode)) {
3301 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3302 eth_dev->data->port_id, mode);
3303 goto err;
3304 }
3305
3306 vlan_filter_bmp_size =
3307 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3308 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3309 RTE_CACHE_LINE_SIZE);
3310 if (internals->vlan_filter_bmpmem == NULL) {
3311 RTE_BOND_LOG(ERR,
3312 "Failed to allocate vlan bitmap for bonded device %u",
3313 eth_dev->data->port_id);
3314 goto err;
3315 }
3316
3317 internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3318 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3319 if (internals->vlan_filter_bmp == NULL) {
3320 RTE_BOND_LOG(ERR,
3321 "Failed to init vlan bitmap for bonded device %u",
3322 eth_dev->data->port_id);
3323 rte_free(internals->vlan_filter_bmpmem);
3324 goto err;
3325 }
3326
3327 return eth_dev->data->port_id;
3328
3329err:
3330 rte_free(internals);
3331 if (eth_dev != NULL)
3332 eth_dev->data->dev_private = NULL;
3333 rte_eth_dev_release_port(eth_dev);
3334 return -1;
3335}
3336
3337static int
3338bond_probe(struct rte_vdev_device *dev)
3339{
3340 const char *name;
3341 struct bond_dev_private *internals;
3342 struct rte_kvargs *kvlist;
3343 uint8_t bonding_mode;
3344 int arg_count, port_id;
3345 int socket_id;
3346 uint8_t agg_mode;
3347 struct rte_eth_dev *eth_dev;
3348
3349 if (!dev)
3350 return -EINVAL;
3351
3352 name = rte_vdev_device_name(dev);
3353 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3354
3355 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3356 eth_dev = rte_eth_dev_attach_secondary(name);
3357 if (!eth_dev) {
3358 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3359 return -1;
3360 }
3361
3362 eth_dev->dev_ops = &default_dev_ops;
3363 eth_dev->device = &dev->device;
3364 rte_eth_dev_probing_finish(eth_dev);
3365 return 0;
3366 }
3367
3368 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3369 pmd_bond_init_valid_arguments);
3370 if (kvlist == NULL)
3371 return -1;
3372
3373
3374 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3375 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3376 &bond_ethdev_parse_slave_mode_kvarg,
3377 &bonding_mode) != 0) {
3378 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3379 name);
3380 goto parse_error;
3381 }
3382 } else {
3383 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3384 "device %s", name);
3385 goto parse_error;
3386 }
3387
3388
3389 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3390 if (arg_count == 1) {
3391 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3392 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3393 != 0) {
3394 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3395 "bonded device %s", name);
3396 goto parse_error;
3397 }
3398 } else if (arg_count > 1) {
3399 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3400 "bonded device %s", name);
3401 goto parse_error;
3402 } else {
3403 socket_id = rte_socket_id();
3404 }
3405
3406 dev->device.numa_node = socket_id;
3407
3408
3409 port_id = bond_alloc(dev, bonding_mode);
3410 if (port_id < 0) {
3411 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3412 "socket %u.", name, bonding_mode, socket_id);
3413 goto parse_error;
3414 }
3415 internals = rte_eth_devices[port_id].data->dev_private;
3416 internals->kvlist = kvlist;
3417
3418 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3419 if (rte_kvargs_process(kvlist,
3420 PMD_BOND_AGG_MODE_KVARG,
3421 &bond_ethdev_parse_slave_agg_mode_kvarg,
3422 &agg_mode) != 0) {
3423 RTE_BOND_LOG(ERR,
3424 "Failed to parse agg selection mode for bonded device %s",
3425 name);
3426 goto parse_error;
3427 }
3428
3429 if (internals->mode == BONDING_MODE_8023AD)
3430 internals->mode4.agg_selection = agg_mode;
3431 } else {
3432 internals->mode4.agg_selection = AGG_STABLE;
3433 }
3434
3435 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3436 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3437 "socket %u.", name, port_id, bonding_mode, socket_id);
3438 return 0;
3439
3440parse_error:
3441 rte_kvargs_free(kvlist);
3442
3443 return -1;
3444}
3445
3446static int
3447bond_remove(struct rte_vdev_device *dev)
3448{
3449 struct rte_eth_dev *eth_dev;
3450 struct bond_dev_private *internals;
3451 const char *name;
3452 int ret = 0;
3453
3454 if (!dev)
3455 return -EINVAL;
3456
3457 name = rte_vdev_device_name(dev);
3458 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3459
3460
3461 eth_dev = rte_eth_dev_allocated(name);
3462 if (eth_dev == NULL)
3463 return 0;
3464
3465 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3466 return rte_eth_dev_release_port(eth_dev);
3467
3468 RTE_ASSERT(eth_dev->device == &dev->device);
3469
3470 internals = eth_dev->data->dev_private;
3471 if (internals->slave_count != 0)
3472 return -EBUSY;
3473
3474 if (eth_dev->data->dev_started == 1) {
3475 ret = bond_ethdev_stop(eth_dev);
3476 bond_ethdev_close(eth_dev);
3477 }
3478 if (internals->kvlist != NULL)
3479 rte_kvargs_free(internals->kvlist);
3480 rte_eth_dev_release_port(eth_dev);
3481
3482 return ret;
3483}
3484
3485
3486
3487static int
3488bond_ethdev_configure(struct rte_eth_dev *dev)
3489{
3490 const char *name = dev->device->name;
3491 struct bond_dev_private *internals = dev->data->dev_private;
3492 struct rte_kvargs *kvlist = internals->kvlist;
3493 int arg_count;
3494 uint16_t port_id = dev - rte_eth_devices;
3495 uint8_t agg_mode;
3496
3497 static const uint8_t default_rss_key[40] = {
3498 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3499 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3500 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3501 0xBE, 0xAC, 0x01, 0xFA
3502 };
3503
3504 unsigned i, j;
3505
3506
3507
3508
3509
3510
3511 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3512 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3513 internals->rss_key_len =
3514 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3515 memcpy(internals->rss_key,
3516 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3517 internals->rss_key_len);
3518 } else {
3519 internals->rss_key_len = sizeof(default_rss_key);
3520 memcpy(internals->rss_key, default_rss_key,
3521 internals->rss_key_len);
3522 }
3523
3524 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3525 internals->reta_conf[i].mask = ~0LL;
3526 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3527 internals->reta_conf[i].reta[j] =
3528 (i * RTE_RETA_GROUP_SIZE + j) %
3529 dev->data->nb_rx_queues;
3530 }
3531 }
3532
3533
3534 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3535
3536
3537
3538
3539
3540 if (!kvlist)
3541 return 0;
3542
3543
3544 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3545 if (arg_count == 1) {
3546 struct rte_ether_addr bond_mac;
3547
3548 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3549 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3550 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3551 name);
3552 return -1;
3553 }
3554
3555
3556 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3557 RTE_BOND_LOG(ERR,
3558 "Failed to set mac address on bonded device %s",
3559 name);
3560 return -1;
3561 }
3562 } else if (arg_count > 1) {
3563 RTE_BOND_LOG(ERR,
3564 "MAC address can be specified only once for bonded device %s",
3565 name);
3566 return -1;
3567 }
3568
3569
3570 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3571 if (arg_count == 1) {
3572 uint8_t xmit_policy;
3573
3574 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3575 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3576 0) {
3577 RTE_BOND_LOG(INFO,
3578 "Invalid xmit policy specified for bonded device %s",
3579 name);
3580 return -1;
3581 }
3582
3583
3584 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3585 RTE_BOND_LOG(ERR,
3586 "Failed to set balance xmit policy on bonded device %s",
3587 name);
3588 return -1;
3589 }
3590 } else if (arg_count > 1) {
3591 RTE_BOND_LOG(ERR,
3592 "Transmit policy can be specified only once for bonded device %s",
3593 name);
3594 return -1;
3595 }
3596
3597 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3598 if (rte_kvargs_process(kvlist,
3599 PMD_BOND_AGG_MODE_KVARG,
3600 &bond_ethdev_parse_slave_agg_mode_kvarg,
3601 &agg_mode) != 0) {
3602 RTE_BOND_LOG(ERR,
3603 "Failed to parse agg selection mode for bonded device %s",
3604 name);
3605 }
3606 if (internals->mode == BONDING_MODE_8023AD) {
3607 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3608 agg_mode);
3609 if (ret < 0) {
3610 RTE_BOND_LOG(ERR,
3611 "Invalid args for agg selection set for bonded device %s",
3612 name);
3613 return -1;
3614 }
3615 }
3616 }
3617
3618
3619 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3620 struct bond_ethdev_slave_ports slave_ports;
3621 unsigned i;
3622
3623 memset(&slave_ports, 0, sizeof(slave_ports));
3624
3625 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3626 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3627 RTE_BOND_LOG(ERR,
3628 "Failed to parse slave ports for bonded device %s",
3629 name);
3630 return -1;
3631 }
3632
3633 for (i = 0; i < slave_ports.slave_count; i++) {
3634 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3635 RTE_BOND_LOG(ERR,
3636 "Failed to add port %d as slave to bonded device %s",
3637 slave_ports.slaves[i], name);
3638 }
3639 }
3640
3641 } else {
3642 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3643 return -1;
3644 }
3645
3646
3647 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3648 if (arg_count == 1) {
3649 uint16_t primary_slave_port_id;
3650
3651 if (rte_kvargs_process(kvlist,
3652 PMD_BOND_PRIMARY_SLAVE_KVARG,
3653 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3654 &primary_slave_port_id) < 0) {
3655 RTE_BOND_LOG(INFO,
3656 "Invalid primary slave port id specified for bonded device %s",
3657 name);
3658 return -1;
3659 }
3660
3661
3662 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3663 != 0) {
3664 RTE_BOND_LOG(ERR,
3665 "Failed to set primary slave port %d on bonded device %s",
3666 primary_slave_port_id, name);
3667 return -1;
3668 }
3669 } else if (arg_count > 1) {
3670 RTE_BOND_LOG(INFO,
3671 "Primary slave can be specified only once for bonded device %s",
3672 name);
3673 return -1;
3674 }
3675
3676
3677 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3678 if (arg_count == 1) {
3679 uint32_t lsc_poll_interval_ms;
3680
3681 if (rte_kvargs_process(kvlist,
3682 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3683 &bond_ethdev_parse_time_ms_kvarg,
3684 &lsc_poll_interval_ms) < 0) {
3685 RTE_BOND_LOG(INFO,
3686 "Invalid lsc polling interval value specified for bonded"
3687 " device %s", name);
3688 return -1;
3689 }
3690
3691 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3692 != 0) {
3693 RTE_BOND_LOG(ERR,
3694 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3695 lsc_poll_interval_ms, name);
3696 return -1;
3697 }
3698 } else if (arg_count > 1) {
3699 RTE_BOND_LOG(INFO,
3700 "LSC polling interval can be specified only once for bonded"
3701 " device %s", name);
3702 return -1;
3703 }
3704
3705
3706 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3707 if (arg_count == 1) {
3708 uint32_t link_up_delay_ms;
3709
3710 if (rte_kvargs_process(kvlist,
3711 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3712 &bond_ethdev_parse_time_ms_kvarg,
3713 &link_up_delay_ms) < 0) {
3714 RTE_BOND_LOG(INFO,
3715 "Invalid link up propagation delay value specified for"
3716 " bonded device %s", name);
3717 return -1;
3718 }
3719
3720
3721 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3722 != 0) {
3723 RTE_BOND_LOG(ERR,
3724 "Failed to set link up propagation delay (%u ms) on bonded"
3725 " device %s", link_up_delay_ms, name);
3726 return -1;
3727 }
3728 } else if (arg_count > 1) {
3729 RTE_BOND_LOG(INFO,
3730 "Link up propagation delay can be specified only once for"
3731 " bonded device %s", name);
3732 return -1;
3733 }
3734
3735
3736 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3737 if (arg_count == 1) {
3738 uint32_t link_down_delay_ms;
3739
3740 if (rte_kvargs_process(kvlist,
3741 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3742 &bond_ethdev_parse_time_ms_kvarg,
3743 &link_down_delay_ms) < 0) {
3744 RTE_BOND_LOG(INFO,
3745 "Invalid link down propagation delay value specified for"
3746 " bonded device %s", name);
3747 return -1;
3748 }
3749
3750
3751 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3752 != 0) {
3753 RTE_BOND_LOG(ERR,
3754 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3755 link_down_delay_ms, name);
3756 return -1;
3757 }
3758 } else if (arg_count > 1) {
3759 RTE_BOND_LOG(INFO,
3760 "Link down propagation delay can be specified only once for bonded device %s",
3761 name);
3762 return -1;
3763 }
3764
3765 return 0;
3766}
3767
3768struct rte_vdev_driver pmd_bond_drv = {
3769 .probe = bond_probe,
3770 .remove = bond_remove,
3771};
3772
3773RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3774RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3775
3776RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3777 "slave=<ifc> "
3778 "primary=<ifc> "
3779 "mode=[0-6] "
3780 "xmit_policy=[l2 | l23 | l34] "
3781 "agg_mode=[count | stable | bandwidth] "
3782 "socket_id=<int> "
3783 "mac=<mac addr> "
3784 "lsc_poll_period_ms=<int> "
3785 "up_delay=<int> "
3786 "down_delay=<int>");
3787
3788
3789
3790
3791RTE_LOG_REGISTER(bond_logtype, pmd.net.bonding, NOTICE);
3792