1
2
3
4
5
6#include <errno.h>
7#include <string.h>
8#include <unistd.h>
9#include <sys/queue.h>
10#include <sys/resource.h>
11
12#include <rte_byteorder.h>
13#include <rte_jhash.h>
14#include <rte_malloc.h>
15#include <rte_eth_tap.h>
16#include <tap_flow.h>
17#include <tap_autoconf.h>
18#include <tap_tcmsgs.h>
19#include <tap_rss.h>
20
21#ifndef HAVE_TC_FLOWER
22
23
24
25
26enum {
27 TCA_FLOWER_UNSPEC,
28 TCA_FLOWER_CLASSID,
29 TCA_FLOWER_INDEV,
30 TCA_FLOWER_ACT,
31 TCA_FLOWER_KEY_ETH_DST,
32 TCA_FLOWER_KEY_ETH_DST_MASK,
33 TCA_FLOWER_KEY_ETH_SRC,
34 TCA_FLOWER_KEY_ETH_SRC_MASK,
35 TCA_FLOWER_KEY_ETH_TYPE,
36 TCA_FLOWER_KEY_IP_PROTO,
37 TCA_FLOWER_KEY_IPV4_SRC,
38 TCA_FLOWER_KEY_IPV4_SRC_MASK,
39 TCA_FLOWER_KEY_IPV4_DST,
40 TCA_FLOWER_KEY_IPV4_DST_MASK,
41 TCA_FLOWER_KEY_IPV6_SRC,
42 TCA_FLOWER_KEY_IPV6_SRC_MASK,
43 TCA_FLOWER_KEY_IPV6_DST,
44 TCA_FLOWER_KEY_IPV6_DST_MASK,
45 TCA_FLOWER_KEY_TCP_SRC,
46 TCA_FLOWER_KEY_TCP_DST,
47 TCA_FLOWER_KEY_UDP_SRC,
48 TCA_FLOWER_KEY_UDP_DST,
49};
50#endif
51#ifndef HAVE_TC_VLAN_ID
52enum {
53
54 TCA_FLOWER_KEY_VLAN_ID = TCA_FLOWER_KEY_UDP_DST + 2,
55 TCA_FLOWER_KEY_VLAN_PRIO,
56 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
57};
58#endif
59
60
61
62
63
64
65
66
67#ifndef HAVE_TC_BPF
68enum {
69 TCA_BPF_UNSPEC,
70 TCA_BPF_ACT,
71 TCA_BPF_POLICE,
72 TCA_BPF_CLASSID,
73 TCA_BPF_OPS_LEN,
74 TCA_BPF_OPS,
75};
76#endif
77#ifndef HAVE_TC_BPF_FD
78enum {
79 TCA_BPF_FD = TCA_BPF_OPS + 1,
80 TCA_BPF_NAME,
81};
82#endif
83#ifndef HAVE_TC_ACT_BPF
84#define tc_gen \
85 __u32 index; \
86 __u32 capab; \
87 int action; \
88 int refcnt; \
89 int bindcnt
90
91struct tc_act_bpf {
92 tc_gen;
93};
94
95enum {
96 TCA_ACT_BPF_UNSPEC,
97 TCA_ACT_BPF_TM,
98 TCA_ACT_BPF_PARMS,
99 TCA_ACT_BPF_OPS_LEN,
100 TCA_ACT_BPF_OPS,
101};
102
103#endif
104#ifndef HAVE_TC_ACT_BPF_FD
105enum {
106 TCA_ACT_BPF_FD = TCA_ACT_BPF_OPS + 1,
107 TCA_ACT_BPF_NAME,
108};
109#endif
110
111
112enum bpf_rss_key_e {
113 KEY_CMD_GET = 1,
114 KEY_CMD_RELEASE,
115 KEY_CMD_INIT,
116 KEY_CMD_DEINIT,
117};
118
119enum key_status_e {
120 KEY_STAT_UNSPEC,
121 KEY_STAT_USED,
122 KEY_STAT_AVAILABLE,
123};
124
125#define ISOLATE_HANDLE 1
126#define REMOTE_PROMISCUOUS_HANDLE 2
127
128struct rte_flow {
129 LIST_ENTRY(rte_flow) next;
130 struct rte_flow *remote_flow;
131 int bpf_fd[SEC_MAX];
132 uint32_t key_idx;
133 struct nlmsg msg;
134};
135
136struct convert_data {
137 uint16_t eth_type;
138 uint16_t ip_proto;
139 uint8_t vlan;
140 struct rte_flow *flow;
141};
142
143struct remote_rule {
144 struct rte_flow_attr attr;
145 struct rte_flow_item items[2];
146 struct rte_flow_action actions[2];
147 int mirred;
148};
149
150struct action_data {
151 char id[16];
152
153 union {
154 struct tc_gact gact;
155 struct tc_mirred mirred;
156 struct skbedit {
157 struct tc_skbedit skbedit;
158 uint16_t queue;
159 } skbedit;
160 struct bpf {
161 struct tc_act_bpf bpf;
162 int bpf_fd;
163 const char *annotation;
164 } bpf;
165 };
166};
167
168static int tap_flow_create_eth(const struct rte_flow_item *item, void *data);
169static int tap_flow_create_vlan(const struct rte_flow_item *item, void *data);
170static int tap_flow_create_ipv4(const struct rte_flow_item *item, void *data);
171static int tap_flow_create_ipv6(const struct rte_flow_item *item, void *data);
172static int tap_flow_create_udp(const struct rte_flow_item *item, void *data);
173static int tap_flow_create_tcp(const struct rte_flow_item *item, void *data);
174static int
175tap_flow_validate(struct rte_eth_dev *dev,
176 const struct rte_flow_attr *attr,
177 const struct rte_flow_item items[],
178 const struct rte_flow_action actions[],
179 struct rte_flow_error *error);
180
181static struct rte_flow *
182tap_flow_create(struct rte_eth_dev *dev,
183 const struct rte_flow_attr *attr,
184 const struct rte_flow_item items[],
185 const struct rte_flow_action actions[],
186 struct rte_flow_error *error);
187
188static void
189tap_flow_free(struct pmd_internals *pmd,
190 struct rte_flow *flow);
191
192static int
193tap_flow_destroy(struct rte_eth_dev *dev,
194 struct rte_flow *flow,
195 struct rte_flow_error *error);
196
197static int
198tap_flow_isolate(struct rte_eth_dev *dev,
199 int set,
200 struct rte_flow_error *error);
201
202static int bpf_rss_key(enum bpf_rss_key_e cmd, __u32 *key_idx);
203static int rss_enable(struct pmd_internals *pmd,
204 const struct rte_flow_attr *attr,
205 struct rte_flow_error *error);
206static int rss_add_actions(struct rte_flow *flow, struct pmd_internals *pmd,
207 const struct rte_flow_action_rss *rss,
208 struct rte_flow_error *error);
209
210static const struct rte_flow_ops tap_flow_ops = {
211 .validate = tap_flow_validate,
212 .create = tap_flow_create,
213 .destroy = tap_flow_destroy,
214 .flush = tap_flow_flush,
215 .isolate = tap_flow_isolate,
216};
217
218
219#define ITEMS(...) \
220 (const enum rte_flow_item_type []){ \
221 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
222 }
223
224
225struct tap_flow_items {
226
227 const void *mask;
228 const unsigned int mask_sz;
229
230
231
232
233 const void *default_mask;
234
235
236
237
238
239
240
241
242
243
244
245 int (*convert)(const struct rte_flow_item *item, void *data);
246
247 const enum rte_flow_item_type *const items;
248};
249
250
251static const struct tap_flow_items tap_flow_items[] = {
252 [RTE_FLOW_ITEM_TYPE_END] = {
253 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
254 },
255 [RTE_FLOW_ITEM_TYPE_ETH] = {
256 .items = ITEMS(
257 RTE_FLOW_ITEM_TYPE_VLAN,
258 RTE_FLOW_ITEM_TYPE_IPV4,
259 RTE_FLOW_ITEM_TYPE_IPV6),
260 .mask = &(const struct rte_flow_item_eth){
261 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
262 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
263 .type = -1,
264 },
265 .mask_sz = sizeof(struct rte_flow_item_eth),
266 .default_mask = &rte_flow_item_eth_mask,
267 .convert = tap_flow_create_eth,
268 },
269 [RTE_FLOW_ITEM_TYPE_VLAN] = {
270 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
271 RTE_FLOW_ITEM_TYPE_IPV6),
272 .mask = &(const struct rte_flow_item_vlan){
273
274#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
275 .tci = 0xffef,
276#else
277 .tci = 0xefff,
278#endif
279 .inner_type = -1,
280 },
281 .mask_sz = sizeof(struct rte_flow_item_vlan),
282 .default_mask = &rte_flow_item_vlan_mask,
283 .convert = tap_flow_create_vlan,
284 },
285 [RTE_FLOW_ITEM_TYPE_IPV4] = {
286 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
287 RTE_FLOW_ITEM_TYPE_TCP),
288 .mask = &(const struct rte_flow_item_ipv4){
289 .hdr = {
290 .src_addr = -1,
291 .dst_addr = -1,
292 .next_proto_id = -1,
293 },
294 },
295 .mask_sz = sizeof(struct rte_flow_item_ipv4),
296 .default_mask = &rte_flow_item_ipv4_mask,
297 .convert = tap_flow_create_ipv4,
298 },
299 [RTE_FLOW_ITEM_TYPE_IPV6] = {
300 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
301 RTE_FLOW_ITEM_TYPE_TCP),
302 .mask = &(const struct rte_flow_item_ipv6){
303 .hdr = {
304 .src_addr = {
305 "\xff\xff\xff\xff\xff\xff\xff\xff"
306 "\xff\xff\xff\xff\xff\xff\xff\xff",
307 },
308 .dst_addr = {
309 "\xff\xff\xff\xff\xff\xff\xff\xff"
310 "\xff\xff\xff\xff\xff\xff\xff\xff",
311 },
312 .proto = -1,
313 },
314 },
315 .mask_sz = sizeof(struct rte_flow_item_ipv6),
316 .default_mask = &rte_flow_item_ipv6_mask,
317 .convert = tap_flow_create_ipv6,
318 },
319 [RTE_FLOW_ITEM_TYPE_UDP] = {
320 .mask = &(const struct rte_flow_item_udp){
321 .hdr = {
322 .src_port = -1,
323 .dst_port = -1,
324 },
325 },
326 .mask_sz = sizeof(struct rte_flow_item_udp),
327 .default_mask = &rte_flow_item_udp_mask,
328 .convert = tap_flow_create_udp,
329 },
330 [RTE_FLOW_ITEM_TYPE_TCP] = {
331 .mask = &(const struct rte_flow_item_tcp){
332 .hdr = {
333 .src_port = -1,
334 .dst_port = -1,
335 },
336 },
337 .mask_sz = sizeof(struct rte_flow_item_tcp),
338 .default_mask = &rte_flow_item_tcp_mask,
339 .convert = tap_flow_create_tcp,
340 },
341};
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384static struct remote_rule implicit_rte_flows[TAP_REMOTE_MAX_IDX] = {
385 [TAP_REMOTE_LOCAL_MAC] = {
386 .attr = {
387 .group = MAX_GROUP,
388 .priority = PRIORITY_MASK - TAP_REMOTE_LOCAL_MAC,
389 .ingress = 1,
390 },
391 .items[0] = {
392 .type = RTE_FLOW_ITEM_TYPE_ETH,
393 .mask = &(const struct rte_flow_item_eth){
394 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
395 },
396 },
397 .items[1] = {
398 .type = RTE_FLOW_ITEM_TYPE_END,
399 },
400 .mirred = TCA_EGRESS_REDIR,
401 },
402 [TAP_REMOTE_BROADCAST] = {
403 .attr = {
404 .group = MAX_GROUP,
405 .priority = PRIORITY_MASK - TAP_REMOTE_BROADCAST,
406 .ingress = 1,
407 },
408 .items[0] = {
409 .type = RTE_FLOW_ITEM_TYPE_ETH,
410 .mask = &(const struct rte_flow_item_eth){
411 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
412 },
413 .spec = &(const struct rte_flow_item_eth){
414 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
415 },
416 },
417 .items[1] = {
418 .type = RTE_FLOW_ITEM_TYPE_END,
419 },
420 .mirred = TCA_EGRESS_MIRROR,
421 },
422 [TAP_REMOTE_BROADCASTV6] = {
423 .attr = {
424 .group = MAX_GROUP,
425 .priority = PRIORITY_MASK - TAP_REMOTE_BROADCASTV6,
426 .ingress = 1,
427 },
428 .items[0] = {
429 .type = RTE_FLOW_ITEM_TYPE_ETH,
430 .mask = &(const struct rte_flow_item_eth){
431 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
432 },
433 .spec = &(const struct rte_flow_item_eth){
434 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
435 },
436 },
437 .items[1] = {
438 .type = RTE_FLOW_ITEM_TYPE_END,
439 },
440 .mirred = TCA_EGRESS_MIRROR,
441 },
442 [TAP_REMOTE_PROMISC] = {
443 .attr = {
444 .group = MAX_GROUP,
445 .priority = PRIORITY_MASK - TAP_REMOTE_PROMISC,
446 .ingress = 1,
447 },
448 .items[0] = {
449 .type = RTE_FLOW_ITEM_TYPE_VOID,
450 },
451 .items[1] = {
452 .type = RTE_FLOW_ITEM_TYPE_END,
453 },
454 .mirred = TCA_EGRESS_MIRROR,
455 },
456 [TAP_REMOTE_ALLMULTI] = {
457 .attr = {
458 .group = MAX_GROUP,
459 .priority = PRIORITY_MASK - TAP_REMOTE_ALLMULTI,
460 .ingress = 1,
461 },
462 .items[0] = {
463 .type = RTE_FLOW_ITEM_TYPE_ETH,
464 .mask = &(const struct rte_flow_item_eth){
465 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
466 },
467 .spec = &(const struct rte_flow_item_eth){
468 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
469 },
470 },
471 .items[1] = {
472 .type = RTE_FLOW_ITEM_TYPE_END,
473 },
474 .mirred = TCA_EGRESS_MIRROR,
475 },
476 [TAP_REMOTE_TX] = {
477 .attr = {
478 .group = 0,
479 .priority = TAP_REMOTE_TX,
480 .egress = 1,
481 },
482 .items[0] = {
483 .type = RTE_FLOW_ITEM_TYPE_VOID,
484 },
485 .items[1] = {
486 .type = RTE_FLOW_ITEM_TYPE_END,
487 },
488 .mirred = TCA_EGRESS_MIRROR,
489 },
490 [TAP_ISOLATE] = {
491 .attr = {
492 .group = MAX_GROUP,
493 .priority = PRIORITY_MASK - TAP_ISOLATE,
494 .ingress = 1,
495 },
496 .items[0] = {
497 .type = RTE_FLOW_ITEM_TYPE_VOID,
498 },
499 .items[1] = {
500 .type = RTE_FLOW_ITEM_TYPE_END,
501 },
502 },
503};
504
505
506
507
508
509
510
511
512
513
514
515
516
517static int
518tap_flow_create_eth(const struct rte_flow_item *item, void *data)
519{
520 struct convert_data *info = (struct convert_data *)data;
521 const struct rte_flow_item_eth *spec = item->spec;
522 const struct rte_flow_item_eth *mask = item->mask;
523 struct rte_flow *flow = info->flow;
524 struct nlmsg *msg;
525
526
527 if (!mask)
528 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_ETH].default_mask;
529
530 if (mask->type && mask->type != 0xffff)
531 return -1;
532 if (!spec)
533 return 0;
534
535 if (spec->type & mask->type)
536 info->eth_type = spec->type;
537 if (!flow)
538 return 0;
539 msg = &flow->msg;
540 if (!rte_is_zero_ether_addr(&mask->dst)) {
541 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_DST,
542 RTE_ETHER_ADDR_LEN,
543 &spec->dst.addr_bytes);
544 tap_nlattr_add(&msg->nh,
545 TCA_FLOWER_KEY_ETH_DST_MASK, RTE_ETHER_ADDR_LEN,
546 &mask->dst.addr_bytes);
547 }
548 if (!rte_is_zero_ether_addr(&mask->src)) {
549 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_SRC,
550 RTE_ETHER_ADDR_LEN,
551 &spec->src.addr_bytes);
552 tap_nlattr_add(&msg->nh,
553 TCA_FLOWER_KEY_ETH_SRC_MASK, RTE_ETHER_ADDR_LEN,
554 &mask->src.addr_bytes);
555 }
556 return 0;
557}
558
559
560
561
562
563
564
565
566
567
568
569
570
571static int
572tap_flow_create_vlan(const struct rte_flow_item *item, void *data)
573{
574 struct convert_data *info = (struct convert_data *)data;
575 const struct rte_flow_item_vlan *spec = item->spec;
576 const struct rte_flow_item_vlan *mask = item->mask;
577 struct rte_flow *flow = info->flow;
578 struct nlmsg *msg;
579
580
581 if (!mask)
582 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_VLAN].default_mask;
583
584 if (info->eth_type)
585 return -1;
586
587 if (info->vlan)
588 return -1;
589 info->vlan = 1;
590 if (mask->inner_type) {
591
592 if (mask->inner_type != RTE_BE16(0xffff))
593 return -1;
594 info->eth_type = spec->inner_type;
595 }
596 if (!flow)
597 return 0;
598 msg = &flow->msg;
599 msg->t.tcm_info = TC_H_MAKE(msg->t.tcm_info, htons(ETH_P_8021Q));
600#define VLAN_PRIO(tci) ((tci) >> 13)
601#define VLAN_ID(tci) ((tci) & 0xfff)
602 if (!spec)
603 return 0;
604 if (spec->tci) {
605 uint16_t tci = ntohs(spec->tci) & mask->tci;
606 uint16_t prio = VLAN_PRIO(tci);
607 uint8_t vid = VLAN_ID(tci);
608
609 if (prio)
610 tap_nlattr_add8(&msg->nh,
611 TCA_FLOWER_KEY_VLAN_PRIO, prio);
612 if (vid)
613 tap_nlattr_add16(&msg->nh,
614 TCA_FLOWER_KEY_VLAN_ID, vid);
615 }
616 return 0;
617}
618
619
620
621
622
623
624
625
626
627
628
629
630
631static int
632tap_flow_create_ipv4(const struct rte_flow_item *item, void *data)
633{
634 struct convert_data *info = (struct convert_data *)data;
635 const struct rte_flow_item_ipv4 *spec = item->spec;
636 const struct rte_flow_item_ipv4 *mask = item->mask;
637 struct rte_flow *flow = info->flow;
638 struct nlmsg *msg;
639
640
641 if (!mask)
642 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_IPV4].default_mask;
643
644 if (info->eth_type && info->eth_type != htons(ETH_P_IP))
645 return -1;
646
647 if (spec)
648 info->ip_proto = spec->hdr.next_proto_id;
649 if (!flow)
650 return 0;
651 msg = &flow->msg;
652 if (!info->eth_type)
653 info->eth_type = htons(ETH_P_IP);
654 if (!spec)
655 return 0;
656 if (mask->hdr.dst_addr) {
657 tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST,
658 spec->hdr.dst_addr);
659 tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST_MASK,
660 mask->hdr.dst_addr);
661 }
662 if (mask->hdr.src_addr) {
663 tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC,
664 spec->hdr.src_addr);
665 tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC_MASK,
666 mask->hdr.src_addr);
667 }
668 if (spec->hdr.next_proto_id)
669 tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO,
670 spec->hdr.next_proto_id);
671 return 0;
672}
673
674
675
676
677
678
679
680
681
682
683
684
685
686static int
687tap_flow_create_ipv6(const struct rte_flow_item *item, void *data)
688{
689 struct convert_data *info = (struct convert_data *)data;
690 const struct rte_flow_item_ipv6 *spec = item->spec;
691 const struct rte_flow_item_ipv6 *mask = item->mask;
692 struct rte_flow *flow = info->flow;
693 uint8_t empty_addr[16] = { 0 };
694 struct nlmsg *msg;
695
696
697 if (!mask)
698 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_IPV6].default_mask;
699
700 if (info->eth_type && info->eth_type != htons(ETH_P_IPV6))
701 return -1;
702
703 if (spec)
704 info->ip_proto = spec->hdr.proto;
705 if (!flow)
706 return 0;
707 msg = &flow->msg;
708 if (!info->eth_type)
709 info->eth_type = htons(ETH_P_IPV6);
710 if (!spec)
711 return 0;
712 if (memcmp(mask->hdr.dst_addr, empty_addr, 16)) {
713 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST,
714 sizeof(spec->hdr.dst_addr), &spec->hdr.dst_addr);
715 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST_MASK,
716 sizeof(mask->hdr.dst_addr), &mask->hdr.dst_addr);
717 }
718 if (memcmp(mask->hdr.src_addr, empty_addr, 16)) {
719 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC,
720 sizeof(spec->hdr.src_addr), &spec->hdr.src_addr);
721 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
722 sizeof(mask->hdr.src_addr), &mask->hdr.src_addr);
723 }
724 if (spec->hdr.proto)
725 tap_nlattr_add8(&msg->nh,
726 TCA_FLOWER_KEY_IP_PROTO, spec->hdr.proto);
727 return 0;
728}
729
730
731
732
733
734
735
736
737
738
739
740
741
742static int
743tap_flow_create_udp(const struct rte_flow_item *item, void *data)
744{
745 struct convert_data *info = (struct convert_data *)data;
746 const struct rte_flow_item_udp *spec = item->spec;
747 const struct rte_flow_item_udp *mask = item->mask;
748 struct rte_flow *flow = info->flow;
749 struct nlmsg *msg;
750
751
752 if (!mask)
753 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_UDP].default_mask;
754
755 if (info->ip_proto && info->ip_proto != IPPROTO_UDP)
756 return -1;
757
758 if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) ||
759 (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff))
760 return -1;
761 if (!flow)
762 return 0;
763 msg = &flow->msg;
764 tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP);
765 if (!spec)
766 return 0;
767 if (mask->hdr.dst_port)
768 tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_DST,
769 spec->hdr.dst_port);
770 if (mask->hdr.src_port)
771 tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_SRC,
772 spec->hdr.src_port);
773 return 0;
774}
775
776
777
778
779
780
781
782
783
784
785
786
787
788static int
789tap_flow_create_tcp(const struct rte_flow_item *item, void *data)
790{
791 struct convert_data *info = (struct convert_data *)data;
792 const struct rte_flow_item_tcp *spec = item->spec;
793 const struct rte_flow_item_tcp *mask = item->mask;
794 struct rte_flow *flow = info->flow;
795 struct nlmsg *msg;
796
797
798 if (!mask)
799 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_TCP].default_mask;
800
801 if (info->ip_proto && info->ip_proto != IPPROTO_TCP)
802 return -1;
803
804 if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) ||
805 (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff))
806 return -1;
807 if (!flow)
808 return 0;
809 msg = &flow->msg;
810 tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP);
811 if (!spec)
812 return 0;
813 if (mask->hdr.dst_port)
814 tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_DST,
815 spec->hdr.dst_port);
816 if (mask->hdr.src_port)
817 tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_SRC,
818 spec->hdr.src_port);
819 return 0;
820}
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838static int
839tap_flow_item_validate(const struct rte_flow_item *item,
840 unsigned int size,
841 const uint8_t *supported_mask,
842 const uint8_t *default_mask)
843{
844 int ret = 0;
845
846
847 if (!item->spec && (item->mask || item->last))
848 return -1;
849
850 if (item->spec && !item->mask) {
851 unsigned int i;
852 const uint8_t *spec = item->spec;
853
854 for (i = 0; i < size; ++i)
855 if ((spec[i] | supported_mask[i]) != supported_mask[i])
856 return -1;
857
858 for (i = 0; i < size; i++)
859 if ((default_mask[i] | supported_mask[i]) !=
860 supported_mask[i])
861 return -1;
862 }
863
864 if (item->last && !item->mask) {
865 unsigned int i;
866 const uint8_t *spec = item->last;
867
868 for (i = 0; i < size; ++i)
869 if ((spec[i] | supported_mask[i]) != supported_mask[i])
870 return -1;
871 }
872
873 if (item->mask) {
874 unsigned int i;
875 const uint8_t *spec = item->mask;
876
877 for (i = 0; i < size; ++i)
878 if ((spec[i] | supported_mask[i]) != supported_mask[i])
879 return -1;
880 }
881
882
883
884
885 if (item->spec && item->last) {
886 uint8_t spec[size];
887 uint8_t last[size];
888 const uint8_t *apply = default_mask;
889 unsigned int i;
890
891 if (item->mask)
892 apply = item->mask;
893 for (i = 0; i < size; ++i) {
894 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
895 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
896 }
897 ret = memcmp(spec, last, size);
898 }
899 return ret;
900}
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918static int
919add_action(struct rte_flow *flow, size_t *act_index, struct action_data *adata)
920{
921 struct nlmsg *msg = &flow->msg;
922
923 if (tap_nlattr_nested_start(msg, (*act_index)++) < 0)
924 return -1;
925
926 tap_nlattr_add(&msg->nh, TCA_ACT_KIND,
927 strlen(adata->id) + 1, adata->id);
928 if (tap_nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0)
929 return -1;
930 if (strcmp("gact", adata->id) == 0) {
931 tap_nlattr_add(&msg->nh, TCA_GACT_PARMS, sizeof(adata->gact),
932 &adata->gact);
933 } else if (strcmp("mirred", adata->id) == 0) {
934 if (adata->mirred.eaction == TCA_EGRESS_MIRROR)
935 adata->mirred.action = TC_ACT_PIPE;
936 else
937 adata->mirred.action = TC_ACT_STOLEN;
938 tap_nlattr_add(&msg->nh, TCA_MIRRED_PARMS,
939 sizeof(adata->mirred),
940 &adata->mirred);
941 } else if (strcmp("skbedit", adata->id) == 0) {
942 tap_nlattr_add(&msg->nh, TCA_SKBEDIT_PARMS,
943 sizeof(adata->skbedit.skbedit),
944 &adata->skbedit.skbedit);
945 tap_nlattr_add16(&msg->nh, TCA_SKBEDIT_QUEUE_MAPPING,
946 adata->skbedit.queue);
947 } else if (strcmp("bpf", adata->id) == 0) {
948 tap_nlattr_add32(&msg->nh, TCA_ACT_BPF_FD, adata->bpf.bpf_fd);
949 tap_nlattr_add(&msg->nh, TCA_ACT_BPF_NAME,
950 strlen(adata->bpf.annotation) + 1,
951 adata->bpf.annotation);
952 tap_nlattr_add(&msg->nh, TCA_ACT_BPF_PARMS,
953 sizeof(adata->bpf.bpf),
954 &adata->bpf.bpf);
955 } else {
956 return -1;
957 }
958 tap_nlattr_nested_finish(msg);
959 tap_nlattr_nested_finish(msg);
960 return 0;
961}
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981static int
982add_actions(struct rte_flow *flow, int nb_actions, struct action_data *data,
983 int classifier_action)
984{
985 struct nlmsg *msg = &flow->msg;
986 size_t act_index = 1;
987 int i;
988
989 if (tap_nlattr_nested_start(msg, classifier_action) < 0)
990 return -1;
991 for (i = 0; i < nb_actions; i++)
992 if (add_action(flow, &act_index, data + i) < 0)
993 return -1;
994 tap_nlattr_nested_finish(msg);
995 return 0;
996}
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027static int
1028priv_flow_process(struct pmd_internals *pmd,
1029 const struct rte_flow_attr *attr,
1030 const struct rte_flow_item items[],
1031 const struct rte_flow_action actions[],
1032 struct rte_flow_error *error,
1033 struct rte_flow *flow,
1034 int mirred)
1035{
1036 const struct tap_flow_items *cur_item = tap_flow_items;
1037 struct convert_data data = {
1038 .eth_type = 0,
1039 .ip_proto = 0,
1040 .flow = flow,
1041 };
1042 int action = 0;
1043
1044 if (attr->transfer) {
1045 rte_flow_error_set(
1046 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1047 NULL, "transfer is not supported");
1048 return -rte_errno;
1049 }
1050 if (attr->group > MAX_GROUP) {
1051 rte_flow_error_set(
1052 error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1053 NULL, "group value too big: cannot exceed 15");
1054 return -rte_errno;
1055 }
1056 if (attr->priority > MAX_PRIORITY) {
1057 rte_flow_error_set(
1058 error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1059 NULL, "priority value too big");
1060 return -rte_errno;
1061 } else if (flow) {
1062 uint16_t group = attr->group << GROUP_SHIFT;
1063 uint16_t prio = group | (attr->priority +
1064 RSS_PRIORITY_OFFSET + PRIORITY_OFFSET);
1065 flow->msg.t.tcm_info = TC_H_MAKE(prio << 16,
1066 flow->msg.t.tcm_info);
1067 }
1068 if (flow) {
1069 if (mirred) {
1070
1071
1072
1073
1074
1075
1076
1077 flow->msg.t.tcm_parent = TC_H_MAKE(TC_H_INGRESS, 0);
1078 } else {
1079
1080 flow->msg.t.tcm_parent =
1081 TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0);
1082 }
1083
1084 tap_nlattr_add(&flow->msg.nh, TCA_KIND, sizeof("flower"), "flower");
1085 if (tap_nlattr_nested_start(&flow->msg, TCA_OPTIONS) < 0)
1086 goto exit_item_not_supported;
1087 }
1088 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1089 const struct tap_flow_items *token = NULL;
1090 unsigned int i;
1091 int err = 0;
1092
1093 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1094 continue;
1095 for (i = 0;
1096 cur_item->items &&
1097 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
1098 ++i) {
1099 if (cur_item->items[i] == items->type) {
1100 token = &tap_flow_items[items->type];
1101 break;
1102 }
1103 }
1104 if (!token)
1105 goto exit_item_not_supported;
1106 cur_item = token;
1107 err = tap_flow_item_validate(
1108 items, cur_item->mask_sz,
1109 (const uint8_t *)cur_item->mask,
1110 (const uint8_t *)cur_item->default_mask);
1111 if (err)
1112 goto exit_item_not_supported;
1113 if (flow && cur_item->convert) {
1114 err = cur_item->convert(items, &data);
1115 if (err)
1116 goto exit_item_not_supported;
1117 }
1118 }
1119 if (flow) {
1120 if (data.vlan) {
1121 tap_nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE,
1122 htons(ETH_P_8021Q));
1123 tap_nlattr_add16(&flow->msg.nh,
1124 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1125 data.eth_type ?
1126 data.eth_type : htons(ETH_P_ALL));
1127 } else if (data.eth_type) {
1128 tap_nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE,
1129 data.eth_type);
1130 }
1131 }
1132 if (mirred && flow) {
1133 struct action_data adata = {
1134 .id = "mirred",
1135 .mirred = {
1136 .eaction = mirred,
1137 },
1138 };
1139
1140
1141
1142
1143
1144
1145
1146 adata.mirred.ifindex = attr->ingress ? pmd->if_index :
1147 pmd->remote_if_index;
1148 if (mirred == TCA_EGRESS_MIRROR)
1149 adata.mirred.action = TC_ACT_PIPE;
1150 else
1151 adata.mirred.action = TC_ACT_STOLEN;
1152 if (add_actions(flow, 1, &adata, TCA_FLOWER_ACT) < 0)
1153 goto exit_action_not_supported;
1154 else
1155 goto end;
1156 }
1157actions:
1158 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
1159 int err = 0;
1160
1161 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
1162 continue;
1163 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
1164 if (action)
1165 goto exit_action_not_supported;
1166 action = 1;
1167 if (flow) {
1168 struct action_data adata = {
1169 .id = "gact",
1170 .gact = {
1171 .action = TC_ACT_SHOT,
1172 },
1173 };
1174
1175 err = add_actions(flow, 1, &adata,
1176 TCA_FLOWER_ACT);
1177 }
1178 } else if (actions->type == RTE_FLOW_ACTION_TYPE_PASSTHRU) {
1179 if (action)
1180 goto exit_action_not_supported;
1181 action = 1;
1182 if (flow) {
1183 struct action_data adata = {
1184 .id = "gact",
1185 .gact = {
1186
1187 .action = TC_ACT_UNSPEC,
1188 },
1189 };
1190
1191 err = add_actions(flow, 1, &adata,
1192 TCA_FLOWER_ACT);
1193 }
1194 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
1195 const struct rte_flow_action_queue *queue =
1196 (const struct rte_flow_action_queue *)
1197 actions->conf;
1198
1199 if (action)
1200 goto exit_action_not_supported;
1201 action = 1;
1202 if (!queue ||
1203 (queue->index > pmd->dev->data->nb_rx_queues - 1))
1204 goto exit_action_not_supported;
1205 if (flow) {
1206 struct action_data adata = {
1207 .id = "skbedit",
1208 .skbedit = {
1209 .skbedit = {
1210 .action = TC_ACT_PIPE,
1211 },
1212 .queue = queue->index,
1213 },
1214 };
1215
1216 err = add_actions(flow, 1, &adata,
1217 TCA_FLOWER_ACT);
1218 }
1219 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
1220 const struct rte_flow_action_rss *rss =
1221 (const struct rte_flow_action_rss *)
1222 actions->conf;
1223
1224 if (action++)
1225 goto exit_action_not_supported;
1226
1227 if (!pmd->rss_enabled) {
1228 err = rss_enable(pmd, attr, error);
1229 if (err)
1230 goto exit_action_not_supported;
1231 }
1232 if (flow)
1233 err = rss_add_actions(flow, pmd, rss, error);
1234 } else {
1235 goto exit_action_not_supported;
1236 }
1237 if (err)
1238 goto exit_action_not_supported;
1239 }
1240
1241 if (!action) {
1242 static const struct rte_flow_action drop[] = {
1243 { .type = RTE_FLOW_ACTION_TYPE_DROP, },
1244 { .type = RTE_FLOW_ACTION_TYPE_END, },
1245 };
1246
1247 actions = drop;
1248 goto actions;
1249 }
1250end:
1251 if (flow)
1252 tap_nlattr_nested_finish(&flow->msg);
1253 return 0;
1254exit_item_not_supported:
1255 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
1256 items, "item not supported");
1257 return -rte_errno;
1258exit_action_not_supported:
1259 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
1260 actions, "action not supported");
1261 return -rte_errno;
1262}
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272static int
1273tap_flow_validate(struct rte_eth_dev *dev,
1274 const struct rte_flow_attr *attr,
1275 const struct rte_flow_item items[],
1276 const struct rte_flow_action actions[],
1277 struct rte_flow_error *error)
1278{
1279 struct pmd_internals *pmd = dev->data->dev_private;
1280
1281 return priv_flow_process(pmd, attr, items, actions, error, NULL, 0);
1282}
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300static void
1301tap_flow_set_handle(struct rte_flow *flow)
1302{
1303 union {
1304 struct rte_flow *flow;
1305 const void *key;
1306 } tmp;
1307 uint32_t handle = 0;
1308
1309 tmp.flow = flow;
1310
1311 if (sizeof(flow) > 4)
1312 handle = rte_jhash(tmp.key, sizeof(flow), 1);
1313 else
1314 handle = (uintptr_t)flow;
1315
1316 if (!handle)
1317 handle = 1;
1318 flow->msg.t.tcm_handle = handle;
1319}
1320
1321
1322
1323
1324
1325
1326
1327
1328static void
1329tap_flow_free(struct pmd_internals *pmd, struct rte_flow *flow)
1330{
1331 int i;
1332
1333 if (!flow)
1334 return;
1335
1336 if (pmd->rss_enabled) {
1337
1338 for (i = 0; i < SEC_MAX; i++)
1339 if (flow->bpf_fd[i] != 0) {
1340 close(flow->bpf_fd[i]);
1341 flow->bpf_fd[i] = 0;
1342 }
1343
1344
1345 bpf_rss_key(KEY_CMD_RELEASE, &flow->key_idx);
1346 flow->key_idx = 0;
1347 }
1348
1349
1350 rte_free(flow);
1351}
1352
1353
1354
1355
1356
1357
1358
1359static struct rte_flow *
1360tap_flow_create(struct rte_eth_dev *dev,
1361 const struct rte_flow_attr *attr,
1362 const struct rte_flow_item items[],
1363 const struct rte_flow_action actions[],
1364 struct rte_flow_error *error)
1365{
1366 struct pmd_internals *pmd = dev->data->dev_private;
1367 struct rte_flow *remote_flow = NULL;
1368 struct rte_flow *flow = NULL;
1369 struct nlmsg *msg = NULL;
1370 int err;
1371
1372 if (!pmd->if_index) {
1373 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
1374 NULL,
1375 "can't create rule, ifindex not found");
1376 goto fail;
1377 }
1378
1379
1380
1381
1382 if ((attr->group == MAX_GROUP) &&
1383 attr->priority > (MAX_PRIORITY - TAP_REMOTE_MAX_IDX)) {
1384 rte_flow_error_set(
1385 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1386 NULL, "priority value too big");
1387 goto fail;
1388 }
1389 flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0);
1390 if (!flow) {
1391 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1392 NULL, "cannot allocate memory for rte_flow");
1393 goto fail;
1394 }
1395 msg = &flow->msg;
1396 tc_init_msg(msg, pmd->if_index, RTM_NEWTFILTER,
1397 NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
1398 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
1399 tap_flow_set_handle(flow);
1400 if (priv_flow_process(pmd, attr, items, actions, error, flow, 0))
1401 goto fail;
1402 err = tap_nl_send(pmd->nlsk_fd, &msg->nh);
1403 if (err < 0) {
1404 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
1405 NULL, "couldn't send request to kernel");
1406 goto fail;
1407 }
1408 err = tap_nl_recv_ack(pmd->nlsk_fd);
1409 if (err < 0) {
1410 TAP_LOG(ERR,
1411 "Kernel refused TC filter rule creation (%d): %s",
1412 errno, strerror(errno));
1413 rte_flow_error_set(error, EEXIST, RTE_FLOW_ERROR_TYPE_HANDLE,
1414 NULL,
1415 "overlapping rules or Kernel too old for flower support");
1416 goto fail;
1417 }
1418 LIST_INSERT_HEAD(&pmd->flows, flow, next);
1419
1420
1421
1422
1423
1424 if (pmd->remote_if_index) {
1425 remote_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0);
1426 if (!remote_flow) {
1427 rte_flow_error_set(
1428 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
1429 "cannot allocate memory for rte_flow");
1430 goto fail;
1431 }
1432 msg = &remote_flow->msg;
1433
1434 tc_init_msg(
1435 msg, pmd->remote_if_index, RTM_NEWTFILTER,
1436 NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
1437 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
1438 tap_flow_set_handle(remote_flow);
1439 if (priv_flow_process(pmd, attr, items, NULL,
1440 error, remote_flow, TCA_EGRESS_REDIR)) {
1441 rte_flow_error_set(
1442 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1443 NULL, "rte flow rule validation failed");
1444 goto fail;
1445 }
1446 err = tap_nl_send(pmd->nlsk_fd, &msg->nh);
1447 if (err < 0) {
1448 rte_flow_error_set(
1449 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1450 NULL, "Failure sending nl request");
1451 goto fail;
1452 }
1453 err = tap_nl_recv_ack(pmd->nlsk_fd);
1454 if (err < 0) {
1455 TAP_LOG(ERR,
1456 "Kernel refused TC filter rule creation (%d): %s",
1457 errno, strerror(errno));
1458 rte_flow_error_set(
1459 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1460 NULL,
1461 "overlapping rules or Kernel too old for flower support");
1462 goto fail;
1463 }
1464 flow->remote_flow = remote_flow;
1465 }
1466 return flow;
1467fail:
1468 if (remote_flow)
1469 rte_free(remote_flow);
1470 if (flow)
1471 tap_flow_free(pmd, flow);
1472 return NULL;
1473}
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487static int
1488tap_flow_destroy_pmd(struct pmd_internals *pmd,
1489 struct rte_flow *flow,
1490 struct rte_flow_error *error)
1491{
1492 struct rte_flow *remote_flow = flow->remote_flow;
1493 int ret = 0;
1494
1495 LIST_REMOVE(flow, next);
1496 flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1497 flow->msg.nh.nlmsg_type = RTM_DELTFILTER;
1498
1499 ret = tap_nl_send(pmd->nlsk_fd, &flow->msg.nh);
1500 if (ret < 0) {
1501 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
1502 NULL, "couldn't send request to kernel");
1503 goto end;
1504 }
1505 ret = tap_nl_recv_ack(pmd->nlsk_fd);
1506
1507 if (ret < 0 && errno == ENOENT)
1508 ret = 0;
1509 if (ret < 0) {
1510 TAP_LOG(ERR,
1511 "Kernel refused TC filter rule deletion (%d): %s",
1512 errno, strerror(errno));
1513 rte_flow_error_set(
1514 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
1515 "couldn't receive kernel ack to our request");
1516 goto end;
1517 }
1518
1519 if (remote_flow) {
1520 remote_flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1521 remote_flow->msg.nh.nlmsg_type = RTM_DELTFILTER;
1522
1523 ret = tap_nl_send(pmd->nlsk_fd, &remote_flow->msg.nh);
1524 if (ret < 0) {
1525 rte_flow_error_set(
1526 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1527 NULL, "Failure sending nl request");
1528 goto end;
1529 }
1530 ret = tap_nl_recv_ack(pmd->nlsk_fd);
1531 if (ret < 0 && errno == ENOENT)
1532 ret = 0;
1533 if (ret < 0) {
1534 TAP_LOG(ERR,
1535 "Kernel refused TC filter rule deletion (%d): %s",
1536 errno, strerror(errno));
1537 rte_flow_error_set(
1538 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1539 NULL, "Failure trying to receive nl ack");
1540 goto end;
1541 }
1542 }
1543end:
1544 if (remote_flow)
1545 rte_free(remote_flow);
1546 tap_flow_free(pmd, flow);
1547 return ret;
1548}
1549
1550
1551
1552
1553
1554
1555
1556static int
1557tap_flow_destroy(struct rte_eth_dev *dev,
1558 struct rte_flow *flow,
1559 struct rte_flow_error *error)
1560{
1561 struct pmd_internals *pmd = dev->data->dev_private;
1562
1563 return tap_flow_destroy_pmd(pmd, flow, error);
1564}
1565
1566
1567
1568
1569
1570
1571
1572static int
1573tap_flow_isolate(struct rte_eth_dev *dev,
1574 int set,
1575 struct rte_flow_error *error __rte_unused)
1576{
1577 struct pmd_internals *pmd = dev->data->dev_private;
1578 struct pmd_process_private *process_private = dev->process_private;
1579
1580
1581 if (set)
1582 set = 1;
1583
1584 if ((set ^ pmd->flow_isolate) == 0)
1585 return 0;
1586
1587 pmd->flow_isolate = set;
1588
1589
1590
1591
1592 if (!process_private->rxq_fds[0])
1593 return 0;
1594 if (set) {
1595 struct rte_flow *remote_flow;
1596
1597 while (1) {
1598 remote_flow = LIST_FIRST(&pmd->implicit_flows);
1599 if (!remote_flow)
1600 break;
1601
1602
1603
1604
1605
1606 if (remote_flow->msg.t.tcm_ifindex == pmd->if_index)
1607 break;
1608 if (tap_flow_destroy_pmd(pmd, remote_flow, NULL) < 0)
1609 goto error;
1610 }
1611
1612 if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1)
1613 goto error;
1614 } else {
1615
1616 if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1)
1617 goto error;
1618 if (!pmd->remote_if_index)
1619 return 0;
1620 if (tap_flow_implicit_create(pmd, TAP_REMOTE_TX) < 0)
1621 goto error;
1622 if (tap_flow_implicit_create(pmd, TAP_REMOTE_LOCAL_MAC) < 0)
1623 goto error;
1624 if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCAST) < 0)
1625 goto error;
1626 if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCASTV6) < 0)
1627 goto error;
1628 if (dev->data->promiscuous &&
1629 tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC) < 0)
1630 goto error;
1631 if (dev->data->all_multicast &&
1632 tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI) < 0)
1633 goto error;
1634 }
1635 return 0;
1636error:
1637 pmd->flow_isolate = 0;
1638 return rte_flow_error_set(
1639 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1640 "TC rule creation failed");
1641}
1642
1643
1644
1645
1646
1647
1648
1649int
1650tap_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
1651{
1652 struct pmd_internals *pmd = dev->data->dev_private;
1653 struct rte_flow *flow;
1654
1655 while (!LIST_EMPTY(&pmd->flows)) {
1656 flow = LIST_FIRST(&pmd->flows);
1657 if (tap_flow_destroy(dev, flow, error) < 0)
1658 return -1;
1659 }
1660 return 0;
1661}
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674int tap_flow_implicit_create(struct pmd_internals *pmd,
1675 enum implicit_rule_index idx)
1676{
1677 uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
1678 struct rte_flow_action *actions = implicit_rte_flows[idx].actions;
1679 struct rte_flow_action isolate_actions[2] = {
1680 [1] = {
1681 .type = RTE_FLOW_ACTION_TYPE_END,
1682 },
1683 };
1684 struct rte_flow_item *items = implicit_rte_flows[idx].items;
1685 struct rte_flow_attr *attr = &implicit_rte_flows[idx].attr;
1686 struct rte_flow_item_eth eth_local = { .type = 0 };
1687 uint16_t if_index = pmd->remote_if_index;
1688 struct rte_flow *remote_flow = NULL;
1689 struct nlmsg *msg = NULL;
1690 int err = 0;
1691 struct rte_flow_item items_local[2] = {
1692 [0] = {
1693 .type = items[0].type,
1694 .spec = ð_local,
1695 .mask = items[0].mask,
1696 },
1697 [1] = {
1698 .type = items[1].type,
1699 }
1700 };
1701
1702 remote_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0);
1703 if (!remote_flow) {
1704 TAP_LOG(ERR, "Cannot allocate memory for rte_flow");
1705 goto fail;
1706 }
1707 msg = &remote_flow->msg;
1708 if (idx == TAP_REMOTE_TX) {
1709 if_index = pmd->if_index;
1710 } else if (idx == TAP_ISOLATE) {
1711 if_index = pmd->if_index;
1712
1713 flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
1714 isolate_actions[0].type = pmd->flow_isolate ?
1715 RTE_FLOW_ACTION_TYPE_DROP :
1716 RTE_FLOW_ACTION_TYPE_PASSTHRU;
1717 actions = isolate_actions;
1718 } else if (idx == TAP_REMOTE_LOCAL_MAC) {
1719
1720
1721
1722
1723 memcpy(ð_local.dst, &pmd->eth_addr, sizeof(pmd->eth_addr));
1724 items = items_local;
1725 }
1726 tc_init_msg(msg, if_index, RTM_NEWTFILTER, flags);
1727 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737 if (idx == TAP_ISOLATE)
1738 remote_flow->msg.t.tcm_handle = ISOLATE_HANDLE;
1739 else if (idx == TAP_REMOTE_PROMISC)
1740 remote_flow->msg.t.tcm_handle = REMOTE_PROMISCUOUS_HANDLE;
1741 else
1742 tap_flow_set_handle(remote_flow);
1743 if (priv_flow_process(pmd, attr, items, actions, NULL,
1744 remote_flow, implicit_rte_flows[idx].mirred)) {
1745 TAP_LOG(ERR, "rte flow rule validation failed");
1746 goto fail;
1747 }
1748 err = tap_nl_send(pmd->nlsk_fd, &msg->nh);
1749 if (err < 0) {
1750 TAP_LOG(ERR, "Failure sending nl request");
1751 goto fail;
1752 }
1753 err = tap_nl_recv_ack(pmd->nlsk_fd);
1754 if (err < 0) {
1755
1756 if (errno == EEXIST)
1757 goto success;
1758 TAP_LOG(ERR,
1759 "Kernel refused TC filter rule creation (%d): %s",
1760 errno, strerror(errno));
1761 goto fail;
1762 }
1763 LIST_INSERT_HEAD(&pmd->implicit_flows, remote_flow, next);
1764success:
1765 return 0;
1766fail:
1767 if (remote_flow)
1768 rte_free(remote_flow);
1769 return -1;
1770}
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782int tap_flow_implicit_destroy(struct pmd_internals *pmd,
1783 enum implicit_rule_index idx)
1784{
1785 struct rte_flow *remote_flow;
1786 int cur_prio = -1;
1787 int idx_prio = implicit_rte_flows[idx].attr.priority + PRIORITY_OFFSET;
1788
1789 for (remote_flow = LIST_FIRST(&pmd->implicit_flows);
1790 remote_flow;
1791 remote_flow = LIST_NEXT(remote_flow, next)) {
1792 cur_prio = (remote_flow->msg.t.tcm_info >> 16) & PRIORITY_MASK;
1793 if (cur_prio != idx_prio)
1794 continue;
1795 return tap_flow_destroy_pmd(pmd, remote_flow, NULL);
1796 }
1797 return 0;
1798}
1799
1800
1801
1802
1803
1804
1805int
1806tap_flow_implicit_flush(struct pmd_internals *pmd, struct rte_flow_error *error)
1807{
1808 struct rte_flow *remote_flow;
1809
1810 while (!LIST_EMPTY(&pmd->implicit_flows)) {
1811 remote_flow = LIST_FIRST(&pmd->implicit_flows);
1812 if (tap_flow_destroy_pmd(pmd, remote_flow, error) < 0)
1813 return -1;
1814 }
1815 return 0;
1816}
1817
1818#define MAX_RSS_KEYS 256
1819#define KEY_IDX_OFFSET (3 * MAX_RSS_KEYS)
1820#define SEC_NAME_CLS_Q "cls_q"
1821
1822static const char *sec_name[SEC_MAX] = {
1823 [SEC_L3_L4] = "l3_l4",
1824};
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840static int rss_enable(struct pmd_internals *pmd,
1841 const struct rte_flow_attr *attr,
1842 struct rte_flow_error *error)
1843{
1844 struct rte_flow *rss_flow = NULL;
1845 struct nlmsg *msg = NULL;
1846
1847 char annotation[64];
1848 int i;
1849 int err = 0;
1850
1851
1852 struct rlimit memlock_limit = {
1853 .rlim_cur = RLIM_INFINITY,
1854 .rlim_max = RLIM_INFINITY,
1855 };
1856 setrlimit(RLIMIT_MEMLOCK, &memlock_limit);
1857
1858
1859 err = bpf_rss_key(KEY_CMD_INIT, NULL);
1860 if (err < 0) {
1861 rte_flow_error_set(
1862 error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
1863 "Failed to initialize BPF RSS keys");
1864
1865 return -1;
1866 }
1867
1868
1869
1870
1871 pmd->map_fd = tap_flow_bpf_rss_map_create(sizeof(__u32),
1872 sizeof(struct rss_key),
1873 MAX_RSS_KEYS);
1874 if (pmd->map_fd < 0) {
1875 TAP_LOG(ERR,
1876 "Failed to create BPF map (%d): %s",
1877 errno, strerror(errno));
1878 rte_flow_error_set(
1879 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
1880 "Kernel too old or not configured "
1881 "to support BPF maps");
1882
1883 return -ENOTSUP;
1884 }
1885
1886
1887
1888
1889
1890 for (i = 0; i < pmd->dev->data->nb_rx_queues; i++) {
1891 pmd->bpf_fd[i] = tap_flow_bpf_cls_q(i);
1892 if (pmd->bpf_fd[i] < 0) {
1893 TAP_LOG(ERR,
1894 "Failed to load BPF section %s for queue %d",
1895 SEC_NAME_CLS_Q, i);
1896 rte_flow_error_set(
1897 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
1898 NULL,
1899 "Kernel too old or not configured "
1900 "to support BPF programs loading");
1901
1902 return -ENOTSUP;
1903 }
1904
1905 rss_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0);
1906 if (!rss_flow) {
1907 TAP_LOG(ERR,
1908 "Cannot allocate memory for rte_flow");
1909 return -1;
1910 }
1911 msg = &rss_flow->msg;
1912 tc_init_msg(msg, pmd->if_index, RTM_NEWTFILTER, NLM_F_REQUEST |
1913 NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
1914 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
1915 tap_flow_set_handle(rss_flow);
1916 uint16_t group = attr->group << GROUP_SHIFT;
1917 uint16_t prio = group | (i + PRIORITY_OFFSET);
1918 msg->t.tcm_info = TC_H_MAKE(prio << 16, msg->t.tcm_info);
1919 msg->t.tcm_parent = TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0);
1920
1921 tap_nlattr_add(&msg->nh, TCA_KIND, sizeof("bpf"), "bpf");
1922 if (tap_nlattr_nested_start(msg, TCA_OPTIONS) < 0)
1923 return -1;
1924 tap_nlattr_add32(&msg->nh, TCA_BPF_FD, pmd->bpf_fd[i]);
1925 snprintf(annotation, sizeof(annotation), "[%s%d]",
1926 SEC_NAME_CLS_Q, i);
1927 tap_nlattr_add(&msg->nh, TCA_BPF_NAME, strlen(annotation) + 1,
1928 annotation);
1929
1930 {
1931 struct action_data adata = {
1932 .id = "skbedit",
1933 .skbedit = {
1934 .skbedit = {
1935 .action = TC_ACT_PIPE,
1936 },
1937 .queue = i,
1938 },
1939 };
1940 if (add_actions(rss_flow, 1, &adata, TCA_BPF_ACT) < 0)
1941 return -1;
1942 }
1943 tap_nlattr_nested_finish(msg);
1944
1945
1946 if (tap_nl_send(pmd->nlsk_fd, &msg->nh) < 0)
1947 return -1;
1948 err = tap_nl_recv_ack(pmd->nlsk_fd);
1949 if (err < 0) {
1950 TAP_LOG(ERR,
1951 "Kernel refused TC filter rule creation (%d): %s",
1952 errno, strerror(errno));
1953 return err;
1954 }
1955 LIST_INSERT_HEAD(&pmd->rss_flows, rss_flow, next);
1956 }
1957
1958 pmd->rss_enabled = 1;
1959 return err;
1960}
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973static int bpf_rss_key(enum bpf_rss_key_e cmd, __u32 *key_idx)
1974{
1975 __u32 i;
1976 int err = 0;
1977 static __u32 num_used_keys;
1978 static __u32 rss_keys[MAX_RSS_KEYS] = {KEY_STAT_UNSPEC};
1979 static __u32 rss_keys_initialized;
1980 __u32 key;
1981
1982 switch (cmd) {
1983 case KEY_CMD_GET:
1984 if (!rss_keys_initialized) {
1985 err = -1;
1986 break;
1987 }
1988
1989 if (num_used_keys == RTE_DIM(rss_keys)) {
1990 err = -1;
1991 break;
1992 }
1993
1994 *key_idx = num_used_keys % RTE_DIM(rss_keys);
1995 while (rss_keys[*key_idx] == KEY_STAT_USED)
1996 *key_idx = (*key_idx + 1) % RTE_DIM(rss_keys);
1997
1998 rss_keys[*key_idx] = KEY_STAT_USED;
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011 *key_idx += KEY_IDX_OFFSET;
2012 num_used_keys++;
2013 break;
2014
2015 case KEY_CMD_RELEASE:
2016 if (!rss_keys_initialized)
2017 break;
2018
2019
2020
2021
2022
2023
2024
2025
2026 key = *key_idx - KEY_IDX_OFFSET;
2027 if (key >= RTE_DIM(rss_keys))
2028 break;
2029
2030 if (rss_keys[key] == KEY_STAT_USED) {
2031 rss_keys[key] = KEY_STAT_AVAILABLE;
2032 num_used_keys--;
2033 }
2034 break;
2035
2036 case KEY_CMD_INIT:
2037 for (i = 0; i < RTE_DIM(rss_keys); i++)
2038 rss_keys[i] = KEY_STAT_AVAILABLE;
2039
2040 rss_keys_initialized = 1;
2041 num_used_keys = 0;
2042 break;
2043
2044 case KEY_CMD_DEINIT:
2045 for (i = 0; i < RTE_DIM(rss_keys); i++)
2046 rss_keys[i] = KEY_STAT_UNSPEC;
2047
2048 rss_keys_initialized = 0;
2049 num_used_keys = 0;
2050 break;
2051
2052 default:
2053 break;
2054 }
2055
2056 return err;
2057}
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073static int rss_add_actions(struct rte_flow *flow, struct pmd_internals *pmd,
2074 const struct rte_flow_action_rss *rss,
2075 struct rte_flow_error *error)
2076{
2077
2078 unsigned int i;
2079 int err;
2080 struct rss_key rss_entry = { .hash_fields = 0,
2081 .key_size = 0 };
2082
2083
2084 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT)
2085 return rte_flow_error_set
2086 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2087 "non-default RSS hash functions are not supported");
2088 if (rss->level)
2089 return rte_flow_error_set
2090 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2091 "a nonzero RSS encapsulation level is not supported");
2092
2093
2094 err = bpf_rss_key(KEY_CMD_GET, &flow->key_idx);
2095 if (err < 0) {
2096 rte_flow_error_set(
2097 error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
2098 "Failed to get BPF RSS key");
2099
2100 return -1;
2101 }
2102
2103
2104 rss_entry.nb_queues = rss->queue_num;
2105 for (i = 0; i < rss->queue_num; i++)
2106 rss_entry.queues[i] = rss->queue[i];
2107 rss_entry.hash_fields =
2108 (1 << HASH_FIELD_IPV4_L3_L4) | (1 << HASH_FIELD_IPV6_L3_L4);
2109
2110
2111 err = tap_flow_bpf_update_rss_elem(pmd->map_fd,
2112 &flow->key_idx, &rss_entry);
2113
2114 if (err) {
2115 TAP_LOG(ERR,
2116 "Failed to update BPF map entry #%u (%d): %s",
2117 flow->key_idx, errno, strerror(errno));
2118 rte_flow_error_set(
2119 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
2120 "Kernel too old or not configured "
2121 "to support BPF maps updates");
2122
2123 return -ENOTSUP;
2124 }
2125
2126
2127
2128
2129
2130
2131 flow->bpf_fd[SEC_L3_L4] =
2132 tap_flow_bpf_calc_l3_l4_hash(flow->key_idx, pmd->map_fd);
2133 if (flow->bpf_fd[SEC_L3_L4] < 0) {
2134 TAP_LOG(ERR,
2135 "Failed to load BPF section %s (%d): %s",
2136 sec_name[SEC_L3_L4], errno, strerror(errno));
2137 rte_flow_error_set(
2138 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
2139 "Kernel too old or not configured "
2140 "to support BPF program loading");
2141
2142 return -ENOTSUP;
2143 }
2144
2145
2146 {
2147 struct action_data adata[] = {
2148 {
2149 .id = "bpf",
2150 .bpf = {
2151 .bpf_fd = flow->bpf_fd[SEC_L3_L4],
2152 .annotation = sec_name[SEC_L3_L4],
2153 .bpf = {
2154 .action = TC_ACT_PIPE,
2155 },
2156 },
2157 },
2158 };
2159
2160 if (add_actions(flow, RTE_DIM(adata), adata,
2161 TCA_FLOWER_ACT) < 0)
2162 return -1;
2163 }
2164
2165 return 0;
2166}
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179int
2180tap_dev_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
2181 const struct rte_flow_ops **ops)
2182{
2183 *ops = &tap_flow_ops;
2184 return 0;
2185}
2186