1
2
3
4#include <stdbool.h>
5#include <stddef.h>
6#include <stdint.h>
7#include <string.h>
8
9#include <linux/bpf.h>
10#include <linux/icmp.h>
11#include <linux/icmpv6.h>
12#include <linux/if_ether.h>
13#include <linux/in.h>
14#include <linux/ip.h>
15#include <linux/ipv6.h>
16#include <linux/pkt_cls.h>
17#include <linux/tcp.h>
18#include <linux/udp.h>
19
20#include <bpf/bpf_helpers.h>
21#include <bpf/bpf_endian.h>
22
23#include "test_cls_redirect.h"
24
25#ifdef SUBPROGS
26#define INLINING __noinline
27#else
28#define INLINING __always_inline
29#endif
30
31#define offsetofend(TYPE, MEMBER) \
32 (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
33
34#define IP_OFFSET_MASK (0x1FFF)
35#define IP_MF (0x2000)
36
37char _license[] SEC("license") = "Dual BSD/GPL";
38
39
40
41
42volatile const __be16 ENCAPSULATION_PORT;
43volatile const __be32 ENCAPSULATION_IP;
44
45typedef struct {
46 uint64_t processed_packets_total;
47 uint64_t l3_protocol_packets_total_ipv4;
48 uint64_t l3_protocol_packets_total_ipv6;
49 uint64_t l4_protocol_packets_total_tcp;
50 uint64_t l4_protocol_packets_total_udp;
51 uint64_t accepted_packets_total_syn;
52 uint64_t accepted_packets_total_syn_cookies;
53 uint64_t accepted_packets_total_last_hop;
54 uint64_t accepted_packets_total_icmp_echo_request;
55 uint64_t accepted_packets_total_established;
56 uint64_t forwarded_packets_total_gue;
57 uint64_t forwarded_packets_total_gre;
58
59 uint64_t errors_total_unknown_l3_proto;
60 uint64_t errors_total_unknown_l4_proto;
61 uint64_t errors_total_malformed_ip;
62 uint64_t errors_total_fragmented_ip;
63 uint64_t errors_total_malformed_icmp;
64 uint64_t errors_total_unwanted_icmp;
65 uint64_t errors_total_malformed_icmp_pkt_too_big;
66 uint64_t errors_total_malformed_tcp;
67 uint64_t errors_total_malformed_udp;
68 uint64_t errors_total_icmp_echo_replies;
69 uint64_t errors_total_malformed_encapsulation;
70 uint64_t errors_total_encap_adjust_failed;
71 uint64_t errors_total_encap_buffer_too_small;
72 uint64_t errors_total_redirect_loop;
73 uint64_t errors_total_encap_mtu_violate;
74} metrics_t;
75
76typedef enum {
77 INVALID = 0,
78 UNKNOWN,
79 ECHO_REQUEST,
80 SYN,
81 SYN_COOKIE,
82 ESTABLISHED,
83} verdict_t;
84
85typedef struct {
86 uint16_t src, dst;
87} flow_ports_t;
88
89_Static_assert(
90 sizeof(flow_ports_t) !=
91 offsetofend(struct bpf_sock_tuple, ipv4.dport) -
92 offsetof(struct bpf_sock_tuple, ipv4.sport) - 1,
93 "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
94_Static_assert(
95 sizeof(flow_ports_t) !=
96 offsetofend(struct bpf_sock_tuple, ipv6.dport) -
97 offsetof(struct bpf_sock_tuple, ipv6.sport) - 1,
98 "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
99
100typedef int ret_t;
101
102
103
104
105
106static const ret_t CONTINUE_PROCESSING = -1;
107
108
109
110#define MAYBE_RETURN(x) \
111 do { \
112 ret_t __ret = x; \
113 if (__ret != CONTINUE_PROCESSING) \
114 return __ret; \
115 } while (0)
116
117
118
119
120
121
122
123
124typedef uint8_t *net_ptr __attribute__((align_value(8)));
125
126typedef struct buf {
127 struct __sk_buff *skb;
128 net_ptr head;
129
130
131
132 uint8_t *const tail;
133} buf_t;
134
135static __always_inline size_t buf_off(const buf_t *buf)
136{
137
138
139
140
141
142
143
144
145
146
147
148
149
150 size_t off = (size_t)buf->head;
151 asm("%0 -= %1" : "+r"(off) : "r"(buf->skb->data));
152 return off;
153}
154
155static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len)
156{
157 if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) {
158 return false;
159 }
160
161 buf->head += len;
162 return true;
163}
164
165static __always_inline bool buf_skip(buf_t *buf, const size_t len)
166{
167
168 if (buf_off(buf) + len > buf->skb->len) {
169 return false;
170 }
171
172 buf->head += len;
173 return true;
174}
175
176
177
178
179
180
181
182
183static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch)
184{
185 if (buf->head + len > buf->tail) {
186 if (scratch == NULL) {
187 return NULL;
188 }
189
190 return buf_copy(buf, scratch, len) ? scratch : NULL;
191 }
192
193 void *ptr = buf->head;
194 buf->head += len;
195 return ptr;
196}
197
198static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
199{
200 if (ipv4->ihl <= 5) {
201 return true;
202 }
203
204 return buf_skip(buf, (ipv4->ihl - 5) * 4);
205}
206
207static INLINING bool ipv4_is_fragment(const struct iphdr *ip)
208{
209 uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
210 return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
211}
212
213static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
214{
215 struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch);
216 if (ipv4 == NULL) {
217 return NULL;
218 }
219
220 if (ipv4->ihl < 5) {
221 return NULL;
222 }
223
224 if (!pkt_skip_ipv4_options(pkt, ipv4)) {
225 return NULL;
226 }
227
228 return ipv4;
229}
230
231
232static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
233{
234 if (!buf_copy(pkt, ports, sizeof(*ports))) {
235 return false;
236 }
237
238
239
240
241 uint16_t dst = ports->src;
242 ports->src = ports->dst;
243 ports->dst = dst;
244 return true;
245}
246
247static INLINING uint16_t pkt_checksum_fold(uint32_t csum)
248{
249
250
251
252 csum = (csum & 0xffff) + (csum >> 16);
253 csum = (csum & 0xffff) + (csum >> 16);
254 return (uint16_t)~csum;
255}
256
257static INLINING void pkt_ipv4_checksum(struct iphdr *iph)
258{
259 iph->check = 0;
260
261
262
263
264
265
266 _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes");
267 uint32_t acc = 0;
268 uint16_t *ipw = (uint16_t *)iph;
269
270#pragma clang loop unroll(full)
271 for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) {
272 acc += ipw[i];
273 }
274
275 iph->check = pkt_checksum_fold(acc);
276}
277
278static INLINING
279bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
280 const struct ipv6hdr *ipv6,
281 uint8_t *upper_proto,
282 bool *is_fragment)
283{
284
285
286
287
288
289 struct {
290 uint8_t next;
291 uint8_t len;
292 } exthdr = {
293 .next = ipv6->nexthdr,
294 };
295 *is_fragment = false;
296
297#pragma clang loop unroll(full)
298 for (int i = 0; i < 6; i++) {
299 switch (exthdr.next) {
300 case IPPROTO_FRAGMENT:
301 *is_fragment = true;
302
303
304
305 case IPPROTO_HOPOPTS:
306 case IPPROTO_ROUTING:
307 case IPPROTO_DSTOPTS:
308 case IPPROTO_MH:
309 if (!buf_copy(pkt, &exthdr, sizeof(exthdr))) {
310 return false;
311 }
312
313
314 if (!buf_skip(pkt,
315 (exthdr.len + 1) * 8 - sizeof(exthdr))) {
316 return false;
317 }
318
319
320 break;
321
322 default:
323
324
325
326
327
328
329
330
331
332
333 *upper_proto = exthdr.next;
334 return true;
335 }
336 }
337
338
339 return false;
340}
341
342
343
344
345
346
347static __always_inline struct ipv6hdr *
348pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
349 bool *is_fragment)
350{
351 struct ipv6hdr *ipv6 = buf_assign(pkt, sizeof(*ipv6), scratch);
352 if (ipv6 == NULL) {
353 return NULL;
354 }
355
356 if (!pkt_skip_ipv6_extension_headers(pkt, ipv6, proto, is_fragment)) {
357 return NULL;
358 }
359
360 return ipv6;
361}
362
363
364
365struct {
366 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
367 __uint(max_entries, 1);
368 __type(key, unsigned int);
369 __type(value, metrics_t);
370} metrics_map SEC(".maps");
371
372static INLINING metrics_t *get_global_metrics(void)
373{
374 uint64_t key = 0;
375 return bpf_map_lookup_elem(&metrics_map, &key);
376}
377
378static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
379{
380 const int payload_off =
381 sizeof(*encap) +
382 sizeof(struct in_addr) * encap->unigue.hop_count;
383 int32_t encap_overhead = payload_off - sizeof(struct ethhdr);
384
385
386 if (encap->gue.proto_ctype == IPPROTO_IPV6) {
387 encap->eth.h_proto = bpf_htons(ETH_P_IPV6);
388 }
389
390 if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC,
391 BPF_F_ADJ_ROOM_FIXED_GSO |
392 BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
393 bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC))
394 return TC_ACT_SHOT;
395
396 return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
397}
398
399static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
400 struct in_addr *next_hop, metrics_t *metrics)
401{
402 metrics->forwarded_packets_total_gre++;
403
404 const int payload_off =
405 sizeof(*encap) +
406 sizeof(struct in_addr) * encap->unigue.hop_count;
407 int32_t encap_overhead =
408 payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr);
409 int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead;
410 uint16_t proto = ETH_P_IP;
411 uint32_t mtu_len = 0;
412
413
414
415
416
417
418
419 if (encap->gue.proto_ctype == IPPROTO_IPV6) {
420 proto = ETH_P_IPV6;
421 uint8_t ttl;
422 int rc;
423
424 rc = bpf_skb_load_bytes(
425 skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
426 &ttl, 1);
427 if (rc != 0) {
428 metrics->errors_total_malformed_encapsulation++;
429 return TC_ACT_SHOT;
430 }
431
432 if (ttl == 0) {
433 metrics->errors_total_redirect_loop++;
434 return TC_ACT_SHOT;
435 }
436
437 ttl--;
438 rc = bpf_skb_store_bytes(
439 skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
440 &ttl, 1, 0);
441 if (rc != 0) {
442 metrics->errors_total_malformed_encapsulation++;
443 return TC_ACT_SHOT;
444 }
445 } else {
446 uint8_t ttl;
447 int rc;
448
449 rc = bpf_skb_load_bytes(
450 skb, payload_off + offsetof(struct iphdr, ttl), &ttl,
451 1);
452 if (rc != 0) {
453 metrics->errors_total_malformed_encapsulation++;
454 return TC_ACT_SHOT;
455 }
456
457 if (ttl == 0) {
458 metrics->errors_total_redirect_loop++;
459 return TC_ACT_SHOT;
460 }
461
462
463
464
465
466 rc = bpf_l3_csum_replace(
467 skb, payload_off + offsetof(struct iphdr, check), ttl,
468 ttl - 1, 2);
469 if (rc != 0) {
470 metrics->errors_total_malformed_encapsulation++;
471 return TC_ACT_SHOT;
472 }
473
474 ttl--;
475 rc = bpf_skb_store_bytes(
476 skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1,
477 0);
478 if (rc != 0) {
479 metrics->errors_total_malformed_encapsulation++;
480 return TC_ACT_SHOT;
481 }
482 }
483
484 if (bpf_check_mtu(skb, skb->ifindex, &mtu_len, delta, 0)) {
485 metrics->errors_total_encap_mtu_violate++;
486 return TC_ACT_SHOT;
487 }
488
489 if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET,
490 BPF_F_ADJ_ROOM_FIXED_GSO |
491 BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
492 bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) {
493 metrics->errors_total_encap_adjust_failed++;
494 return TC_ACT_SHOT;
495 }
496
497 if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) {
498 metrics->errors_total_encap_buffer_too_small++;
499 return TC_ACT_SHOT;
500 }
501
502 buf_t pkt = {
503 .skb = skb,
504 .head = (uint8_t *)(long)skb->data,
505 .tail = (uint8_t *)(long)skb->data_end,
506 };
507
508 encap_gre_t *encap_gre = buf_assign(&pkt, sizeof(encap_gre_t), NULL);
509 if (encap_gre == NULL) {
510 metrics->errors_total_encap_buffer_too_small++;
511 return TC_ACT_SHOT;
512 }
513
514 encap_gre->ip.protocol = IPPROTO_GRE;
515 encap_gre->ip.daddr = next_hop->s_addr;
516 encap_gre->ip.saddr = ENCAPSULATION_IP;
517 encap_gre->ip.tot_len =
518 bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta);
519 encap_gre->gre.flags = 0;
520 encap_gre->gre.protocol = bpf_htons(proto);
521 pkt_ipv4_checksum((void *)&encap_gre->ip);
522
523 return bpf_redirect(skb->ifindex, 0);
524}
525
526static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
527 struct in_addr *next_hop, metrics_t *metrics)
528{
529
530
531
532
533
534 unsigned char temp[ETH_ALEN];
535 memcpy(temp, encap->eth.h_dest, sizeof(temp));
536 memcpy(encap->eth.h_dest, encap->eth.h_source,
537 sizeof(encap->eth.h_dest));
538 memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source));
539
540 if (encap->unigue.next_hop == encap->unigue.hop_count - 1 &&
541 encap->unigue.last_hop_gre) {
542 return forward_with_gre(skb, encap, next_hop, metrics);
543 }
544
545 metrics->forwarded_packets_total_gue++;
546 uint32_t old_saddr = encap->ip.saddr;
547 encap->ip.saddr = encap->ip.daddr;
548 encap->ip.daddr = next_hop->s_addr;
549 if (encap->unigue.next_hop < encap->unigue.hop_count) {
550 encap->unigue.next_hop++;
551 }
552
553
554 const uint64_t off = offsetof(typeof(*encap), ip.check);
555 int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4);
556 if (ret < 0) {
557 return TC_ACT_SHOT;
558 }
559
560 return bpf_redirect(skb->ifindex, 0);
561}
562
563static INLINING ret_t skip_next_hops(buf_t *pkt, int n)
564{
565 switch (n) {
566 case 1:
567 if (!buf_skip(pkt, sizeof(struct in_addr)))
568 return TC_ACT_SHOT;
569 case 0:
570 return CONTINUE_PROCESSING;
571
572 default:
573 return TC_ACT_SHOT;
574 }
575}
576
577
578
579
580
581
582
583static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
584 struct in_addr *next_hop)
585{
586 if (encap->unigue.next_hop > encap->unigue.hop_count) {
587 return TC_ACT_SHOT;
588 }
589
590
591 MAYBE_RETURN(skip_next_hops(pkt, encap->unigue.next_hop));
592
593 if (encap->unigue.next_hop == encap->unigue.hop_count) {
594
595 next_hop->s_addr = 0;
596 return CONTINUE_PROCESSING;
597 }
598
599 if (!buf_copy(pkt, next_hop, sizeof(*next_hop))) {
600 return TC_ACT_SHOT;
601 }
602
603
604 return skip_next_hops(pkt, encap->unigue.hop_count -
605 encap->unigue.next_hop - 1);
606}
607
608
609
610
611
612
613
614
615
616
617
618static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
619 uint64_t iphlen, uint16_t sport, uint16_t dport)
620{
621 switch (iphlen) {
622 case sizeof(struct iphdr): {
623 struct iphdr *ipv4 = (struct iphdr *)iph;
624 tuple->ipv4.daddr = ipv4->daddr;
625 tuple->ipv4.saddr = ipv4->saddr;
626 tuple->ipv4.sport = sport;
627 tuple->ipv4.dport = dport;
628 return sizeof(tuple->ipv4);
629 }
630
631 case sizeof(struct ipv6hdr): {
632 struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph;
633 memcpy(&tuple->ipv6.daddr, &ipv6->daddr,
634 sizeof(tuple->ipv6.daddr));
635 memcpy(&tuple->ipv6.saddr, &ipv6->saddr,
636 sizeof(tuple->ipv6.saddr));
637 tuple->ipv6.sport = sport;
638 tuple->ipv6.dport = dport;
639 return sizeof(tuple->ipv6);
640 }
641
642 default:
643 return 0;
644 }
645}
646
647static INLINING verdict_t classify_tcp(struct __sk_buff *skb,
648 struct bpf_sock_tuple *tuple, uint64_t tuplen,
649 void *iph, struct tcphdr *tcp)
650{
651 struct bpf_sock *sk =
652 bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
653 if (sk == NULL) {
654 return UNKNOWN;
655 }
656
657 if (sk->state != BPF_TCP_LISTEN) {
658 bpf_sk_release(sk);
659 return ESTABLISHED;
660 }
661
662 if (iph != NULL && tcp != NULL) {
663
664 uint64_t iphlen = sizeof(struct iphdr);
665 if (tuplen == sizeof(tuple->ipv6)) {
666 iphlen = sizeof(struct ipv6hdr);
667 }
668
669 if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp,
670 sizeof(*tcp)) == 0) {
671 bpf_sk_release(sk);
672 return SYN_COOKIE;
673 }
674 }
675
676 bpf_sk_release(sk);
677 return UNKNOWN;
678}
679
680static INLINING verdict_t classify_udp(struct __sk_buff *skb,
681 struct bpf_sock_tuple *tuple, uint64_t tuplen)
682{
683 struct bpf_sock *sk =
684 bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
685 if (sk == NULL) {
686 return UNKNOWN;
687 }
688
689 if (sk->state == BPF_TCP_ESTABLISHED) {
690 bpf_sk_release(sk);
691 return ESTABLISHED;
692 }
693
694 bpf_sk_release(sk);
695 return UNKNOWN;
696}
697
698static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
699 struct bpf_sock_tuple *tuple, uint64_t tuplen,
700 metrics_t *metrics)
701{
702 switch (proto) {
703 case IPPROTO_TCP:
704 return classify_tcp(skb, tuple, tuplen, NULL, NULL);
705
706 case IPPROTO_UDP:
707 return classify_udp(skb, tuple, tuplen);
708
709 default:
710 metrics->errors_total_malformed_icmp++;
711 return INVALID;
712 }
713}
714
715static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
716{
717 struct icmphdr icmp;
718 if (!buf_copy(pkt, &icmp, sizeof(icmp))) {
719 metrics->errors_total_malformed_icmp++;
720 return INVALID;
721 }
722
723
724 if (icmp.type == ICMP_ECHOREPLY) {
725 metrics->errors_total_icmp_echo_replies++;
726 return INVALID;
727 }
728
729 if (icmp.type == ICMP_ECHO) {
730 return ECHO_REQUEST;
731 }
732
733 if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) {
734 metrics->errors_total_unwanted_icmp++;
735 return INVALID;
736 }
737
738 struct iphdr _ip4;
739 const struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4);
740 if (ipv4 == NULL) {
741 metrics->errors_total_malformed_icmp_pkt_too_big++;
742 return INVALID;
743 }
744
745
746
747
748
749 struct bpf_sock_tuple tuple;
750 tuple.ipv4.saddr = ipv4->daddr;
751 tuple.ipv4.daddr = ipv4->saddr;
752
753 if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv4.sport)) {
754 metrics->errors_total_malformed_icmp_pkt_too_big++;
755 return INVALID;
756 }
757
758 return classify_icmp(pkt->skb, ipv4->protocol, &tuple,
759 sizeof(tuple.ipv4), metrics);
760}
761
762static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
763{
764 struct icmp6hdr icmp6;
765 if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) {
766 metrics->errors_total_malformed_icmp++;
767 return INVALID;
768 }
769
770
771 if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) {
772 metrics->errors_total_icmp_echo_replies++;
773 return INVALID;
774 }
775
776 if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) {
777 return ECHO_REQUEST;
778 }
779
780 if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) {
781 metrics->errors_total_unwanted_icmp++;
782 return INVALID;
783 }
784
785 bool is_fragment;
786 uint8_t l4_proto;
787 struct ipv6hdr _ipv6;
788 const struct ipv6hdr *ipv6 =
789 pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment);
790 if (ipv6 == NULL) {
791 metrics->errors_total_malformed_icmp_pkt_too_big++;
792 return INVALID;
793 }
794
795 if (is_fragment) {
796 metrics->errors_total_fragmented_ip++;
797 return INVALID;
798 }
799
800
801 struct bpf_sock_tuple tuple;
802 memcpy(&tuple.ipv6.saddr, &ipv6->daddr, sizeof(tuple.ipv6.saddr));
803 memcpy(&tuple.ipv6.daddr, &ipv6->saddr, sizeof(tuple.ipv6.daddr));
804
805 if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv6.sport)) {
806 metrics->errors_total_malformed_icmp_pkt_too_big++;
807 return INVALID;
808 }
809
810 return classify_icmp(pkt->skb, l4_proto, &tuple, sizeof(tuple.ipv6),
811 metrics);
812}
813
814static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
815 metrics_t *metrics)
816{
817 metrics->l4_protocol_packets_total_tcp++;
818
819 struct tcphdr _tcp;
820 struct tcphdr *tcp = buf_assign(pkt, sizeof(_tcp), &_tcp);
821 if (tcp == NULL) {
822 metrics->errors_total_malformed_tcp++;
823 return INVALID;
824 }
825
826 if (tcp->syn) {
827 return SYN;
828 }
829
830 struct bpf_sock_tuple tuple;
831 uint64_t tuplen =
832 fill_tuple(&tuple, iph, iphlen, tcp->source, tcp->dest);
833 return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp);
834}
835
836static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
837 metrics_t *metrics)
838{
839 metrics->l4_protocol_packets_total_udp++;
840
841 struct udphdr _udp;
842 struct udphdr *udph = buf_assign(pkt, sizeof(_udp), &_udp);
843 if (udph == NULL) {
844 metrics->errors_total_malformed_udp++;
845 return INVALID;
846 }
847
848 struct bpf_sock_tuple tuple;
849 uint64_t tuplen =
850 fill_tuple(&tuple, iph, iphlen, udph->source, udph->dest);
851 return classify_udp(pkt->skb, &tuple, tuplen);
852}
853
854static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
855{
856 metrics->l3_protocol_packets_total_ipv4++;
857
858 struct iphdr _ip4;
859 struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4);
860 if (ipv4 == NULL) {
861 metrics->errors_total_malformed_ip++;
862 return INVALID;
863 }
864
865 if (ipv4->version != 4) {
866 metrics->errors_total_malformed_ip++;
867 return INVALID;
868 }
869
870 if (ipv4_is_fragment(ipv4)) {
871 metrics->errors_total_fragmented_ip++;
872 return INVALID;
873 }
874
875 switch (ipv4->protocol) {
876 case IPPROTO_ICMP:
877 return process_icmpv4(pkt, metrics);
878
879 case IPPROTO_TCP:
880 return process_tcp(pkt, ipv4, sizeof(*ipv4), metrics);
881
882 case IPPROTO_UDP:
883 return process_udp(pkt, ipv4, sizeof(*ipv4), metrics);
884
885 default:
886 metrics->errors_total_unknown_l4_proto++;
887 return INVALID;
888 }
889}
890
891static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
892{
893 metrics->l3_protocol_packets_total_ipv6++;
894
895 uint8_t l4_proto;
896 bool is_fragment;
897 struct ipv6hdr _ipv6;
898 struct ipv6hdr *ipv6 =
899 pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment);
900 if (ipv6 == NULL) {
901 metrics->errors_total_malformed_ip++;
902 return INVALID;
903 }
904
905 if (ipv6->version != 6) {
906 metrics->errors_total_malformed_ip++;
907 return INVALID;
908 }
909
910 if (is_fragment) {
911 metrics->errors_total_fragmented_ip++;
912 return INVALID;
913 }
914
915 switch (l4_proto) {
916 case IPPROTO_ICMPV6:
917 return process_icmpv6(pkt, metrics);
918
919 case IPPROTO_TCP:
920 return process_tcp(pkt, ipv6, sizeof(*ipv6), metrics);
921
922 case IPPROTO_UDP:
923 return process_udp(pkt, ipv6, sizeof(*ipv6), metrics);
924
925 default:
926 metrics->errors_total_unknown_l4_proto++;
927 return INVALID;
928 }
929}
930
931SEC("classifier/cls_redirect")
932int cls_redirect(struct __sk_buff *skb)
933{
934 metrics_t *metrics = get_global_metrics();
935 if (metrics == NULL) {
936 return TC_ACT_SHOT;
937 }
938
939 metrics->processed_packets_total++;
940
941
942
943
944 if (skb->protocol != bpf_htons(ETH_P_IP)) {
945 return TC_ACT_OK;
946 }
947
948 encap_headers_t *encap;
949
950
951
952
953 if (bpf_skb_pull_data(skb, sizeof(*encap))) {
954 return TC_ACT_OK;
955 }
956
957 buf_t pkt = {
958 .skb = skb,
959 .head = (uint8_t *)(long)skb->data,
960 .tail = (uint8_t *)(long)skb->data_end,
961 };
962
963 encap = buf_assign(&pkt, sizeof(*encap), NULL);
964 if (encap == NULL) {
965 return TC_ACT_OK;
966 }
967
968 if (encap->ip.ihl != 5) {
969
970 return TC_ACT_OK;
971 }
972
973 if (encap->ip.daddr != ENCAPSULATION_IP ||
974 encap->ip.protocol != IPPROTO_UDP) {
975 return TC_ACT_OK;
976 }
977
978
979 if (encap->udp.dest != ENCAPSULATION_PORT) {
980 return TC_ACT_OK;
981 }
982
983
984
985
986 if (ipv4_is_fragment((void *)&encap->ip)) {
987 metrics->errors_total_fragmented_ip++;
988 return TC_ACT_SHOT;
989 }
990
991 if (encap->gue.variant != 0) {
992 metrics->errors_total_malformed_encapsulation++;
993 return TC_ACT_SHOT;
994 }
995
996 if (encap->gue.control != 0) {
997 metrics->errors_total_malformed_encapsulation++;
998 return TC_ACT_SHOT;
999 }
1000
1001 if (encap->gue.flags != 0) {
1002 metrics->errors_total_malformed_encapsulation++;
1003 return TC_ACT_SHOT;
1004 }
1005
1006 if (encap->gue.hlen !=
1007 sizeof(encap->unigue) / 4 + encap->unigue.hop_count) {
1008 metrics->errors_total_malformed_encapsulation++;
1009 return TC_ACT_SHOT;
1010 }
1011
1012 if (encap->unigue.version != 0) {
1013 metrics->errors_total_malformed_encapsulation++;
1014 return TC_ACT_SHOT;
1015 }
1016
1017 if (encap->unigue.reserved != 0) {
1018 return TC_ACT_SHOT;
1019 }
1020
1021 struct in_addr next_hop;
1022 MAYBE_RETURN(get_next_hop(&pkt, encap, &next_hop));
1023
1024 if (next_hop.s_addr == 0) {
1025 metrics->accepted_packets_total_last_hop++;
1026 return accept_locally(skb, encap);
1027 }
1028
1029 verdict_t verdict;
1030 switch (encap->gue.proto_ctype) {
1031 case IPPROTO_IPIP:
1032 verdict = process_ipv4(&pkt, metrics);
1033 break;
1034
1035 case IPPROTO_IPV6:
1036 verdict = process_ipv6(&pkt, metrics);
1037 break;
1038
1039 default:
1040 metrics->errors_total_unknown_l3_proto++;
1041 return TC_ACT_SHOT;
1042 }
1043
1044 switch (verdict) {
1045 case INVALID:
1046
1047 return TC_ACT_SHOT;
1048
1049 case UNKNOWN:
1050 return forward_to_next_hop(skb, encap, &next_hop, metrics);
1051
1052 case ECHO_REQUEST:
1053 metrics->accepted_packets_total_icmp_echo_request++;
1054 break;
1055
1056 case SYN:
1057 if (encap->unigue.forward_syn) {
1058 return forward_to_next_hop(skb, encap, &next_hop,
1059 metrics);
1060 }
1061
1062 metrics->accepted_packets_total_syn++;
1063 break;
1064
1065 case SYN_COOKIE:
1066 metrics->accepted_packets_total_syn_cookies++;
1067 break;
1068
1069 case ESTABLISHED:
1070 metrics->accepted_packets_total_established++;
1071 break;
1072 }
1073
1074 return accept_locally(skb, encap);
1075}
1076