1
2
3
4
5
6
7
8
9
10#include <errno.h>
11#include <error.h>
12#include <fcntl.h>
13#include <linux/bpf.h>
14#include <linux/filter.h>
15#include <linux/unistd.h>
16#include <netinet/in.h>
17#include <netinet/tcp.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21#include <sys/epoll.h>
22#include <sys/types.h>
23#include <sys/socket.h>
24#include <unistd.h>
25
26#ifndef ARRAY_SIZE
27#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
28#endif
29
30struct test_params {
31 int recv_family;
32 int send_family;
33 int protocol;
34 size_t recv_socks;
35 uint16_t recv_port;
36 uint16_t send_port_min;
37};
38
39static size_t sockaddr_size(void)
40{
41 return sizeof(struct sockaddr_storage);
42}
43
44static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
45{
46 struct sockaddr_storage *addr;
47 struct sockaddr_in *addr4;
48 struct sockaddr_in6 *addr6;
49
50 addr = malloc(sizeof(struct sockaddr_storage));
51 memset(addr, 0, sizeof(struct sockaddr_storage));
52
53 switch (family) {
54 case AF_INET:
55 addr4 = (struct sockaddr_in *)addr;
56 addr4->sin_family = AF_INET;
57 addr4->sin_addr.s_addr = htonl(INADDR_ANY);
58 addr4->sin_port = htons(port);
59 break;
60 case AF_INET6:
61 addr6 = (struct sockaddr_in6 *)addr;
62 addr6->sin6_family = AF_INET6;
63 addr6->sin6_addr = in6addr_any;
64 addr6->sin6_port = htons(port);
65 break;
66 default:
67 error(1, 0, "Unsupported family %d", family);
68 }
69 return (struct sockaddr *)addr;
70}
71
72static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
73{
74 struct sockaddr *addr = new_any_sockaddr(family, port);
75 struct sockaddr_in *addr4;
76 struct sockaddr_in6 *addr6;
77
78 switch (family) {
79 case AF_INET:
80 addr4 = (struct sockaddr_in *)addr;
81 addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
82 break;
83 case AF_INET6:
84 addr6 = (struct sockaddr_in6 *)addr;
85 addr6->sin6_addr = in6addr_loopback;
86 break;
87 default:
88 error(1, 0, "Unsupported family %d", family);
89 }
90 return addr;
91}
92
93static void attach_ebpf(int fd, uint16_t mod)
94{
95 static char bpf_log_buf[65536];
96 static const char bpf_license[] = "GPL";
97
98 int bpf_fd;
99 const struct bpf_insn prog[] = {
100
101 { BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
102
103 { BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
104
105 { BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
106
107 { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
108 };
109 union bpf_attr attr;
110
111 memset(&attr, 0, sizeof(attr));
112 attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
113 attr.insn_cnt = ARRAY_SIZE(prog);
114 attr.insns = (uint64_t)prog;
115 attr.license = (uint64_t)bpf_license;
116 attr.log_buf = (uint64_t)bpf_log_buf;
117 attr.log_size = sizeof(bpf_log_buf);
118 attr.log_level = 1;
119 attr.kern_version = 0;
120
121 bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
122 if (bpf_fd < 0)
123 error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
124
125 if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
126 sizeof(bpf_fd)))
127 error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
128
129 close(bpf_fd);
130}
131
132static void attach_cbpf(int fd, uint16_t mod)
133{
134 struct sock_filter code[] = {
135
136 { BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 },
137
138 { BPF_ALU | BPF_MOD, 0, 0, mod },
139
140 { BPF_RET | BPF_A, 0, 0, 0 },
141 };
142 struct sock_fprog p = {
143 .len = ARRAY_SIZE(code),
144 .filter = code,
145 };
146
147 if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
148 error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
149}
150
151static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
152 void (*attach_bpf)(int, uint16_t))
153{
154 struct sockaddr * const addr =
155 new_any_sockaddr(p.recv_family, p.recv_port);
156 int i, opt;
157
158 for (i = 0; i < p.recv_socks; ++i) {
159 fd[i] = socket(p.recv_family, p.protocol, 0);
160 if (fd[i] < 0)
161 error(1, errno, "failed to create recv %d", i);
162
163 opt = 1;
164 if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
165 sizeof(opt)))
166 error(1, errno, "failed to set SO_REUSEPORT on %d", i);
167
168 if (i == 0)
169 attach_bpf(fd[i], mod);
170
171 if (bind(fd[i], addr, sockaddr_size()))
172 error(1, errno, "failed to bind recv socket %d", i);
173
174 if (p.protocol == SOCK_STREAM) {
175 opt = 4;
176 if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt,
177 sizeof(opt)))
178 error(1, errno,
179 "failed to set TCP_FASTOPEN on %d", i);
180 if (listen(fd[i], p.recv_socks * 10))
181 error(1, errno, "failed to listen on socket");
182 }
183 }
184 free(addr);
185}
186
187static void send_from(struct test_params p, uint16_t sport, char *buf,
188 size_t len)
189{
190 struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
191 struct sockaddr * const daddr =
192 new_loopback_sockaddr(p.send_family, p.recv_port);
193 const int fd = socket(p.send_family, p.protocol, 0);
194
195 if (fd < 0)
196 error(1, errno, "failed to create send socket");
197
198 if (bind(fd, saddr, sockaddr_size()))
199 error(1, errno, "failed to bind send socket");
200
201 if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0)
202 error(1, errno, "failed to send message");
203
204 close(fd);
205 free(saddr);
206 free(daddr);
207}
208
209static void test_recv_order(const struct test_params p, int fd[], int mod)
210{
211 char recv_buf[8], send_buf[8];
212 struct msghdr msg;
213 struct iovec recv_io = { recv_buf, 8 };
214 struct epoll_event ev;
215 int epfd, conn, i, sport, expected;
216 uint32_t data, ndata;
217
218 epfd = epoll_create(1);
219 if (epfd < 0)
220 error(1, errno, "failed to create epoll");
221 for (i = 0; i < p.recv_socks; ++i) {
222 ev.events = EPOLLIN;
223 ev.data.fd = fd[i];
224 if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
225 error(1, errno, "failed to register sock %d epoll", i);
226 }
227
228 memset(&msg, 0, sizeof(msg));
229 msg.msg_iov = &recv_io;
230 msg.msg_iovlen = 1;
231
232 for (data = 0; data < p.recv_socks * 2; ++data) {
233 sport = p.send_port_min + data;
234 ndata = htonl(data);
235 memcpy(send_buf, &ndata, sizeof(ndata));
236 send_from(p, sport, send_buf, sizeof(ndata));
237
238 i = epoll_wait(epfd, &ev, 1, -1);
239 if (i < 0)
240 error(1, errno, "epoll wait failed");
241
242 if (p.protocol == SOCK_STREAM) {
243 conn = accept(ev.data.fd, NULL, NULL);
244 if (conn < 0)
245 error(1, errno, "error accepting");
246 i = recvmsg(conn, &msg, 0);
247 close(conn);
248 } else {
249 i = recvmsg(ev.data.fd, &msg, 0);
250 }
251 if (i < 0)
252 error(1, errno, "recvmsg error");
253 if (i != sizeof(ndata))
254 error(1, 0, "expected size %zd got %d",
255 sizeof(ndata), i);
256
257 for (i = 0; i < p.recv_socks; ++i)
258 if (ev.data.fd == fd[i])
259 break;
260 memcpy(&ndata, recv_buf, sizeof(ndata));
261 fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
262
263 expected = (sport % mod);
264 if (i != expected)
265 error(1, 0, "expected socket %d", expected);
266 }
267}
268
269static void test_reuseport_ebpf(struct test_params p)
270{
271 int i, fd[p.recv_socks];
272
273 fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
274 build_recv_group(p, fd, p.recv_socks, attach_ebpf);
275 test_recv_order(p, fd, p.recv_socks);
276
277 p.send_port_min += p.recv_socks * 2;
278 fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
279 attach_ebpf(fd[0], p.recv_socks / 2);
280 test_recv_order(p, fd, p.recv_socks / 2);
281
282 for (i = 0; i < p.recv_socks; ++i)
283 close(fd[i]);
284}
285
286static void test_reuseport_cbpf(struct test_params p)
287{
288 int i, fd[p.recv_socks];
289
290 fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
291 build_recv_group(p, fd, p.recv_socks, attach_cbpf);
292 test_recv_order(p, fd, p.recv_socks);
293
294 p.send_port_min += p.recv_socks * 2;
295 fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
296 attach_cbpf(fd[0], p.recv_socks / 2);
297 test_recv_order(p, fd, p.recv_socks / 2);
298
299 for (i = 0; i < p.recv_socks; ++i)
300 close(fd[i]);
301}
302
303static void test_extra_filter(const struct test_params p)
304{
305 struct sockaddr * const addr =
306 new_any_sockaddr(p.recv_family, p.recv_port);
307 int fd1, fd2, opt;
308
309 fprintf(stderr, "Testing too many filters...\n");
310 fd1 = socket(p.recv_family, p.protocol, 0);
311 if (fd1 < 0)
312 error(1, errno, "failed to create socket 1");
313 fd2 = socket(p.recv_family, p.protocol, 0);
314 if (fd2 < 0)
315 error(1, errno, "failed to create socket 2");
316
317 opt = 1;
318 if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
319 error(1, errno, "failed to set SO_REUSEPORT on socket 1");
320 if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
321 error(1, errno, "failed to set SO_REUSEPORT on socket 2");
322
323 attach_ebpf(fd1, 10);
324 attach_ebpf(fd2, 10);
325
326 if (bind(fd1, addr, sockaddr_size()))
327 error(1, errno, "failed to bind recv socket 1");
328
329 if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
330 error(1, errno, "bind socket 2 should fail with EADDRINUSE");
331
332 free(addr);
333}
334
335static void test_filter_no_reuseport(const struct test_params p)
336{
337 struct sockaddr * const addr =
338 new_any_sockaddr(p.recv_family, p.recv_port);
339 const char bpf_license[] = "GPL";
340 struct bpf_insn ecode[] = {
341 { BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
342 { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
343 };
344 struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
345 union bpf_attr eprog;
346 struct sock_fprog cprog;
347 int fd, bpf_fd;
348
349 fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
350
351 memset(&eprog, 0, sizeof(eprog));
352 eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
353 eprog.insn_cnt = ARRAY_SIZE(ecode);
354 eprog.insns = (uint64_t)ecode;
355 eprog.license = (uint64_t)bpf_license;
356 eprog.kern_version = 0;
357
358 memset(&cprog, 0, sizeof(cprog));
359 cprog.len = ARRAY_SIZE(ccode);
360 cprog.filter = ccode;
361
362
363 bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
364 if (bpf_fd < 0)
365 error(1, errno, "ebpf error");
366 fd = socket(p.recv_family, p.protocol, 0);
367 if (fd < 0)
368 error(1, errno, "failed to create socket 1");
369
370 if (bind(fd, addr, sockaddr_size()))
371 error(1, errno, "failed to bind recv socket 1");
372
373 errno = 0;
374 if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
375 sizeof(bpf_fd)) || errno != EINVAL)
376 error(1, errno, "setsockopt should have returned EINVAL");
377
378 errno = 0;
379 if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
380 sizeof(cprog)) || errno != EINVAL)
381 error(1, errno, "setsockopt should have returned EINVAL");
382
383 free(addr);
384}
385
386static void test_filter_without_bind(void)
387{
388 int fd1, fd2, opt = 1;
389
390 fprintf(stderr, "Testing filter add without bind...\n");
391 fd1 = socket(AF_INET, SOCK_DGRAM, 0);
392 if (fd1 < 0)
393 error(1, errno, "failed to create socket 1");
394 fd2 = socket(AF_INET, SOCK_DGRAM, 0);
395 if (fd2 < 0)
396 error(1, errno, "failed to create socket 2");
397 if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
398 error(1, errno, "failed to set SO_REUSEPORT on socket 1");
399 if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
400 error(1, errno, "failed to set SO_REUSEPORT on socket 2");
401
402 attach_ebpf(fd1, 10);
403 attach_cbpf(fd2, 10);
404
405 close(fd1);
406 close(fd2);
407}
408
409void enable_fastopen(void)
410{
411 int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0);
412 int rw_mask = 3;
413 int val, size;
414 char buf[16];
415
416 if (fd < 0)
417 error(1, errno, "Unable to open tcp_fastopen sysctl");
418 if (read(fd, buf, sizeof(buf)) <= 0)
419 error(1, errno, "Unable to read tcp_fastopen sysctl");
420 val = atoi(buf);
421 close(fd);
422
423 if ((val & rw_mask) != rw_mask) {
424 fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
425 if (fd < 0)
426 error(1, errno,
427 "Unable to open tcp_fastopen sysctl for writing");
428 val |= rw_mask;
429 size = snprintf(buf, 16, "%d", val);
430 if (write(fd, buf, size) <= 0)
431 error(1, errno, "Unable to write tcp_fastopen sysctl");
432 close(fd);
433 }
434}
435
436int main(void)
437{
438 fprintf(stderr, "---- IPv4 UDP ----\n");
439
440
441
442 test_reuseport_ebpf((struct test_params) {
443 .recv_family = AF_INET,
444 .send_family = AF_INET,
445 .protocol = SOCK_DGRAM,
446 .recv_socks = 10,
447 .recv_port = 8000,
448 .send_port_min = 9000});
449 test_reuseport_ebpf((struct test_params) {
450 .recv_family = AF_INET,
451 .send_family = AF_INET,
452 .protocol = SOCK_DGRAM,
453 .recv_socks = 20,
454 .recv_port = 8000,
455 .send_port_min = 9000});
456 test_reuseport_cbpf((struct test_params) {
457 .recv_family = AF_INET,
458 .send_family = AF_INET,
459 .protocol = SOCK_DGRAM,
460 .recv_socks = 10,
461 .recv_port = 8001,
462 .send_port_min = 9020});
463 test_reuseport_cbpf((struct test_params) {
464 .recv_family = AF_INET,
465 .send_family = AF_INET,
466 .protocol = SOCK_DGRAM,
467 .recv_socks = 20,
468 .recv_port = 8001,
469 .send_port_min = 9020});
470 test_extra_filter((struct test_params) {
471 .recv_family = AF_INET,
472 .protocol = SOCK_DGRAM,
473 .recv_port = 8002});
474 test_filter_no_reuseport((struct test_params) {
475 .recv_family = AF_INET,
476 .protocol = SOCK_DGRAM,
477 .recv_port = 8008});
478
479 fprintf(stderr, "---- IPv6 UDP ----\n");
480 test_reuseport_ebpf((struct test_params) {
481 .recv_family = AF_INET6,
482 .send_family = AF_INET6,
483 .protocol = SOCK_DGRAM,
484 .recv_socks = 10,
485 .recv_port = 8003,
486 .send_port_min = 9040});
487 test_reuseport_ebpf((struct test_params) {
488 .recv_family = AF_INET6,
489 .send_family = AF_INET6,
490 .protocol = SOCK_DGRAM,
491 .recv_socks = 20,
492 .recv_port = 8003,
493 .send_port_min = 9040});
494 test_reuseport_cbpf((struct test_params) {
495 .recv_family = AF_INET6,
496 .send_family = AF_INET6,
497 .protocol = SOCK_DGRAM,
498 .recv_socks = 10,
499 .recv_port = 8004,
500 .send_port_min = 9060});
501 test_reuseport_cbpf((struct test_params) {
502 .recv_family = AF_INET6,
503 .send_family = AF_INET6,
504 .protocol = SOCK_DGRAM,
505 .recv_socks = 20,
506 .recv_port = 8004,
507 .send_port_min = 9060});
508 test_extra_filter((struct test_params) {
509 .recv_family = AF_INET6,
510 .protocol = SOCK_DGRAM,
511 .recv_port = 8005});
512 test_filter_no_reuseport((struct test_params) {
513 .recv_family = AF_INET6,
514 .protocol = SOCK_DGRAM,
515 .recv_port = 8009});
516
517 fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
518 test_reuseport_ebpf((struct test_params) {
519 .recv_family = AF_INET6,
520 .send_family = AF_INET,
521 .protocol = SOCK_DGRAM,
522 .recv_socks = 20,
523 .recv_port = 8006,
524 .send_port_min = 9080});
525 test_reuseport_ebpf((struct test_params) {
526 .recv_family = AF_INET6,
527 .send_family = AF_INET,
528 .protocol = SOCK_DGRAM,
529 .recv_socks = 10,
530 .recv_port = 8006,
531 .send_port_min = 9080});
532 test_reuseport_cbpf((struct test_params) {
533 .recv_family = AF_INET6,
534 .send_family = AF_INET,
535 .protocol = SOCK_DGRAM,
536 .recv_socks = 10,
537 .recv_port = 8007,
538 .send_port_min = 9100});
539 test_reuseport_cbpf((struct test_params) {
540 .recv_family = AF_INET6,
541 .send_family = AF_INET,
542 .protocol = SOCK_DGRAM,
543 .recv_socks = 20,
544 .recv_port = 8007,
545 .send_port_min = 9100});
546
547
548 enable_fastopen();
549 fprintf(stderr, "---- IPv4 TCP ----\n");
550 test_reuseport_ebpf((struct test_params) {
551 .recv_family = AF_INET,
552 .send_family = AF_INET,
553 .protocol = SOCK_STREAM,
554 .recv_socks = 10,
555 .recv_port = 8008,
556 .send_port_min = 9120});
557 test_reuseport_cbpf((struct test_params) {
558 .recv_family = AF_INET,
559 .send_family = AF_INET,
560 .protocol = SOCK_STREAM,
561 .recv_socks = 10,
562 .recv_port = 8009,
563 .send_port_min = 9160});
564 test_extra_filter((struct test_params) {
565 .recv_family = AF_INET,
566 .protocol = SOCK_STREAM,
567 .recv_port = 8010});
568 test_filter_no_reuseport((struct test_params) {
569 .recv_family = AF_INET,
570 .protocol = SOCK_STREAM,
571 .recv_port = 8011});
572
573 fprintf(stderr, "---- IPv6 TCP ----\n");
574 test_reuseport_ebpf((struct test_params) {
575 .recv_family = AF_INET6,
576 .send_family = AF_INET6,
577 .protocol = SOCK_STREAM,
578 .recv_socks = 10,
579 .recv_port = 8012,
580 .send_port_min = 9200});
581 test_reuseport_cbpf((struct test_params) {
582 .recv_family = AF_INET6,
583 .send_family = AF_INET6,
584 .protocol = SOCK_STREAM,
585 .recv_socks = 10,
586 .recv_port = 8013,
587 .send_port_min = 9240});
588 test_extra_filter((struct test_params) {
589 .recv_family = AF_INET6,
590 .protocol = SOCK_STREAM,
591 .recv_port = 8014});
592 test_filter_no_reuseport((struct test_params) {
593 .recv_family = AF_INET6,
594 .protocol = SOCK_STREAM,
595 .recv_port = 8015});
596
597 fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n");
598 test_reuseport_ebpf((struct test_params) {
599 .recv_family = AF_INET6,
600 .send_family = AF_INET,
601 .protocol = SOCK_STREAM,
602 .recv_socks = 10,
603 .recv_port = 8016,
604 .send_port_min = 9320});
605 test_reuseport_cbpf((struct test_params) {
606 .recv_family = AF_INET6,
607 .send_family = AF_INET,
608 .protocol = SOCK_STREAM,
609 .recv_socks = 10,
610 .recv_port = 8017,
611 .send_port_min = 9360});
612
613 test_filter_without_bind();
614
615 fprintf(stderr, "SUCCESS\n");
616 return 0;
617}
618