1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include <linux/module.h>
21#include <linux/types.h>
22#include <linux/mm.h>
23#include <linux/fcntl.h>
24#include <linux/socket.h>
25#include <linux/sock_diag.h>
26#include <linux/in.h>
27#include <linux/inet.h>
28#include <linux/netdevice.h>
29#include <linux/if_packet.h>
30#include <linux/if_arp.h>
31#include <linux/gfp.h>
32#include <net/inet_common.h>
33#include <net/ip.h>
34#include <net/protocol.h>
35#include <net/netlink.h>
36#include <linux/skbuff.h>
37#include <linux/skmsg.h>
38#include <net/sock.h>
39#include <net/flow_dissector.h>
40#include <linux/errno.h>
41#include <linux/timer.h>
42#include <linux/uaccess.h>
43#include <asm/unaligned.h>
44#include <asm/cmpxchg.h>
45#include <linux/filter.h>
46#include <linux/ratelimit.h>
47#include <linux/seccomp.h>
48#include <linux/if_vlan.h>
49#include <linux/bpf.h>
50#include <linux/btf.h>
51#include <net/sch_generic.h>
52#include <net/cls_cgroup.h>
53#include <net/dst_metadata.h>
54#include <net/dst.h>
55#include <net/sock_reuseport.h>
56#include <net/busy_poll.h>
57#include <net/tcp.h>
58#include <net/xfrm.h>
59#include <net/udp.h>
60#include <linux/bpf_trace.h>
61#include <net/xdp_sock.h>
62#include <linux/inetdevice.h>
63#include <net/inet_hashtables.h>
64#include <net/inet6_hashtables.h>
65#include <net/ip_fib.h>
66#include <net/nexthop.h>
67#include <net/flow.h>
68#include <net/arp.h>
69#include <net/ipv6.h>
70#include <net/net_namespace.h>
71#include <linux/seg6_local.h>
72#include <net/seg6.h>
73#include <net/seg6_local.h>
74#include <net/lwtunnel.h>
75#include <net/ipv6_stubs.h>
76#include <net/bpf_sk_storage.h>
77#include <net/transp_v6.h>
78#include <linux/btf_ids.h>
79
80int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
81{
82 if (in_compat_syscall()) {
83 struct compat_sock_fprog f32;
84
85 if (len != sizeof(f32))
86 return -EINVAL;
87 if (copy_from_sockptr(&f32, src, sizeof(f32)))
88 return -EFAULT;
89 memset(dst, 0, sizeof(*dst));
90 dst->len = f32.len;
91 dst->filter = compat_ptr(f32.filter);
92 } else {
93 if (len != sizeof(*dst))
94 return -EINVAL;
95 if (copy_from_sockptr(dst, src, sizeof(*dst)))
96 return -EFAULT;
97 }
98
99 return 0;
100}
101EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
117{
118 int err;
119 struct sk_filter *filter;
120
121
122
123
124
125
126 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
127 NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
128 return -ENOMEM;
129 }
130 err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
131 if (err)
132 return err;
133
134 err = security_sock_rcv_skb(sk, skb);
135 if (err)
136 return err;
137
138 rcu_read_lock();
139 filter = rcu_dereference(sk->sk_filter);
140 if (filter) {
141 struct sock *save_sk = skb->sk;
142 unsigned int pkt_len;
143
144 skb->sk = sk;
145 pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
146 skb->sk = save_sk;
147 err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
148 }
149 rcu_read_unlock();
150
151 return err;
152}
153EXPORT_SYMBOL(sk_filter_trim_cap);
154
155BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
156{
157 return skb_get_poff(skb);
158}
159
160BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
161{
162 struct nlattr *nla;
163
164 if (skb_is_nonlinear(skb))
165 return 0;
166
167 if (skb->len < sizeof(struct nlattr))
168 return 0;
169
170 if (a > skb->len - sizeof(struct nlattr))
171 return 0;
172
173 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
174 if (nla)
175 return (void *) nla - (void *) skb->data;
176
177 return 0;
178}
179
180BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
181{
182 struct nlattr *nla;
183
184 if (skb_is_nonlinear(skb))
185 return 0;
186
187 if (skb->len < sizeof(struct nlattr))
188 return 0;
189
190 if (a > skb->len - sizeof(struct nlattr))
191 return 0;
192
193 nla = (struct nlattr *) &skb->data[a];
194 if (nla->nla_len > skb->len - a)
195 return 0;
196
197 nla = nla_find_nested(nla, x);
198 if (nla)
199 return (void *) nla - (void *) skb->data;
200
201 return 0;
202}
203
204BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
205 data, int, headlen, int, offset)
206{
207 u8 tmp, *ptr;
208 const int len = sizeof(tmp);
209
210 if (offset >= 0) {
211 if (headlen - offset >= len)
212 return *(u8 *)(data + offset);
213 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
214 return tmp;
215 } else {
216 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
217 if (likely(ptr))
218 return *(u8 *)ptr;
219 }
220
221 return -EFAULT;
222}
223
224BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
225 int, offset)
226{
227 return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len,
228 offset);
229}
230
231BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
232 data, int, headlen, int, offset)
233{
234 u16 tmp, *ptr;
235 const int len = sizeof(tmp);
236
237 if (offset >= 0) {
238 if (headlen - offset >= len)
239 return get_unaligned_be16(data + offset);
240 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
241 return be16_to_cpu(tmp);
242 } else {
243 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
244 if (likely(ptr))
245 return get_unaligned_be16(ptr);
246 }
247
248 return -EFAULT;
249}
250
251BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
252 int, offset)
253{
254 return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len,
255 offset);
256}
257
258BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
259 data, int, headlen, int, offset)
260{
261 u32 tmp, *ptr;
262 const int len = sizeof(tmp);
263
264 if (likely(offset >= 0)) {
265 if (headlen - offset >= len)
266 return get_unaligned_be32(data + offset);
267 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
268 return be32_to_cpu(tmp);
269 } else {
270 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
271 if (likely(ptr))
272 return get_unaligned_be32(ptr);
273 }
274
275 return -EFAULT;
276}
277
278BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
279 int, offset)
280{
281 return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len,
282 offset);
283}
284
285static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
286 struct bpf_insn *insn_buf)
287{
288 struct bpf_insn *insn = insn_buf;
289
290 switch (skb_field) {
291 case SKF_AD_MARK:
292 BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
293
294 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
295 offsetof(struct sk_buff, mark));
296 break;
297
298 case SKF_AD_PKTTYPE:
299 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
300 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
301#ifdef __BIG_ENDIAN_BITFIELD
302 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
303#endif
304 break;
305
306 case SKF_AD_QUEUE:
307 BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != 2);
308
309 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
310 offsetof(struct sk_buff, queue_mapping));
311 break;
312
313 case SKF_AD_VLAN_TAG:
314 BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
315
316
317 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
318 offsetof(struct sk_buff, vlan_tci));
319 break;
320 case SKF_AD_VLAN_TAG_PRESENT:
321 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET());
322 if (PKT_VLAN_PRESENT_BIT)
323 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT);
324 if (PKT_VLAN_PRESENT_BIT < 7)
325 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
326 break;
327 }
328
329 return insn - insn_buf;
330}
331
332static bool convert_bpf_extensions(struct sock_filter *fp,
333 struct bpf_insn **insnp)
334{
335 struct bpf_insn *insn = *insnp;
336 u32 cnt;
337
338 switch (fp->k) {
339 case SKF_AD_OFF + SKF_AD_PROTOCOL:
340 BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != 2);
341
342
343 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
344 offsetof(struct sk_buff, protocol));
345
346 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
347 break;
348
349 case SKF_AD_OFF + SKF_AD_PKTTYPE:
350 cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
351 insn += cnt - 1;
352 break;
353
354 case SKF_AD_OFF + SKF_AD_IFINDEX:
355 case SKF_AD_OFF + SKF_AD_HATYPE:
356 BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
357 BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2);
358
359 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
360 BPF_REG_TMP, BPF_REG_CTX,
361 offsetof(struct sk_buff, dev));
362
363 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
364 *insn++ = BPF_EXIT_INSN();
365 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
366 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
367 offsetof(struct net_device, ifindex));
368 else
369 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
370 offsetof(struct net_device, type));
371 break;
372
373 case SKF_AD_OFF + SKF_AD_MARK:
374 cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
375 insn += cnt - 1;
376 break;
377
378 case SKF_AD_OFF + SKF_AD_RXHASH:
379 BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
380
381 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
382 offsetof(struct sk_buff, hash));
383 break;
384
385 case SKF_AD_OFF + SKF_AD_QUEUE:
386 cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
387 insn += cnt - 1;
388 break;
389
390 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
391 cnt = convert_skb_access(SKF_AD_VLAN_TAG,
392 BPF_REG_A, BPF_REG_CTX, insn);
393 insn += cnt - 1;
394 break;
395
396 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
397 cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
398 BPF_REG_A, BPF_REG_CTX, insn);
399 insn += cnt - 1;
400 break;
401
402 case SKF_AD_OFF + SKF_AD_VLAN_TPID:
403 BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_proto) != 2);
404
405
406 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
407 offsetof(struct sk_buff, vlan_proto));
408
409 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
410 break;
411
412 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
413 case SKF_AD_OFF + SKF_AD_NLATTR:
414 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
415 case SKF_AD_OFF + SKF_AD_CPU:
416 case SKF_AD_OFF + SKF_AD_RANDOM:
417
418 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
419
420 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
421
422 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
423
424 switch (fp->k) {
425 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
426 *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
427 break;
428 case SKF_AD_OFF + SKF_AD_NLATTR:
429 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
430 break;
431 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
432 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
433 break;
434 case SKF_AD_OFF + SKF_AD_CPU:
435 *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
436 break;
437 case SKF_AD_OFF + SKF_AD_RANDOM:
438 *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
439 bpf_user_rnd_init_once();
440 break;
441 }
442 break;
443
444 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
445
446 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
447 break;
448
449 default:
450
451
452
453
454 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
455 return false;
456 }
457
458 *insnp = insn;
459 return true;
460}
461
462static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
463{
464 const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
465 int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
466 bool endian = BPF_SIZE(fp->code) == BPF_H ||
467 BPF_SIZE(fp->code) == BPF_W;
468 bool indirect = BPF_MODE(fp->code) == BPF_IND;
469 const int ip_align = NET_IP_ALIGN;
470 struct bpf_insn *insn = *insnp;
471 int offset = fp->k;
472
473 if (!indirect &&
474 ((unaligned_ok && offset >= 0) ||
475 (!unaligned_ok && offset >= 0 &&
476 offset + ip_align >= 0 &&
477 offset + ip_align % size == 0))) {
478 bool ldx_off_ok = offset <= S16_MAX;
479
480 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
481 if (offset)
482 *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
483 *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
484 size, 2 + endian + (!ldx_off_ok * 2));
485 if (ldx_off_ok) {
486 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
487 BPF_REG_D, offset);
488 } else {
489 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
490 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
491 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
492 BPF_REG_TMP, 0);
493 }
494 if (endian)
495 *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
496 *insn++ = BPF_JMP_A(8);
497 }
498
499 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
500 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
501 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
502 if (!indirect) {
503 *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
504 } else {
505 *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
506 if (fp->k)
507 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
508 }
509
510 switch (BPF_SIZE(fp->code)) {
511 case BPF_B:
512 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
513 break;
514 case BPF_H:
515 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
516 break;
517 case BPF_W:
518 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
519 break;
520 default:
521 return false;
522 }
523
524 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2);
525 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
526 *insn = BPF_EXIT_INSN();
527
528 *insnp = insn;
529 return true;
530}
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551static int bpf_convert_filter(struct sock_filter *prog, int len,
552 struct bpf_prog *new_prog, int *new_len,
553 bool *seen_ld_abs)
554{
555 int new_flen = 0, pass = 0, target, i, stack_off;
556 struct bpf_insn *new_insn, *first_insn = NULL;
557 struct sock_filter *fp;
558 int *addrs = NULL;
559 u8 bpf_src;
560
561 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
562 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
563
564 if (len <= 0 || len > BPF_MAXINSNS)
565 return -EINVAL;
566
567 if (new_prog) {
568 first_insn = new_prog->insnsi;
569 addrs = kcalloc(len, sizeof(*addrs),
570 GFP_KERNEL | __GFP_NOWARN);
571 if (!addrs)
572 return -ENOMEM;
573 }
574
575do_pass:
576 new_insn = first_insn;
577 fp = prog;
578
579
580 if (new_prog) {
581
582
583
584 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
585 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
586
587
588
589
590
591 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
592 if (*seen_ld_abs) {
593
594
595
596
597
598 *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
599 BPF_REG_D, BPF_REG_CTX,
600 offsetof(struct sk_buff, data));
601 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
602 offsetof(struct sk_buff, len));
603 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
604 offsetof(struct sk_buff, data_len));
605 *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
606 }
607 } else {
608 new_insn += 3;
609 }
610
611 for (i = 0; i < len; fp++, i++) {
612 struct bpf_insn tmp_insns[32] = { };
613 struct bpf_insn *insn = tmp_insns;
614
615 if (addrs)
616 addrs[i] = new_insn - first_insn;
617
618 switch (fp->code) {
619
620 case BPF_ALU | BPF_ADD | BPF_X:
621 case BPF_ALU | BPF_ADD | BPF_K:
622 case BPF_ALU | BPF_SUB | BPF_X:
623 case BPF_ALU | BPF_SUB | BPF_K:
624 case BPF_ALU | BPF_AND | BPF_X:
625 case BPF_ALU | BPF_AND | BPF_K:
626 case BPF_ALU | BPF_OR | BPF_X:
627 case BPF_ALU | BPF_OR | BPF_K:
628 case BPF_ALU | BPF_LSH | BPF_X:
629 case BPF_ALU | BPF_LSH | BPF_K:
630 case BPF_ALU | BPF_RSH | BPF_X:
631 case BPF_ALU | BPF_RSH | BPF_K:
632 case BPF_ALU | BPF_XOR | BPF_X:
633 case BPF_ALU | BPF_XOR | BPF_K:
634 case BPF_ALU | BPF_MUL | BPF_X:
635 case BPF_ALU | BPF_MUL | BPF_K:
636 case BPF_ALU | BPF_DIV | BPF_X:
637 case BPF_ALU | BPF_DIV | BPF_K:
638 case BPF_ALU | BPF_MOD | BPF_X:
639 case BPF_ALU | BPF_MOD | BPF_K:
640 case BPF_ALU | BPF_NEG:
641 case BPF_LD | BPF_ABS | BPF_W:
642 case BPF_LD | BPF_ABS | BPF_H:
643 case BPF_LD | BPF_ABS | BPF_B:
644 case BPF_LD | BPF_IND | BPF_W:
645 case BPF_LD | BPF_IND | BPF_H:
646 case BPF_LD | BPF_IND | BPF_B:
647
648
649
650
651 if (BPF_CLASS(fp->code) == BPF_LD &&
652 BPF_MODE(fp->code) == BPF_ABS &&
653 convert_bpf_extensions(fp, &insn))
654 break;
655 if (BPF_CLASS(fp->code) == BPF_LD &&
656 convert_bpf_ld_abs(fp, &insn)) {
657 *seen_ld_abs = true;
658 break;
659 }
660
661 if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
662 fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
663 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
664
665
666
667 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
668 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
669 *insn++ = BPF_EXIT_INSN();
670 }
671
672 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
673 break;
674
675
676
677
678
679
680
681#define BPF_EMIT_JMP \
682 do { \
683 const s32 off_min = S16_MIN, off_max = S16_MAX; \
684 s32 off; \
685 \
686 if (target >= len || target < 0) \
687 goto err; \
688 off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
689 \
690 off -= insn - tmp_insns; \
691 \
692 if (off < off_min || off > off_max) \
693 goto err; \
694 insn->off = off; \
695 } while (0)
696
697 case BPF_JMP | BPF_JA:
698 target = i + fp->k + 1;
699 insn->code = fp->code;
700 BPF_EMIT_JMP;
701 break;
702
703 case BPF_JMP | BPF_JEQ | BPF_K:
704 case BPF_JMP | BPF_JEQ | BPF_X:
705 case BPF_JMP | BPF_JSET | BPF_K:
706 case BPF_JMP | BPF_JSET | BPF_X:
707 case BPF_JMP | BPF_JGT | BPF_K:
708 case BPF_JMP | BPF_JGT | BPF_X:
709 case BPF_JMP | BPF_JGE | BPF_K:
710 case BPF_JMP | BPF_JGE | BPF_X:
711 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
712
713
714
715
716 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
717
718 insn->dst_reg = BPF_REG_A;
719 insn->src_reg = BPF_REG_TMP;
720 bpf_src = BPF_X;
721 } else {
722 insn->dst_reg = BPF_REG_A;
723 insn->imm = fp->k;
724 bpf_src = BPF_SRC(fp->code);
725 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
726 }
727
728
729 if (fp->jf == 0) {
730 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
731 target = i + fp->jt + 1;
732 BPF_EMIT_JMP;
733 break;
734 }
735
736
737 if (fp->jt == 0) {
738 switch (BPF_OP(fp->code)) {
739 case BPF_JEQ:
740 insn->code = BPF_JMP | BPF_JNE | bpf_src;
741 break;
742 case BPF_JGT:
743 insn->code = BPF_JMP | BPF_JLE | bpf_src;
744 break;
745 case BPF_JGE:
746 insn->code = BPF_JMP | BPF_JLT | bpf_src;
747 break;
748 default:
749 goto jmp_rest;
750 }
751
752 target = i + fp->jf + 1;
753 BPF_EMIT_JMP;
754 break;
755 }
756jmp_rest:
757
758 target = i + fp->jt + 1;
759 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
760 BPF_EMIT_JMP;
761 insn++;
762
763 insn->code = BPF_JMP | BPF_JA;
764 target = i + fp->jf + 1;
765 BPF_EMIT_JMP;
766 break;
767
768
769 case BPF_LDX | BPF_MSH | BPF_B: {
770 struct sock_filter tmp = {
771 .code = BPF_LD | BPF_ABS | BPF_B,
772 .k = fp->k,
773 };
774
775 *seen_ld_abs = true;
776
777
778 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
779
780 convert_bpf_ld_abs(&tmp, &insn);
781 insn++;
782
783 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
784
785 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
786
787 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
788
789 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
790
791 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
792 break;
793 }
794
795
796
797 case BPF_RET | BPF_A:
798 case BPF_RET | BPF_K:
799 if (BPF_RVAL(fp->code) == BPF_K)
800 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
801 0, fp->k);
802 *insn = BPF_EXIT_INSN();
803 break;
804
805
806 case BPF_ST:
807 case BPF_STX:
808 stack_off = fp->k * 4 + 4;
809 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
810 BPF_ST ? BPF_REG_A : BPF_REG_X,
811 -stack_off);
812
813
814
815
816 if (new_prog && new_prog->aux->stack_depth < stack_off)
817 new_prog->aux->stack_depth = stack_off;
818 break;
819
820
821 case BPF_LD | BPF_MEM:
822 case BPF_LDX | BPF_MEM:
823 stack_off = fp->k * 4 + 4;
824 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
825 BPF_REG_A : BPF_REG_X, BPF_REG_FP,
826 -stack_off);
827 break;
828
829
830 case BPF_LD | BPF_IMM:
831 case BPF_LDX | BPF_IMM:
832 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
833 BPF_REG_A : BPF_REG_X, fp->k);
834 break;
835
836
837 case BPF_MISC | BPF_TAX:
838 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
839 break;
840
841
842 case BPF_MISC | BPF_TXA:
843 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
844 break;
845
846
847 case BPF_LD | BPF_W | BPF_LEN:
848 case BPF_LDX | BPF_W | BPF_LEN:
849 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
850 BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
851 offsetof(struct sk_buff, len));
852 break;
853
854
855 case BPF_LDX | BPF_ABS | BPF_W:
856
857 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
858 break;
859
860
861 default:
862 goto err;
863 }
864
865 insn++;
866 if (new_prog)
867 memcpy(new_insn, tmp_insns,
868 sizeof(*insn) * (insn - tmp_insns));
869 new_insn += insn - tmp_insns;
870 }
871
872 if (!new_prog) {
873
874 *new_len = new_insn - first_insn;
875 if (*seen_ld_abs)
876 *new_len += 4;
877 return 0;
878 }
879
880 pass++;
881 if (new_flen != new_insn - first_insn) {
882 new_flen = new_insn - first_insn;
883 if (pass > 2)
884 goto err;
885 goto do_pass;
886 }
887
888 kfree(addrs);
889 BUG_ON(*new_len != new_flen);
890 return 0;
891err:
892 kfree(addrs);
893 return -EINVAL;
894}
895
896
897
898
899
900
901
902
903static int check_load_and_stores(const struct sock_filter *filter, int flen)
904{
905 u16 *masks, memvalid = 0;
906 int pc, ret = 0;
907
908 BUILD_BUG_ON(BPF_MEMWORDS > 16);
909
910 masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
911 if (!masks)
912 return -ENOMEM;
913
914 memset(masks, 0xff, flen * sizeof(*masks));
915
916 for (pc = 0; pc < flen; pc++) {
917 memvalid &= masks[pc];
918
919 switch (filter[pc].code) {
920 case BPF_ST:
921 case BPF_STX:
922 memvalid |= (1 << filter[pc].k);
923 break;
924 case BPF_LD | BPF_MEM:
925 case BPF_LDX | BPF_MEM:
926 if (!(memvalid & (1 << filter[pc].k))) {
927 ret = -EINVAL;
928 goto error;
929 }
930 break;
931 case BPF_JMP | BPF_JA:
932
933 masks[pc + 1 + filter[pc].k] &= memvalid;
934 memvalid = ~0;
935 break;
936 case BPF_JMP | BPF_JEQ | BPF_K:
937 case BPF_JMP | BPF_JEQ | BPF_X:
938 case BPF_JMP | BPF_JGE | BPF_K:
939 case BPF_JMP | BPF_JGE | BPF_X:
940 case BPF_JMP | BPF_JGT | BPF_K:
941 case BPF_JMP | BPF_JGT | BPF_X:
942 case BPF_JMP | BPF_JSET | BPF_K:
943 case BPF_JMP | BPF_JSET | BPF_X:
944
945 masks[pc + 1 + filter[pc].jt] &= memvalid;
946 masks[pc + 1 + filter[pc].jf] &= memvalid;
947 memvalid = ~0;
948 break;
949 }
950 }
951error:
952 kfree(masks);
953 return ret;
954}
955
956static bool chk_code_allowed(u16 code_to_probe)
957{
958 static const bool codes[] = {
959
960 [BPF_ALU | BPF_ADD | BPF_K] = true,
961 [BPF_ALU | BPF_ADD | BPF_X] = true,
962 [BPF_ALU | BPF_SUB | BPF_K] = true,
963 [BPF_ALU | BPF_SUB | BPF_X] = true,
964 [BPF_ALU | BPF_MUL | BPF_K] = true,
965 [BPF_ALU | BPF_MUL | BPF_X] = true,
966 [BPF_ALU | BPF_DIV | BPF_K] = true,
967 [BPF_ALU | BPF_DIV | BPF_X] = true,
968 [BPF_ALU | BPF_MOD | BPF_K] = true,
969 [BPF_ALU | BPF_MOD | BPF_X] = true,
970 [BPF_ALU | BPF_AND | BPF_K] = true,
971 [BPF_ALU | BPF_AND | BPF_X] = true,
972 [BPF_ALU | BPF_OR | BPF_K] = true,
973 [BPF_ALU | BPF_OR | BPF_X] = true,
974 [BPF_ALU | BPF_XOR | BPF_K] = true,
975 [BPF_ALU | BPF_XOR | BPF_X] = true,
976 [BPF_ALU | BPF_LSH | BPF_K] = true,
977 [BPF_ALU | BPF_LSH | BPF_X] = true,
978 [BPF_ALU | BPF_RSH | BPF_K] = true,
979 [BPF_ALU | BPF_RSH | BPF_X] = true,
980 [BPF_ALU | BPF_NEG] = true,
981
982 [BPF_LD | BPF_W | BPF_ABS] = true,
983 [BPF_LD | BPF_H | BPF_ABS] = true,
984 [BPF_LD | BPF_B | BPF_ABS] = true,
985 [BPF_LD | BPF_W | BPF_LEN] = true,
986 [BPF_LD | BPF_W | BPF_IND] = true,
987 [BPF_LD | BPF_H | BPF_IND] = true,
988 [BPF_LD | BPF_B | BPF_IND] = true,
989 [BPF_LD | BPF_IMM] = true,
990 [BPF_LD | BPF_MEM] = true,
991 [BPF_LDX | BPF_W | BPF_LEN] = true,
992 [BPF_LDX | BPF_B | BPF_MSH] = true,
993 [BPF_LDX | BPF_IMM] = true,
994 [BPF_LDX | BPF_MEM] = true,
995
996 [BPF_ST] = true,
997 [BPF_STX] = true,
998
999 [BPF_MISC | BPF_TAX] = true,
1000 [BPF_MISC | BPF_TXA] = true,
1001
1002 [BPF_RET | BPF_K] = true,
1003 [BPF_RET | BPF_A] = true,
1004
1005 [BPF_JMP | BPF_JA] = true,
1006 [BPF_JMP | BPF_JEQ | BPF_K] = true,
1007 [BPF_JMP | BPF_JEQ | BPF_X] = true,
1008 [BPF_JMP | BPF_JGE | BPF_K] = true,
1009 [BPF_JMP | BPF_JGE | BPF_X] = true,
1010 [BPF_JMP | BPF_JGT | BPF_K] = true,
1011 [BPF_JMP | BPF_JGT | BPF_X] = true,
1012 [BPF_JMP | BPF_JSET | BPF_K] = true,
1013 [BPF_JMP | BPF_JSET | BPF_X] = true,
1014 };
1015
1016 if (code_to_probe >= ARRAY_SIZE(codes))
1017 return false;
1018
1019 return codes[code_to_probe];
1020}
1021
1022static bool bpf_check_basics_ok(const struct sock_filter *filter,
1023 unsigned int flen)
1024{
1025 if (filter == NULL)
1026 return false;
1027 if (flen == 0 || flen > BPF_MAXINSNS)
1028 return false;
1029
1030 return true;
1031}
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047static int bpf_check_classic(const struct sock_filter *filter,
1048 unsigned int flen)
1049{
1050 bool anc_found;
1051 int pc;
1052
1053
1054 for (pc = 0; pc < flen; pc++) {
1055 const struct sock_filter *ftest = &filter[pc];
1056
1057
1058 if (!chk_code_allowed(ftest->code))
1059 return -EINVAL;
1060
1061
1062 switch (ftest->code) {
1063 case BPF_ALU | BPF_DIV | BPF_K:
1064 case BPF_ALU | BPF_MOD | BPF_K:
1065
1066 if (ftest->k == 0)
1067 return -EINVAL;
1068 break;
1069 case BPF_ALU | BPF_LSH | BPF_K:
1070 case BPF_ALU | BPF_RSH | BPF_K:
1071 if (ftest->k >= 32)
1072 return -EINVAL;
1073 break;
1074 case BPF_LD | BPF_MEM:
1075 case BPF_LDX | BPF_MEM:
1076 case BPF_ST:
1077 case BPF_STX:
1078
1079 if (ftest->k >= BPF_MEMWORDS)
1080 return -EINVAL;
1081 break;
1082 case BPF_JMP | BPF_JA:
1083
1084
1085
1086
1087 if (ftest->k >= (unsigned int)(flen - pc - 1))
1088 return -EINVAL;
1089 break;
1090 case BPF_JMP | BPF_JEQ | BPF_K:
1091 case BPF_JMP | BPF_JEQ | BPF_X:
1092 case BPF_JMP | BPF_JGE | BPF_K:
1093 case BPF_JMP | BPF_JGE | BPF_X:
1094 case BPF_JMP | BPF_JGT | BPF_K:
1095 case BPF_JMP | BPF_JGT | BPF_X:
1096 case BPF_JMP | BPF_JSET | BPF_K:
1097 case BPF_JMP | BPF_JSET | BPF_X:
1098
1099 if (pc + ftest->jt + 1 >= flen ||
1100 pc + ftest->jf + 1 >= flen)
1101 return -EINVAL;
1102 break;
1103 case BPF_LD | BPF_W | BPF_ABS:
1104 case BPF_LD | BPF_H | BPF_ABS:
1105 case BPF_LD | BPF_B | BPF_ABS:
1106 anc_found = false;
1107 if (bpf_anc_helper(ftest) & BPF_ANC)
1108 anc_found = true;
1109
1110 if (anc_found == false && ftest->k >= SKF_AD_OFF)
1111 return -EINVAL;
1112 }
1113 }
1114
1115
1116 switch (filter[flen - 1].code) {
1117 case BPF_RET | BPF_K:
1118 case BPF_RET | BPF_A:
1119 return check_load_and_stores(filter, flen);
1120 }
1121
1122 return -EINVAL;
1123}
1124
1125static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
1126 const struct sock_fprog *fprog)
1127{
1128 unsigned int fsize = bpf_classic_proglen(fprog);
1129 struct sock_fprog_kern *fkprog;
1130
1131 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1132 if (!fp->orig_prog)
1133 return -ENOMEM;
1134
1135 fkprog = fp->orig_prog;
1136 fkprog->len = fprog->len;
1137
1138 fkprog->filter = kmemdup(fp->insns, fsize,
1139 GFP_KERNEL | __GFP_NOWARN);
1140 if (!fkprog->filter) {
1141 kfree(fp->orig_prog);
1142 return -ENOMEM;
1143 }
1144
1145 return 0;
1146}
1147
1148static void bpf_release_orig_filter(struct bpf_prog *fp)
1149{
1150 struct sock_fprog_kern *fprog = fp->orig_prog;
1151
1152 if (fprog) {
1153 kfree(fprog->filter);
1154 kfree(fprog);
1155 }
1156}
1157
1158static void __bpf_prog_release(struct bpf_prog *prog)
1159{
1160 if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
1161 bpf_prog_put(prog);
1162 } else {
1163 bpf_release_orig_filter(prog);
1164 bpf_prog_free(prog);
1165 }
1166}
1167
1168static void __sk_filter_release(struct sk_filter *fp)
1169{
1170 __bpf_prog_release(fp->prog);
1171 kfree(fp);
1172}
1173
1174
1175
1176
1177
1178static void sk_filter_release_rcu(struct rcu_head *rcu)
1179{
1180 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1181
1182 __sk_filter_release(fp);
1183}
1184
1185
1186
1187
1188
1189
1190
1191static void sk_filter_release(struct sk_filter *fp)
1192{
1193 if (refcount_dec_and_test(&fp->refcnt))
1194 call_rcu(&fp->rcu, sk_filter_release_rcu);
1195}
1196
1197void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1198{
1199 u32 filter_size = bpf_prog_size(fp->prog->len);
1200
1201 atomic_sub(filter_size, &sk->sk_omem_alloc);
1202 sk_filter_release(fp);
1203}
1204
1205
1206
1207
1208static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1209{
1210 u32 filter_size = bpf_prog_size(fp->prog->len);
1211
1212
1213 if (filter_size <= sysctl_optmem_max &&
1214 atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
1215 atomic_add(filter_size, &sk->sk_omem_alloc);
1216 return true;
1217 }
1218 return false;
1219}
1220
1221bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1222{
1223 if (!refcount_inc_not_zero(&fp->refcnt))
1224 return false;
1225
1226 if (!__sk_filter_charge(sk, fp)) {
1227 sk_filter_release(fp);
1228 return false;
1229 }
1230 return true;
1231}
1232
1233static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
1234{
1235 struct sock_filter *old_prog;
1236 struct bpf_prog *old_fp;
1237 int err, new_len, old_len = fp->len;
1238 bool seen_ld_abs = false;
1239
1240
1241
1242
1243
1244
1245 BUILD_BUG_ON(sizeof(struct sock_filter) !=
1246 sizeof(struct bpf_insn));
1247
1248
1249
1250
1251
1252 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
1253 GFP_KERNEL | __GFP_NOWARN);
1254 if (!old_prog) {
1255 err = -ENOMEM;
1256 goto out_err;
1257 }
1258
1259
1260 err = bpf_convert_filter(old_prog, old_len, NULL, &new_len,
1261 &seen_ld_abs);
1262 if (err)
1263 goto out_err_free;
1264
1265
1266 old_fp = fp;
1267 fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
1268 if (!fp) {
1269
1270
1271
1272 fp = old_fp;
1273 err = -ENOMEM;
1274 goto out_err_free;
1275 }
1276
1277 fp->len = new_len;
1278
1279
1280 err = bpf_convert_filter(old_prog, old_len, fp, &new_len,
1281 &seen_ld_abs);
1282 if (err)
1283
1284
1285
1286
1287
1288 goto out_err_free;
1289
1290 fp = bpf_prog_select_runtime(fp, &err);
1291 if (err)
1292 goto out_err_free;
1293
1294 kfree(old_prog);
1295 return fp;
1296
1297out_err_free:
1298 kfree(old_prog);
1299out_err:
1300 __bpf_prog_release(fp);
1301 return ERR_PTR(err);
1302}
1303
1304static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
1305 bpf_aux_classic_check_t trans)
1306{
1307 int err;
1308
1309 fp->bpf_func = NULL;
1310 fp->jited = 0;
1311
1312 err = bpf_check_classic(fp->insns, fp->len);
1313 if (err) {
1314 __bpf_prog_release(fp);
1315 return ERR_PTR(err);
1316 }
1317
1318
1319
1320
1321 if (trans) {
1322 err = trans(fp->insns, fp->len);
1323 if (err) {
1324 __bpf_prog_release(fp);
1325 return ERR_PTR(err);
1326 }
1327 }
1328
1329
1330
1331
1332 bpf_jit_compile(fp);
1333
1334
1335
1336
1337 if (!fp->jited)
1338 fp = bpf_migrate_filter(fp);
1339
1340 return fp;
1341}
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
1354{
1355 unsigned int fsize = bpf_classic_proglen(fprog);
1356 struct bpf_prog *fp;
1357
1358
1359 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1360 return -EINVAL;
1361
1362 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1363 if (!fp)
1364 return -ENOMEM;
1365
1366 memcpy(fp->insns, fprog->filter, fsize);
1367
1368 fp->len = fprog->len;
1369
1370
1371
1372
1373 fp->orig_prog = NULL;
1374
1375
1376
1377
1378 fp = bpf_prepare_filter(fp, NULL);
1379 if (IS_ERR(fp))
1380 return PTR_ERR(fp);
1381
1382 *pfp = fp;
1383 return 0;
1384}
1385EXPORT_SYMBOL_GPL(bpf_prog_create);
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
1399 bpf_aux_classic_check_t trans, bool save_orig)
1400{
1401 unsigned int fsize = bpf_classic_proglen(fprog);
1402 struct bpf_prog *fp;
1403 int err;
1404
1405
1406 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1407 return -EINVAL;
1408
1409 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1410 if (!fp)
1411 return -ENOMEM;
1412
1413 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
1414 __bpf_prog_free(fp);
1415 return -EFAULT;
1416 }
1417
1418 fp->len = fprog->len;
1419 fp->orig_prog = NULL;
1420
1421 if (save_orig) {
1422 err = bpf_prog_store_orig_filter(fp, fprog);
1423 if (err) {
1424 __bpf_prog_free(fp);
1425 return -ENOMEM;
1426 }
1427 }
1428
1429
1430
1431
1432 fp = bpf_prepare_filter(fp, trans);
1433 if (IS_ERR(fp))
1434 return PTR_ERR(fp);
1435
1436 *pfp = fp;
1437 return 0;
1438}
1439EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
1440
1441void bpf_prog_destroy(struct bpf_prog *fp)
1442{
1443 __bpf_prog_release(fp);
1444}
1445EXPORT_SYMBOL_GPL(bpf_prog_destroy);
1446
1447static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
1448{
1449 struct sk_filter *fp, *old_fp;
1450
1451 fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1452 if (!fp)
1453 return -ENOMEM;
1454
1455 fp->prog = prog;
1456
1457 if (!__sk_filter_charge(sk, fp)) {
1458 kfree(fp);
1459 return -ENOMEM;
1460 }
1461 refcount_set(&fp->refcnt, 1);
1462
1463 old_fp = rcu_dereference_protected(sk->sk_filter,
1464 lockdep_sock_is_held(sk));
1465 rcu_assign_pointer(sk->sk_filter, fp);
1466
1467 if (old_fp)
1468 sk_filter_uncharge(sk, old_fp);
1469
1470 return 0;
1471}
1472
1473static
1474struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1475{
1476 unsigned int fsize = bpf_classic_proglen(fprog);
1477 struct bpf_prog *prog;
1478 int err;
1479
1480 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1481 return ERR_PTR(-EPERM);
1482
1483
1484 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1485 return ERR_PTR(-EINVAL);
1486
1487 prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1488 if (!prog)
1489 return ERR_PTR(-ENOMEM);
1490
1491 if (copy_from_user(prog->insns, fprog->filter, fsize)) {
1492 __bpf_prog_free(prog);
1493 return ERR_PTR(-EFAULT);
1494 }
1495
1496 prog->len = fprog->len;
1497
1498 err = bpf_prog_store_orig_filter(prog, fprog);
1499 if (err) {
1500 __bpf_prog_free(prog);
1501 return ERR_PTR(-ENOMEM);
1502 }
1503
1504
1505
1506
1507 return bpf_prepare_filter(prog, NULL);
1508}
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1521{
1522 struct bpf_prog *prog = __get_filter(fprog, sk);
1523 int err;
1524
1525 if (IS_ERR(prog))
1526 return PTR_ERR(prog);
1527
1528 err = __sk_attach_prog(prog, sk);
1529 if (err < 0) {
1530 __bpf_prog_release(prog);
1531 return err;
1532 }
1533
1534 return 0;
1535}
1536EXPORT_SYMBOL_GPL(sk_attach_filter);
1537
1538int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1539{
1540 struct bpf_prog *prog = __get_filter(fprog, sk);
1541 int err;
1542
1543 if (IS_ERR(prog))
1544 return PTR_ERR(prog);
1545
1546 if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1547 err = -ENOMEM;
1548 else
1549 err = reuseport_attach_prog(sk, prog);
1550
1551 if (err)
1552 __bpf_prog_release(prog);
1553
1554 return err;
1555}
1556
1557static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
1558{
1559 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1560 return ERR_PTR(-EPERM);
1561
1562 return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
1563}
1564
1565int sk_attach_bpf(u32 ufd, struct sock *sk)
1566{
1567 struct bpf_prog *prog = __get_bpf(ufd, sk);
1568 int err;
1569
1570 if (IS_ERR(prog))
1571 return PTR_ERR(prog);
1572
1573 err = __sk_attach_prog(prog, sk);
1574 if (err < 0) {
1575 bpf_prog_put(prog);
1576 return err;
1577 }
1578
1579 return 0;
1580}
1581
1582int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1583{
1584 struct bpf_prog *prog;
1585 int err;
1586
1587 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1588 return -EPERM;
1589
1590 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
1591 if (PTR_ERR(prog) == -EINVAL)
1592 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
1593 if (IS_ERR(prog))
1594 return PTR_ERR(prog);
1595
1596 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
1597
1598
1599
1600
1601
1602 if ((sk->sk_type != SOCK_STREAM &&
1603 sk->sk_type != SOCK_DGRAM) ||
1604 (sk->sk_protocol != IPPROTO_UDP &&
1605 sk->sk_protocol != IPPROTO_TCP) ||
1606 (sk->sk_family != AF_INET &&
1607 sk->sk_family != AF_INET6)) {
1608 err = -ENOTSUPP;
1609 goto err_prog_put;
1610 }
1611 } else {
1612
1613 if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
1614 err = -ENOMEM;
1615 goto err_prog_put;
1616 }
1617 }
1618
1619 err = reuseport_attach_prog(sk, prog);
1620err_prog_put:
1621 if (err)
1622 bpf_prog_put(prog);
1623
1624 return err;
1625}
1626
1627void sk_reuseport_prog_free(struct bpf_prog *prog)
1628{
1629 if (!prog)
1630 return;
1631
1632 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
1633 bpf_prog_put(prog);
1634 else
1635 bpf_prog_destroy(prog);
1636}
1637
1638struct bpf_scratchpad {
1639 union {
1640 __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1641 u8 buff[MAX_BPF_STACK];
1642 };
1643};
1644
1645static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
1646
1647static inline int __bpf_try_make_writable(struct sk_buff *skb,
1648 unsigned int write_len)
1649{
1650 return skb_ensure_writable(skb, write_len);
1651}
1652
1653static inline int bpf_try_make_writable(struct sk_buff *skb,
1654 unsigned int write_len)
1655{
1656 int err = __bpf_try_make_writable(skb, write_len);
1657
1658 bpf_compute_data_pointers(skb);
1659 return err;
1660}
1661
1662static int bpf_try_make_head_writable(struct sk_buff *skb)
1663{
1664 return bpf_try_make_writable(skb, skb_headlen(skb));
1665}
1666
1667static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
1668{
1669 if (skb_at_tc_ingress(skb))
1670 skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1671}
1672
1673static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
1674{
1675 if (skb_at_tc_ingress(skb))
1676 skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1677}
1678
1679BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
1680 const void *, from, u32, len, u64, flags)
1681{
1682 void *ptr;
1683
1684 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
1685 return -EINVAL;
1686 if (unlikely(offset > 0xffff))
1687 return -EFAULT;
1688 if (unlikely(bpf_try_make_writable(skb, offset + len)))
1689 return -EFAULT;
1690
1691 ptr = skb->data + offset;
1692 if (flags & BPF_F_RECOMPUTE_CSUM)
1693 __skb_postpull_rcsum(skb, ptr, len, offset);
1694
1695 memcpy(ptr, from, len);
1696
1697 if (flags & BPF_F_RECOMPUTE_CSUM)
1698 __skb_postpush_rcsum(skb, ptr, len, offset);
1699 if (flags & BPF_F_INVALIDATE_HASH)
1700 skb_clear_hash(skb);
1701
1702 return 0;
1703}
1704
1705static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
1706 .func = bpf_skb_store_bytes,
1707 .gpl_only = false,
1708 .ret_type = RET_INTEGER,
1709 .arg1_type = ARG_PTR_TO_CTX,
1710 .arg2_type = ARG_ANYTHING,
1711 .arg3_type = ARG_PTR_TO_MEM,
1712 .arg4_type = ARG_CONST_SIZE,
1713 .arg5_type = ARG_ANYTHING,
1714};
1715
1716BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
1717 void *, to, u32, len)
1718{
1719 void *ptr;
1720
1721 if (unlikely(offset > 0xffff))
1722 goto err_clear;
1723
1724 ptr = skb_header_pointer(skb, offset, len, to);
1725 if (unlikely(!ptr))
1726 goto err_clear;
1727 if (ptr != to)
1728 memcpy(to, ptr, len);
1729
1730 return 0;
1731err_clear:
1732 memset(to, 0, len);
1733 return -EFAULT;
1734}
1735
1736static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1737 .func = bpf_skb_load_bytes,
1738 .gpl_only = false,
1739 .ret_type = RET_INTEGER,
1740 .arg1_type = ARG_PTR_TO_CTX,
1741 .arg2_type = ARG_ANYTHING,
1742 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1743 .arg4_type = ARG_CONST_SIZE,
1744};
1745
1746BPF_CALL_4(bpf_flow_dissector_load_bytes,
1747 const struct bpf_flow_dissector *, ctx, u32, offset,
1748 void *, to, u32, len)
1749{
1750 void *ptr;
1751
1752 if (unlikely(offset > 0xffff))
1753 goto err_clear;
1754
1755 if (unlikely(!ctx->skb))
1756 goto err_clear;
1757
1758 ptr = skb_header_pointer(ctx->skb, offset, len, to);
1759 if (unlikely(!ptr))
1760 goto err_clear;
1761 if (ptr != to)
1762 memcpy(to, ptr, len);
1763
1764 return 0;
1765err_clear:
1766 memset(to, 0, len);
1767 return -EFAULT;
1768}
1769
1770static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
1771 .func = bpf_flow_dissector_load_bytes,
1772 .gpl_only = false,
1773 .ret_type = RET_INTEGER,
1774 .arg1_type = ARG_PTR_TO_CTX,
1775 .arg2_type = ARG_ANYTHING,
1776 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1777 .arg4_type = ARG_CONST_SIZE,
1778};
1779
1780BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
1781 u32, offset, void *, to, u32, len, u32, start_header)
1782{
1783 u8 *end = skb_tail_pointer(skb);
1784 u8 *start, *ptr;
1785
1786 if (unlikely(offset > 0xffff))
1787 goto err_clear;
1788
1789 switch (start_header) {
1790 case BPF_HDR_START_MAC:
1791 if (unlikely(!skb_mac_header_was_set(skb)))
1792 goto err_clear;
1793 start = skb_mac_header(skb);
1794 break;
1795 case BPF_HDR_START_NET:
1796 start = skb_network_header(skb);
1797 break;
1798 default:
1799 goto err_clear;
1800 }
1801
1802 ptr = start + offset;
1803
1804 if (likely(ptr + len <= end)) {
1805 memcpy(to, ptr, len);
1806 return 0;
1807 }
1808
1809err_clear:
1810 memset(to, 0, len);
1811 return -EFAULT;
1812}
1813
1814static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
1815 .func = bpf_skb_load_bytes_relative,
1816 .gpl_only = false,
1817 .ret_type = RET_INTEGER,
1818 .arg1_type = ARG_PTR_TO_CTX,
1819 .arg2_type = ARG_ANYTHING,
1820 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1821 .arg4_type = ARG_CONST_SIZE,
1822 .arg5_type = ARG_ANYTHING,
1823};
1824
1825BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
1826{
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836 return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
1837}
1838
1839static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1840 .func = bpf_skb_pull_data,
1841 .gpl_only = false,
1842 .ret_type = RET_INTEGER,
1843 .arg1_type = ARG_PTR_TO_CTX,
1844 .arg2_type = ARG_ANYTHING,
1845};
1846
1847BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
1848{
1849 return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
1850}
1851
1852static const struct bpf_func_proto bpf_sk_fullsock_proto = {
1853 .func = bpf_sk_fullsock,
1854 .gpl_only = false,
1855 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
1856 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
1857};
1858
1859static inline int sk_skb_try_make_writable(struct sk_buff *skb,
1860 unsigned int write_len)
1861{
1862 int err = __bpf_try_make_writable(skb, write_len);
1863
1864 bpf_compute_data_end_sk_skb(skb);
1865 return err;
1866}
1867
1868BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
1869{
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879 return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
1880}
1881
1882static const struct bpf_func_proto sk_skb_pull_data_proto = {
1883 .func = sk_skb_pull_data,
1884 .gpl_only = false,
1885 .ret_type = RET_INTEGER,
1886 .arg1_type = ARG_PTR_TO_CTX,
1887 .arg2_type = ARG_ANYTHING,
1888};
1889
1890BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
1891 u64, from, u64, to, u64, flags)
1892{
1893 __sum16 *ptr;
1894
1895 if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
1896 return -EINVAL;
1897 if (unlikely(offset > 0xffff || offset & 1))
1898 return -EFAULT;
1899 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1900 return -EFAULT;
1901
1902 ptr = (__sum16 *)(skb->data + offset);
1903 switch (flags & BPF_F_HDR_FIELD_MASK) {
1904 case 0:
1905 if (unlikely(from != 0))
1906 return -EINVAL;
1907
1908 csum_replace_by_diff(ptr, to);
1909 break;
1910 case 2:
1911 csum_replace2(ptr, from, to);
1912 break;
1913 case 4:
1914 csum_replace4(ptr, from, to);
1915 break;
1916 default:
1917 return -EINVAL;
1918 }
1919
1920 return 0;
1921}
1922
1923static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
1924 .func = bpf_l3_csum_replace,
1925 .gpl_only = false,
1926 .ret_type = RET_INTEGER,
1927 .arg1_type = ARG_PTR_TO_CTX,
1928 .arg2_type = ARG_ANYTHING,
1929 .arg3_type = ARG_ANYTHING,
1930 .arg4_type = ARG_ANYTHING,
1931 .arg5_type = ARG_ANYTHING,
1932};
1933
1934BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
1935 u64, from, u64, to, u64, flags)
1936{
1937 bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
1938 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
1939 bool do_mforce = flags & BPF_F_MARK_ENFORCE;
1940 __sum16 *ptr;
1941
1942 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
1943 BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
1944 return -EINVAL;
1945 if (unlikely(offset > 0xffff || offset & 1))
1946 return -EFAULT;
1947 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1948 return -EFAULT;
1949
1950 ptr = (__sum16 *)(skb->data + offset);
1951 if (is_mmzero && !do_mforce && !*ptr)
1952 return 0;
1953
1954 switch (flags & BPF_F_HDR_FIELD_MASK) {
1955 case 0:
1956 if (unlikely(from != 0))
1957 return -EINVAL;
1958
1959 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1960 break;
1961 case 2:
1962 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1963 break;
1964 case 4:
1965 inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
1966 break;
1967 default:
1968 return -EINVAL;
1969 }
1970
1971 if (is_mmzero && !*ptr)
1972 *ptr = CSUM_MANGLED_0;
1973 return 0;
1974}
1975
1976static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1977 .func = bpf_l4_csum_replace,
1978 .gpl_only = false,
1979 .ret_type = RET_INTEGER,
1980 .arg1_type = ARG_PTR_TO_CTX,
1981 .arg2_type = ARG_ANYTHING,
1982 .arg3_type = ARG_ANYTHING,
1983 .arg4_type = ARG_ANYTHING,
1984 .arg5_type = ARG_ANYTHING,
1985};
1986
1987BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
1988 __be32 *, to, u32, to_size, __wsum, seed)
1989{
1990 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1991 u32 diff_size = from_size + to_size;
1992 int i, j = 0;
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
2003 diff_size > sizeof(sp->diff)))
2004 return -EINVAL;
2005
2006 for (i = 0; i < from_size / sizeof(__be32); i++, j++)
2007 sp->diff[j] = ~from[i];
2008 for (i = 0; i < to_size / sizeof(__be32); i++, j++)
2009 sp->diff[j] = to[i];
2010
2011 return csum_partial(sp->diff, diff_size, seed);
2012}
2013
2014static const struct bpf_func_proto bpf_csum_diff_proto = {
2015 .func = bpf_csum_diff,
2016 .gpl_only = false,
2017 .pkt_access = true,
2018 .ret_type = RET_INTEGER,
2019 .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
2020 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
2021 .arg3_type = ARG_PTR_TO_MEM_OR_NULL,
2022 .arg4_type = ARG_CONST_SIZE_OR_ZERO,
2023 .arg5_type = ARG_ANYTHING,
2024};
2025
2026BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
2027{
2028
2029
2030
2031
2032 if (skb->ip_summed == CHECKSUM_COMPLETE)
2033 return (skb->csum = csum_add(skb->csum, csum));
2034
2035 return -ENOTSUPP;
2036}
2037
2038static const struct bpf_func_proto bpf_csum_update_proto = {
2039 .func = bpf_csum_update,
2040 .gpl_only = false,
2041 .ret_type = RET_INTEGER,
2042 .arg1_type = ARG_PTR_TO_CTX,
2043 .arg2_type = ARG_ANYTHING,
2044};
2045
2046BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
2047{
2048
2049
2050
2051
2052 switch (level) {
2053 case BPF_CSUM_LEVEL_INC:
2054 __skb_incr_checksum_unnecessary(skb);
2055 break;
2056 case BPF_CSUM_LEVEL_DEC:
2057 __skb_decr_checksum_unnecessary(skb);
2058 break;
2059 case BPF_CSUM_LEVEL_RESET:
2060 __skb_reset_checksum_unnecessary(skb);
2061 break;
2062 case BPF_CSUM_LEVEL_QUERY:
2063 return skb->ip_summed == CHECKSUM_UNNECESSARY ?
2064 skb->csum_level : -EACCES;
2065 default:
2066 return -EINVAL;
2067 }
2068
2069 return 0;
2070}
2071
2072static const struct bpf_func_proto bpf_csum_level_proto = {
2073 .func = bpf_csum_level,
2074 .gpl_only = false,
2075 .ret_type = RET_INTEGER,
2076 .arg1_type = ARG_PTR_TO_CTX,
2077 .arg2_type = ARG_ANYTHING,
2078};
2079
2080static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
2081{
2082 return dev_forward_skb(dev, skb);
2083}
2084
2085static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
2086 struct sk_buff *skb)
2087{
2088 int ret = ____dev_forward_skb(dev, skb);
2089
2090 if (likely(!ret)) {
2091 skb->dev = dev;
2092 ret = netif_rx(skb);
2093 }
2094
2095 return ret;
2096}
2097
2098static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
2099{
2100 int ret;
2101
2102 if (dev_xmit_recursion()) {
2103 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2104 kfree_skb(skb);
2105 return -ENETDOWN;
2106 }
2107
2108 skb->dev = dev;
2109 skb->tstamp = 0;
2110
2111 dev_xmit_recursion_inc();
2112 ret = dev_queue_xmit(skb);
2113 dev_xmit_recursion_dec();
2114
2115 return ret;
2116}
2117
2118static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
2119 u32 flags)
2120{
2121 unsigned int mlen = skb_network_offset(skb);
2122
2123 if (mlen) {
2124 __skb_pull(skb, mlen);
2125
2126
2127
2128
2129
2130
2131 if (!skb_at_tc_ingress(skb))
2132 skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
2133 }
2134 skb_pop_mac_header(skb);
2135 skb_reset_mac_len(skb);
2136 return flags & BPF_F_INGRESS ?
2137 __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
2138}
2139
2140static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
2141 u32 flags)
2142{
2143
2144 if (unlikely(skb->mac_header >= skb->network_header)) {
2145 kfree_skb(skb);
2146 return -ERANGE;
2147 }
2148
2149 bpf_push_mac_rcsum(skb);
2150 return flags & BPF_F_INGRESS ?
2151 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
2152}
2153
2154static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
2155 u32 flags)
2156{
2157 if (dev_is_mac_header_xmit(dev))
2158 return __bpf_redirect_common(skb, dev, flags);
2159 else
2160 return __bpf_redirect_no_mac(skb, dev, flags);
2161}
2162
2163BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
2164{
2165 struct net_device *dev;
2166 struct sk_buff *clone;
2167 int ret;
2168
2169 if (unlikely(flags & ~(BPF_F_INGRESS)))
2170 return -EINVAL;
2171
2172 dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
2173 if (unlikely(!dev))
2174 return -EINVAL;
2175
2176 clone = skb_clone(skb, GFP_ATOMIC);
2177 if (unlikely(!clone))
2178 return -ENOMEM;
2179
2180
2181
2182
2183
2184
2185 ret = bpf_try_make_head_writable(skb);
2186 if (unlikely(ret)) {
2187 kfree_skb(clone);
2188 return -ENOMEM;
2189 }
2190
2191 return __bpf_redirect(clone, dev, flags);
2192}
2193
2194static const struct bpf_func_proto bpf_clone_redirect_proto = {
2195 .func = bpf_clone_redirect,
2196 .gpl_only = false,
2197 .ret_type = RET_INTEGER,
2198 .arg1_type = ARG_PTR_TO_CTX,
2199 .arg2_type = ARG_ANYTHING,
2200 .arg3_type = ARG_ANYTHING,
2201};
2202
2203DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
2204EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
2205
2206BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
2207{
2208 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2209
2210 if (unlikely(flags & ~(BPF_F_INGRESS)))
2211 return TC_ACT_SHOT;
2212
2213 ri->flags = flags;
2214 ri->tgt_index = ifindex;
2215
2216 return TC_ACT_REDIRECT;
2217}
2218
2219int skb_do_redirect(struct sk_buff *skb)
2220{
2221 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2222 struct net_device *dev;
2223
2224 dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
2225 ri->tgt_index = 0;
2226 if (unlikely(!dev)) {
2227 kfree_skb(skb);
2228 return -EINVAL;
2229 }
2230
2231 return __bpf_redirect(skb, dev, ri->flags);
2232}
2233
2234static const struct bpf_func_proto bpf_redirect_proto = {
2235 .func = bpf_redirect,
2236 .gpl_only = false,
2237 .ret_type = RET_INTEGER,
2238 .arg1_type = ARG_ANYTHING,
2239 .arg2_type = ARG_ANYTHING,
2240};
2241
2242BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
2243{
2244 msg->apply_bytes = bytes;
2245 return 0;
2246}
2247
2248static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
2249 .func = bpf_msg_apply_bytes,
2250 .gpl_only = false,
2251 .ret_type = RET_INTEGER,
2252 .arg1_type = ARG_PTR_TO_CTX,
2253 .arg2_type = ARG_ANYTHING,
2254};
2255
2256BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
2257{
2258 msg->cork_bytes = bytes;
2259 return 0;
2260}
2261
2262static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
2263 .func = bpf_msg_cork_bytes,
2264 .gpl_only = false,
2265 .ret_type = RET_INTEGER,
2266 .arg1_type = ARG_PTR_TO_CTX,
2267 .arg2_type = ARG_ANYTHING,
2268};
2269
2270BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
2271 u32, end, u64, flags)
2272{
2273 u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
2274 u32 first_sge, last_sge, i, shift, bytes_sg_total;
2275 struct scatterlist *sge;
2276 u8 *raw, *to, *from;
2277 struct page *page;
2278
2279 if (unlikely(flags || end <= start))
2280 return -EINVAL;
2281
2282
2283 i = msg->sg.start;
2284 do {
2285 offset += len;
2286 len = sk_msg_elem(msg, i)->length;
2287 if (start < offset + len)
2288 break;
2289 sk_msg_iter_var_next(i);
2290 } while (i != msg->sg.end);
2291
2292 if (unlikely(start >= offset + len))
2293 return -EINVAL;
2294
2295 first_sge = i;
2296
2297
2298
2299 bytes_sg_total = start - offset + bytes;
2300 if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
2301 goto out;
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313 do {
2314 copy += sk_msg_elem(msg, i)->length;
2315 sk_msg_iter_var_next(i);
2316 if (bytes_sg_total <= copy)
2317 break;
2318 } while (i != msg->sg.end);
2319 last_sge = i;
2320
2321 if (unlikely(bytes_sg_total > copy))
2322 return -EINVAL;
2323
2324 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2325 get_order(copy));
2326 if (unlikely(!page))
2327 return -ENOMEM;
2328
2329 raw = page_address(page);
2330 i = first_sge;
2331 do {
2332 sge = sk_msg_elem(msg, i);
2333 from = sg_virt(sge);
2334 len = sge->length;
2335 to = raw + poffset;
2336
2337 memcpy(to, from, len);
2338 poffset += len;
2339 sge->length = 0;
2340 put_page(sg_page(sge));
2341
2342 sk_msg_iter_var_next(i);
2343 } while (i != last_sge);
2344
2345 sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
2346
2347
2348
2349
2350
2351 WARN_ON_ONCE(last_sge == first_sge);
2352 shift = last_sge > first_sge ?
2353 last_sge - first_sge - 1 :
2354 NR_MSG_FRAG_IDS - first_sge + last_sge - 1;
2355 if (!shift)
2356 goto out;
2357
2358 i = first_sge;
2359 sk_msg_iter_var_next(i);
2360 do {
2361 u32 move_from;
2362
2363 if (i + shift >= NR_MSG_FRAG_IDS)
2364 move_from = i + shift - NR_MSG_FRAG_IDS;
2365 else
2366 move_from = i + shift;
2367 if (move_from == msg->sg.end)
2368 break;
2369
2370 msg->sg.data[i] = msg->sg.data[move_from];
2371 msg->sg.data[move_from].length = 0;
2372 msg->sg.data[move_from].page_link = 0;
2373 msg->sg.data[move_from].offset = 0;
2374 sk_msg_iter_var_next(i);
2375 } while (1);
2376
2377 msg->sg.end = msg->sg.end - shift > msg->sg.end ?
2378 msg->sg.end - shift + NR_MSG_FRAG_IDS :
2379 msg->sg.end - shift;
2380out:
2381 msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
2382 msg->data_end = msg->data + bytes;
2383 return 0;
2384}
2385
2386static const struct bpf_func_proto bpf_msg_pull_data_proto = {
2387 .func = bpf_msg_pull_data,
2388 .gpl_only = false,
2389 .ret_type = RET_INTEGER,
2390 .arg1_type = ARG_PTR_TO_CTX,
2391 .arg2_type = ARG_ANYTHING,
2392 .arg3_type = ARG_ANYTHING,
2393 .arg4_type = ARG_ANYTHING,
2394};
2395
2396BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
2397 u32, len, u64, flags)
2398{
2399 struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
2400 u32 new, i = 0, l = 0, space, copy = 0, offset = 0;
2401 u8 *raw, *to, *from;
2402 struct page *page;
2403
2404 if (unlikely(flags))
2405 return -EINVAL;
2406
2407
2408 i = msg->sg.start;
2409 do {
2410 offset += l;
2411 l = sk_msg_elem(msg, i)->length;
2412
2413 if (start < offset + l)
2414 break;
2415 sk_msg_iter_var_next(i);
2416 } while (i != msg->sg.end);
2417
2418 if (start >= offset + l)
2419 return -EINVAL;
2420
2421 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2422
2423
2424
2425
2426
2427
2428
2429
2430 if (!space || (space == 1 && start != offset))
2431 copy = msg->sg.data[i].length;
2432
2433 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2434 get_order(copy + len));
2435 if (unlikely(!page))
2436 return -ENOMEM;
2437
2438 if (copy) {
2439 int front, back;
2440
2441 raw = page_address(page);
2442
2443 psge = sk_msg_elem(msg, i);
2444 front = start - offset;
2445 back = psge->length - front;
2446 from = sg_virt(psge);
2447
2448 if (front)
2449 memcpy(raw, from, front);
2450
2451 if (back) {
2452 from += front;
2453 to = raw + front + len;
2454
2455 memcpy(to, from, back);
2456 }
2457
2458 put_page(sg_page(psge));
2459 } else if (start - offset) {
2460 psge = sk_msg_elem(msg, i);
2461 rsge = sk_msg_elem_cpy(msg, i);
2462
2463 psge->length = start - offset;
2464 rsge.length -= psge->length;
2465 rsge.offset += start;
2466
2467 sk_msg_iter_var_next(i);
2468 sg_unmark_end(psge);
2469 sg_unmark_end(&rsge);
2470 sk_msg_iter_next(msg, end);
2471 }
2472
2473
2474 new = i;
2475
2476
2477 if (!copy) {
2478 sge = sk_msg_elem_cpy(msg, i);
2479
2480 sk_msg_iter_var_next(i);
2481 sg_unmark_end(&sge);
2482 sk_msg_iter_next(msg, end);
2483
2484 nsge = sk_msg_elem_cpy(msg, i);
2485 if (rsge.length) {
2486 sk_msg_iter_var_next(i);
2487 nnsge = sk_msg_elem_cpy(msg, i);
2488 }
2489
2490 while (i != msg->sg.end) {
2491 msg->sg.data[i] = sge;
2492 sge = nsge;
2493 sk_msg_iter_var_next(i);
2494 if (rsge.length) {
2495 nsge = nnsge;
2496 nnsge = sk_msg_elem_cpy(msg, i);
2497 } else {
2498 nsge = sk_msg_elem_cpy(msg, i);
2499 }
2500 }
2501 }
2502
2503
2504 sk_mem_charge(msg->sk, len);
2505 msg->sg.size += len;
2506 __clear_bit(new, &msg->sg.copy);
2507 sg_set_page(&msg->sg.data[new], page, len + copy, 0);
2508 if (rsge.length) {
2509 get_page(sg_page(&rsge));
2510 sk_msg_iter_var_next(new);
2511 msg->sg.data[new] = rsge;
2512 }
2513
2514 sk_msg_compute_data_pointers(msg);
2515 return 0;
2516}
2517
2518static const struct bpf_func_proto bpf_msg_push_data_proto = {
2519 .func = bpf_msg_push_data,
2520 .gpl_only = false,
2521 .ret_type = RET_INTEGER,
2522 .arg1_type = ARG_PTR_TO_CTX,
2523 .arg2_type = ARG_ANYTHING,
2524 .arg3_type = ARG_ANYTHING,
2525 .arg4_type = ARG_ANYTHING,
2526};
2527
2528static void sk_msg_shift_left(struct sk_msg *msg, int i)
2529{
2530 int prev;
2531
2532 do {
2533 prev = i;
2534 sk_msg_iter_var_next(i);
2535 msg->sg.data[prev] = msg->sg.data[i];
2536 } while (i != msg->sg.end);
2537
2538 sk_msg_iter_prev(msg, end);
2539}
2540
2541static void sk_msg_shift_right(struct sk_msg *msg, int i)
2542{
2543 struct scatterlist tmp, sge;
2544
2545 sk_msg_iter_next(msg, end);
2546 sge = sk_msg_elem_cpy(msg, i);
2547 sk_msg_iter_var_next(i);
2548 tmp = sk_msg_elem_cpy(msg, i);
2549
2550 while (i != msg->sg.end) {
2551 msg->sg.data[i] = sge;
2552 sk_msg_iter_var_next(i);
2553 sge = tmp;
2554 tmp = sk_msg_elem_cpy(msg, i);
2555 }
2556}
2557
2558BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
2559 u32, len, u64, flags)
2560{
2561 u32 i = 0, l = 0, space, offset = 0;
2562 u64 last = start + len;
2563 int pop;
2564
2565 if (unlikely(flags))
2566 return -EINVAL;
2567
2568
2569 i = msg->sg.start;
2570 do {
2571 offset += l;
2572 l = sk_msg_elem(msg, i)->length;
2573
2574 if (start < offset + l)
2575 break;
2576 sk_msg_iter_var_next(i);
2577 } while (i != msg->sg.end);
2578
2579
2580 if (start >= offset + l || last >= msg->sg.size)
2581 return -EINVAL;
2582
2583 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2584
2585 pop = len;
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607 if (start != offset) {
2608 struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
2609 int a = start;
2610 int b = sge->length - pop - a;
2611
2612 sk_msg_iter_var_next(i);
2613
2614 if (pop < sge->length - a) {
2615 if (space) {
2616 sge->length = a;
2617 sk_msg_shift_right(msg, i);
2618 nsge = sk_msg_elem(msg, i);
2619 get_page(sg_page(sge));
2620 sg_set_page(nsge,
2621 sg_page(sge),
2622 b, sge->offset + pop + a);
2623 } else {
2624 struct page *page, *orig;
2625 u8 *to, *from;
2626
2627 page = alloc_pages(__GFP_NOWARN |
2628 __GFP_COMP | GFP_ATOMIC,
2629 get_order(a + b));
2630 if (unlikely(!page))
2631 return -ENOMEM;
2632
2633 sge->length = a;
2634 orig = sg_page(sge);
2635 from = sg_virt(sge);
2636 to = page_address(page);
2637 memcpy(to, from, a);
2638 memcpy(to + a, from + a + pop, b);
2639 sg_set_page(sge, page, a + b, 0);
2640 put_page(orig);
2641 }
2642 pop = 0;
2643 } else if (pop >= sge->length - a) {
2644 pop -= (sge->length - a);
2645 sge->length = a;
2646 }
2647 }
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666 while (pop) {
2667 struct scatterlist *sge = sk_msg_elem(msg, i);
2668
2669 if (pop < sge->length) {
2670 sge->length -= pop;
2671 sge->offset += pop;
2672 pop = 0;
2673 } else {
2674 pop -= sge->length;
2675 sk_msg_shift_left(msg, i);
2676 }
2677 sk_msg_iter_var_next(i);
2678 }
2679
2680 sk_mem_uncharge(msg->sk, len - pop);
2681 msg->sg.size -= (len - pop);
2682 sk_msg_compute_data_pointers(msg);
2683 return 0;
2684}
2685
2686static const struct bpf_func_proto bpf_msg_pop_data_proto = {
2687 .func = bpf_msg_pop_data,
2688 .gpl_only = false,
2689 .ret_type = RET_INTEGER,
2690 .arg1_type = ARG_PTR_TO_CTX,
2691 .arg2_type = ARG_ANYTHING,
2692 .arg3_type = ARG_ANYTHING,
2693 .arg4_type = ARG_ANYTHING,
2694};
2695
2696#ifdef CONFIG_CGROUP_NET_CLASSID
2697BPF_CALL_0(bpf_get_cgroup_classid_curr)
2698{
2699 return __task_get_classid(current);
2700}
2701
2702static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
2703 .func = bpf_get_cgroup_classid_curr,
2704 .gpl_only = false,
2705 .ret_type = RET_INTEGER,
2706};
2707#endif
2708
2709BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
2710{
2711 return task_get_classid(skb);
2712}
2713
2714static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
2715 .func = bpf_get_cgroup_classid,
2716 .gpl_only = false,
2717 .ret_type = RET_INTEGER,
2718 .arg1_type = ARG_PTR_TO_CTX,
2719};
2720
2721BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
2722{
2723 return dst_tclassid(skb);
2724}
2725
2726static const struct bpf_func_proto bpf_get_route_realm_proto = {
2727 .func = bpf_get_route_realm,
2728 .gpl_only = false,
2729 .ret_type = RET_INTEGER,
2730 .arg1_type = ARG_PTR_TO_CTX,
2731};
2732
2733BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
2734{
2735
2736
2737
2738
2739
2740 return skb_get_hash(skb);
2741}
2742
2743static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
2744 .func = bpf_get_hash_recalc,
2745 .gpl_only = false,
2746 .ret_type = RET_INTEGER,
2747 .arg1_type = ARG_PTR_TO_CTX,
2748};
2749
2750BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
2751{
2752
2753
2754
2755 skb_clear_hash(skb);
2756 return 0;
2757}
2758
2759static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
2760 .func = bpf_set_hash_invalid,
2761 .gpl_only = false,
2762 .ret_type = RET_INTEGER,
2763 .arg1_type = ARG_PTR_TO_CTX,
2764};
2765
2766BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
2767{
2768
2769
2770
2771
2772 __skb_set_sw_hash(skb, hash, true);
2773 return 0;
2774}
2775
2776static const struct bpf_func_proto bpf_set_hash_proto = {
2777 .func = bpf_set_hash,
2778 .gpl_only = false,
2779 .ret_type = RET_INTEGER,
2780 .arg1_type = ARG_PTR_TO_CTX,
2781 .arg2_type = ARG_ANYTHING,
2782};
2783
2784BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
2785 u16, vlan_tci)
2786{
2787 int ret;
2788
2789 if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
2790 vlan_proto != htons(ETH_P_8021AD)))
2791 vlan_proto = htons(ETH_P_8021Q);
2792
2793 bpf_push_mac_rcsum(skb);
2794 ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
2795 bpf_pull_mac_rcsum(skb);
2796
2797 bpf_compute_data_pointers(skb);
2798 return ret;
2799}
2800
2801static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
2802 .func = bpf_skb_vlan_push,
2803 .gpl_only = false,
2804 .ret_type = RET_INTEGER,
2805 .arg1_type = ARG_PTR_TO_CTX,
2806 .arg2_type = ARG_ANYTHING,
2807 .arg3_type = ARG_ANYTHING,
2808};
2809
2810BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
2811{
2812 int ret;
2813
2814 bpf_push_mac_rcsum(skb);
2815 ret = skb_vlan_pop(skb);
2816 bpf_pull_mac_rcsum(skb);
2817
2818 bpf_compute_data_pointers(skb);
2819 return ret;
2820}
2821
2822static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
2823 .func = bpf_skb_vlan_pop,
2824 .gpl_only = false,
2825 .ret_type = RET_INTEGER,
2826 .arg1_type = ARG_PTR_TO_CTX,
2827};
2828
2829static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
2830{
2831
2832
2833
2834 skb_push(skb, len);
2835 memmove(skb->data, skb->data + len, off);
2836 memset(skb->data + off, 0, len);
2837
2838
2839
2840
2841
2842
2843 return 0;
2844}
2845
2846static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
2847{
2848
2849
2850
2851 if (unlikely(!pskb_may_pull(skb, off + len)))
2852 return -ENOMEM;
2853
2854 skb_postpull_rcsum(skb, skb->data + off, len);
2855 memmove(skb->data + len, skb->data, off);
2856 __skb_pull(skb, len);
2857
2858 return 0;
2859}
2860
2861static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
2862{
2863 bool trans_same = skb->transport_header == skb->network_header;
2864 int ret;
2865
2866
2867
2868
2869
2870 ret = bpf_skb_generic_push(skb, off, len);
2871 if (likely(!ret)) {
2872 skb->mac_header -= len;
2873 skb->network_header -= len;
2874 if (trans_same)
2875 skb->transport_header = skb->network_header;
2876 }
2877
2878 return ret;
2879}
2880
2881static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
2882{
2883 bool trans_same = skb->transport_header == skb->network_header;
2884 int ret;
2885
2886
2887 ret = bpf_skb_generic_pop(skb, off, len);
2888 if (likely(!ret)) {
2889 skb->mac_header += len;
2890 skb->network_header += len;
2891 if (trans_same)
2892 skb->transport_header = skb->network_header;
2893 }
2894
2895 return ret;
2896}
2897
2898static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
2899{
2900 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
2901 u32 off = skb_mac_header_len(skb);
2902 int ret;
2903
2904 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
2905 return -ENOTSUPP;
2906
2907 ret = skb_cow(skb, len_diff);
2908 if (unlikely(ret < 0))
2909 return ret;
2910
2911 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
2912 if (unlikely(ret < 0))
2913 return ret;
2914
2915 if (skb_is_gso(skb)) {
2916 struct skb_shared_info *shinfo = skb_shinfo(skb);
2917
2918
2919
2920
2921 if (shinfo->gso_type & SKB_GSO_TCPV4) {
2922 shinfo->gso_type &= ~SKB_GSO_TCPV4;
2923 shinfo->gso_type |= SKB_GSO_TCPV6;
2924 }
2925
2926
2927 skb_decrease_gso_size(shinfo, len_diff);
2928
2929 shinfo->gso_type |= SKB_GSO_DODGY;
2930 shinfo->gso_segs = 0;
2931 }
2932
2933 skb->protocol = htons(ETH_P_IPV6);
2934 skb_clear_hash(skb);
2935
2936 return 0;
2937}
2938
2939static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
2940{
2941 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
2942 u32 off = skb_mac_header_len(skb);
2943 int ret;
2944
2945 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
2946 return -ENOTSUPP;
2947
2948 ret = skb_unclone(skb, GFP_ATOMIC);
2949 if (unlikely(ret < 0))
2950 return ret;
2951
2952 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
2953 if (unlikely(ret < 0))
2954 return ret;
2955
2956 if (skb_is_gso(skb)) {
2957 struct skb_shared_info *shinfo = skb_shinfo(skb);
2958
2959
2960
2961
2962 if (shinfo->gso_type & SKB_GSO_TCPV6) {
2963 shinfo->gso_type &= ~SKB_GSO_TCPV6;
2964 shinfo->gso_type |= SKB_GSO_TCPV4;
2965 }
2966
2967
2968 skb_increase_gso_size(shinfo, len_diff);
2969
2970 shinfo->gso_type |= SKB_GSO_DODGY;
2971 shinfo->gso_segs = 0;
2972 }
2973
2974 skb->protocol = htons(ETH_P_IP);
2975 skb_clear_hash(skb);
2976
2977 return 0;
2978}
2979
2980static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
2981{
2982 __be16 from_proto = skb->protocol;
2983
2984 if (from_proto == htons(ETH_P_IP) &&
2985 to_proto == htons(ETH_P_IPV6))
2986 return bpf_skb_proto_4_to_6(skb);
2987
2988 if (from_proto == htons(ETH_P_IPV6) &&
2989 to_proto == htons(ETH_P_IP))
2990 return bpf_skb_proto_6_to_4(skb);
2991
2992 return -ENOTSUPP;
2993}
2994
2995BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
2996 u64, flags)
2997{
2998 int ret;
2999
3000 if (unlikely(flags))
3001 return -EINVAL;
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020 ret = bpf_skb_proto_xlat(skb, proto);
3021 bpf_compute_data_pointers(skb);
3022 return ret;
3023}
3024
3025static const struct bpf_func_proto bpf_skb_change_proto_proto = {
3026 .func = bpf_skb_change_proto,
3027 .gpl_only = false,
3028 .ret_type = RET_INTEGER,
3029 .arg1_type = ARG_PTR_TO_CTX,
3030 .arg2_type = ARG_ANYTHING,
3031 .arg3_type = ARG_ANYTHING,
3032};
3033
3034BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
3035{
3036
3037 if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
3038 !skb_pkt_type_ok(pkt_type)))
3039 return -EINVAL;
3040
3041 skb->pkt_type = pkt_type;
3042 return 0;
3043}
3044
3045static const struct bpf_func_proto bpf_skb_change_type_proto = {
3046 .func = bpf_skb_change_type,
3047 .gpl_only = false,
3048 .ret_type = RET_INTEGER,
3049 .arg1_type = ARG_PTR_TO_CTX,
3050 .arg2_type = ARG_ANYTHING,
3051};
3052
3053static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
3054{
3055 switch (skb->protocol) {
3056 case htons(ETH_P_IP):
3057 return sizeof(struct iphdr);
3058 case htons(ETH_P_IPV6):
3059 return sizeof(struct ipv6hdr);
3060 default:
3061 return ~0U;
3062 }
3063}
3064
3065#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
3066 BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3067
3068#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
3069 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
3070 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
3071 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
3072 BPF_F_ADJ_ROOM_ENCAP_L2( \
3073 BPF_ADJ_ROOM_ENCAP_L2_MASK))
3074
3075static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
3076 u64 flags)
3077{
3078 u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
3079 bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
3080 u16 mac_len = 0, inner_net = 0, inner_trans = 0;
3081 unsigned int gso_type = SKB_GSO_DODGY;
3082 int ret;
3083
3084 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3085
3086 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3087 !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3088 return -ENOTSUPP;
3089 }
3090
3091 ret = skb_cow_head(skb, len_diff);
3092 if (unlikely(ret < 0))
3093 return ret;
3094
3095 if (encap) {
3096 if (skb->protocol != htons(ETH_P_IP) &&
3097 skb->protocol != htons(ETH_P_IPV6))
3098 return -ENOTSUPP;
3099
3100 if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
3101 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3102 return -EINVAL;
3103
3104 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
3105 flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3106 return -EINVAL;
3107
3108 if (skb->encapsulation)
3109 return -EALREADY;
3110
3111 mac_len = skb->network_header - skb->mac_header;
3112 inner_net = skb->network_header;
3113 if (inner_mac_len > len_diff)
3114 return -EINVAL;
3115 inner_trans = skb->transport_header;
3116 }
3117
3118 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
3119 if (unlikely(ret < 0))
3120 return ret;
3121
3122 if (encap) {
3123 skb->inner_mac_header = inner_net - inner_mac_len;
3124 skb->inner_network_header = inner_net;
3125 skb->inner_transport_header = inner_trans;
3126 skb_set_inner_protocol(skb, skb->protocol);
3127
3128 skb->encapsulation = 1;
3129 skb_set_network_header(skb, mac_len);
3130
3131 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3132 gso_type |= SKB_GSO_UDP_TUNNEL;
3133 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
3134 gso_type |= SKB_GSO_GRE;
3135 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3136 gso_type |= SKB_GSO_IPXIP6;
3137 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3138 gso_type |= SKB_GSO_IPXIP4;
3139
3140 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
3141 flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
3142 int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
3143 sizeof(struct ipv6hdr) :
3144 sizeof(struct iphdr);
3145
3146 skb_set_transport_header(skb, mac_len + nh_len);
3147 }
3148
3149
3150 if (skb->protocol == htons(ETH_P_IP) &&
3151 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3152 skb->protocol = htons(ETH_P_IPV6);
3153 else if (skb->protocol == htons(ETH_P_IPV6) &&
3154 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3155 skb->protocol = htons(ETH_P_IP);
3156 }
3157
3158 if (skb_is_gso(skb)) {
3159 struct skb_shared_info *shinfo = skb_shinfo(skb);
3160
3161
3162 if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3163 skb_decrease_gso_size(shinfo, len_diff);
3164
3165
3166 shinfo->gso_type |= gso_type;
3167 shinfo->gso_segs = 0;
3168 }
3169
3170 return 0;
3171}
3172
3173static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
3174 u64 flags)
3175{
3176 int ret;
3177
3178 if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
3179 BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
3180 return -EINVAL;
3181
3182 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3183
3184 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3185 !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3186 return -ENOTSUPP;
3187 }
3188
3189 ret = skb_unclone(skb, GFP_ATOMIC);
3190 if (unlikely(ret < 0))
3191 return ret;
3192
3193 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
3194 if (unlikely(ret < 0))
3195 return ret;
3196
3197 if (skb_is_gso(skb)) {
3198 struct skb_shared_info *shinfo = skb_shinfo(skb);
3199
3200
3201 if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3202 skb_increase_gso_size(shinfo, len_diff);
3203
3204
3205 shinfo->gso_type |= SKB_GSO_DODGY;
3206 shinfo->gso_segs = 0;
3207 }
3208
3209 return 0;
3210}
3211
3212static u32 __bpf_skb_max_len(const struct sk_buff *skb)
3213{
3214 return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
3215 SKB_MAX_ALLOC;
3216}
3217
3218BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3219 u32, mode, u64, flags)
3220{
3221 u32 len_cur, len_diff_abs = abs(len_diff);
3222 u32 len_min = bpf_skb_net_base_len(skb);
3223 u32 len_max = __bpf_skb_max_len(skb);
3224 __be16 proto = skb->protocol;
3225 bool shrink = len_diff < 0;
3226 u32 off;
3227 int ret;
3228
3229 if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK |
3230 BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
3231 return -EINVAL;
3232 if (unlikely(len_diff_abs > 0xfffU))
3233 return -EFAULT;
3234 if (unlikely(proto != htons(ETH_P_IP) &&
3235 proto != htons(ETH_P_IPV6)))
3236 return -ENOTSUPP;
3237
3238 off = skb_mac_header_len(skb);
3239 switch (mode) {
3240 case BPF_ADJ_ROOM_NET:
3241 off += bpf_skb_net_base_len(skb);
3242 break;
3243 case BPF_ADJ_ROOM_MAC:
3244 break;
3245 default:
3246 return -ENOTSUPP;
3247 }
3248
3249 len_cur = skb->len - skb_network_offset(skb);
3250 if ((shrink && (len_diff_abs >= len_cur ||
3251 len_cur - len_diff_abs < len_min)) ||
3252 (!shrink && (skb->len + len_diff_abs > len_max &&
3253 !skb_is_gso(skb))))
3254 return -ENOTSUPP;
3255
3256 ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
3257 bpf_skb_net_grow(skb, off, len_diff_abs, flags);
3258 if (!ret && !(flags & BPF_F_ADJ_ROOM_NO_CSUM_RESET))
3259 __skb_reset_checksum_unnecessary(skb);
3260
3261 bpf_compute_data_pointers(skb);
3262 return ret;
3263}
3264
3265static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
3266 .func = bpf_skb_adjust_room,
3267 .gpl_only = false,
3268 .ret_type = RET_INTEGER,
3269 .arg1_type = ARG_PTR_TO_CTX,
3270 .arg2_type = ARG_ANYTHING,
3271 .arg3_type = ARG_ANYTHING,
3272 .arg4_type = ARG_ANYTHING,
3273};
3274
3275static u32 __bpf_skb_min_len(const struct sk_buff *skb)
3276{
3277 u32 min_len = skb_network_offset(skb);
3278
3279 if (skb_transport_header_was_set(skb))
3280 min_len = skb_transport_offset(skb);
3281 if (skb->ip_summed == CHECKSUM_PARTIAL)
3282 min_len = skb_checksum_start_offset(skb) +
3283 skb->csum_offset + sizeof(__sum16);
3284 return min_len;
3285}
3286
3287static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
3288{
3289 unsigned int old_len = skb->len;
3290 int ret;
3291
3292 ret = __skb_grow_rcsum(skb, new_len);
3293 if (!ret)
3294 memset(skb->data + old_len, 0, new_len - old_len);
3295 return ret;
3296}
3297
3298static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
3299{
3300 return __skb_trim_rcsum(skb, new_len);
3301}
3302
3303static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
3304 u64 flags)
3305{
3306 u32 max_len = __bpf_skb_max_len(skb);
3307 u32 min_len = __bpf_skb_min_len(skb);
3308 int ret;
3309
3310 if (unlikely(flags || new_len > max_len || new_len < min_len))
3311 return -EINVAL;
3312 if (skb->encapsulation)
3313 return -ENOTSUPP;
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331 ret = __bpf_try_make_writable(skb, skb->len);
3332 if (!ret) {
3333 if (new_len > skb->len)
3334 ret = bpf_skb_grow_rcsum(skb, new_len);
3335 else if (new_len < skb->len)
3336 ret = bpf_skb_trim_rcsum(skb, new_len);
3337 if (!ret && skb_is_gso(skb))
3338 skb_gso_reset(skb);
3339 }
3340 return ret;
3341}
3342
3343BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3344 u64, flags)
3345{
3346 int ret = __bpf_skb_change_tail(skb, new_len, flags);
3347
3348 bpf_compute_data_pointers(skb);
3349 return ret;
3350}
3351
3352static const struct bpf_func_proto bpf_skb_change_tail_proto = {
3353 .func = bpf_skb_change_tail,
3354 .gpl_only = false,
3355 .ret_type = RET_INTEGER,
3356 .arg1_type = ARG_PTR_TO_CTX,
3357 .arg2_type = ARG_ANYTHING,
3358 .arg3_type = ARG_ANYTHING,
3359};
3360
3361BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3362 u64, flags)
3363{
3364 int ret = __bpf_skb_change_tail(skb, new_len, flags);
3365
3366 bpf_compute_data_end_sk_skb(skb);
3367 return ret;
3368}
3369
3370static const struct bpf_func_proto sk_skb_change_tail_proto = {
3371 .func = sk_skb_change_tail,
3372 .gpl_only = false,
3373 .ret_type = RET_INTEGER,
3374 .arg1_type = ARG_PTR_TO_CTX,
3375 .arg2_type = ARG_ANYTHING,
3376 .arg3_type = ARG_ANYTHING,
3377};
3378
3379static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
3380 u64 flags)
3381{
3382 u32 max_len = __bpf_skb_max_len(skb);
3383 u32 new_len = skb->len + head_room;
3384 int ret;
3385
3386 if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
3387 new_len < skb->len))
3388 return -EINVAL;
3389
3390 ret = skb_cow(skb, head_room);
3391 if (likely(!ret)) {
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401 __skb_push(skb, head_room);
3402 memset(skb->data, 0, head_room);
3403 skb_reset_mac_header(skb);
3404 }
3405
3406 return ret;
3407}
3408
3409BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
3410 u64, flags)
3411{
3412 int ret = __bpf_skb_change_head(skb, head_room, flags);
3413
3414 bpf_compute_data_pointers(skb);
3415 return ret;
3416}
3417
3418static const struct bpf_func_proto bpf_skb_change_head_proto = {
3419 .func = bpf_skb_change_head,
3420 .gpl_only = false,
3421 .ret_type = RET_INTEGER,
3422 .arg1_type = ARG_PTR_TO_CTX,
3423 .arg2_type = ARG_ANYTHING,
3424 .arg3_type = ARG_ANYTHING,
3425};
3426
3427BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
3428 u64, flags)
3429{
3430 int ret = __bpf_skb_change_head(skb, head_room, flags);
3431
3432 bpf_compute_data_end_sk_skb(skb);
3433 return ret;
3434}
3435
3436static const struct bpf_func_proto sk_skb_change_head_proto = {
3437 .func = sk_skb_change_head,
3438 .gpl_only = false,
3439 .ret_type = RET_INTEGER,
3440 .arg1_type = ARG_PTR_TO_CTX,
3441 .arg2_type = ARG_ANYTHING,
3442 .arg3_type = ARG_ANYTHING,
3443};
3444static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
3445{
3446 return xdp_data_meta_unsupported(xdp) ? 0 :
3447 xdp->data - xdp->data_meta;
3448}
3449
3450BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
3451{
3452 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
3453 unsigned long metalen = xdp_get_metalen(xdp);
3454 void *data_start = xdp_frame_end + metalen;
3455 void *data = xdp->data + offset;
3456
3457 if (unlikely(data < data_start ||
3458 data > xdp->data_end - ETH_HLEN))
3459 return -EINVAL;
3460
3461 if (metalen)
3462 memmove(xdp->data_meta + offset,
3463 xdp->data_meta, metalen);
3464 xdp->data_meta += offset;
3465 xdp->data = data;
3466
3467 return 0;
3468}
3469
3470static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
3471 .func = bpf_xdp_adjust_head,
3472 .gpl_only = false,
3473 .ret_type = RET_INTEGER,
3474 .arg1_type = ARG_PTR_TO_CTX,
3475 .arg2_type = ARG_ANYTHING,
3476};
3477
3478BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
3479{
3480 void *data_hard_end = xdp_data_hard_end(xdp);
3481 void *data_end = xdp->data_end + offset;
3482
3483
3484 if (unlikely(data_end > data_hard_end))
3485 return -EINVAL;
3486
3487
3488 if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
3489 WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
3490 return -EINVAL;
3491 }
3492
3493 if (unlikely(data_end < xdp->data + ETH_HLEN))
3494 return -EINVAL;
3495
3496
3497 if (offset > 0)
3498 memset(xdp->data_end, 0, offset);
3499
3500 xdp->data_end = data_end;
3501
3502 return 0;
3503}
3504
3505static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
3506 .func = bpf_xdp_adjust_tail,
3507 .gpl_only = false,
3508 .ret_type = RET_INTEGER,
3509 .arg1_type = ARG_PTR_TO_CTX,
3510 .arg2_type = ARG_ANYTHING,
3511};
3512
3513BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
3514{
3515 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
3516 void *meta = xdp->data_meta + offset;
3517 unsigned long metalen = xdp->data - meta;
3518
3519 if (xdp_data_meta_unsupported(xdp))
3520 return -ENOTSUPP;
3521 if (unlikely(meta < xdp_frame_end ||
3522 meta > xdp->data))
3523 return -EINVAL;
3524 if (unlikely((metalen & (sizeof(__u32) - 1)) ||
3525 (metalen > 32)))
3526 return -EACCES;
3527
3528 xdp->data_meta = meta;
3529
3530 return 0;
3531}
3532
3533static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
3534 .func = bpf_xdp_adjust_meta,
3535 .gpl_only = false,
3536 .ret_type = RET_INTEGER,
3537 .arg1_type = ARG_PTR_TO_CTX,
3538 .arg2_type = ARG_ANYTHING,
3539};
3540
3541static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
3542 struct bpf_map *map, struct xdp_buff *xdp)
3543{
3544 switch (map->map_type) {
3545 case BPF_MAP_TYPE_DEVMAP:
3546 case BPF_MAP_TYPE_DEVMAP_HASH:
3547 return dev_map_enqueue(fwd, xdp, dev_rx);
3548 case BPF_MAP_TYPE_CPUMAP:
3549 return cpu_map_enqueue(fwd, xdp, dev_rx);
3550 case BPF_MAP_TYPE_XSKMAP:
3551 return __xsk_map_redirect(fwd, xdp);
3552 default:
3553 return -EBADRQC;
3554 }
3555 return 0;
3556}
3557
3558void xdp_do_flush(void)
3559{
3560 __dev_flush();
3561 __cpu_map_flush();
3562 __xsk_map_flush();
3563}
3564EXPORT_SYMBOL_GPL(xdp_do_flush);
3565
3566static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
3567{
3568 switch (map->map_type) {
3569 case BPF_MAP_TYPE_DEVMAP:
3570 return __dev_map_lookup_elem(map, index);
3571 case BPF_MAP_TYPE_DEVMAP_HASH:
3572 return __dev_map_hash_lookup_elem(map, index);
3573 case BPF_MAP_TYPE_CPUMAP:
3574 return __cpu_map_lookup_elem(map, index);
3575 case BPF_MAP_TYPE_XSKMAP:
3576 return __xsk_map_lookup_elem(map, index);
3577 default:
3578 return NULL;
3579 }
3580}
3581
3582void bpf_clear_redirect_map(struct bpf_map *map)
3583{
3584 struct bpf_redirect_info *ri;
3585 int cpu;
3586
3587 for_each_possible_cpu(cpu) {
3588 ri = per_cpu_ptr(&bpf_redirect_info, cpu);
3589
3590
3591
3592
3593
3594 if (unlikely(READ_ONCE(ri->map) == map))
3595 cmpxchg(&ri->map, map, NULL);
3596 }
3597}
3598
3599int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
3600 struct bpf_prog *xdp_prog)
3601{
3602 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3603 struct bpf_map *map = READ_ONCE(ri->map);
3604 u32 index = ri->tgt_index;
3605 void *fwd = ri->tgt_value;
3606 int err;
3607
3608 ri->tgt_index = 0;
3609 ri->tgt_value = NULL;
3610 WRITE_ONCE(ri->map, NULL);
3611
3612 if (unlikely(!map)) {
3613 fwd = dev_get_by_index_rcu(dev_net(dev), index);
3614 if (unlikely(!fwd)) {
3615 err = -EINVAL;
3616 goto err;
3617 }
3618
3619 err = dev_xdp_enqueue(fwd, xdp, dev);
3620 } else {
3621 err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
3622 }
3623
3624 if (unlikely(err))
3625 goto err;
3626
3627 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
3628 return 0;
3629err:
3630 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
3631 return err;
3632}
3633EXPORT_SYMBOL_GPL(xdp_do_redirect);
3634
3635static int xdp_do_generic_redirect_map(struct net_device *dev,
3636 struct sk_buff *skb,
3637 struct xdp_buff *xdp,
3638 struct bpf_prog *xdp_prog,
3639 struct bpf_map *map)
3640{
3641 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3642 u32 index = ri->tgt_index;
3643 void *fwd = ri->tgt_value;
3644 int err = 0;
3645
3646 ri->tgt_index = 0;
3647 ri->tgt_value = NULL;
3648 WRITE_ONCE(ri->map, NULL);
3649
3650 if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
3651 map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
3652 struct bpf_dtab_netdev *dst = fwd;
3653
3654 err = dev_map_generic_redirect(dst, skb, xdp_prog);
3655 if (unlikely(err))
3656 goto err;
3657 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
3658 struct xdp_sock *xs = fwd;
3659
3660 err = xsk_generic_rcv(xs, xdp);
3661 if (err)
3662 goto err;
3663 consume_skb(skb);
3664 } else {
3665
3666 err = -EBADRQC;
3667 goto err;
3668 }
3669
3670 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
3671 return 0;
3672err:
3673 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
3674 return err;
3675}
3676
3677int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
3678 struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
3679{
3680 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3681 struct bpf_map *map = READ_ONCE(ri->map);
3682 u32 index = ri->tgt_index;
3683 struct net_device *fwd;
3684 int err = 0;
3685
3686 if (map)
3687 return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
3688 map);
3689 ri->tgt_index = 0;
3690 fwd = dev_get_by_index_rcu(dev_net(dev), index);
3691 if (unlikely(!fwd)) {
3692 err = -EINVAL;
3693 goto err;
3694 }
3695
3696 err = xdp_ok_fwd_dev(fwd, skb->len);
3697 if (unlikely(err))
3698 goto err;
3699
3700 skb->dev = fwd;
3701 _trace_xdp_redirect(dev, xdp_prog, index);
3702 generic_xdp_tx(skb, xdp_prog);
3703 return 0;
3704err:
3705 _trace_xdp_redirect_err(dev, xdp_prog, index, err);
3706 return err;
3707}
3708
3709BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
3710{
3711 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3712
3713 if (unlikely(flags))
3714 return XDP_ABORTED;
3715
3716 ri->flags = flags;
3717 ri->tgt_index = ifindex;
3718 ri->tgt_value = NULL;
3719 WRITE_ONCE(ri->map, NULL);
3720
3721 return XDP_REDIRECT;
3722}
3723
3724static const struct bpf_func_proto bpf_xdp_redirect_proto = {
3725 .func = bpf_xdp_redirect,
3726 .gpl_only = false,
3727 .ret_type = RET_INTEGER,
3728 .arg1_type = ARG_ANYTHING,
3729 .arg2_type = ARG_ANYTHING,
3730};
3731
3732BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
3733 u64, flags)
3734{
3735 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3736
3737
3738 if (unlikely(flags > XDP_TX))
3739 return XDP_ABORTED;
3740
3741 ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
3742 if (unlikely(!ri->tgt_value)) {
3743
3744
3745
3746
3747
3748 WRITE_ONCE(ri->map, NULL);
3749 return flags;
3750 }
3751
3752 ri->flags = flags;
3753 ri->tgt_index = ifindex;
3754 WRITE_ONCE(ri->map, map);
3755
3756 return XDP_REDIRECT;
3757}
3758
3759static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
3760 .func = bpf_xdp_redirect_map,
3761 .gpl_only = false,
3762 .ret_type = RET_INTEGER,
3763 .arg1_type = ARG_CONST_MAP_PTR,
3764 .arg2_type = ARG_ANYTHING,
3765 .arg3_type = ARG_ANYTHING,
3766};
3767
3768static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
3769 unsigned long off, unsigned long len)
3770{
3771 void *ptr = skb_header_pointer(skb, off, len, dst_buff);
3772
3773 if (unlikely(!ptr))
3774 return len;
3775 if (ptr != dst_buff)
3776 memcpy(dst_buff, ptr, len);
3777
3778 return 0;
3779}
3780
3781BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
3782 u64, flags, void *, meta, u64, meta_size)
3783{
3784 u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
3785
3786 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
3787 return -EINVAL;
3788 if (unlikely(!skb || skb_size > skb->len))
3789 return -EFAULT;
3790
3791 return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
3792 bpf_skb_copy);
3793}
3794
3795static const struct bpf_func_proto bpf_skb_event_output_proto = {
3796 .func = bpf_skb_event_output,
3797 .gpl_only = true,
3798 .ret_type = RET_INTEGER,
3799 .arg1_type = ARG_PTR_TO_CTX,
3800 .arg2_type = ARG_CONST_MAP_PTR,
3801 .arg3_type = ARG_ANYTHING,
3802 .arg4_type = ARG_PTR_TO_MEM,
3803 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
3804};
3805
3806BTF_ID_LIST(bpf_skb_output_btf_ids)
3807BTF_ID(struct, sk_buff)
3808
3809const struct bpf_func_proto bpf_skb_output_proto = {
3810 .func = bpf_skb_event_output,
3811 .gpl_only = true,
3812 .ret_type = RET_INTEGER,
3813 .arg1_type = ARG_PTR_TO_BTF_ID,
3814 .arg2_type = ARG_CONST_MAP_PTR,
3815 .arg3_type = ARG_ANYTHING,
3816 .arg4_type = ARG_PTR_TO_MEM,
3817 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
3818 .btf_id = bpf_skb_output_btf_ids,
3819};
3820
3821static unsigned short bpf_tunnel_key_af(u64 flags)
3822{
3823 return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
3824}
3825
3826BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
3827 u32, size, u64, flags)
3828{
3829 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
3830 u8 compat[sizeof(struct bpf_tunnel_key)];
3831 void *to_orig = to;
3832 int err;
3833
3834 if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
3835 err = -EINVAL;
3836 goto err_clear;
3837 }
3838 if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
3839 err = -EPROTO;
3840 goto err_clear;
3841 }
3842 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
3843 err = -EINVAL;
3844 switch (size) {
3845 case offsetof(struct bpf_tunnel_key, tunnel_label):
3846 case offsetof(struct bpf_tunnel_key, tunnel_ext):
3847 goto set_compat;
3848 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
3849
3850
3851
3852 if (ip_tunnel_info_af(info) != AF_INET)
3853 goto err_clear;
3854set_compat:
3855 to = (struct bpf_tunnel_key *)compat;
3856 break;
3857 default:
3858 goto err_clear;
3859 }
3860 }
3861
3862 to->tunnel_id = be64_to_cpu(info->key.tun_id);
3863 to->tunnel_tos = info->key.tos;
3864 to->tunnel_ttl = info->key.ttl;
3865 to->tunnel_ext = 0;
3866
3867 if (flags & BPF_F_TUNINFO_IPV6) {
3868 memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
3869 sizeof(to->remote_ipv6));
3870 to->tunnel_label = be32_to_cpu(info->key.label);
3871 } else {
3872 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
3873 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
3874 to->tunnel_label = 0;
3875 }
3876
3877 if (unlikely(size != sizeof(struct bpf_tunnel_key)))
3878 memcpy(to_orig, to, size);
3879
3880 return 0;
3881err_clear:
3882 memset(to_orig, 0, size);
3883 return err;
3884}
3885
3886static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
3887 .func = bpf_skb_get_tunnel_key,
3888 .gpl_only = false,
3889 .ret_type = RET_INTEGER,
3890 .arg1_type = ARG_PTR_TO_CTX,
3891 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
3892 .arg3_type = ARG_CONST_SIZE,
3893 .arg4_type = ARG_ANYTHING,
3894};
3895
3896BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
3897{
3898 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
3899 int err;
3900
3901 if (unlikely(!info ||
3902 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
3903 err = -ENOENT;
3904 goto err_clear;
3905 }
3906 if (unlikely(size < info->options_len)) {
3907 err = -ENOMEM;
3908 goto err_clear;
3909 }
3910
3911 ip_tunnel_info_opts_get(to, info);
3912 if (size > info->options_len)
3913 memset(to + info->options_len, 0, size - info->options_len);
3914
3915 return info->options_len;
3916err_clear:
3917 memset(to, 0, size);
3918 return err;
3919}
3920
3921static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
3922 .func = bpf_skb_get_tunnel_opt,
3923 .gpl_only = false,
3924 .ret_type = RET_INTEGER,
3925 .arg1_type = ARG_PTR_TO_CTX,
3926 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
3927 .arg3_type = ARG_CONST_SIZE,
3928};
3929
3930static struct metadata_dst __percpu *md_dst;
3931
3932BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
3933 const struct bpf_tunnel_key *, from, u32, size, u64, flags)
3934{
3935 struct metadata_dst *md = this_cpu_ptr(md_dst);
3936 u8 compat[sizeof(struct bpf_tunnel_key)];
3937 struct ip_tunnel_info *info;
3938
3939 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
3940 BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
3941 return -EINVAL;
3942 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
3943 switch (size) {
3944 case offsetof(struct bpf_tunnel_key, tunnel_label):
3945 case offsetof(struct bpf_tunnel_key, tunnel_ext):
3946 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
3947
3948
3949
3950 memcpy(compat, from, size);
3951 memset(compat + size, 0, sizeof(compat) - size);
3952 from = (const struct bpf_tunnel_key *) compat;
3953 break;
3954 default:
3955 return -EINVAL;
3956 }
3957 }
3958 if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
3959 from->tunnel_ext))
3960 return -EINVAL;
3961
3962 skb_dst_drop(skb);
3963 dst_hold((struct dst_entry *) md);
3964 skb_dst_set(skb, (struct dst_entry *) md);
3965
3966 info = &md->u.tun_info;
3967 memset(info, 0, sizeof(*info));
3968 info->mode = IP_TUNNEL_INFO_TX;
3969
3970 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
3971 if (flags & BPF_F_DONT_FRAGMENT)
3972 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
3973 if (flags & BPF_F_ZERO_CSUM_TX)
3974 info->key.tun_flags &= ~TUNNEL_CSUM;
3975 if (flags & BPF_F_SEQ_NUMBER)
3976 info->key.tun_flags |= TUNNEL_SEQ;
3977
3978 info->key.tun_id = cpu_to_be64(from->tunnel_id);
3979 info->key.tos = from->tunnel_tos;
3980 info->key.ttl = from->tunnel_ttl;
3981
3982 if (flags & BPF_F_TUNINFO_IPV6) {
3983 info->mode |= IP_TUNNEL_INFO_IPV6;
3984 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
3985 sizeof(from->remote_ipv6));
3986 info->key.label = cpu_to_be32(from->tunnel_label) &
3987 IPV6_FLOWLABEL_MASK;
3988 } else {
3989 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
3990 }
3991
3992 return 0;
3993}
3994
3995static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
3996 .func = bpf_skb_set_tunnel_key,
3997 .gpl_only = false,
3998 .ret_type = RET_INTEGER,
3999 .arg1_type = ARG_PTR_TO_CTX,
4000 .arg2_type = ARG_PTR_TO_MEM,
4001 .arg3_type = ARG_CONST_SIZE,
4002 .arg4_type = ARG_ANYTHING,
4003};
4004
4005BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
4006 const u8 *, from, u32, size)
4007{
4008 struct ip_tunnel_info *info = skb_tunnel_info(skb);
4009 const struct metadata_dst *md = this_cpu_ptr(md_dst);
4010
4011 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
4012 return -EINVAL;
4013 if (unlikely(size > IP_TUNNEL_OPTS_MAX))
4014 return -ENOMEM;
4015
4016 ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
4017
4018 return 0;
4019}
4020
4021static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
4022 .func = bpf_skb_set_tunnel_opt,
4023 .gpl_only = false,
4024 .ret_type = RET_INTEGER,
4025 .arg1_type = ARG_PTR_TO_CTX,
4026 .arg2_type = ARG_PTR_TO_MEM,
4027 .arg3_type = ARG_CONST_SIZE,
4028};
4029
4030static const struct bpf_func_proto *
4031bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
4032{
4033 if (!md_dst) {
4034 struct metadata_dst __percpu *tmp;
4035
4036 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
4037 METADATA_IP_TUNNEL,
4038 GFP_KERNEL);
4039 if (!tmp)
4040 return NULL;
4041 if (cmpxchg(&md_dst, NULL, tmp))
4042 metadata_dst_free_percpu(tmp);
4043 }
4044
4045 switch (which) {
4046 case BPF_FUNC_skb_set_tunnel_key:
4047 return &bpf_skb_set_tunnel_key_proto;
4048 case BPF_FUNC_skb_set_tunnel_opt:
4049 return &bpf_skb_set_tunnel_opt_proto;
4050 default:
4051 return NULL;
4052 }
4053}
4054
4055BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
4056 u32, idx)
4057{
4058 struct bpf_array *array = container_of(map, struct bpf_array, map);
4059 struct cgroup *cgrp;
4060 struct sock *sk;
4061
4062 sk = skb_to_full_sk(skb);
4063 if (!sk || !sk_fullsock(sk))
4064 return -ENOENT;
4065 if (unlikely(idx >= array->map.max_entries))
4066 return -E2BIG;
4067
4068 cgrp = READ_ONCE(array->ptrs[idx]);
4069 if (unlikely(!cgrp))
4070 return -EAGAIN;
4071
4072 return sk_under_cgroup_hierarchy(sk, cgrp);
4073}
4074
4075static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
4076 .func = bpf_skb_under_cgroup,
4077 .gpl_only = false,
4078 .ret_type = RET_INTEGER,
4079 .arg1_type = ARG_PTR_TO_CTX,
4080 .arg2_type = ARG_CONST_MAP_PTR,
4081 .arg3_type = ARG_ANYTHING,
4082};
4083
4084#ifdef CONFIG_SOCK_CGROUP_DATA
4085static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
4086{
4087 struct cgroup *cgrp;
4088
4089 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4090 return cgroup_id(cgrp);
4091}
4092
4093BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
4094{
4095 struct sock *sk = skb_to_full_sk(skb);
4096
4097 if (!sk || !sk_fullsock(sk))
4098 return 0;
4099
4100 return __bpf_sk_cgroup_id(sk);
4101}
4102
4103static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
4104 .func = bpf_skb_cgroup_id,
4105 .gpl_only = false,
4106 .ret_type = RET_INTEGER,
4107 .arg1_type = ARG_PTR_TO_CTX,
4108};
4109
4110static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
4111 int ancestor_level)
4112{
4113 struct cgroup *ancestor;
4114 struct cgroup *cgrp;
4115
4116 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4117 ancestor = cgroup_ancestor(cgrp, ancestor_level);
4118 if (!ancestor)
4119 return 0;
4120
4121 return cgroup_id(ancestor);
4122}
4123
4124BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
4125 ancestor_level)
4126{
4127 struct sock *sk = skb_to_full_sk(skb);
4128
4129 if (!sk || !sk_fullsock(sk))
4130 return 0;
4131
4132 return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
4133}
4134
4135static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
4136 .func = bpf_skb_ancestor_cgroup_id,
4137 .gpl_only = false,
4138 .ret_type = RET_INTEGER,
4139 .arg1_type = ARG_PTR_TO_CTX,
4140 .arg2_type = ARG_ANYTHING,
4141};
4142
4143BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk)
4144{
4145 return __bpf_sk_cgroup_id(sk);
4146}
4147
4148static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
4149 .func = bpf_sk_cgroup_id,
4150 .gpl_only = false,
4151 .ret_type = RET_INTEGER,
4152 .arg1_type = ARG_PTR_TO_SOCKET,
4153};
4154
4155BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
4156{
4157 return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
4158}
4159
4160static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
4161 .func = bpf_sk_ancestor_cgroup_id,
4162 .gpl_only = false,
4163 .ret_type = RET_INTEGER,
4164 .arg1_type = ARG_PTR_TO_SOCKET,
4165 .arg2_type = ARG_ANYTHING,
4166};
4167#endif
4168
4169static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
4170 unsigned long off, unsigned long len)
4171{
4172 memcpy(dst_buff, src_buff + off, len);
4173 return 0;
4174}
4175
4176BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
4177 u64, flags, void *, meta, u64, meta_size)
4178{
4179 u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
4180
4181 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
4182 return -EINVAL;
4183 if (unlikely(!xdp ||
4184 xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
4185 return -EFAULT;
4186
4187 return bpf_event_output(map, flags, meta, meta_size, xdp->data,
4188 xdp_size, bpf_xdp_copy);
4189}
4190
4191static const struct bpf_func_proto bpf_xdp_event_output_proto = {
4192 .func = bpf_xdp_event_output,
4193 .gpl_only = true,
4194 .ret_type = RET_INTEGER,
4195 .arg1_type = ARG_PTR_TO_CTX,
4196 .arg2_type = ARG_CONST_MAP_PTR,
4197 .arg3_type = ARG_ANYTHING,
4198 .arg4_type = ARG_PTR_TO_MEM,
4199 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
4200};
4201
4202BTF_ID_LIST(bpf_xdp_output_btf_ids)
4203BTF_ID(struct, xdp_buff)
4204
4205const struct bpf_func_proto bpf_xdp_output_proto = {
4206 .func = bpf_xdp_event_output,
4207 .gpl_only = true,
4208 .ret_type = RET_INTEGER,
4209 .arg1_type = ARG_PTR_TO_BTF_ID,
4210 .arg2_type = ARG_CONST_MAP_PTR,
4211 .arg3_type = ARG_ANYTHING,
4212 .arg4_type = ARG_PTR_TO_MEM,
4213 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
4214 .btf_id = bpf_xdp_output_btf_ids,
4215};
4216
4217BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
4218{
4219 return skb->sk ? sock_gen_cookie(skb->sk) : 0;
4220}
4221
4222static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
4223 .func = bpf_get_socket_cookie,
4224 .gpl_only = false,
4225 .ret_type = RET_INTEGER,
4226 .arg1_type = ARG_PTR_TO_CTX,
4227};
4228
4229BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4230{
4231 return sock_gen_cookie(ctx->sk);
4232}
4233
4234static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
4235 .func = bpf_get_socket_cookie_sock_addr,
4236 .gpl_only = false,
4237 .ret_type = RET_INTEGER,
4238 .arg1_type = ARG_PTR_TO_CTX,
4239};
4240
4241BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
4242{
4243 return sock_gen_cookie(ctx);
4244}
4245
4246static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
4247 .func = bpf_get_socket_cookie_sock,
4248 .gpl_only = false,
4249 .ret_type = RET_INTEGER,
4250 .arg1_type = ARG_PTR_TO_CTX,
4251};
4252
4253BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
4254{
4255 return sock_gen_cookie(ctx->sk);
4256}
4257
4258static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
4259 .func = bpf_get_socket_cookie_sock_ops,
4260 .gpl_only = false,
4261 .ret_type = RET_INTEGER,
4262 .arg1_type = ARG_PTR_TO_CTX,
4263};
4264
4265static u64 __bpf_get_netns_cookie(struct sock *sk)
4266{
4267#ifdef CONFIG_NET_NS
4268 return net_gen_cookie(sk ? sk->sk_net.net : &init_net);
4269#else
4270 return 0;
4271#endif
4272}
4273
4274BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
4275{
4276 return __bpf_get_netns_cookie(ctx);
4277}
4278
4279static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = {
4280 .func = bpf_get_netns_cookie_sock,
4281 .gpl_only = false,
4282 .ret_type = RET_INTEGER,
4283 .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
4284};
4285
4286BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4287{
4288 return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
4289}
4290
4291static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
4292 .func = bpf_get_netns_cookie_sock_addr,
4293 .gpl_only = false,
4294 .ret_type = RET_INTEGER,
4295 .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
4296};
4297
4298BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
4299{
4300 struct sock *sk = sk_to_full_sk(skb->sk);
4301 kuid_t kuid;
4302
4303 if (!sk || !sk_fullsock(sk))
4304 return overflowuid;
4305 kuid = sock_net_uid(sock_net(sk), sk);
4306 return from_kuid_munged(sock_net(sk)->user_ns, kuid);
4307}
4308
4309static const struct bpf_func_proto bpf_get_socket_uid_proto = {
4310 .func = bpf_get_socket_uid,
4311 .gpl_only = false,
4312 .ret_type = RET_INTEGER,
4313 .arg1_type = ARG_PTR_TO_CTX,
4314};
4315
4316#define SOCKOPT_CC_REINIT (1 << 0)
4317
4318static int _bpf_setsockopt(struct sock *sk, int level, int optname,
4319 char *optval, int optlen, u32 flags)
4320{
4321 char devname[IFNAMSIZ];
4322 int val, valbool;
4323 struct net *net;
4324 int ifindex;
4325 int ret = 0;
4326
4327 if (!sk_fullsock(sk))
4328 return -EINVAL;
4329
4330 sock_owned_by_me(sk);
4331
4332 if (level == SOL_SOCKET) {
4333 if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
4334 return -EINVAL;
4335 val = *((int *)optval);
4336 valbool = val ? 1 : 0;
4337
4338
4339 switch (optname) {
4340 case SO_RCVBUF:
4341 val = min_t(u32, val, sysctl_rmem_max);
4342 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
4343 WRITE_ONCE(sk->sk_rcvbuf,
4344 max_t(int, val * 2, SOCK_MIN_RCVBUF));
4345 break;
4346 case SO_SNDBUF:
4347 val = min_t(u32, val, sysctl_wmem_max);
4348 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
4349 WRITE_ONCE(sk->sk_sndbuf,
4350 max_t(int, val * 2, SOCK_MIN_SNDBUF));
4351 break;
4352 case SO_MAX_PACING_RATE:
4353 if (val != ~0U)
4354 cmpxchg(&sk->sk_pacing_status,
4355 SK_PACING_NONE,
4356 SK_PACING_NEEDED);
4357 sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
4358 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
4359 sk->sk_max_pacing_rate);
4360 break;
4361 case SO_PRIORITY:
4362 sk->sk_priority = val;
4363 break;
4364 case SO_RCVLOWAT:
4365 if (val < 0)
4366 val = INT_MAX;
4367 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
4368 break;
4369 case SO_MARK:
4370 if (sk->sk_mark != val) {
4371 sk->sk_mark = val;
4372 sk_dst_reset(sk);
4373 }
4374 break;
4375 case SO_BINDTODEVICE:
4376 optlen = min_t(long, optlen, IFNAMSIZ - 1);
4377 strncpy(devname, optval, optlen);
4378 devname[optlen] = 0;
4379
4380 ifindex = 0;
4381 if (devname[0] != '\0') {
4382 struct net_device *dev;
4383
4384 ret = -ENODEV;
4385
4386 net = sock_net(sk);
4387 dev = dev_get_by_name(net, devname);
4388 if (!dev)
4389 break;
4390 ifindex = dev->ifindex;
4391 dev_put(dev);
4392 }
4393 ret = sock_bindtoindex(sk, ifindex, false);
4394 break;
4395 case SO_KEEPALIVE:
4396 if (sk->sk_prot->keepalive)
4397 sk->sk_prot->keepalive(sk, valbool);
4398 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
4399 break;
4400 default:
4401 ret = -EINVAL;
4402 }
4403#ifdef CONFIG_INET
4404 } else if (level == SOL_IP) {
4405 if (optlen != sizeof(int) || sk->sk_family != AF_INET)
4406 return -EINVAL;
4407
4408 val = *((int *)optval);
4409
4410 switch (optname) {
4411 case IP_TOS:
4412 if (val < -1 || val > 0xff) {
4413 ret = -EINVAL;
4414 } else {
4415 struct inet_sock *inet = inet_sk(sk);
4416
4417 if (val == -1)
4418 val = 0;
4419 inet->tos = val;
4420 }
4421 break;
4422 default:
4423 ret = -EINVAL;
4424 }
4425#if IS_ENABLED(CONFIG_IPV6)
4426 } else if (level == SOL_IPV6) {
4427 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
4428 return -EINVAL;
4429
4430 val = *((int *)optval);
4431
4432 switch (optname) {
4433 case IPV6_TCLASS:
4434 if (val < -1 || val > 0xff) {
4435 ret = -EINVAL;
4436 } else {
4437 struct ipv6_pinfo *np = inet6_sk(sk);
4438
4439 if (val == -1)
4440 val = 0;
4441 np->tclass = val;
4442 }
4443 break;
4444 default:
4445 ret = -EINVAL;
4446 }
4447#endif
4448 } else if (level == SOL_TCP &&
4449 sk->sk_prot->setsockopt == tcp_setsockopt) {
4450 if (optname == TCP_CONGESTION) {
4451 char name[TCP_CA_NAME_MAX];
4452 bool reinit = flags & SOCKOPT_CC_REINIT;
4453
4454 strncpy(name, optval, min_t(long, optlen,
4455 TCP_CA_NAME_MAX-1));
4456 name[TCP_CA_NAME_MAX-1] = 0;
4457 ret = tcp_set_congestion_control(sk, name, false,
4458 reinit, true);
4459 } else {
4460 struct inet_connection_sock *icsk = inet_csk(sk);
4461 struct tcp_sock *tp = tcp_sk(sk);
4462
4463 if (optlen != sizeof(int))
4464 return -EINVAL;
4465
4466 val = *((int *)optval);
4467
4468 switch (optname) {
4469 case TCP_BPF_IW:
4470 if (val <= 0 || tp->data_segs_out > tp->syn_data)
4471 ret = -EINVAL;
4472 else
4473 tp->snd_cwnd = val;
4474 break;
4475 case TCP_BPF_SNDCWND_CLAMP:
4476 if (val <= 0) {
4477 ret = -EINVAL;
4478 } else {
4479 tp->snd_cwnd_clamp = val;
4480 tp->snd_ssthresh = val;
4481 }
4482 break;
4483 case TCP_SAVE_SYN:
4484 if (val < 0 || val > 1)
4485 ret = -EINVAL;
4486 else
4487 tp->save_syn = val;
4488 break;
4489 case TCP_KEEPIDLE:
4490 ret = tcp_sock_set_keepidle_locked(sk, val);
4491 break;
4492 case TCP_KEEPINTVL:
4493 if (val < 1 || val > MAX_TCP_KEEPINTVL)
4494 ret = -EINVAL;
4495 else
4496 tp->keepalive_intvl = val * HZ;
4497 break;
4498 case TCP_KEEPCNT:
4499 if (val < 1 || val > MAX_TCP_KEEPCNT)
4500 ret = -EINVAL;
4501 else
4502 tp->keepalive_probes = val;
4503 break;
4504 case TCP_SYNCNT:
4505 if (val < 1 || val > MAX_TCP_SYNCNT)
4506 ret = -EINVAL;
4507 else
4508 icsk->icsk_syn_retries = val;
4509 break;
4510 case TCP_USER_TIMEOUT:
4511 if (val < 0)
4512 ret = -EINVAL;
4513 else
4514 icsk->icsk_user_timeout = val;
4515 break;
4516 default:
4517 ret = -EINVAL;
4518 }
4519 }
4520#endif
4521 } else {
4522 ret = -EINVAL;
4523 }
4524 return ret;
4525}
4526
4527static int _bpf_getsockopt(struct sock *sk, int level, int optname,
4528 char *optval, int optlen)
4529{
4530 if (!sk_fullsock(sk))
4531 goto err_clear;
4532
4533 sock_owned_by_me(sk);
4534
4535#ifdef CONFIG_INET
4536 if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
4537 struct inet_connection_sock *icsk;
4538 struct tcp_sock *tp;
4539
4540 switch (optname) {
4541 case TCP_CONGESTION:
4542 icsk = inet_csk(sk);
4543
4544 if (!icsk->icsk_ca_ops || optlen <= 1)
4545 goto err_clear;
4546 strncpy(optval, icsk->icsk_ca_ops->name, optlen);
4547 optval[optlen - 1] = 0;
4548 break;
4549 case TCP_SAVED_SYN:
4550 tp = tcp_sk(sk);
4551
4552 if (optlen <= 0 || !tp->saved_syn ||
4553 optlen > tp->saved_syn[0])
4554 goto err_clear;
4555 memcpy(optval, tp->saved_syn + 1, optlen);
4556 break;
4557 default:
4558 goto err_clear;
4559 }
4560 } else if (level == SOL_IP) {
4561 struct inet_sock *inet = inet_sk(sk);
4562
4563 if (optlen != sizeof(int) || sk->sk_family != AF_INET)
4564 goto err_clear;
4565
4566
4567 switch (optname) {
4568 case IP_TOS:
4569 *((int *)optval) = (int)inet->tos;
4570 break;
4571 default:
4572 goto err_clear;
4573 }
4574#if IS_ENABLED(CONFIG_IPV6)
4575 } else if (level == SOL_IPV6) {
4576 struct ipv6_pinfo *np = inet6_sk(sk);
4577
4578 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
4579 goto err_clear;
4580
4581
4582 switch (optname) {
4583 case IPV6_TCLASS:
4584 *((int *)optval) = (int)np->tclass;
4585 break;
4586 default:
4587 goto err_clear;
4588 }
4589#endif
4590 } else {
4591 goto err_clear;
4592 }
4593 return 0;
4594#endif
4595err_clear:
4596 memset(optval, 0, optlen);
4597 return -EINVAL;
4598}
4599
4600BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
4601 int, level, int, optname, char *, optval, int, optlen)
4602{
4603 u32 flags = 0;
4604 return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen,
4605 flags);
4606}
4607
4608static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
4609 .func = bpf_sock_addr_setsockopt,
4610 .gpl_only = false,
4611 .ret_type = RET_INTEGER,
4612 .arg1_type = ARG_PTR_TO_CTX,
4613 .arg2_type = ARG_ANYTHING,
4614 .arg3_type = ARG_ANYTHING,
4615 .arg4_type = ARG_PTR_TO_MEM,
4616 .arg5_type = ARG_CONST_SIZE,
4617};
4618
4619BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx,
4620 int, level, int, optname, char *, optval, int, optlen)
4621{
4622 return _bpf_getsockopt(ctx->sk, level, optname, optval, optlen);
4623}
4624
4625static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
4626 .func = bpf_sock_addr_getsockopt,
4627 .gpl_only = false,
4628 .ret_type = RET_INTEGER,
4629 .arg1_type = ARG_PTR_TO_CTX,
4630 .arg2_type = ARG_ANYTHING,
4631 .arg3_type = ARG_ANYTHING,
4632 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
4633 .arg5_type = ARG_CONST_SIZE,
4634};
4635
4636BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
4637 int, level, int, optname, char *, optval, int, optlen)
4638{
4639 u32 flags = 0;
4640 if (bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
4641 flags |= SOCKOPT_CC_REINIT;
4642 return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen,
4643 flags);
4644}
4645
4646static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
4647 .func = bpf_sock_ops_setsockopt,
4648 .gpl_only = false,
4649 .ret_type = RET_INTEGER,
4650 .arg1_type = ARG_PTR_TO_CTX,
4651 .arg2_type = ARG_ANYTHING,
4652 .arg3_type = ARG_ANYTHING,
4653 .arg4_type = ARG_PTR_TO_MEM,
4654 .arg5_type = ARG_CONST_SIZE,
4655};
4656
4657BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
4658 int, level, int, optname, char *, optval, int, optlen)
4659{
4660 return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen);
4661}
4662
4663static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = {
4664 .func = bpf_sock_ops_getsockopt,
4665 .gpl_only = false,
4666 .ret_type = RET_INTEGER,
4667 .arg1_type = ARG_PTR_TO_CTX,
4668 .arg2_type = ARG_ANYTHING,
4669 .arg3_type = ARG_ANYTHING,
4670 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
4671 .arg5_type = ARG_CONST_SIZE,
4672};
4673
4674BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
4675 int, argval)
4676{
4677 struct sock *sk = bpf_sock->sk;
4678 int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
4679
4680 if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
4681 return -EINVAL;
4682
4683 tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
4684
4685 return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
4686}
4687
4688static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
4689 .func = bpf_sock_ops_cb_flags_set,
4690 .gpl_only = false,
4691 .ret_type = RET_INTEGER,
4692 .arg1_type = ARG_PTR_TO_CTX,
4693 .arg2_type = ARG_ANYTHING,
4694};
4695
4696const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
4697EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
4698
4699BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
4700 int, addr_len)
4701{
4702#ifdef CONFIG_INET
4703 struct sock *sk = ctx->sk;
4704 u32 flags = BIND_FROM_BPF;
4705 int err;
4706
4707 err = -EINVAL;
4708 if (addr_len < offsetofend(struct sockaddr, sa_family))
4709 return err;
4710 if (addr->sa_family == AF_INET) {
4711 if (addr_len < sizeof(struct sockaddr_in))
4712 return err;
4713 if (((struct sockaddr_in *)addr)->sin_port == htons(0))
4714 flags |= BIND_FORCE_ADDRESS_NO_PORT;
4715 return __inet_bind(sk, addr, addr_len, flags);
4716#if IS_ENABLED(CONFIG_IPV6)
4717 } else if (addr->sa_family == AF_INET6) {
4718 if (addr_len < SIN6_LEN_RFC2133)
4719 return err;
4720 if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0))
4721 flags |= BIND_FORCE_ADDRESS_NO_PORT;
4722
4723
4724
4725 return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags);
4726#endif
4727 }
4728#endif
4729
4730 return -EAFNOSUPPORT;
4731}
4732
4733static const struct bpf_func_proto bpf_bind_proto = {
4734 .func = bpf_bind,
4735 .gpl_only = false,
4736 .ret_type = RET_INTEGER,
4737 .arg1_type = ARG_PTR_TO_CTX,
4738 .arg2_type = ARG_PTR_TO_MEM,
4739 .arg3_type = ARG_CONST_SIZE,
4740};
4741
4742#ifdef CONFIG_XFRM
4743BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
4744 struct bpf_xfrm_state *, to, u32, size, u64, flags)
4745{
4746 const struct sec_path *sp = skb_sec_path(skb);
4747 const struct xfrm_state *x;
4748
4749 if (!sp || unlikely(index >= sp->len || flags))
4750 goto err_clear;
4751
4752 x = sp->xvec[index];
4753
4754 if (unlikely(size != sizeof(struct bpf_xfrm_state)))
4755 goto err_clear;
4756
4757 to->reqid = x->props.reqid;
4758 to->spi = x->id.spi;
4759 to->family = x->props.family;
4760 to->ext = 0;
4761
4762 if (to->family == AF_INET6) {
4763 memcpy(to->remote_ipv6, x->props.saddr.a6,
4764 sizeof(to->remote_ipv6));
4765 } else {
4766 to->remote_ipv4 = x->props.saddr.a4;
4767 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
4768 }
4769
4770 return 0;
4771err_clear:
4772 memset(to, 0, size);
4773 return -EINVAL;
4774}
4775
4776static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
4777 .func = bpf_skb_get_xfrm_state,
4778 .gpl_only = false,
4779 .ret_type = RET_INTEGER,
4780 .arg1_type = ARG_PTR_TO_CTX,
4781 .arg2_type = ARG_ANYTHING,
4782 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
4783 .arg4_type = ARG_CONST_SIZE,
4784 .arg5_type = ARG_ANYTHING,
4785};
4786#endif
4787
4788#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
4789static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
4790 const struct neighbour *neigh,
4791 const struct net_device *dev)
4792{
4793 memcpy(params->dmac, neigh->ha, ETH_ALEN);
4794 memcpy(params->smac, dev->dev_addr, ETH_ALEN);
4795 params->h_vlan_TCI = 0;
4796 params->h_vlan_proto = 0;
4797 params->ifindex = dev->ifindex;
4798
4799 return 0;
4800}
4801#endif
4802
4803#if IS_ENABLED(CONFIG_INET)
4804static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4805 u32 flags, bool check_mtu)
4806{
4807 struct fib_nh_common *nhc;
4808 struct in_device *in_dev;
4809 struct neighbour *neigh;
4810 struct net_device *dev;
4811 struct fib_result res;
4812 struct flowi4 fl4;
4813 int err;
4814 u32 mtu;
4815
4816 dev = dev_get_by_index_rcu(net, params->ifindex);
4817 if (unlikely(!dev))
4818 return -ENODEV;
4819
4820
4821 in_dev = __in_dev_get_rcu(dev);
4822 if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
4823 return BPF_FIB_LKUP_RET_FWD_DISABLED;
4824
4825 if (flags & BPF_FIB_LOOKUP_OUTPUT) {
4826 fl4.flowi4_iif = 1;
4827 fl4.flowi4_oif = params->ifindex;
4828 } else {
4829 fl4.flowi4_iif = params->ifindex;
4830 fl4.flowi4_oif = 0;
4831 }
4832 fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
4833 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
4834 fl4.flowi4_flags = 0;
4835
4836 fl4.flowi4_proto = params->l4_protocol;
4837 fl4.daddr = params->ipv4_dst;
4838 fl4.saddr = params->ipv4_src;
4839 fl4.fl4_sport = params->sport;
4840 fl4.fl4_dport = params->dport;
4841 fl4.flowi4_multipath_hash = 0;
4842
4843 if (flags & BPF_FIB_LOOKUP_DIRECT) {
4844 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
4845 struct fib_table *tb;
4846
4847 tb = fib_get_table(net, tbid);
4848 if (unlikely(!tb))
4849 return BPF_FIB_LKUP_RET_NOT_FWDED;
4850
4851 err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
4852 } else {
4853 fl4.flowi4_mark = 0;
4854 fl4.flowi4_secid = 0;
4855 fl4.flowi4_tun_key.tun_id = 0;
4856 fl4.flowi4_uid = sock_net_uid(net, NULL);
4857
4858 err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
4859 }
4860
4861 if (err) {
4862
4863 if (err == -EINVAL)
4864 return BPF_FIB_LKUP_RET_BLACKHOLE;
4865 if (err == -EHOSTUNREACH)
4866 return BPF_FIB_LKUP_RET_UNREACHABLE;
4867 if (err == -EACCES)
4868 return BPF_FIB_LKUP_RET_PROHIBIT;
4869
4870 return BPF_FIB_LKUP_RET_NOT_FWDED;
4871 }
4872
4873 if (res.type != RTN_UNICAST)
4874 return BPF_FIB_LKUP_RET_NOT_FWDED;
4875
4876 if (fib_info_num_path(res.fi) > 1)
4877 fib_select_path(net, &res, &fl4, NULL);
4878
4879 if (check_mtu) {
4880 mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
4881 if (params->tot_len > mtu)
4882 return BPF_FIB_LKUP_RET_FRAG_NEEDED;
4883 }
4884
4885 nhc = res.nhc;
4886
4887
4888 if (nhc->nhc_lwtstate)
4889 return BPF_FIB_LKUP_RET_UNSUPP_LWT;
4890
4891 dev = nhc->nhc_dev;
4892
4893 params->rt_metric = res.fi->fib_priority;
4894
4895
4896
4897
4898 if (likely(nhc->nhc_gw_family != AF_INET6)) {
4899 if (nhc->nhc_gw_family)
4900 params->ipv4_dst = nhc->nhc_gw.ipv4;
4901
4902 neigh = __ipv4_neigh_lookup_noref(dev,
4903 (__force u32)params->ipv4_dst);
4904 } else {
4905 struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
4906
4907 params->family = AF_INET6;
4908 *dst = nhc->nhc_gw.ipv6;
4909 neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
4910 }
4911
4912 if (!neigh)
4913 return BPF_FIB_LKUP_RET_NO_NEIGH;
4914
4915 return bpf_fib_set_fwd_params(params, neigh, dev);
4916}
4917#endif
4918
4919#if IS_ENABLED(CONFIG_IPV6)
4920static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4921 u32 flags, bool check_mtu)
4922{
4923 struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
4924 struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
4925 struct fib6_result res = {};
4926 struct neighbour *neigh;
4927 struct net_device *dev;
4928 struct inet6_dev *idev;
4929 struct flowi6 fl6;
4930 int strict = 0;
4931 int oif, err;
4932 u32 mtu;
4933
4934
4935 if (rt6_need_strict(dst) || rt6_need_strict(src))
4936 return BPF_FIB_LKUP_RET_NOT_FWDED;
4937
4938 dev = dev_get_by_index_rcu(net, params->ifindex);
4939 if (unlikely(!dev))
4940 return -ENODEV;
4941
4942 idev = __in6_dev_get_safely(dev);
4943 if (unlikely(!idev || !idev->cnf.forwarding))
4944 return BPF_FIB_LKUP_RET_FWD_DISABLED;
4945
4946 if (flags & BPF_FIB_LOOKUP_OUTPUT) {
4947 fl6.flowi6_iif = 1;
4948 oif = fl6.flowi6_oif = params->ifindex;
4949 } else {
4950 oif = fl6.flowi6_iif = params->ifindex;
4951 fl6.flowi6_oif = 0;
4952 strict = RT6_LOOKUP_F_HAS_SADDR;
4953 }
4954 fl6.flowlabel = params->flowinfo;
4955 fl6.flowi6_scope = 0;
4956 fl6.flowi6_flags = 0;
4957 fl6.mp_hash = 0;
4958
4959 fl6.flowi6_proto = params->l4_protocol;
4960 fl6.daddr = *dst;
4961 fl6.saddr = *src;
4962 fl6.fl6_sport = params->sport;
4963 fl6.fl6_dport = params->dport;
4964
4965 if (flags & BPF_FIB_LOOKUP_DIRECT) {
4966 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
4967 struct fib6_table *tb;
4968
4969 tb = ipv6_stub->fib6_get_table(net, tbid);
4970 if (unlikely(!tb))
4971 return BPF_FIB_LKUP_RET_NOT_FWDED;
4972
4973 err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
4974 strict);
4975 } else {
4976 fl6.flowi6_mark = 0;
4977 fl6.flowi6_secid = 0;
4978 fl6.flowi6_tun_key.tun_id = 0;
4979 fl6.flowi6_uid = sock_net_uid(net, NULL);
4980
4981 err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
4982 }
4983
4984 if (unlikely(err || IS_ERR_OR_NULL(res.f6i) ||
4985 res.f6i == net->ipv6.fib6_null_entry))
4986 return BPF_FIB_LKUP_RET_NOT_FWDED;
4987
4988 switch (res.fib6_type) {
4989
4990 case RTN_UNICAST:
4991 break;
4992 case RTN_BLACKHOLE:
4993 return BPF_FIB_LKUP_RET_BLACKHOLE;
4994 case RTN_UNREACHABLE:
4995 return BPF_FIB_LKUP_RET_UNREACHABLE;
4996 case RTN_PROHIBIT:
4997 return BPF_FIB_LKUP_RET_PROHIBIT;
4998 default:
4999 return BPF_FIB_LKUP_RET_NOT_FWDED;
5000 }
5001
5002 ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
5003 fl6.flowi6_oif != 0, NULL, strict);
5004
5005 if (check_mtu) {
5006 mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
5007 if (params->tot_len > mtu)
5008 return BPF_FIB_LKUP_RET_FRAG_NEEDED;
5009 }
5010
5011 if (res.nh->fib_nh_lws)
5012 return BPF_FIB_LKUP_RET_UNSUPP_LWT;
5013
5014 if (res.nh->fib_nh_gw_family)
5015 *dst = res.nh->fib_nh_gw6;
5016
5017 dev = res.nh->fib_nh_dev;
5018 params->rt_metric = res.f6i->fib6_metric;
5019
5020
5021
5022
5023 neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
5024 if (!neigh)
5025 return BPF_FIB_LKUP_RET_NO_NEIGH;
5026
5027 return bpf_fib_set_fwd_params(params, neigh, dev);
5028}
5029#endif
5030
5031BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
5032 struct bpf_fib_lookup *, params, int, plen, u32, flags)
5033{
5034 if (plen < sizeof(*params))
5035 return -EINVAL;
5036
5037 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
5038 return -EINVAL;
5039
5040 switch (params->family) {
5041#if IS_ENABLED(CONFIG_INET)
5042 case AF_INET:
5043 return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
5044 flags, true);
5045#endif
5046#if IS_ENABLED(CONFIG_IPV6)
5047 case AF_INET6:
5048 return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
5049 flags, true);
5050#endif
5051 }
5052 return -EAFNOSUPPORT;
5053}
5054
5055static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
5056 .func = bpf_xdp_fib_lookup,
5057 .gpl_only = true,
5058 .ret_type = RET_INTEGER,
5059 .arg1_type = ARG_PTR_TO_CTX,
5060 .arg2_type = ARG_PTR_TO_MEM,
5061 .arg3_type = ARG_CONST_SIZE,
5062 .arg4_type = ARG_ANYTHING,
5063};
5064
5065BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
5066 struct bpf_fib_lookup *, params, int, plen, u32, flags)
5067{
5068 struct net *net = dev_net(skb->dev);
5069 int rc = -EAFNOSUPPORT;
5070
5071 if (plen < sizeof(*params))
5072 return -EINVAL;
5073
5074 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
5075 return -EINVAL;
5076
5077 switch (params->family) {
5078#if IS_ENABLED(CONFIG_INET)
5079 case AF_INET:
5080 rc = bpf_ipv4_fib_lookup(net, params, flags, false);
5081 break;
5082#endif
5083#if IS_ENABLED(CONFIG_IPV6)
5084 case AF_INET6:
5085 rc = bpf_ipv6_fib_lookup(net, params, flags, false);
5086 break;
5087#endif
5088 }
5089
5090 if (!rc) {
5091 struct net_device *dev;
5092
5093 dev = dev_get_by_index_rcu(net, params->ifindex);
5094 if (!is_skb_forwardable(dev, skb))
5095 rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
5096 }
5097
5098 return rc;
5099}
5100
5101static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
5102 .func = bpf_skb_fib_lookup,
5103 .gpl_only = true,
5104 .ret_type = RET_INTEGER,
5105 .arg1_type = ARG_PTR_TO_CTX,
5106 .arg2_type = ARG_PTR_TO_MEM,
5107 .arg3_type = ARG_CONST_SIZE,
5108 .arg4_type = ARG_ANYTHING,
5109};
5110
5111#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
5112static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
5113{
5114 int err;
5115 struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
5116
5117 if (!seg6_validate_srh(srh, len, false))
5118 return -EINVAL;
5119
5120 switch (type) {
5121 case BPF_LWT_ENCAP_SEG6_INLINE:
5122 if (skb->protocol != htons(ETH_P_IPV6))
5123 return -EBADMSG;
5124
5125 err = seg6_do_srh_inline(skb, srh);
5126 break;
5127 case BPF_LWT_ENCAP_SEG6:
5128 skb_reset_inner_headers(skb);
5129 skb->encapsulation = 1;
5130 err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
5131 break;
5132 default:
5133 return -EINVAL;
5134 }
5135
5136 bpf_compute_data_pointers(skb);
5137 if (err)
5138 return err;
5139
5140 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
5141 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
5142
5143 return seg6_lookup_nexthop(skb, NULL, 0);
5144}
5145#endif
5146
5147#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
5148static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
5149 bool ingress)
5150{
5151 return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
5152}
5153#endif
5154
5155BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
5156 u32, len)
5157{
5158 switch (type) {
5159#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
5160 case BPF_LWT_ENCAP_SEG6:
5161 case BPF_LWT_ENCAP_SEG6_INLINE:
5162 return bpf_push_seg6_encap(skb, type, hdr, len);
5163#endif
5164#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
5165 case BPF_LWT_ENCAP_IP:
5166 return bpf_push_ip_encap(skb, hdr, len, true );
5167#endif
5168 default:
5169 return -EINVAL;
5170 }
5171}
5172
5173BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
5174 void *, hdr, u32, len)
5175{
5176 switch (type) {
5177#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
5178 case BPF_LWT_ENCAP_IP:
5179 return bpf_push_ip_encap(skb, hdr, len, false );
5180#endif
5181 default:
5182 return -EINVAL;
5183 }
5184}
5185
5186static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
5187 .func = bpf_lwt_in_push_encap,
5188 .gpl_only = false,
5189 .ret_type = RET_INTEGER,
5190 .arg1_type = ARG_PTR_TO_CTX,
5191 .arg2_type = ARG_ANYTHING,
5192 .arg3_type = ARG_PTR_TO_MEM,
5193 .arg4_type = ARG_CONST_SIZE
5194};
5195
5196static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
5197 .func = bpf_lwt_xmit_push_encap,
5198 .gpl_only = false,
5199 .ret_type = RET_INTEGER,
5200 .arg1_type = ARG_PTR_TO_CTX,
5201 .arg2_type = ARG_ANYTHING,
5202 .arg3_type = ARG_PTR_TO_MEM,
5203 .arg4_type = ARG_CONST_SIZE
5204};
5205
5206#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
5207BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
5208 const void *, from, u32, len)
5209{
5210 struct seg6_bpf_srh_state *srh_state =
5211 this_cpu_ptr(&seg6_bpf_srh_states);
5212 struct ipv6_sr_hdr *srh = srh_state->srh;
5213 void *srh_tlvs, *srh_end, *ptr;
5214 int srhoff = 0;
5215
5216 if (srh == NULL)
5217 return -EINVAL;
5218
5219 srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
5220 srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
5221
5222 ptr = skb->data + offset;
5223 if (ptr >= srh_tlvs && ptr + len <= srh_end)
5224 srh_state->valid = false;
5225 else if (ptr < (void *)&srh->flags ||
5226 ptr + len > (void *)&srh->segments)
5227 return -EFAULT;
5228
5229 if (unlikely(bpf_try_make_writable(skb, offset + len)))
5230 return -EFAULT;
5231 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
5232 return -EINVAL;
5233 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
5234
5235 memcpy(skb->data + offset, from, len);
5236 return 0;
5237}
5238
5239static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
5240 .func = bpf_lwt_seg6_store_bytes,
5241 .gpl_only = false,
5242 .ret_type = RET_INTEGER,
5243 .arg1_type = ARG_PTR_TO_CTX,
5244 .arg2_type = ARG_ANYTHING,
5245 .arg3_type = ARG_PTR_TO_MEM,
5246 .arg4_type = ARG_CONST_SIZE
5247};
5248
5249static void bpf_update_srh_state(struct sk_buff *skb)
5250{
5251 struct seg6_bpf_srh_state *srh_state =
5252 this_cpu_ptr(&seg6_bpf_srh_states);
5253 int srhoff = 0;
5254
5255 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
5256 srh_state->srh = NULL;
5257 } else {
5258 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
5259 srh_state->hdrlen = srh_state->srh->hdrlen << 3;
5260 srh_state->valid = true;
5261 }
5262}
5263
5264BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
5265 u32, action, void *, param, u32, param_len)
5266{
5267 struct seg6_bpf_srh_state *srh_state =
5268 this_cpu_ptr(&seg6_bpf_srh_states);
5269 int hdroff = 0;
5270 int err;
5271
5272 switch (action) {
5273 case SEG6_LOCAL_ACTION_END_X:
5274 if (!seg6_bpf_has_valid_srh(skb))
5275 return -EBADMSG;
5276 if (param_len != sizeof(struct in6_addr))
5277 return -EINVAL;
5278 return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
5279 case SEG6_LOCAL_ACTION_END_T:
5280 if (!seg6_bpf_has_valid_srh(skb))
5281 return -EBADMSG;
5282 if (param_len != sizeof(int))
5283 return -EINVAL;
5284 return seg6_lookup_nexthop(skb, NULL, *(int *)param);
5285 case SEG6_LOCAL_ACTION_END_DT6:
5286 if (!seg6_bpf_has_valid_srh(skb))
5287 return -EBADMSG;
5288 if (param_len != sizeof(int))
5289 return -EINVAL;
5290
5291 if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
5292 return -EBADMSG;
5293 if (!pskb_pull(skb, hdroff))
5294 return -EBADMSG;
5295
5296 skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
5297 skb_reset_network_header(skb);
5298 skb_reset_transport_header(skb);
5299 skb->encapsulation = 0;
5300
5301 bpf_compute_data_pointers(skb);
5302 bpf_update_srh_state(skb);
5303 return seg6_lookup_nexthop(skb, NULL, *(int *)param);
5304 case SEG6_LOCAL_ACTION_END_B6:
5305 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
5306 return -EBADMSG;
5307 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
5308 param, param_len);
5309 if (!err)
5310 bpf_update_srh_state(skb);
5311
5312 return err;
5313 case SEG6_LOCAL_ACTION_END_B6_ENCAP:
5314 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
5315 return -EBADMSG;
5316 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
5317 param, param_len);
5318 if (!err)
5319 bpf_update_srh_state(skb);
5320
5321 return err;
5322 default:
5323 return -EINVAL;
5324 }
5325}
5326
5327static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
5328 .func = bpf_lwt_seg6_action,
5329 .gpl_only = false,
5330 .ret_type = RET_INTEGER,
5331 .arg1_type = ARG_PTR_TO_CTX,
5332 .arg2_type = ARG_ANYTHING,
5333 .arg3_type = ARG_PTR_TO_MEM,
5334 .arg4_type = ARG_CONST_SIZE
5335};
5336
5337BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
5338 s32, len)
5339{
5340 struct seg6_bpf_srh_state *srh_state =
5341 this_cpu_ptr(&seg6_bpf_srh_states);
5342 struct ipv6_sr_hdr *srh = srh_state->srh;
5343 void *srh_end, *srh_tlvs, *ptr;
5344 struct ipv6hdr *hdr;
5345 int srhoff = 0;
5346 int ret;
5347
5348 if (unlikely(srh == NULL))
5349 return -EINVAL;
5350
5351 srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
5352 ((srh->first_segment + 1) << 4));
5353 srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
5354 srh_state->hdrlen);
5355 ptr = skb->data + offset;
5356
5357 if (unlikely(ptr < srh_tlvs || ptr > srh_end))
5358 return -EFAULT;
5359 if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
5360 return -EFAULT;
5361
5362 if (len > 0) {
5363 ret = skb_cow_head(skb, len);
5364 if (unlikely(ret < 0))
5365 return ret;
5366
5367 ret = bpf_skb_net_hdr_push(skb, offset, len);
5368 } else {
5369 ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
5370 }
5371
5372 bpf_compute_data_pointers(skb);
5373 if (unlikely(ret < 0))
5374 return ret;
5375
5376 hdr = (struct ipv6hdr *)skb->data;
5377 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
5378
5379 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
5380 return -EINVAL;
5381 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
5382 srh_state->hdrlen += len;
5383 srh_state->valid = false;
5384 return 0;
5385}
5386
5387static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
5388 .func = bpf_lwt_seg6_adjust_srh,
5389 .gpl_only = false,
5390 .ret_type = RET_INTEGER,
5391 .arg1_type = ARG_PTR_TO_CTX,
5392 .arg2_type = ARG_ANYTHING,
5393 .arg3_type = ARG_ANYTHING,
5394};
5395#endif
5396
5397#ifdef CONFIG_INET
5398static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
5399 int dif, int sdif, u8 family, u8 proto)
5400{
5401 bool refcounted = false;
5402 struct sock *sk = NULL;
5403
5404 if (family == AF_INET) {
5405 __be32 src4 = tuple->ipv4.saddr;
5406 __be32 dst4 = tuple->ipv4.daddr;
5407
5408 if (proto == IPPROTO_TCP)
5409 sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
5410 src4, tuple->ipv4.sport,
5411 dst4, tuple->ipv4.dport,
5412 dif, sdif, &refcounted);
5413 else
5414 sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
5415 dst4, tuple->ipv4.dport,
5416 dif, sdif, &udp_table, NULL);
5417#if IS_ENABLED(CONFIG_IPV6)
5418 } else {
5419 struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
5420 struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
5421
5422 if (proto == IPPROTO_TCP)
5423 sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
5424 src6, tuple->ipv6.sport,
5425 dst6, ntohs(tuple->ipv6.dport),
5426 dif, sdif, &refcounted);
5427 else if (likely(ipv6_bpf_stub))
5428 sk = ipv6_bpf_stub->udp6_lib_lookup(net,
5429 src6, tuple->ipv6.sport,
5430 dst6, tuple->ipv6.dport,
5431 dif, sdif,
5432 &udp_table, NULL);
5433#endif
5434 }
5435
5436 if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
5437 WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
5438 sk = NULL;
5439 }
5440 return sk;
5441}
5442
5443
5444
5445
5446
5447
5448static struct sock *
5449__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
5450 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
5451 u64 flags)
5452{
5453 struct sock *sk = NULL;
5454 u8 family = AF_UNSPEC;
5455 struct net *net;
5456 int sdif;
5457
5458 if (len == sizeof(tuple->ipv4))
5459 family = AF_INET;
5460 else if (len == sizeof(tuple->ipv6))
5461 family = AF_INET6;
5462 else
5463 return NULL;
5464
5465 if (unlikely(family == AF_UNSPEC || flags ||
5466 !((s32)netns_id < 0 || netns_id <= S32_MAX)))
5467 goto out;
5468
5469 if (family == AF_INET)
5470 sdif = inet_sdif(skb);
5471 else
5472 sdif = inet6_sdif(skb);
5473
5474 if ((s32)netns_id < 0) {
5475 net = caller_net;
5476 sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
5477 } else {
5478 net = get_net_ns_by_id(caller_net, netns_id);
5479 if (unlikely(!net))
5480 goto out;
5481 sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
5482 put_net(net);
5483 }
5484
5485out:
5486 return sk;
5487}
5488
5489static struct sock *
5490__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
5491 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
5492 u64 flags)
5493{
5494 struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
5495 ifindex, proto, netns_id, flags);
5496
5497 if (sk) {
5498 sk = sk_to_full_sk(sk);
5499 if (!sk_fullsock(sk)) {
5500 sock_gen_put(sk);
5501 return NULL;
5502 }
5503 }
5504
5505 return sk;
5506}
5507
5508static struct sock *
5509bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
5510 u8 proto, u64 netns_id, u64 flags)
5511{
5512 struct net *caller_net;
5513 int ifindex;
5514
5515 if (skb->dev) {
5516 caller_net = dev_net(skb->dev);
5517 ifindex = skb->dev->ifindex;
5518 } else {
5519 caller_net = sock_net(skb->sk);
5520 ifindex = 0;
5521 }
5522
5523 return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
5524 netns_id, flags);
5525}
5526
5527static struct sock *
5528bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
5529 u8 proto, u64 netns_id, u64 flags)
5530{
5531 struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
5532 flags);
5533
5534 if (sk) {
5535 sk = sk_to_full_sk(sk);
5536 if (!sk_fullsock(sk)) {
5537 sock_gen_put(sk);
5538 return NULL;
5539 }
5540 }
5541
5542 return sk;
5543}
5544
5545BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
5546 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5547{
5548 return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
5549 netns_id, flags);
5550}
5551
5552static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
5553 .func = bpf_skc_lookup_tcp,
5554 .gpl_only = false,
5555 .pkt_access = true,
5556 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
5557 .arg1_type = ARG_PTR_TO_CTX,
5558 .arg2_type = ARG_PTR_TO_MEM,
5559 .arg3_type = ARG_CONST_SIZE,
5560 .arg4_type = ARG_ANYTHING,
5561 .arg5_type = ARG_ANYTHING,
5562};
5563
5564BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
5565 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5566{
5567 return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
5568 netns_id, flags);
5569}
5570
5571static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
5572 .func = bpf_sk_lookup_tcp,
5573 .gpl_only = false,
5574 .pkt_access = true,
5575 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5576 .arg1_type = ARG_PTR_TO_CTX,
5577 .arg2_type = ARG_PTR_TO_MEM,
5578 .arg3_type = ARG_CONST_SIZE,
5579 .arg4_type = ARG_ANYTHING,
5580 .arg5_type = ARG_ANYTHING,
5581};
5582
5583BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
5584 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5585{
5586 return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
5587 netns_id, flags);
5588}
5589
5590static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
5591 .func = bpf_sk_lookup_udp,
5592 .gpl_only = false,
5593 .pkt_access = true,
5594 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5595 .arg1_type = ARG_PTR_TO_CTX,
5596 .arg2_type = ARG_PTR_TO_MEM,
5597 .arg3_type = ARG_CONST_SIZE,
5598 .arg4_type = ARG_ANYTHING,
5599 .arg5_type = ARG_ANYTHING,
5600};
5601
5602BPF_CALL_1(bpf_sk_release, struct sock *, sk)
5603{
5604 if (sk_is_refcounted(sk))
5605 sock_gen_put(sk);
5606 return 0;
5607}
5608
5609static const struct bpf_func_proto bpf_sk_release_proto = {
5610 .func = bpf_sk_release,
5611 .gpl_only = false,
5612 .ret_type = RET_INTEGER,
5613 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
5614};
5615
5616BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
5617 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
5618{
5619 struct net *caller_net = dev_net(ctx->rxq->dev);
5620 int ifindex = ctx->rxq->dev->ifindex;
5621
5622 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
5623 ifindex, IPPROTO_UDP, netns_id,
5624 flags);
5625}
5626
5627static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
5628 .func = bpf_xdp_sk_lookup_udp,
5629 .gpl_only = false,
5630 .pkt_access = true,
5631 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5632 .arg1_type = ARG_PTR_TO_CTX,
5633 .arg2_type = ARG_PTR_TO_MEM,
5634 .arg3_type = ARG_CONST_SIZE,
5635 .arg4_type = ARG_ANYTHING,
5636 .arg5_type = ARG_ANYTHING,
5637};
5638
5639BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
5640 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
5641{
5642 struct net *caller_net = dev_net(ctx->rxq->dev);
5643 int ifindex = ctx->rxq->dev->ifindex;
5644
5645 return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
5646 ifindex, IPPROTO_TCP, netns_id,
5647 flags);
5648}
5649
5650static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
5651 .func = bpf_xdp_skc_lookup_tcp,
5652 .gpl_only = false,
5653 .pkt_access = true,
5654 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
5655 .arg1_type = ARG_PTR_TO_CTX,
5656 .arg2_type = ARG_PTR_TO_MEM,
5657 .arg3_type = ARG_CONST_SIZE,
5658 .arg4_type = ARG_ANYTHING,
5659 .arg5_type = ARG_ANYTHING,
5660};
5661
5662BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
5663 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
5664{
5665 struct net *caller_net = dev_net(ctx->rxq->dev);
5666 int ifindex = ctx->rxq->dev->ifindex;
5667
5668 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
5669 ifindex, IPPROTO_TCP, netns_id,
5670 flags);
5671}
5672
5673static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
5674 .func = bpf_xdp_sk_lookup_tcp,
5675 .gpl_only = false,
5676 .pkt_access = true,
5677 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5678 .arg1_type = ARG_PTR_TO_CTX,
5679 .arg2_type = ARG_PTR_TO_MEM,
5680 .arg3_type = ARG_CONST_SIZE,
5681 .arg4_type = ARG_ANYTHING,
5682 .arg5_type = ARG_ANYTHING,
5683};
5684
5685BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
5686 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5687{
5688 return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
5689 sock_net(ctx->sk), 0,
5690 IPPROTO_TCP, netns_id, flags);
5691}
5692
5693static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
5694 .func = bpf_sock_addr_skc_lookup_tcp,
5695 .gpl_only = false,
5696 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
5697 .arg1_type = ARG_PTR_TO_CTX,
5698 .arg2_type = ARG_PTR_TO_MEM,
5699 .arg3_type = ARG_CONST_SIZE,
5700 .arg4_type = ARG_ANYTHING,
5701 .arg5_type = ARG_ANYTHING,
5702};
5703
5704BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
5705 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5706{
5707 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
5708 sock_net(ctx->sk), 0, IPPROTO_TCP,
5709 netns_id, flags);
5710}
5711
5712static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
5713 .func = bpf_sock_addr_sk_lookup_tcp,
5714 .gpl_only = false,
5715 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5716 .arg1_type = ARG_PTR_TO_CTX,
5717 .arg2_type = ARG_PTR_TO_MEM,
5718 .arg3_type = ARG_CONST_SIZE,
5719 .arg4_type = ARG_ANYTHING,
5720 .arg5_type = ARG_ANYTHING,
5721};
5722
5723BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
5724 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5725{
5726 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
5727 sock_net(ctx->sk), 0, IPPROTO_UDP,
5728 netns_id, flags);
5729}
5730
5731static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
5732 .func = bpf_sock_addr_sk_lookup_udp,
5733 .gpl_only = false,
5734 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5735 .arg1_type = ARG_PTR_TO_CTX,
5736 .arg2_type = ARG_PTR_TO_MEM,
5737 .arg3_type = ARG_CONST_SIZE,
5738 .arg4_type = ARG_ANYTHING,
5739 .arg5_type = ARG_ANYTHING,
5740};
5741
5742bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
5743 struct bpf_insn_access_aux *info)
5744{
5745 if (off < 0 || off >= offsetofend(struct bpf_tcp_sock,
5746 icsk_retransmits))
5747 return false;
5748
5749 if (off % size != 0)
5750 return false;
5751
5752 switch (off) {
5753 case offsetof(struct bpf_tcp_sock, bytes_received):
5754 case offsetof(struct bpf_tcp_sock, bytes_acked):
5755 return size == sizeof(__u64);
5756 default:
5757 return size == sizeof(__u32);
5758 }
5759}
5760
5761u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
5762 const struct bpf_insn *si,
5763 struct bpf_insn *insn_buf,
5764 struct bpf_prog *prog, u32 *target_size)
5765{
5766 struct bpf_insn *insn = insn_buf;
5767
5768#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
5769 do { \
5770 BUILD_BUG_ON(sizeof_field(struct tcp_sock, FIELD) > \
5771 sizeof_field(struct bpf_tcp_sock, FIELD)); \
5772 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
5773 si->dst_reg, si->src_reg, \
5774 offsetof(struct tcp_sock, FIELD)); \
5775 } while (0)
5776
5777#define BPF_INET_SOCK_GET_COMMON(FIELD) \
5778 do { \
5779 BUILD_BUG_ON(sizeof_field(struct inet_connection_sock, \
5780 FIELD) > \
5781 sizeof_field(struct bpf_tcp_sock, FIELD)); \
5782 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
5783 struct inet_connection_sock, \
5784 FIELD), \
5785 si->dst_reg, si->src_reg, \
5786 offsetof( \
5787 struct inet_connection_sock, \
5788 FIELD)); \
5789 } while (0)
5790
5791 if (insn > insn_buf)
5792 return insn - insn_buf;
5793
5794 switch (si->off) {
5795 case offsetof(struct bpf_tcp_sock, rtt_min):
5796 BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
5797 sizeof(struct minmax));
5798 BUILD_BUG_ON(sizeof(struct minmax) <
5799 sizeof(struct minmax_sample));
5800
5801 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5802 offsetof(struct tcp_sock, rtt_min) +
5803 offsetof(struct minmax_sample, v));
5804 break;
5805 case offsetof(struct bpf_tcp_sock, snd_cwnd):
5806 BPF_TCP_SOCK_GET_COMMON(snd_cwnd);
5807 break;
5808 case offsetof(struct bpf_tcp_sock, srtt_us):
5809 BPF_TCP_SOCK_GET_COMMON(srtt_us);
5810 break;
5811 case offsetof(struct bpf_tcp_sock, snd_ssthresh):
5812 BPF_TCP_SOCK_GET_COMMON(snd_ssthresh);
5813 break;
5814 case offsetof(struct bpf_tcp_sock, rcv_nxt):
5815 BPF_TCP_SOCK_GET_COMMON(rcv_nxt);
5816 break;
5817 case offsetof(struct bpf_tcp_sock, snd_nxt):
5818 BPF_TCP_SOCK_GET_COMMON(snd_nxt);
5819 break;
5820 case offsetof(struct bpf_tcp_sock, snd_una):
5821 BPF_TCP_SOCK_GET_COMMON(snd_una);
5822 break;
5823 case offsetof(struct bpf_tcp_sock, mss_cache):
5824 BPF_TCP_SOCK_GET_COMMON(mss_cache);
5825 break;
5826 case offsetof(struct bpf_tcp_sock, ecn_flags):
5827 BPF_TCP_SOCK_GET_COMMON(ecn_flags);
5828 break;
5829 case offsetof(struct bpf_tcp_sock, rate_delivered):
5830 BPF_TCP_SOCK_GET_COMMON(rate_delivered);
5831 break;
5832 case offsetof(struct bpf_tcp_sock, rate_interval_us):
5833 BPF_TCP_SOCK_GET_COMMON(rate_interval_us);
5834 break;
5835 case offsetof(struct bpf_tcp_sock, packets_out):
5836 BPF_TCP_SOCK_GET_COMMON(packets_out);
5837 break;
5838 case offsetof(struct bpf_tcp_sock, retrans_out):
5839 BPF_TCP_SOCK_GET_COMMON(retrans_out);
5840 break;
5841 case offsetof(struct bpf_tcp_sock, total_retrans):
5842 BPF_TCP_SOCK_GET_COMMON(total_retrans);
5843 break;
5844 case offsetof(struct bpf_tcp_sock, segs_in):
5845 BPF_TCP_SOCK_GET_COMMON(segs_in);
5846 break;
5847 case offsetof(struct bpf_tcp_sock, data_segs_in):
5848 BPF_TCP_SOCK_GET_COMMON(data_segs_in);
5849 break;
5850 case offsetof(struct bpf_tcp_sock, segs_out):
5851 BPF_TCP_SOCK_GET_COMMON(segs_out);
5852 break;
5853 case offsetof(struct bpf_tcp_sock, data_segs_out):
5854 BPF_TCP_SOCK_GET_COMMON(data_segs_out);
5855 break;
5856 case offsetof(struct bpf_tcp_sock, lost_out):
5857 BPF_TCP_SOCK_GET_COMMON(lost_out);
5858 break;
5859 case offsetof(struct bpf_tcp_sock, sacked_out):
5860 BPF_TCP_SOCK_GET_COMMON(sacked_out);
5861 break;
5862 case offsetof(struct bpf_tcp_sock, bytes_received):
5863 BPF_TCP_SOCK_GET_COMMON(bytes_received);
5864 break;
5865 case offsetof(struct bpf_tcp_sock, bytes_acked):
5866 BPF_TCP_SOCK_GET_COMMON(bytes_acked);
5867 break;
5868 case offsetof(struct bpf_tcp_sock, dsack_dups):
5869 BPF_TCP_SOCK_GET_COMMON(dsack_dups);
5870 break;
5871 case offsetof(struct bpf_tcp_sock, delivered):
5872 BPF_TCP_SOCK_GET_COMMON(delivered);
5873 break;
5874 case offsetof(struct bpf_tcp_sock, delivered_ce):
5875 BPF_TCP_SOCK_GET_COMMON(delivered_ce);
5876 break;
5877 case offsetof(struct bpf_tcp_sock, icsk_retransmits):
5878 BPF_INET_SOCK_GET_COMMON(icsk_retransmits);
5879 break;
5880 }
5881
5882 return insn - insn_buf;
5883}
5884
5885BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
5886{
5887 if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
5888 return (unsigned long)sk;
5889
5890 return (unsigned long)NULL;
5891}
5892
5893const struct bpf_func_proto bpf_tcp_sock_proto = {
5894 .func = bpf_tcp_sock,
5895 .gpl_only = false,
5896 .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
5897 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
5898};
5899
5900BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
5901{
5902 sk = sk_to_full_sk(sk);
5903
5904 if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
5905 return (unsigned long)sk;
5906
5907 return (unsigned long)NULL;
5908}
5909
5910static const struct bpf_func_proto bpf_get_listener_sock_proto = {
5911 .func = bpf_get_listener_sock,
5912 .gpl_only = false,
5913 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5914 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
5915};
5916
5917BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
5918{
5919 unsigned int iphdr_len;
5920
5921 switch (skb_protocol(skb, true)) {
5922 case cpu_to_be16(ETH_P_IP):
5923 iphdr_len = sizeof(struct iphdr);
5924 break;
5925 case cpu_to_be16(ETH_P_IPV6):
5926 iphdr_len = sizeof(struct ipv6hdr);
5927 break;
5928 default:
5929 return 0;
5930 }
5931
5932 if (skb_headlen(skb) < iphdr_len)
5933 return 0;
5934
5935 if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len))
5936 return 0;
5937
5938 return INET_ECN_set_ce(skb);
5939}
5940
5941bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
5942 struct bpf_insn_access_aux *info)
5943{
5944 if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id))
5945 return false;
5946
5947 if (off % size != 0)
5948 return false;
5949
5950 switch (off) {
5951 default:
5952 return size == sizeof(__u32);
5953 }
5954}
5955
5956u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
5957 const struct bpf_insn *si,
5958 struct bpf_insn *insn_buf,
5959 struct bpf_prog *prog, u32 *target_size)
5960{
5961 struct bpf_insn *insn = insn_buf;
5962
5963#define BPF_XDP_SOCK_GET(FIELD) \
5964 do { \
5965 BUILD_BUG_ON(sizeof_field(struct xdp_sock, FIELD) > \
5966 sizeof_field(struct bpf_xdp_sock, FIELD)); \
5967 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
5968 si->dst_reg, si->src_reg, \
5969 offsetof(struct xdp_sock, FIELD)); \
5970 } while (0)
5971
5972 switch (si->off) {
5973 case offsetof(struct bpf_xdp_sock, queue_id):
5974 BPF_XDP_SOCK_GET(queue_id);
5975 break;
5976 }
5977
5978 return insn - insn_buf;
5979}
5980
5981static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
5982 .func = bpf_skb_ecn_set_ce,
5983 .gpl_only = false,
5984 .ret_type = RET_INTEGER,
5985 .arg1_type = ARG_PTR_TO_CTX,
5986};
5987
5988BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
5989 struct tcphdr *, th, u32, th_len)
5990{
5991#ifdef CONFIG_SYN_COOKIES
5992 u32 cookie;
5993 int ret;
5994
5995 if (unlikely(th_len < sizeof(*th)))
5996 return -EINVAL;
5997
5998
5999 if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
6000 return -EINVAL;
6001
6002 if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
6003 return -EINVAL;
6004
6005 if (!th->ack || th->rst || th->syn)
6006 return -ENOENT;
6007
6008 if (tcp_synq_no_recent_overflow(sk))
6009 return -ENOENT;
6010
6011 cookie = ntohl(th->ack_seq) - 1;
6012
6013 switch (sk->sk_family) {
6014 case AF_INET:
6015 if (unlikely(iph_len < sizeof(struct iphdr)))
6016 return -EINVAL;
6017
6018 ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
6019 break;
6020
6021#if IS_BUILTIN(CONFIG_IPV6)
6022 case AF_INET6:
6023 if (unlikely(iph_len < sizeof(struct ipv6hdr)))
6024 return -EINVAL;
6025
6026 ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
6027 break;
6028#endif
6029
6030 default:
6031 return -EPROTONOSUPPORT;
6032 }
6033
6034 if (ret > 0)
6035 return 0;
6036
6037 return -ENOENT;
6038#else
6039 return -ENOTSUPP;
6040#endif
6041}
6042
6043static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
6044 .func = bpf_tcp_check_syncookie,
6045 .gpl_only = true,
6046 .pkt_access = true,
6047 .ret_type = RET_INTEGER,
6048 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
6049 .arg2_type = ARG_PTR_TO_MEM,
6050 .arg3_type = ARG_CONST_SIZE,
6051 .arg4_type = ARG_PTR_TO_MEM,
6052 .arg5_type = ARG_CONST_SIZE,
6053};
6054
6055BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
6056 struct tcphdr *, th, u32, th_len)
6057{
6058#ifdef CONFIG_SYN_COOKIES
6059 u32 cookie;
6060 u16 mss;
6061
6062 if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
6063 return -EINVAL;
6064
6065 if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
6066 return -EINVAL;
6067
6068 if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
6069 return -ENOENT;
6070
6071 if (!th->syn || th->ack || th->fin || th->rst)
6072 return -EINVAL;
6073
6074 if (unlikely(iph_len < sizeof(struct iphdr)))
6075 return -EINVAL;
6076
6077
6078
6079
6080 switch (((struct iphdr *)iph)->version) {
6081 case 4:
6082 if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
6083 return -EINVAL;
6084
6085 mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
6086 break;
6087
6088#if IS_BUILTIN(CONFIG_IPV6)
6089 case 6:
6090 if (unlikely(iph_len < sizeof(struct ipv6hdr)))
6091 return -EINVAL;
6092
6093 if (sk->sk_family != AF_INET6)
6094 return -EINVAL;
6095
6096 mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
6097 break;
6098#endif
6099
6100 default:
6101 return -EPROTONOSUPPORT;
6102 }
6103 if (mss == 0)
6104 return -ENOENT;
6105
6106 return cookie | ((u64)mss << 32);
6107#else
6108 return -EOPNOTSUPP;
6109#endif
6110}
6111
6112static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
6113 .func = bpf_tcp_gen_syncookie,
6114 .gpl_only = true,
6115 .pkt_access = true,
6116 .ret_type = RET_INTEGER,
6117 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
6118 .arg2_type = ARG_PTR_TO_MEM,
6119 .arg3_type = ARG_CONST_SIZE,
6120 .arg4_type = ARG_PTR_TO_MEM,
6121 .arg5_type = ARG_CONST_SIZE,
6122};
6123
6124BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
6125{
6126 if (flags != 0)
6127 return -EINVAL;
6128 if (!skb_at_tc_ingress(skb))
6129 return -EOPNOTSUPP;
6130 if (unlikely(dev_net(skb->dev) != sock_net(sk)))
6131 return -ENETUNREACH;
6132 if (unlikely(sk_fullsock(sk) && sk->sk_reuseport))
6133 return -ESOCKTNOSUPPORT;
6134 if (sk_is_refcounted(sk) &&
6135 unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
6136 return -ENOENT;
6137
6138 skb_orphan(skb);
6139 skb->sk = sk;
6140 skb->destructor = sock_pfree;
6141
6142 return 0;
6143}
6144
6145static const struct bpf_func_proto bpf_sk_assign_proto = {
6146 .func = bpf_sk_assign,
6147 .gpl_only = false,
6148 .ret_type = RET_INTEGER,
6149 .arg1_type = ARG_PTR_TO_CTX,
6150 .arg2_type = ARG_PTR_TO_SOCK_COMMON,
6151 .arg3_type = ARG_ANYTHING,
6152};
6153
6154#endif
6155
6156bool bpf_helper_changes_pkt_data(void *func)
6157{
6158 if (func == bpf_skb_vlan_push ||
6159 func == bpf_skb_vlan_pop ||
6160 func == bpf_skb_store_bytes ||
6161 func == bpf_skb_change_proto ||
6162 func == bpf_skb_change_head ||
6163 func == sk_skb_change_head ||
6164 func == bpf_skb_change_tail ||
6165 func == sk_skb_change_tail ||
6166 func == bpf_skb_adjust_room ||
6167 func == bpf_skb_pull_data ||
6168 func == sk_skb_pull_data ||
6169 func == bpf_clone_redirect ||
6170 func == bpf_l3_csum_replace ||
6171 func == bpf_l4_csum_replace ||
6172 func == bpf_xdp_adjust_head ||
6173 func == bpf_xdp_adjust_meta ||
6174 func == bpf_msg_pull_data ||
6175 func == bpf_msg_push_data ||
6176 func == bpf_msg_pop_data ||
6177 func == bpf_xdp_adjust_tail ||
6178#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
6179 func == bpf_lwt_seg6_store_bytes ||
6180 func == bpf_lwt_seg6_adjust_srh ||
6181 func == bpf_lwt_seg6_action ||
6182#endif
6183 func == bpf_lwt_in_push_encap ||
6184 func == bpf_lwt_xmit_push_encap)
6185 return true;
6186
6187 return false;
6188}
6189
6190const struct bpf_func_proto bpf_event_output_data_proto __weak;
6191const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;
6192
6193static const struct bpf_func_proto *
6194sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6195{
6196 switch (func_id) {
6197
6198
6199
6200 case BPF_FUNC_get_current_uid_gid:
6201 return &bpf_get_current_uid_gid_proto;
6202 case BPF_FUNC_get_local_storage:
6203 return &bpf_get_local_storage_proto;
6204 case BPF_FUNC_get_socket_cookie:
6205 return &bpf_get_socket_cookie_sock_proto;
6206 case BPF_FUNC_get_netns_cookie:
6207 return &bpf_get_netns_cookie_sock_proto;
6208 case BPF_FUNC_perf_event_output:
6209 return &bpf_event_output_data_proto;
6210 case BPF_FUNC_get_current_pid_tgid:
6211 return &bpf_get_current_pid_tgid_proto;
6212 case BPF_FUNC_get_current_comm:
6213 return &bpf_get_current_comm_proto;
6214#ifdef CONFIG_CGROUPS
6215 case BPF_FUNC_get_current_cgroup_id:
6216 return &bpf_get_current_cgroup_id_proto;
6217 case BPF_FUNC_get_current_ancestor_cgroup_id:
6218 return &bpf_get_current_ancestor_cgroup_id_proto;
6219#endif
6220#ifdef CONFIG_CGROUP_NET_CLASSID
6221 case BPF_FUNC_get_cgroup_classid:
6222 return &bpf_get_cgroup_classid_curr_proto;
6223#endif
6224 case BPF_FUNC_sk_storage_get:
6225 return &bpf_sk_storage_get_cg_sock_proto;
6226 default:
6227 return bpf_base_func_proto(func_id);
6228 }
6229}
6230
6231static const struct bpf_func_proto *
6232sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6233{
6234 switch (func_id) {
6235
6236
6237
6238 case BPF_FUNC_get_current_uid_gid:
6239 return &bpf_get_current_uid_gid_proto;
6240 case BPF_FUNC_bind:
6241 switch (prog->expected_attach_type) {
6242 case BPF_CGROUP_INET4_CONNECT:
6243 case BPF_CGROUP_INET6_CONNECT:
6244 return &bpf_bind_proto;
6245 default:
6246 return NULL;
6247 }
6248 case BPF_FUNC_get_socket_cookie:
6249 return &bpf_get_socket_cookie_sock_addr_proto;
6250 case BPF_FUNC_get_netns_cookie:
6251 return &bpf_get_netns_cookie_sock_addr_proto;
6252 case BPF_FUNC_get_local_storage:
6253 return &bpf_get_local_storage_proto;
6254 case BPF_FUNC_perf_event_output:
6255 return &bpf_event_output_data_proto;
6256 case BPF_FUNC_get_current_pid_tgid:
6257 return &bpf_get_current_pid_tgid_proto;
6258 case BPF_FUNC_get_current_comm:
6259 return &bpf_get_current_comm_proto;
6260#ifdef CONFIG_CGROUPS
6261 case BPF_FUNC_get_current_cgroup_id:
6262 return &bpf_get_current_cgroup_id_proto;
6263 case BPF_FUNC_get_current_ancestor_cgroup_id:
6264 return &bpf_get_current_ancestor_cgroup_id_proto;
6265#endif
6266#ifdef CONFIG_CGROUP_NET_CLASSID
6267 case BPF_FUNC_get_cgroup_classid:
6268 return &bpf_get_cgroup_classid_curr_proto;
6269#endif
6270#ifdef CONFIG_INET
6271 case BPF_FUNC_sk_lookup_tcp:
6272 return &bpf_sock_addr_sk_lookup_tcp_proto;
6273 case BPF_FUNC_sk_lookup_udp:
6274 return &bpf_sock_addr_sk_lookup_udp_proto;
6275 case BPF_FUNC_sk_release:
6276 return &bpf_sk_release_proto;
6277 case BPF_FUNC_skc_lookup_tcp:
6278 return &bpf_sock_addr_skc_lookup_tcp_proto;
6279#endif
6280 case BPF_FUNC_sk_storage_get:
6281 return &bpf_sk_storage_get_proto;
6282 case BPF_FUNC_sk_storage_delete:
6283 return &bpf_sk_storage_delete_proto;
6284 case BPF_FUNC_setsockopt:
6285 switch (prog->expected_attach_type) {
6286 case BPF_CGROUP_INET4_CONNECT:
6287 case BPF_CGROUP_INET6_CONNECT:
6288 return &bpf_sock_addr_setsockopt_proto;
6289 default:
6290 return NULL;
6291 }
6292 case BPF_FUNC_getsockopt:
6293 switch (prog->expected_attach_type) {
6294 case BPF_CGROUP_INET4_CONNECT:
6295 case BPF_CGROUP_INET6_CONNECT:
6296 return &bpf_sock_addr_getsockopt_proto;
6297 default:
6298 return NULL;
6299 }
6300 default:
6301 return bpf_base_func_proto(func_id);
6302 }
6303}
6304
6305static const struct bpf_func_proto *
6306sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6307{
6308 switch (func_id) {
6309 case BPF_FUNC_skb_load_bytes:
6310 return &bpf_skb_load_bytes_proto;
6311 case BPF_FUNC_skb_load_bytes_relative:
6312 return &bpf_skb_load_bytes_relative_proto;
6313 case BPF_FUNC_get_socket_cookie:
6314 return &bpf_get_socket_cookie_proto;
6315 case BPF_FUNC_get_socket_uid:
6316 return &bpf_get_socket_uid_proto;
6317 case BPF_FUNC_perf_event_output:
6318 return &bpf_skb_event_output_proto;
6319 default:
6320 return bpf_base_func_proto(func_id);
6321 }
6322}
6323
6324const struct bpf_func_proto bpf_sk_storage_get_proto __weak;
6325const struct bpf_func_proto bpf_sk_storage_delete_proto __weak;
6326
6327static const struct bpf_func_proto *
6328cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6329{
6330 switch (func_id) {
6331 case BPF_FUNC_get_local_storage:
6332 return &bpf_get_local_storage_proto;
6333 case BPF_FUNC_sk_fullsock:
6334 return &bpf_sk_fullsock_proto;
6335 case BPF_FUNC_sk_storage_get:
6336 return &bpf_sk_storage_get_proto;
6337 case BPF_FUNC_sk_storage_delete:
6338 return &bpf_sk_storage_delete_proto;
6339 case BPF_FUNC_perf_event_output:
6340 return &bpf_skb_event_output_proto;
6341#ifdef CONFIG_SOCK_CGROUP_DATA
6342 case BPF_FUNC_skb_cgroup_id:
6343 return &bpf_skb_cgroup_id_proto;
6344 case BPF_FUNC_skb_ancestor_cgroup_id:
6345 return &bpf_skb_ancestor_cgroup_id_proto;
6346 case BPF_FUNC_sk_cgroup_id:
6347 return &bpf_sk_cgroup_id_proto;
6348 case BPF_FUNC_sk_ancestor_cgroup_id:
6349 return &bpf_sk_ancestor_cgroup_id_proto;
6350#endif
6351#ifdef CONFIG_INET
6352 case BPF_FUNC_sk_lookup_tcp:
6353 return &bpf_sk_lookup_tcp_proto;
6354 case BPF_FUNC_sk_lookup_udp:
6355 return &bpf_sk_lookup_udp_proto;
6356 case BPF_FUNC_sk_release:
6357 return &bpf_sk_release_proto;
6358 case BPF_FUNC_skc_lookup_tcp:
6359 return &bpf_skc_lookup_tcp_proto;
6360 case BPF_FUNC_tcp_sock:
6361 return &bpf_tcp_sock_proto;
6362 case BPF_FUNC_get_listener_sock:
6363 return &bpf_get_listener_sock_proto;
6364 case BPF_FUNC_skb_ecn_set_ce:
6365 return &bpf_skb_ecn_set_ce_proto;
6366#endif
6367 default:
6368 return sk_filter_func_proto(func_id, prog);
6369 }
6370}
6371
6372static const struct bpf_func_proto *
6373tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6374{
6375 switch (func_id) {
6376 case BPF_FUNC_skb_store_bytes:
6377 return &bpf_skb_store_bytes_proto;
6378 case BPF_FUNC_skb_load_bytes:
6379 return &bpf_skb_load_bytes_proto;
6380 case BPF_FUNC_skb_load_bytes_relative:
6381 return &bpf_skb_load_bytes_relative_proto;
6382 case BPF_FUNC_skb_pull_data:
6383 return &bpf_skb_pull_data_proto;
6384 case BPF_FUNC_csum_diff:
6385 return &bpf_csum_diff_proto;
6386 case BPF_FUNC_csum_update:
6387 return &bpf_csum_update_proto;
6388 case BPF_FUNC_csum_level:
6389 return &bpf_csum_level_proto;
6390 case BPF_FUNC_l3_csum_replace:
6391 return &bpf_l3_csum_replace_proto;
6392 case BPF_FUNC_l4_csum_replace:
6393 return &bpf_l4_csum_replace_proto;
6394 case BPF_FUNC_clone_redirect:
6395 return &bpf_clone_redirect_proto;
6396 case BPF_FUNC_get_cgroup_classid:
6397 return &bpf_get_cgroup_classid_proto;
6398 case BPF_FUNC_skb_vlan_push:
6399 return &bpf_skb_vlan_push_proto;
6400 case BPF_FUNC_skb_vlan_pop:
6401 return &bpf_skb_vlan_pop_proto;
6402 case BPF_FUNC_skb_change_proto:
6403 return &bpf_skb_change_proto_proto;
6404 case BPF_FUNC_skb_change_type:
6405 return &bpf_skb_change_type_proto;
6406 case BPF_FUNC_skb_adjust_room:
6407 return &bpf_skb_adjust_room_proto;
6408 case BPF_FUNC_skb_change_tail:
6409 return &bpf_skb_change_tail_proto;
6410 case BPF_FUNC_skb_change_head:
6411 return &bpf_skb_change_head_proto;
6412 case BPF_FUNC_skb_get_tunnel_key:
6413 return &bpf_skb_get_tunnel_key_proto;
6414 case BPF_FUNC_skb_set_tunnel_key:
6415 return bpf_get_skb_set_tunnel_proto(func_id);
6416 case BPF_FUNC_skb_get_tunnel_opt:
6417 return &bpf_skb_get_tunnel_opt_proto;
6418 case BPF_FUNC_skb_set_tunnel_opt:
6419 return bpf_get_skb_set_tunnel_proto(func_id);
6420 case BPF_FUNC_redirect:
6421 return &bpf_redirect_proto;
6422 case BPF_FUNC_get_route_realm:
6423 return &bpf_get_route_realm_proto;
6424 case BPF_FUNC_get_hash_recalc:
6425 return &bpf_get_hash_recalc_proto;
6426 case BPF_FUNC_set_hash_invalid:
6427 return &bpf_set_hash_invalid_proto;
6428 case BPF_FUNC_set_hash:
6429 return &bpf_set_hash_proto;
6430 case BPF_FUNC_perf_event_output:
6431 return &bpf_skb_event_output_proto;
6432 case BPF_FUNC_get_smp_processor_id:
6433 return &bpf_get_smp_processor_id_proto;
6434 case BPF_FUNC_skb_under_cgroup:
6435 return &bpf_skb_under_cgroup_proto;
6436 case BPF_FUNC_get_socket_cookie:
6437 return &bpf_get_socket_cookie_proto;
6438 case BPF_FUNC_get_socket_uid:
6439 return &bpf_get_socket_uid_proto;
6440 case BPF_FUNC_fib_lookup:
6441 return &bpf_skb_fib_lookup_proto;
6442 case BPF_FUNC_sk_fullsock:
6443 return &bpf_sk_fullsock_proto;
6444 case BPF_FUNC_sk_storage_get:
6445 return &bpf_sk_storage_get_proto;
6446 case BPF_FUNC_sk_storage_delete:
6447 return &bpf_sk_storage_delete_proto;
6448#ifdef CONFIG_XFRM
6449 case BPF_FUNC_skb_get_xfrm_state:
6450 return &bpf_skb_get_xfrm_state_proto;
6451#endif
6452#ifdef CONFIG_SOCK_CGROUP_DATA
6453 case BPF_FUNC_skb_cgroup_id:
6454 return &bpf_skb_cgroup_id_proto;
6455 case BPF_FUNC_skb_ancestor_cgroup_id:
6456 return &bpf_skb_ancestor_cgroup_id_proto;
6457#endif
6458#ifdef CONFIG_INET
6459 case BPF_FUNC_sk_lookup_tcp:
6460 return &bpf_sk_lookup_tcp_proto;
6461 case BPF_FUNC_sk_lookup_udp:
6462 return &bpf_sk_lookup_udp_proto;
6463 case BPF_FUNC_sk_release:
6464 return &bpf_sk_release_proto;
6465 case BPF_FUNC_tcp_sock:
6466 return &bpf_tcp_sock_proto;
6467 case BPF_FUNC_get_listener_sock:
6468 return &bpf_get_listener_sock_proto;
6469 case BPF_FUNC_skc_lookup_tcp:
6470 return &bpf_skc_lookup_tcp_proto;
6471 case BPF_FUNC_tcp_check_syncookie:
6472 return &bpf_tcp_check_syncookie_proto;
6473 case BPF_FUNC_skb_ecn_set_ce:
6474 return &bpf_skb_ecn_set_ce_proto;
6475 case BPF_FUNC_tcp_gen_syncookie:
6476 return &bpf_tcp_gen_syncookie_proto;
6477 case BPF_FUNC_sk_assign:
6478 return &bpf_sk_assign_proto;
6479#endif
6480 default:
6481 return bpf_base_func_proto(func_id);
6482 }
6483}
6484
6485static const struct bpf_func_proto *
6486xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6487{
6488 switch (func_id) {
6489 case BPF_FUNC_perf_event_output:
6490 return &bpf_xdp_event_output_proto;
6491 case BPF_FUNC_get_smp_processor_id:
6492 return &bpf_get_smp_processor_id_proto;
6493 case BPF_FUNC_csum_diff:
6494 return &bpf_csum_diff_proto;
6495 case BPF_FUNC_xdp_adjust_head:
6496 return &bpf_xdp_adjust_head_proto;
6497 case BPF_FUNC_xdp_adjust_meta:
6498 return &bpf_xdp_adjust_meta_proto;
6499 case BPF_FUNC_redirect:
6500 return &bpf_xdp_redirect_proto;
6501 case BPF_FUNC_redirect_map:
6502 return &bpf_xdp_redirect_map_proto;
6503 case BPF_FUNC_xdp_adjust_tail:
6504 return &bpf_xdp_adjust_tail_proto;
6505 case BPF_FUNC_fib_lookup:
6506 return &bpf_xdp_fib_lookup_proto;
6507#ifdef CONFIG_INET
6508 case BPF_FUNC_sk_lookup_udp:
6509 return &bpf_xdp_sk_lookup_udp_proto;
6510 case BPF_FUNC_sk_lookup_tcp:
6511 return &bpf_xdp_sk_lookup_tcp_proto;
6512 case BPF_FUNC_sk_release:
6513 return &bpf_sk_release_proto;
6514 case BPF_FUNC_skc_lookup_tcp:
6515 return &bpf_xdp_skc_lookup_tcp_proto;
6516 case BPF_FUNC_tcp_check_syncookie:
6517 return &bpf_tcp_check_syncookie_proto;
6518 case BPF_FUNC_tcp_gen_syncookie:
6519 return &bpf_tcp_gen_syncookie_proto;
6520#endif
6521 default:
6522 return bpf_base_func_proto(func_id);
6523 }
6524}
6525
6526const struct bpf_func_proto bpf_sock_map_update_proto __weak;
6527const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
6528
6529static const struct bpf_func_proto *
6530sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6531{
6532 switch (func_id) {
6533 case BPF_FUNC_setsockopt:
6534 return &bpf_sock_ops_setsockopt_proto;
6535 case BPF_FUNC_getsockopt:
6536 return &bpf_sock_ops_getsockopt_proto;
6537 case BPF_FUNC_sock_ops_cb_flags_set:
6538 return &bpf_sock_ops_cb_flags_set_proto;
6539 case BPF_FUNC_sock_map_update:
6540 return &bpf_sock_map_update_proto;
6541 case BPF_FUNC_sock_hash_update:
6542 return &bpf_sock_hash_update_proto;
6543 case BPF_FUNC_get_socket_cookie:
6544 return &bpf_get_socket_cookie_sock_ops_proto;
6545 case BPF_FUNC_get_local_storage:
6546 return &bpf_get_local_storage_proto;
6547 case BPF_FUNC_perf_event_output:
6548 return &bpf_event_output_data_proto;
6549 case BPF_FUNC_sk_storage_get:
6550 return &bpf_sk_storage_get_proto;
6551 case BPF_FUNC_sk_storage_delete:
6552 return &bpf_sk_storage_delete_proto;
6553#ifdef CONFIG_INET
6554 case BPF_FUNC_tcp_sock:
6555 return &bpf_tcp_sock_proto;
6556#endif
6557 default:
6558 return bpf_base_func_proto(func_id);
6559 }
6560}
6561
6562const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
6563const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
6564
6565static const struct bpf_func_proto *
6566sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6567{
6568 switch (func_id) {
6569 case BPF_FUNC_msg_redirect_map:
6570 return &bpf_msg_redirect_map_proto;
6571 case BPF_FUNC_msg_redirect_hash:
6572 return &bpf_msg_redirect_hash_proto;
6573 case BPF_FUNC_msg_apply_bytes:
6574 return &bpf_msg_apply_bytes_proto;
6575 case BPF_FUNC_msg_cork_bytes:
6576 return &bpf_msg_cork_bytes_proto;
6577 case BPF_FUNC_msg_pull_data:
6578 return &bpf_msg_pull_data_proto;
6579 case BPF_FUNC_msg_push_data:
6580 return &bpf_msg_push_data_proto;
6581 case BPF_FUNC_msg_pop_data:
6582 return &bpf_msg_pop_data_proto;
6583 case BPF_FUNC_perf_event_output:
6584 return &bpf_event_output_data_proto;
6585 case BPF_FUNC_get_current_uid_gid:
6586 return &bpf_get_current_uid_gid_proto;
6587 case BPF_FUNC_get_current_pid_tgid:
6588 return &bpf_get_current_pid_tgid_proto;
6589 case BPF_FUNC_sk_storage_get:
6590 return &bpf_sk_storage_get_proto;
6591 case BPF_FUNC_sk_storage_delete:
6592 return &bpf_sk_storage_delete_proto;
6593#ifdef CONFIG_CGROUPS
6594 case BPF_FUNC_get_current_cgroup_id:
6595 return &bpf_get_current_cgroup_id_proto;
6596 case BPF_FUNC_get_current_ancestor_cgroup_id:
6597 return &bpf_get_current_ancestor_cgroup_id_proto;
6598#endif
6599#ifdef CONFIG_CGROUP_NET_CLASSID
6600 case BPF_FUNC_get_cgroup_classid:
6601 return &bpf_get_cgroup_classid_curr_proto;
6602#endif
6603 default:
6604 return bpf_base_func_proto(func_id);
6605 }
6606}
6607
6608const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
6609const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
6610
6611static const struct bpf_func_proto *
6612sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6613{
6614 switch (func_id) {
6615 case BPF_FUNC_skb_store_bytes:
6616 return &bpf_skb_store_bytes_proto;
6617 case BPF_FUNC_skb_load_bytes:
6618 return &bpf_skb_load_bytes_proto;
6619 case BPF_FUNC_skb_pull_data:
6620 return &sk_skb_pull_data_proto;
6621 case BPF_FUNC_skb_change_tail:
6622 return &sk_skb_change_tail_proto;
6623 case BPF_FUNC_skb_change_head:
6624 return &sk_skb_change_head_proto;
6625 case BPF_FUNC_get_socket_cookie:
6626 return &bpf_get_socket_cookie_proto;
6627 case BPF_FUNC_get_socket_uid:
6628 return &bpf_get_socket_uid_proto;
6629 case BPF_FUNC_sk_redirect_map:
6630 return &bpf_sk_redirect_map_proto;
6631 case BPF_FUNC_sk_redirect_hash:
6632 return &bpf_sk_redirect_hash_proto;
6633 case BPF_FUNC_perf_event_output:
6634 return &bpf_skb_event_output_proto;
6635#ifdef CONFIG_INET
6636 case BPF_FUNC_sk_lookup_tcp:
6637 return &bpf_sk_lookup_tcp_proto;
6638 case BPF_FUNC_sk_lookup_udp:
6639 return &bpf_sk_lookup_udp_proto;
6640 case BPF_FUNC_sk_release:
6641 return &bpf_sk_release_proto;
6642 case BPF_FUNC_skc_lookup_tcp:
6643 return &bpf_skc_lookup_tcp_proto;
6644#endif
6645 default:
6646 return bpf_base_func_proto(func_id);
6647 }
6648}
6649
6650static const struct bpf_func_proto *
6651flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6652{
6653 switch (func_id) {
6654 case BPF_FUNC_skb_load_bytes:
6655 return &bpf_flow_dissector_load_bytes_proto;
6656 default:
6657 return bpf_base_func_proto(func_id);
6658 }
6659}
6660
6661static const struct bpf_func_proto *
6662lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6663{
6664 switch (func_id) {
6665 case BPF_FUNC_skb_load_bytes:
6666 return &bpf_skb_load_bytes_proto;
6667 case BPF_FUNC_skb_pull_data:
6668 return &bpf_skb_pull_data_proto;
6669 case BPF_FUNC_csum_diff:
6670 return &bpf_csum_diff_proto;
6671 case BPF_FUNC_get_cgroup_classid:
6672 return &bpf_get_cgroup_classid_proto;
6673 case BPF_FUNC_get_route_realm:
6674 return &bpf_get_route_realm_proto;
6675 case BPF_FUNC_get_hash_recalc:
6676 return &bpf_get_hash_recalc_proto;
6677 case BPF_FUNC_perf_event_output:
6678 return &bpf_skb_event_output_proto;
6679 case BPF_FUNC_get_smp_processor_id:
6680 return &bpf_get_smp_processor_id_proto;
6681 case BPF_FUNC_skb_under_cgroup:
6682 return &bpf_skb_under_cgroup_proto;
6683 default:
6684 return bpf_base_func_proto(func_id);
6685 }
6686}
6687
6688static const struct bpf_func_proto *
6689lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6690{
6691 switch (func_id) {
6692 case BPF_FUNC_lwt_push_encap:
6693 return &bpf_lwt_in_push_encap_proto;
6694 default:
6695 return lwt_out_func_proto(func_id, prog);
6696 }
6697}
6698
6699static const struct bpf_func_proto *
6700lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6701{
6702 switch (func_id) {
6703 case BPF_FUNC_skb_get_tunnel_key:
6704 return &bpf_skb_get_tunnel_key_proto;
6705 case BPF_FUNC_skb_set_tunnel_key:
6706 return bpf_get_skb_set_tunnel_proto(func_id);
6707 case BPF_FUNC_skb_get_tunnel_opt:
6708 return &bpf_skb_get_tunnel_opt_proto;
6709 case BPF_FUNC_skb_set_tunnel_opt:
6710 return bpf_get_skb_set_tunnel_proto(func_id);
6711 case BPF_FUNC_redirect:
6712 return &bpf_redirect_proto;
6713 case BPF_FUNC_clone_redirect:
6714 return &bpf_clone_redirect_proto;
6715 case BPF_FUNC_skb_change_tail:
6716 return &bpf_skb_change_tail_proto;
6717 case BPF_FUNC_skb_change_head:
6718 return &bpf_skb_change_head_proto;
6719 case BPF_FUNC_skb_store_bytes:
6720 return &bpf_skb_store_bytes_proto;
6721 case BPF_FUNC_csum_update:
6722 return &bpf_csum_update_proto;
6723 case BPF_FUNC_csum_level:
6724 return &bpf_csum_level_proto;
6725 case BPF_FUNC_l3_csum_replace:
6726 return &bpf_l3_csum_replace_proto;
6727 case BPF_FUNC_l4_csum_replace:
6728 return &bpf_l4_csum_replace_proto;
6729 case BPF_FUNC_set_hash_invalid:
6730 return &bpf_set_hash_invalid_proto;
6731 case BPF_FUNC_lwt_push_encap:
6732 return &bpf_lwt_xmit_push_encap_proto;
6733 default:
6734 return lwt_out_func_proto(func_id, prog);
6735 }
6736}
6737
6738static const struct bpf_func_proto *
6739lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6740{
6741 switch (func_id) {
6742#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
6743 case BPF_FUNC_lwt_seg6_store_bytes:
6744 return &bpf_lwt_seg6_store_bytes_proto;
6745 case BPF_FUNC_lwt_seg6_action:
6746 return &bpf_lwt_seg6_action_proto;
6747 case BPF_FUNC_lwt_seg6_adjust_srh:
6748 return &bpf_lwt_seg6_adjust_srh_proto;
6749#endif
6750 default:
6751 return lwt_out_func_proto(func_id, prog);
6752 }
6753}
6754
6755static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
6756 const struct bpf_prog *prog,
6757 struct bpf_insn_access_aux *info)
6758{
6759 const int size_default = sizeof(__u32);
6760
6761 if (off < 0 || off >= sizeof(struct __sk_buff))
6762 return false;
6763
6764
6765 if (off % size != 0)
6766 return false;
6767
6768 switch (off) {
6769 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
6770 if (off + size > offsetofend(struct __sk_buff, cb[4]))
6771 return false;
6772 break;
6773 case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
6774 case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
6775 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
6776 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
6777 case bpf_ctx_range(struct __sk_buff, data):
6778 case bpf_ctx_range(struct __sk_buff, data_meta):
6779 case bpf_ctx_range(struct __sk_buff, data_end):
6780 if (size != size_default)
6781 return false;
6782 break;
6783 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
6784 return false;
6785 case bpf_ctx_range(struct __sk_buff, tstamp):
6786 if (size != sizeof(__u64))
6787 return false;
6788 break;
6789 case offsetof(struct __sk_buff, sk):
6790 if (type == BPF_WRITE || size != sizeof(__u64))
6791 return false;
6792 info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
6793 break;
6794 default:
6795
6796 if (type == BPF_WRITE) {
6797 if (size != size_default)
6798 return false;
6799 } else {
6800 bpf_ctx_record_field_size(info, size_default);
6801 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
6802 return false;
6803 }
6804 }
6805
6806 return true;
6807}
6808
6809static bool sk_filter_is_valid_access(int off, int size,
6810 enum bpf_access_type type,
6811 const struct bpf_prog *prog,
6812 struct bpf_insn_access_aux *info)
6813{
6814 switch (off) {
6815 case bpf_ctx_range(struct __sk_buff, tc_classid):
6816 case bpf_ctx_range(struct __sk_buff, data):
6817 case bpf_ctx_range(struct __sk_buff, data_meta):
6818 case bpf_ctx_range(struct __sk_buff, data_end):
6819 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
6820 case bpf_ctx_range(struct __sk_buff, tstamp):
6821 case bpf_ctx_range(struct __sk_buff, wire_len):
6822 return false;
6823 }
6824
6825 if (type == BPF_WRITE) {
6826 switch (off) {
6827 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
6828 break;
6829 default:
6830 return false;
6831 }
6832 }
6833
6834 return bpf_skb_is_valid_access(off, size, type, prog, info);
6835}
6836
6837static bool cg_skb_is_valid_access(int off, int size,
6838 enum bpf_access_type type,
6839 const struct bpf_prog *prog,
6840 struct bpf_insn_access_aux *info)
6841{
6842 switch (off) {
6843 case bpf_ctx_range(struct __sk_buff, tc_classid):
6844 case bpf_ctx_range(struct __sk_buff, data_meta):
6845 case bpf_ctx_range(struct __sk_buff, wire_len):
6846 return false;
6847 case bpf_ctx_range(struct __sk_buff, data):
6848 case bpf_ctx_range(struct __sk_buff, data_end):
6849 if (!bpf_capable())
6850 return false;
6851 break;
6852 }
6853
6854 if (type == BPF_WRITE) {
6855 switch (off) {
6856 case bpf_ctx_range(struct __sk_buff, mark):
6857 case bpf_ctx_range(struct __sk_buff, priority):
6858 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
6859 break;
6860 case bpf_ctx_range(struct __sk_buff, tstamp):
6861 if (!bpf_capable())
6862 return false;
6863 break;
6864 default:
6865 return false;
6866 }
6867 }
6868
6869 switch (off) {
6870 case bpf_ctx_range(struct __sk_buff, data):
6871 info->reg_type = PTR_TO_PACKET;
6872 break;
6873 case bpf_ctx_range(struct __sk_buff, data_end):
6874 info->reg_type = PTR_TO_PACKET_END;
6875 break;
6876 }
6877
6878 return bpf_skb_is_valid_access(off, size, type, prog, info);
6879}
6880
6881static bool lwt_is_valid_access(int off, int size,
6882 enum bpf_access_type type,
6883 const struct bpf_prog *prog,
6884 struct bpf_insn_access_aux *info)
6885{
6886 switch (off) {
6887 case bpf_ctx_range(struct __sk_buff, tc_classid):
6888 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
6889 case bpf_ctx_range(struct __sk_buff, data_meta):
6890 case bpf_ctx_range(struct __sk_buff, tstamp):
6891 case bpf_ctx_range(struct __sk_buff, wire_len):
6892 return false;
6893 }
6894
6895 if (type == BPF_WRITE) {
6896 switch (off) {
6897 case bpf_ctx_range(struct __sk_buff, mark):
6898 case bpf_ctx_range(struct __sk_buff, priority):
6899 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
6900 break;
6901 default:
6902 return false;
6903 }
6904 }
6905
6906 switch (off) {
6907 case bpf_ctx_range(struct __sk_buff, data):
6908 info->reg_type = PTR_TO_PACKET;
6909 break;
6910 case bpf_ctx_range(struct __sk_buff, data_end):
6911 info->reg_type = PTR_TO_PACKET_END;
6912 break;
6913 }
6914
6915 return bpf_skb_is_valid_access(off, size, type, prog, info);
6916}
6917
6918
6919static bool __sock_filter_check_attach_type(int off,
6920 enum bpf_access_type access_type,
6921 enum bpf_attach_type attach_type)
6922{
6923 switch (off) {
6924 case offsetof(struct bpf_sock, bound_dev_if):
6925 case offsetof(struct bpf_sock, mark):
6926 case offsetof(struct bpf_sock, priority):
6927 switch (attach_type) {
6928 case BPF_CGROUP_INET_SOCK_CREATE:
6929 case BPF_CGROUP_INET_SOCK_RELEASE:
6930 goto full_access;
6931 default:
6932 return false;
6933 }
6934 case bpf_ctx_range(struct bpf_sock, src_ip4):
6935 switch (attach_type) {
6936 case BPF_CGROUP_INET4_POST_BIND:
6937 goto read_only;
6938 default:
6939 return false;
6940 }
6941 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
6942 switch (attach_type) {
6943 case BPF_CGROUP_INET6_POST_BIND:
6944 goto read_only;
6945 default:
6946 return false;
6947 }
6948 case bpf_ctx_range(struct bpf_sock, src_port):
6949 switch (attach_type) {
6950 case BPF_CGROUP_INET4_POST_BIND:
6951 case BPF_CGROUP_INET6_POST_BIND:
6952 goto read_only;
6953 default:
6954 return false;
6955 }
6956 }
6957read_only:
6958 return access_type == BPF_READ;
6959full_access:
6960 return true;
6961}
6962
6963bool bpf_sock_common_is_valid_access(int off, int size,
6964 enum bpf_access_type type,
6965 struct bpf_insn_access_aux *info)
6966{
6967 switch (off) {
6968 case bpf_ctx_range_till(struct bpf_sock, type, priority):
6969 return false;
6970 default:
6971 return bpf_sock_is_valid_access(off, size, type, info);
6972 }
6973}
6974
6975bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
6976 struct bpf_insn_access_aux *info)
6977{
6978 const int size_default = sizeof(__u32);
6979
6980 if (off < 0 || off >= sizeof(struct bpf_sock))
6981 return false;
6982 if (off % size != 0)
6983 return false;
6984
6985 switch (off) {
6986 case offsetof(struct bpf_sock, state):
6987 case offsetof(struct bpf_sock, family):
6988 case offsetof(struct bpf_sock, type):
6989 case offsetof(struct bpf_sock, protocol):
6990 case offsetof(struct bpf_sock, dst_port):
6991 case offsetof(struct bpf_sock, src_port):
6992 case offsetof(struct bpf_sock, rx_queue_mapping):
6993 case bpf_ctx_range(struct bpf_sock, src_ip4):
6994 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
6995 case bpf_ctx_range(struct bpf_sock, dst_ip4):
6996 case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
6997 bpf_ctx_record_field_size(info, size_default);
6998 return bpf_ctx_narrow_access_ok(off, size, size_default);
6999 }
7000
7001 return size == size_default;
7002}
7003
7004static bool sock_filter_is_valid_access(int off, int size,
7005 enum bpf_access_type type,
7006 const struct bpf_prog *prog,
7007 struct bpf_insn_access_aux *info)
7008{
7009 if (!bpf_sock_is_valid_access(off, size, type, info))
7010 return false;
7011 return __sock_filter_check_attach_type(off, type,
7012 prog->expected_attach_type);
7013}
7014
7015static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
7016 const struct bpf_prog *prog)
7017{
7018
7019
7020
7021 return 0;
7022}
7023
7024static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
7025 const struct bpf_prog *prog, int drop_verdict)
7026{
7027 struct bpf_insn *insn = insn_buf;
7028
7029 if (!direct_write)
7030 return 0;
7031
7032
7033
7034
7035
7036
7037
7038 *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
7039 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
7040 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
7041
7042
7043 *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
7044 *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
7045 *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7046 BPF_FUNC_skb_pull_data);
7047
7048
7049
7050
7051 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
7052 *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
7053 *insn++ = BPF_EXIT_INSN();
7054
7055
7056 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
7057
7058 *insn++ = prog->insnsi[0];
7059
7060 return insn - insn_buf;
7061}
7062
7063static int bpf_gen_ld_abs(const struct bpf_insn *orig,
7064 struct bpf_insn *insn_buf)
7065{
7066 bool indirect = BPF_MODE(orig->code) == BPF_IND;
7067 struct bpf_insn *insn = insn_buf;
7068
7069 if (!indirect) {
7070 *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
7071 } else {
7072 *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
7073 if (orig->imm)
7074 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
7075 }
7076
7077 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
7078
7079 switch (BPF_SIZE(orig->code)) {
7080 case BPF_B:
7081 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
7082 break;
7083 case BPF_H:
7084 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
7085 break;
7086 case BPF_W:
7087 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
7088 break;
7089 }
7090
7091 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2);
7092 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
7093 *insn++ = BPF_EXIT_INSN();
7094
7095 return insn - insn_buf;
7096}
7097
7098static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
7099 const struct bpf_prog *prog)
7100{
7101 return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
7102}
7103
7104static bool tc_cls_act_is_valid_access(int off, int size,
7105 enum bpf_access_type type,
7106 const struct bpf_prog *prog,
7107 struct bpf_insn_access_aux *info)
7108{
7109 if (type == BPF_WRITE) {
7110 switch (off) {
7111 case bpf_ctx_range(struct __sk_buff, mark):
7112 case bpf_ctx_range(struct __sk_buff, tc_index):
7113 case bpf_ctx_range(struct __sk_buff, priority):
7114 case bpf_ctx_range(struct __sk_buff, tc_classid):
7115 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
7116 case bpf_ctx_range(struct __sk_buff, tstamp):
7117 case bpf_ctx_range(struct __sk_buff, queue_mapping):
7118 break;
7119 default:
7120 return false;
7121 }
7122 }
7123
7124 switch (off) {
7125 case bpf_ctx_range(struct __sk_buff, data):
7126 info->reg_type = PTR_TO_PACKET;
7127 break;
7128 case bpf_ctx_range(struct __sk_buff, data_meta):
7129 info->reg_type = PTR_TO_PACKET_META;
7130 break;
7131 case bpf_ctx_range(struct __sk_buff, data_end):
7132 info->reg_type = PTR_TO_PACKET_END;
7133 break;
7134 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
7135 return false;
7136 }
7137
7138 return bpf_skb_is_valid_access(off, size, type, prog, info);
7139}
7140
7141static bool __is_valid_xdp_access(int off, int size)
7142{
7143 if (off < 0 || off >= sizeof(struct xdp_md))
7144 return false;
7145 if (off % size != 0)
7146 return false;
7147 if (size != sizeof(__u32))
7148 return false;
7149
7150 return true;
7151}
7152
7153static bool xdp_is_valid_access(int off, int size,
7154 enum bpf_access_type type,
7155 const struct bpf_prog *prog,
7156 struct bpf_insn_access_aux *info)
7157{
7158 if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
7159 switch (off) {
7160 case offsetof(struct xdp_md, egress_ifindex):
7161 return false;
7162 }
7163 }
7164
7165 if (type == BPF_WRITE) {
7166 if (bpf_prog_is_dev_bound(prog->aux)) {
7167 switch (off) {
7168 case offsetof(struct xdp_md, rx_queue_index):
7169 return __is_valid_xdp_access(off, size);
7170 }
7171 }
7172 return false;
7173 }
7174
7175 switch (off) {
7176 case offsetof(struct xdp_md, data):
7177 info->reg_type = PTR_TO_PACKET;
7178 break;
7179 case offsetof(struct xdp_md, data_meta):
7180 info->reg_type = PTR_TO_PACKET_META;
7181 break;
7182 case offsetof(struct xdp_md, data_end):
7183 info->reg_type = PTR_TO_PACKET_END;
7184 break;
7185 }
7186
7187 return __is_valid_xdp_access(off, size);
7188}
7189
7190void bpf_warn_invalid_xdp_action(u32 act)
7191{
7192 const u32 act_max = XDP_REDIRECT;
7193
7194 WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
7195 act > act_max ? "Illegal" : "Driver unsupported",
7196 act);
7197}
7198EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
7199
7200static bool sock_addr_is_valid_access(int off, int size,
7201 enum bpf_access_type type,
7202 const struct bpf_prog *prog,
7203 struct bpf_insn_access_aux *info)
7204{
7205 const int size_default = sizeof(__u32);
7206
7207 if (off < 0 || off >= sizeof(struct bpf_sock_addr))
7208 return false;
7209 if (off % size != 0)
7210 return false;
7211
7212
7213
7214
7215 switch (off) {
7216 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
7217 switch (prog->expected_attach_type) {
7218 case BPF_CGROUP_INET4_BIND:
7219 case BPF_CGROUP_INET4_CONNECT:
7220 case BPF_CGROUP_INET4_GETPEERNAME:
7221 case BPF_CGROUP_INET4_GETSOCKNAME:
7222 case BPF_CGROUP_UDP4_SENDMSG:
7223 case BPF_CGROUP_UDP4_RECVMSG:
7224 break;
7225 default:
7226 return false;
7227 }
7228 break;
7229 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
7230 switch (prog->expected_attach_type) {
7231 case BPF_CGROUP_INET6_BIND:
7232 case BPF_CGROUP_INET6_CONNECT:
7233 case BPF_CGROUP_INET6_GETPEERNAME:
7234 case BPF_CGROUP_INET6_GETSOCKNAME:
7235 case BPF_CGROUP_UDP6_SENDMSG:
7236 case BPF_CGROUP_UDP6_RECVMSG:
7237 break;
7238 default:
7239 return false;
7240 }
7241 break;
7242 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
7243 switch (prog->expected_attach_type) {
7244 case BPF_CGROUP_UDP4_SENDMSG:
7245 break;
7246 default:
7247 return false;
7248 }
7249 break;
7250 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
7251 msg_src_ip6[3]):
7252 switch (prog->expected_attach_type) {
7253 case BPF_CGROUP_UDP6_SENDMSG:
7254 break;
7255 default:
7256 return false;
7257 }
7258 break;
7259 }
7260
7261 switch (off) {
7262 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
7263 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
7264 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
7265 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
7266 msg_src_ip6[3]):
7267 case bpf_ctx_range(struct bpf_sock_addr, user_port):
7268 if (type == BPF_READ) {
7269 bpf_ctx_record_field_size(info, size_default);
7270
7271 if (bpf_ctx_wide_access_ok(off, size,
7272 struct bpf_sock_addr,
7273 user_ip6))
7274 return true;
7275
7276 if (bpf_ctx_wide_access_ok(off, size,
7277 struct bpf_sock_addr,
7278 msg_src_ip6))
7279 return true;
7280
7281 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
7282 return false;
7283 } else {
7284 if (bpf_ctx_wide_access_ok(off, size,
7285 struct bpf_sock_addr,
7286 user_ip6))
7287 return true;
7288
7289 if (bpf_ctx_wide_access_ok(off, size,
7290 struct bpf_sock_addr,
7291 msg_src_ip6))
7292 return true;
7293
7294 if (size != size_default)
7295 return false;
7296 }
7297 break;
7298 case offsetof(struct bpf_sock_addr, sk):
7299 if (type != BPF_READ)
7300 return false;
7301 if (size != sizeof(__u64))
7302 return false;
7303 info->reg_type = PTR_TO_SOCKET;
7304 break;
7305 default:
7306 if (type == BPF_READ) {
7307 if (size != size_default)
7308 return false;
7309 } else {
7310 return false;
7311 }
7312 }
7313
7314 return true;
7315}
7316
7317static bool sock_ops_is_valid_access(int off, int size,
7318 enum bpf_access_type type,
7319 const struct bpf_prog *prog,
7320 struct bpf_insn_access_aux *info)
7321{
7322 const int size_default = sizeof(__u32);
7323
7324 if (off < 0 || off >= sizeof(struct bpf_sock_ops))
7325 return false;
7326
7327
7328 if (off % size != 0)
7329 return false;
7330
7331 if (type == BPF_WRITE) {
7332 switch (off) {
7333 case offsetof(struct bpf_sock_ops, reply):
7334 case offsetof(struct bpf_sock_ops, sk_txhash):
7335 if (size != size_default)
7336 return false;
7337 break;
7338 default:
7339 return false;
7340 }
7341 } else {
7342 switch (off) {
7343 case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
7344 bytes_acked):
7345 if (size != sizeof(__u64))
7346 return false;
7347 break;
7348 case offsetof(struct bpf_sock_ops, sk):
7349 if (size != sizeof(__u64))
7350 return false;
7351 info->reg_type = PTR_TO_SOCKET_OR_NULL;
7352 break;
7353 default:
7354 if (size != size_default)
7355 return false;
7356 break;
7357 }
7358 }
7359
7360 return true;
7361}
7362
7363static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
7364 const struct bpf_prog *prog)
7365{
7366 return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
7367}
7368
7369static bool sk_skb_is_valid_access(int off, int size,
7370 enum bpf_access_type type,
7371 const struct bpf_prog *prog,
7372 struct bpf_insn_access_aux *info)
7373{
7374 switch (off) {
7375 case bpf_ctx_range(struct __sk_buff, tc_classid):
7376 case bpf_ctx_range(struct __sk_buff, data_meta):
7377 case bpf_ctx_range(struct __sk_buff, tstamp):
7378 case bpf_ctx_range(struct __sk_buff, wire_len):
7379 return false;
7380 }
7381
7382 if (type == BPF_WRITE) {
7383 switch (off) {
7384 case bpf_ctx_range(struct __sk_buff, tc_index):
7385 case bpf_ctx_range(struct __sk_buff, priority):
7386 break;
7387 default:
7388 return false;
7389 }
7390 }
7391
7392 switch (off) {
7393 case bpf_ctx_range(struct __sk_buff, mark):
7394 return false;
7395 case bpf_ctx_range(struct __sk_buff, data):
7396 info->reg_type = PTR_TO_PACKET;
7397 break;
7398 case bpf_ctx_range(struct __sk_buff, data_end):
7399 info->reg_type = PTR_TO_PACKET_END;
7400 break;
7401 }
7402
7403 return bpf_skb_is_valid_access(off, size, type, prog, info);
7404}
7405
7406static bool sk_msg_is_valid_access(int off, int size,
7407 enum bpf_access_type type,
7408 const struct bpf_prog *prog,
7409 struct bpf_insn_access_aux *info)
7410{
7411 if (type == BPF_WRITE)
7412 return false;
7413
7414 if (off % size != 0)
7415 return false;
7416
7417 switch (off) {
7418 case offsetof(struct sk_msg_md, data):
7419 info->reg_type = PTR_TO_PACKET;
7420 if (size != sizeof(__u64))
7421 return false;
7422 break;
7423 case offsetof(struct sk_msg_md, data_end):
7424 info->reg_type = PTR_TO_PACKET_END;
7425 if (size != sizeof(__u64))
7426 return false;
7427 break;
7428 case offsetof(struct sk_msg_md, sk):
7429 if (size != sizeof(__u64))
7430 return false;
7431 info->reg_type = PTR_TO_SOCKET;
7432 break;
7433 case bpf_ctx_range(struct sk_msg_md, family):
7434 case bpf_ctx_range(struct sk_msg_md, remote_ip4):
7435 case bpf_ctx_range(struct sk_msg_md, local_ip4):
7436 case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]):
7437 case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]):
7438 case bpf_ctx_range(struct sk_msg_md, remote_port):
7439 case bpf_ctx_range(struct sk_msg_md, local_port):
7440 case bpf_ctx_range(struct sk_msg_md, size):
7441 if (size != sizeof(__u32))
7442 return false;
7443 break;
7444 default:
7445 return false;
7446 }
7447 return true;
7448}
7449
7450static bool flow_dissector_is_valid_access(int off, int size,
7451 enum bpf_access_type type,
7452 const struct bpf_prog *prog,
7453 struct bpf_insn_access_aux *info)
7454{
7455 const int size_default = sizeof(__u32);
7456
7457 if (off < 0 || off >= sizeof(struct __sk_buff))
7458 return false;
7459
7460 if (type == BPF_WRITE)
7461 return false;
7462
7463 switch (off) {
7464 case bpf_ctx_range(struct __sk_buff, data):
7465 if (size != size_default)
7466 return false;
7467 info->reg_type = PTR_TO_PACKET;
7468 return true;
7469 case bpf_ctx_range(struct __sk_buff, data_end):
7470 if (size != size_default)
7471 return false;
7472 info->reg_type = PTR_TO_PACKET_END;
7473 return true;
7474 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
7475 if (size != sizeof(__u64))
7476 return false;
7477 info->reg_type = PTR_TO_FLOW_KEYS;
7478 return true;
7479 default:
7480 return false;
7481 }
7482}
7483
7484static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
7485 const struct bpf_insn *si,
7486 struct bpf_insn *insn_buf,
7487 struct bpf_prog *prog,
7488 u32 *target_size)
7489
7490{
7491 struct bpf_insn *insn = insn_buf;
7492
7493 switch (si->off) {
7494 case offsetof(struct __sk_buff, data):
7495 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
7496 si->dst_reg, si->src_reg,
7497 offsetof(struct bpf_flow_dissector, data));
7498 break;
7499
7500 case offsetof(struct __sk_buff, data_end):
7501 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
7502 si->dst_reg, si->src_reg,
7503 offsetof(struct bpf_flow_dissector, data_end));
7504 break;
7505
7506 case offsetof(struct __sk_buff, flow_keys):
7507 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys),
7508 si->dst_reg, si->src_reg,
7509 offsetof(struct bpf_flow_dissector, flow_keys));
7510 break;
7511 }
7512
7513 return insn - insn_buf;
7514}
7515
7516static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
7517 struct bpf_insn *insn)
7518{
7519
7520#ifdef NET_SKBUFF_DATA_USES_OFFSET
7521 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
7522 BPF_REG_AX, si->src_reg,
7523 offsetof(struct sk_buff, end));
7524 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
7525 si->dst_reg, si->src_reg,
7526 offsetof(struct sk_buff, head));
7527 *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
7528#else
7529 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
7530 si->dst_reg, si->src_reg,
7531 offsetof(struct sk_buff, end));
7532#endif
7533
7534 return insn;
7535}
7536
7537static u32 bpf_convert_ctx_access(enum bpf_access_type type,
7538 const struct bpf_insn *si,
7539 struct bpf_insn *insn_buf,
7540 struct bpf_prog *prog, u32 *target_size)
7541{
7542 struct bpf_insn *insn = insn_buf;
7543 int off;
7544
7545 switch (si->off) {
7546 case offsetof(struct __sk_buff, len):
7547 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7548 bpf_target_off(struct sk_buff, len, 4,
7549 target_size));
7550 break;
7551
7552 case offsetof(struct __sk_buff, protocol):
7553 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
7554 bpf_target_off(struct sk_buff, protocol, 2,
7555 target_size));
7556 break;
7557
7558 case offsetof(struct __sk_buff, vlan_proto):
7559 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
7560 bpf_target_off(struct sk_buff, vlan_proto, 2,
7561 target_size));
7562 break;
7563
7564 case offsetof(struct __sk_buff, priority):
7565 if (type == BPF_WRITE)
7566 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
7567 bpf_target_off(struct sk_buff, priority, 4,
7568 target_size));
7569 else
7570 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7571 bpf_target_off(struct sk_buff, priority, 4,
7572 target_size));
7573 break;
7574
7575 case offsetof(struct __sk_buff, ingress_ifindex):
7576 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7577 bpf_target_off(struct sk_buff, skb_iif, 4,
7578 target_size));
7579 break;
7580
7581 case offsetof(struct __sk_buff, ifindex):
7582 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
7583 si->dst_reg, si->src_reg,
7584 offsetof(struct sk_buff, dev));
7585 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
7586 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7587 bpf_target_off(struct net_device, ifindex, 4,
7588 target_size));
7589 break;
7590
7591 case offsetof(struct __sk_buff, hash):
7592 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7593 bpf_target_off(struct sk_buff, hash, 4,
7594 target_size));
7595 break;
7596
7597 case offsetof(struct __sk_buff, mark):
7598 if (type == BPF_WRITE)
7599 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
7600 bpf_target_off(struct sk_buff, mark, 4,
7601 target_size));
7602 else
7603 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7604 bpf_target_off(struct sk_buff, mark, 4,
7605 target_size));
7606 break;
7607
7608 case offsetof(struct __sk_buff, pkt_type):
7609 *target_size = 1;
7610 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
7611 PKT_TYPE_OFFSET());
7612 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
7613#ifdef __BIG_ENDIAN_BITFIELD
7614 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
7615#endif
7616 break;
7617
7618 case offsetof(struct __sk_buff, queue_mapping):
7619 if (type == BPF_WRITE) {
7620 *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
7621 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
7622 bpf_target_off(struct sk_buff,
7623 queue_mapping,
7624 2, target_size));
7625 } else {
7626 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
7627 bpf_target_off(struct sk_buff,
7628 queue_mapping,
7629 2, target_size));
7630 }
7631 break;
7632
7633 case offsetof(struct __sk_buff, vlan_present):
7634 *target_size = 1;
7635 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
7636 PKT_VLAN_PRESENT_OFFSET());
7637 if (PKT_VLAN_PRESENT_BIT)
7638 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT);
7639 if (PKT_VLAN_PRESENT_BIT < 7)
7640 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
7641 break;
7642
7643 case offsetof(struct __sk_buff, vlan_tci):
7644 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
7645 bpf_target_off(struct sk_buff, vlan_tci, 2,
7646 target_size));
7647 break;
7648
7649 case offsetof(struct __sk_buff, cb[0]) ...
7650 offsetofend(struct __sk_buff, cb[4]) - 1:
7651 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < 20);
7652 BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
7653 offsetof(struct qdisc_skb_cb, data)) %
7654 sizeof(__u64));
7655
7656 prog->cb_access = 1;
7657 off = si->off;
7658 off -= offsetof(struct __sk_buff, cb[0]);
7659 off += offsetof(struct sk_buff, cb);
7660 off += offsetof(struct qdisc_skb_cb, data);
7661 if (type == BPF_WRITE)
7662 *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
7663 si->src_reg, off);
7664 else
7665 *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
7666 si->src_reg, off);
7667 break;
7668
7669 case offsetof(struct __sk_buff, tc_classid):
7670 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, tc_classid) != 2);
7671
7672 off = si->off;
7673 off -= offsetof(struct __sk_buff, tc_classid);
7674 off += offsetof(struct sk_buff, cb);
7675 off += offsetof(struct qdisc_skb_cb, tc_classid);
7676 *target_size = 2;
7677 if (type == BPF_WRITE)
7678 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
7679 si->src_reg, off);
7680 else
7681 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
7682 si->src_reg, off);
7683 break;
7684
7685 case offsetof(struct __sk_buff, data):
7686 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
7687 si->dst_reg, si->src_reg,
7688 offsetof(struct sk_buff, data));
7689 break;
7690
7691 case offsetof(struct __sk_buff, data_meta):
7692 off = si->off;
7693 off -= offsetof(struct __sk_buff, data_meta);
7694 off += offsetof(struct sk_buff, cb);
7695 off += offsetof(struct bpf_skb_data_end, data_meta);
7696 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
7697 si->src_reg, off);
7698 break;
7699
7700 case offsetof(struct __sk_buff, data_end):
7701 off = si->off;
7702 off -= offsetof(struct __sk_buff, data_end);
7703 off += offsetof(struct sk_buff, cb);
7704 off += offsetof(struct bpf_skb_data_end, data_end);
7705 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
7706 si->src_reg, off);
7707 break;
7708
7709 case offsetof(struct __sk_buff, tc_index):
7710#ifdef CONFIG_NET_SCHED
7711 if (type == BPF_WRITE)
7712 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
7713 bpf_target_off(struct sk_buff, tc_index, 2,
7714 target_size));
7715 else
7716 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
7717 bpf_target_off(struct sk_buff, tc_index, 2,
7718 target_size));
7719#else
7720 *target_size = 2;
7721 if (type == BPF_WRITE)
7722 *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
7723 else
7724 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
7725#endif
7726 break;
7727
7728 case offsetof(struct __sk_buff, napi_id):
7729#if defined(CONFIG_NET_RX_BUSY_POLL)
7730 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7731 bpf_target_off(struct sk_buff, napi_id, 4,
7732 target_size));
7733 *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
7734 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
7735#else
7736 *target_size = 4;
7737 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
7738#endif
7739 break;
7740 case offsetof(struct __sk_buff, family):
7741 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
7742
7743 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7744 si->dst_reg, si->src_reg,
7745 offsetof(struct sk_buff, sk));
7746 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
7747 bpf_target_off(struct sock_common,
7748 skc_family,
7749 2, target_size));
7750 break;
7751 case offsetof(struct __sk_buff, remote_ip4):
7752 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
7753
7754 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7755 si->dst_reg, si->src_reg,
7756 offsetof(struct sk_buff, sk));
7757 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7758 bpf_target_off(struct sock_common,
7759 skc_daddr,
7760 4, target_size));
7761 break;
7762 case offsetof(struct __sk_buff, local_ip4):
7763 BUILD_BUG_ON(sizeof_field(struct sock_common,
7764 skc_rcv_saddr) != 4);
7765
7766 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7767 si->dst_reg, si->src_reg,
7768 offsetof(struct sk_buff, sk));
7769 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7770 bpf_target_off(struct sock_common,
7771 skc_rcv_saddr,
7772 4, target_size));
7773 break;
7774 case offsetof(struct __sk_buff, remote_ip6[0]) ...
7775 offsetof(struct __sk_buff, remote_ip6[3]):
7776#if IS_ENABLED(CONFIG_IPV6)
7777 BUILD_BUG_ON(sizeof_field(struct sock_common,
7778 skc_v6_daddr.s6_addr32[0]) != 4);
7779
7780 off = si->off;
7781 off -= offsetof(struct __sk_buff, remote_ip6[0]);
7782
7783 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7784 si->dst_reg, si->src_reg,
7785 offsetof(struct sk_buff, sk));
7786 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7787 offsetof(struct sock_common,
7788 skc_v6_daddr.s6_addr32[0]) +
7789 off);
7790#else
7791 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
7792#endif
7793 break;
7794 case offsetof(struct __sk_buff, local_ip6[0]) ...
7795 offsetof(struct __sk_buff, local_ip6[3]):
7796#if IS_ENABLED(CONFIG_IPV6)
7797 BUILD_BUG_ON(sizeof_field(struct sock_common,
7798 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
7799
7800 off = si->off;
7801 off -= offsetof(struct __sk_buff, local_ip6[0]);
7802
7803 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7804 si->dst_reg, si->src_reg,
7805 offsetof(struct sk_buff, sk));
7806 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7807 offsetof(struct sock_common,
7808 skc_v6_rcv_saddr.s6_addr32[0]) +
7809 off);
7810#else
7811 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
7812#endif
7813 break;
7814
7815 case offsetof(struct __sk_buff, remote_port):
7816 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
7817
7818 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7819 si->dst_reg, si->src_reg,
7820 offsetof(struct sk_buff, sk));
7821 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
7822 bpf_target_off(struct sock_common,
7823 skc_dport,
7824 2, target_size));
7825#ifndef __BIG_ENDIAN_BITFIELD
7826 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
7827#endif
7828 break;
7829
7830 case offsetof(struct __sk_buff, local_port):
7831 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
7832
7833 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7834 si->dst_reg, si->src_reg,
7835 offsetof(struct sk_buff, sk));
7836 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
7837 bpf_target_off(struct sock_common,
7838 skc_num, 2, target_size));
7839 break;
7840
7841 case offsetof(struct __sk_buff, tstamp):
7842 BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8);
7843
7844 if (type == BPF_WRITE)
7845 *insn++ = BPF_STX_MEM(BPF_DW,
7846 si->dst_reg, si->src_reg,
7847 bpf_target_off(struct sk_buff,
7848 tstamp, 8,
7849 target_size));
7850 else
7851 *insn++ = BPF_LDX_MEM(BPF_DW,
7852 si->dst_reg, si->src_reg,
7853 bpf_target_off(struct sk_buff,
7854 tstamp, 8,
7855 target_size));
7856 break;
7857
7858 case offsetof(struct __sk_buff, gso_segs):
7859 insn = bpf_convert_shinfo_access(si, insn);
7860 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
7861 si->dst_reg, si->dst_reg,
7862 bpf_target_off(struct skb_shared_info,
7863 gso_segs, 2,
7864 target_size));
7865 break;
7866 case offsetof(struct __sk_buff, gso_size):
7867 insn = bpf_convert_shinfo_access(si, insn);
7868 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size),
7869 si->dst_reg, si->dst_reg,
7870 bpf_target_off(struct skb_shared_info,
7871 gso_size, 2,
7872 target_size));
7873 break;
7874 case offsetof(struct __sk_buff, wire_len):
7875 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4);
7876
7877 off = si->off;
7878 off -= offsetof(struct __sk_buff, wire_len);
7879 off += offsetof(struct sk_buff, cb);
7880 off += offsetof(struct qdisc_skb_cb, pkt_len);
7881 *target_size = 4;
7882 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
7883 break;
7884
7885 case offsetof(struct __sk_buff, sk):
7886 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7887 si->dst_reg, si->src_reg,
7888 offsetof(struct sk_buff, sk));
7889 break;
7890 }
7891
7892 return insn - insn_buf;
7893}
7894
7895u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
7896 const struct bpf_insn *si,
7897 struct bpf_insn *insn_buf,
7898 struct bpf_prog *prog, u32 *target_size)
7899{
7900 struct bpf_insn *insn = insn_buf;
7901 int off;
7902
7903 switch (si->off) {
7904 case offsetof(struct bpf_sock, bound_dev_if):
7905 BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4);
7906
7907 if (type == BPF_WRITE)
7908 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
7909 offsetof(struct sock, sk_bound_dev_if));
7910 else
7911 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7912 offsetof(struct sock, sk_bound_dev_if));
7913 break;
7914
7915 case offsetof(struct bpf_sock, mark):
7916 BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4);
7917
7918 if (type == BPF_WRITE)
7919 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
7920 offsetof(struct sock, sk_mark));
7921 else
7922 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7923 offsetof(struct sock, sk_mark));
7924 break;
7925
7926 case offsetof(struct bpf_sock, priority):
7927 BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4);
7928
7929 if (type == BPF_WRITE)
7930 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
7931 offsetof(struct sock, sk_priority));
7932 else
7933 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7934 offsetof(struct sock, sk_priority));
7935 break;
7936
7937 case offsetof(struct bpf_sock, family):
7938 *insn++ = BPF_LDX_MEM(
7939 BPF_FIELD_SIZEOF(struct sock_common, skc_family),
7940 si->dst_reg, si->src_reg,
7941 bpf_target_off(struct sock_common,
7942 skc_family,
7943 sizeof_field(struct sock_common,
7944 skc_family),
7945 target_size));
7946 break;
7947
7948 case offsetof(struct bpf_sock, type):
7949 *insn++ = BPF_LDX_MEM(
7950 BPF_FIELD_SIZEOF(struct sock, sk_type),
7951 si->dst_reg, si->src_reg,
7952 bpf_target_off(struct sock, sk_type,
7953 sizeof_field(struct sock, sk_type),
7954 target_size));
7955 break;
7956
7957 case offsetof(struct bpf_sock, protocol):
7958 *insn++ = BPF_LDX_MEM(
7959 BPF_FIELD_SIZEOF(struct sock, sk_protocol),
7960 si->dst_reg, si->src_reg,
7961 bpf_target_off(struct sock, sk_protocol,
7962 sizeof_field(struct sock, sk_protocol),
7963 target_size));
7964 break;
7965
7966 case offsetof(struct bpf_sock, src_ip4):
7967 *insn++ = BPF_LDX_MEM(
7968 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
7969 bpf_target_off(struct sock_common, skc_rcv_saddr,
7970 sizeof_field(struct sock_common,
7971 skc_rcv_saddr),
7972 target_size));
7973 break;
7974
7975 case offsetof(struct bpf_sock, dst_ip4):
7976 *insn++ = BPF_LDX_MEM(
7977 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
7978 bpf_target_off(struct sock_common, skc_daddr,
7979 sizeof_field(struct sock_common,
7980 skc_daddr),
7981 target_size));
7982 break;
7983
7984 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
7985#if IS_ENABLED(CONFIG_IPV6)
7986 off = si->off;
7987 off -= offsetof(struct bpf_sock, src_ip6[0]);
7988 *insn++ = BPF_LDX_MEM(
7989 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
7990 bpf_target_off(
7991 struct sock_common,
7992 skc_v6_rcv_saddr.s6_addr32[0],
7993 sizeof_field(struct sock_common,
7994 skc_v6_rcv_saddr.s6_addr32[0]),
7995 target_size) + off);
7996#else
7997 (void)off;
7998 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
7999#endif
8000 break;
8001
8002 case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
8003#if IS_ENABLED(CONFIG_IPV6)
8004 off = si->off;
8005 off -= offsetof(struct bpf_sock, dst_ip6[0]);
8006 *insn++ = BPF_LDX_MEM(
8007 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
8008 bpf_target_off(struct sock_common,
8009 skc_v6_daddr.s6_addr32[0],
8010 sizeof_field(struct sock_common,
8011 skc_v6_daddr.s6_addr32[0]),
8012 target_size) + off);
8013#else
8014 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8015 *target_size = 4;
8016#endif
8017 break;
8018
8019 case offsetof(struct bpf_sock, src_port):
8020 *insn++ = BPF_LDX_MEM(
8021 BPF_FIELD_SIZEOF(struct sock_common, skc_num),
8022 si->dst_reg, si->src_reg,
8023 bpf_target_off(struct sock_common, skc_num,
8024 sizeof_field(struct sock_common,
8025 skc_num),
8026 target_size));
8027 break;
8028
8029 case offsetof(struct bpf_sock, dst_port):
8030 *insn++ = BPF_LDX_MEM(
8031 BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
8032 si->dst_reg, si->src_reg,
8033 bpf_target_off(struct sock_common, skc_dport,
8034 sizeof_field(struct sock_common,
8035 skc_dport),
8036 target_size));
8037 break;
8038
8039 case offsetof(struct bpf_sock, state):
8040 *insn++ = BPF_LDX_MEM(
8041 BPF_FIELD_SIZEOF(struct sock_common, skc_state),
8042 si->dst_reg, si->src_reg,
8043 bpf_target_off(struct sock_common, skc_state,
8044 sizeof_field(struct sock_common,
8045 skc_state),
8046 target_size));
8047 break;
8048 case offsetof(struct bpf_sock, rx_queue_mapping):
8049#ifdef CONFIG_XPS
8050 *insn++ = BPF_LDX_MEM(
8051 BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
8052 si->dst_reg, si->src_reg,
8053 bpf_target_off(struct sock, sk_rx_queue_mapping,
8054 sizeof_field(struct sock,
8055 sk_rx_queue_mapping),
8056 target_size));
8057 *insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, NO_QUEUE_MAPPING,
8058 1);
8059 *insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
8060#else
8061 *insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
8062 *target_size = 2;
8063#endif
8064 break;
8065 }
8066
8067 return insn - insn_buf;
8068}
8069
8070static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
8071 const struct bpf_insn *si,
8072 struct bpf_insn *insn_buf,
8073 struct bpf_prog *prog, u32 *target_size)
8074{
8075 struct bpf_insn *insn = insn_buf;
8076
8077 switch (si->off) {
8078 case offsetof(struct __sk_buff, ifindex):
8079 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
8080 si->dst_reg, si->src_reg,
8081 offsetof(struct sk_buff, dev));
8082 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8083 bpf_target_off(struct net_device, ifindex, 4,
8084 target_size));
8085 break;
8086 default:
8087 return bpf_convert_ctx_access(type, si, insn_buf, prog,
8088 target_size);
8089 }
8090
8091 return insn - insn_buf;
8092}
8093
8094static u32 xdp_convert_ctx_access(enum bpf_access_type type,
8095 const struct bpf_insn *si,
8096 struct bpf_insn *insn_buf,
8097 struct bpf_prog *prog, u32 *target_size)
8098{
8099 struct bpf_insn *insn = insn_buf;
8100
8101 switch (si->off) {
8102 case offsetof(struct xdp_md, data):
8103 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
8104 si->dst_reg, si->src_reg,
8105 offsetof(struct xdp_buff, data));
8106 break;
8107 case offsetof(struct xdp_md, data_meta):
8108 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
8109 si->dst_reg, si->src_reg,
8110 offsetof(struct xdp_buff, data_meta));
8111 break;
8112 case offsetof(struct xdp_md, data_end):
8113 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
8114 si->dst_reg, si->src_reg,
8115 offsetof(struct xdp_buff, data_end));
8116 break;
8117 case offsetof(struct xdp_md, ingress_ifindex):
8118 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
8119 si->dst_reg, si->src_reg,
8120 offsetof(struct xdp_buff, rxq));
8121 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
8122 si->dst_reg, si->dst_reg,
8123 offsetof(struct xdp_rxq_info, dev));
8124 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8125 offsetof(struct net_device, ifindex));
8126 break;
8127 case offsetof(struct xdp_md, rx_queue_index):
8128 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
8129 si->dst_reg, si->src_reg,
8130 offsetof(struct xdp_buff, rxq));
8131 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8132 offsetof(struct xdp_rxq_info,
8133 queue_index));
8134 break;
8135 case offsetof(struct xdp_md, egress_ifindex):
8136 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
8137 si->dst_reg, si->src_reg,
8138 offsetof(struct xdp_buff, txq));
8139 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
8140 si->dst_reg, si->dst_reg,
8141 offsetof(struct xdp_txq_info, dev));
8142 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8143 offsetof(struct net_device, ifindex));
8144 break;
8145 }
8146
8147 return insn - insn_buf;
8148}
8149
8150
8151
8152
8153
8154
8155
8156
8157
8158
8159
8160#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
8161 do { \
8162 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
8163 si->src_reg, offsetof(S, F)); \
8164 *insn++ = BPF_LDX_MEM( \
8165 SIZE, si->dst_reg, si->dst_reg, \
8166 bpf_target_off(NS, NF, sizeof_field(NS, NF), \
8167 target_size) \
8168 + OFF); \
8169 } while (0)
8170
8171#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
8172 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
8173 BPF_FIELD_SIZEOF(NS, NF), 0)
8174
8175
8176
8177
8178
8179
8180
8181
8182
8183
8184
8185#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \
8186 do { \
8187 int tmp_reg = BPF_REG_9; \
8188 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
8189 --tmp_reg; \
8190 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
8191 --tmp_reg; \
8192 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
8193 offsetof(S, TF)); \
8194 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
8195 si->dst_reg, offsetof(S, F)); \
8196 *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
8197 bpf_target_off(NS, NF, sizeof_field(NS, NF), \
8198 target_size) \
8199 + OFF); \
8200 *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
8201 offsetof(S, TF)); \
8202 } while (0)
8203
8204#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
8205 TF) \
8206 do { \
8207 if (type == BPF_WRITE) { \
8208 SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \
8209 OFF, TF); \
8210 } else { \
8211 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
8212 S, NS, F, NF, SIZE, OFF); \
8213 } \
8214 } while (0)
8215
8216#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
8217 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
8218 S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
8219
8220static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
8221 const struct bpf_insn *si,
8222 struct bpf_insn *insn_buf,
8223 struct bpf_prog *prog, u32 *target_size)
8224{
8225 int off, port_size = sizeof_field(struct sockaddr_in6, sin6_port);
8226 struct bpf_insn *insn = insn_buf;
8227
8228 switch (si->off) {
8229 case offsetof(struct bpf_sock_addr, user_family):
8230 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
8231 struct sockaddr, uaddr, sa_family);
8232 break;
8233
8234 case offsetof(struct bpf_sock_addr, user_ip4):
8235 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
8236 struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
8237 sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
8238 break;
8239
8240 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
8241 off = si->off;
8242 off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
8243 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
8244 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
8245 sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
8246 tmp_reg);
8247 break;
8248
8249 case offsetof(struct bpf_sock_addr, user_port):
8250
8251
8252
8253
8254
8255
8256
8257 BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
8258 offsetof(struct sockaddr_in6, sin6_port));
8259 BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
8260 sizeof_field(struct sockaddr_in6, sin6_port));
8261
8262 port_size = min(port_size, BPF_LDST_BYTES(si));
8263 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
8264 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
8265 sin6_port, bytes_to_bpf_size(port_size), 0, tmp_reg);
8266 break;
8267
8268 case offsetof(struct bpf_sock_addr, family):
8269 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
8270 struct sock, sk, sk_family);
8271 break;
8272
8273 case offsetof(struct bpf_sock_addr, type):
8274 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
8275 struct sock, sk, sk_type);
8276 break;
8277
8278 case offsetof(struct bpf_sock_addr, protocol):
8279 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
8280 struct sock, sk, sk_protocol);
8281 break;
8282
8283 case offsetof(struct bpf_sock_addr, msg_src_ip4):
8284
8285 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
8286 struct bpf_sock_addr_kern, struct in_addr, t_ctx,
8287 s_addr, BPF_SIZE(si->code), 0, tmp_reg);
8288 break;
8289
8290 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
8291 msg_src_ip6[3]):
8292 off = si->off;
8293 off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]);
8294
8295 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
8296 struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
8297 s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
8298 break;
8299 case offsetof(struct bpf_sock_addr, sk):
8300 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_addr_kern, sk),
8301 si->dst_reg, si->src_reg,
8302 offsetof(struct bpf_sock_addr_kern, sk));
8303 break;
8304 }
8305
8306 return insn - insn_buf;
8307}
8308
8309static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
8310 const struct bpf_insn *si,
8311 struct bpf_insn *insn_buf,
8312 struct bpf_prog *prog,
8313 u32 *target_size)
8314{
8315 struct bpf_insn *insn = insn_buf;
8316 int off;
8317
8318
8319#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
8320 do { \
8321 int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2; \
8322 BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
8323 sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
8324 if (si->dst_reg == reg || si->src_reg == reg) \
8325 reg--; \
8326 if (si->dst_reg == reg || si->src_reg == reg) \
8327 reg--; \
8328 if (si->dst_reg == si->src_reg) { \
8329 *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \
8330 offsetof(struct bpf_sock_ops_kern, \
8331 temp)); \
8332 fullsock_reg = reg; \
8333 jmp += 2; \
8334 } \
8335 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
8336 struct bpf_sock_ops_kern, \
8337 is_fullsock), \
8338 fullsock_reg, si->src_reg, \
8339 offsetof(struct bpf_sock_ops_kern, \
8340 is_fullsock)); \
8341 *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
8342 if (si->dst_reg == si->src_reg) \
8343 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
8344 offsetof(struct bpf_sock_ops_kern, \
8345 temp)); \
8346 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
8347 struct bpf_sock_ops_kern, sk),\
8348 si->dst_reg, si->src_reg, \
8349 offsetof(struct bpf_sock_ops_kern, sk));\
8350 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
8351 OBJ_FIELD), \
8352 si->dst_reg, si->dst_reg, \
8353 offsetof(OBJ, OBJ_FIELD)); \
8354 if (si->dst_reg == si->src_reg) { \
8355 *insn++ = BPF_JMP_A(1); \
8356 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
8357 offsetof(struct bpf_sock_ops_kern, \
8358 temp)); \
8359 } \
8360 } while (0)
8361
8362#define SOCK_OPS_GET_SK() \
8363 do { \
8364 int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1; \
8365 if (si->dst_reg == reg || si->src_reg == reg) \
8366 reg--; \
8367 if (si->dst_reg == reg || si->src_reg == reg) \
8368 reg--; \
8369 if (si->dst_reg == si->src_reg) { \
8370 *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \
8371 offsetof(struct bpf_sock_ops_kern, \
8372 temp)); \
8373 fullsock_reg = reg; \
8374 jmp += 2; \
8375 } \
8376 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
8377 struct bpf_sock_ops_kern, \
8378 is_fullsock), \
8379 fullsock_reg, si->src_reg, \
8380 offsetof(struct bpf_sock_ops_kern, \
8381 is_fullsock)); \
8382 *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
8383 if (si->dst_reg == si->src_reg) \
8384 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
8385 offsetof(struct bpf_sock_ops_kern, \
8386 temp)); \
8387 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
8388 struct bpf_sock_ops_kern, sk),\
8389 si->dst_reg, si->src_reg, \
8390 offsetof(struct bpf_sock_ops_kern, sk));\
8391 if (si->dst_reg == si->src_reg) { \
8392 *insn++ = BPF_JMP_A(1); \
8393 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
8394 offsetof(struct bpf_sock_ops_kern, \
8395 temp)); \
8396 } \
8397 } while (0)
8398
8399#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
8400 SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
8401
8402
8403
8404
8405
8406
8407
8408
8409
8410
8411#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
8412 do { \
8413 int reg = BPF_REG_9; \
8414 BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
8415 sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
8416 if (si->dst_reg == reg || si->src_reg == reg) \
8417 reg--; \
8418 if (si->dst_reg == reg || si->src_reg == reg) \
8419 reg--; \
8420 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
8421 offsetof(struct bpf_sock_ops_kern, \
8422 temp)); \
8423 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
8424 struct bpf_sock_ops_kern, \
8425 is_fullsock), \
8426 reg, si->dst_reg, \
8427 offsetof(struct bpf_sock_ops_kern, \
8428 is_fullsock)); \
8429 *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
8430 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
8431 struct bpf_sock_ops_kern, sk),\
8432 reg, si->dst_reg, \
8433 offsetof(struct bpf_sock_ops_kern, sk));\
8434 *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
8435 reg, si->src_reg, \
8436 offsetof(OBJ, OBJ_FIELD)); \
8437 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
8438 offsetof(struct bpf_sock_ops_kern, \
8439 temp)); \
8440 } while (0)
8441
8442#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
8443 do { \
8444 if (TYPE == BPF_WRITE) \
8445 SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
8446 else \
8447 SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
8448 } while (0)
8449
8450 if (insn > insn_buf)
8451 return insn - insn_buf;
8452
8453 switch (si->off) {
8454 case offsetof(struct bpf_sock_ops, op) ...
8455 offsetof(struct bpf_sock_ops, replylong[3]):
8456 BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, op) !=
8457 sizeof_field(struct bpf_sock_ops_kern, op));
8458 BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) !=
8459 sizeof_field(struct bpf_sock_ops_kern, reply));
8460 BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) !=
8461 sizeof_field(struct bpf_sock_ops_kern, replylong));
8462 off = si->off;
8463 off -= offsetof(struct bpf_sock_ops, op);
8464 off += offsetof(struct bpf_sock_ops_kern, op);
8465 if (type == BPF_WRITE)
8466 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
8467 off);
8468 else
8469 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8470 off);
8471 break;
8472
8473 case offsetof(struct bpf_sock_ops, family):
8474 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
8475
8476 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8477 struct bpf_sock_ops_kern, sk),
8478 si->dst_reg, si->src_reg,
8479 offsetof(struct bpf_sock_ops_kern, sk));
8480 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8481 offsetof(struct sock_common, skc_family));
8482 break;
8483
8484 case offsetof(struct bpf_sock_ops, remote_ip4):
8485 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
8486
8487 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8488 struct bpf_sock_ops_kern, sk),
8489 si->dst_reg, si->src_reg,
8490 offsetof(struct bpf_sock_ops_kern, sk));
8491 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8492 offsetof(struct sock_common, skc_daddr));
8493 break;
8494
8495 case offsetof(struct bpf_sock_ops, local_ip4):
8496 BUILD_BUG_ON(sizeof_field(struct sock_common,
8497 skc_rcv_saddr) != 4);
8498
8499 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8500 struct bpf_sock_ops_kern, sk),
8501 si->dst_reg, si->src_reg,
8502 offsetof(struct bpf_sock_ops_kern, sk));
8503 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8504 offsetof(struct sock_common,
8505 skc_rcv_saddr));
8506 break;
8507
8508 case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
8509 offsetof(struct bpf_sock_ops, remote_ip6[3]):
8510#if IS_ENABLED(CONFIG_IPV6)
8511 BUILD_BUG_ON(sizeof_field(struct sock_common,
8512 skc_v6_daddr.s6_addr32[0]) != 4);
8513
8514 off = si->off;
8515 off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
8516 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8517 struct bpf_sock_ops_kern, sk),
8518 si->dst_reg, si->src_reg,
8519 offsetof(struct bpf_sock_ops_kern, sk));
8520 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8521 offsetof(struct sock_common,
8522 skc_v6_daddr.s6_addr32[0]) +
8523 off);
8524#else
8525 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8526#endif
8527 break;
8528
8529 case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
8530 offsetof(struct bpf_sock_ops, local_ip6[3]):
8531#if IS_ENABLED(CONFIG_IPV6)
8532 BUILD_BUG_ON(sizeof_field(struct sock_common,
8533 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
8534
8535 off = si->off;
8536 off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
8537 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8538 struct bpf_sock_ops_kern, sk),
8539 si->dst_reg, si->src_reg,
8540 offsetof(struct bpf_sock_ops_kern, sk));
8541 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8542 offsetof(struct sock_common,
8543 skc_v6_rcv_saddr.s6_addr32[0]) +
8544 off);
8545#else
8546 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8547#endif
8548 break;
8549
8550 case offsetof(struct bpf_sock_ops, remote_port):
8551 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
8552
8553 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8554 struct bpf_sock_ops_kern, sk),
8555 si->dst_reg, si->src_reg,
8556 offsetof(struct bpf_sock_ops_kern, sk));
8557 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8558 offsetof(struct sock_common, skc_dport));
8559#ifndef __BIG_ENDIAN_BITFIELD
8560 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
8561#endif
8562 break;
8563
8564 case offsetof(struct bpf_sock_ops, local_port):
8565 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
8566
8567 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8568 struct bpf_sock_ops_kern, sk),
8569 si->dst_reg, si->src_reg,
8570 offsetof(struct bpf_sock_ops_kern, sk));
8571 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8572 offsetof(struct sock_common, skc_num));
8573 break;
8574
8575 case offsetof(struct bpf_sock_ops, is_fullsock):
8576 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8577 struct bpf_sock_ops_kern,
8578 is_fullsock),
8579 si->dst_reg, si->src_reg,
8580 offsetof(struct bpf_sock_ops_kern,
8581 is_fullsock));
8582 break;
8583
8584 case offsetof(struct bpf_sock_ops, state):
8585 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
8586
8587 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8588 struct bpf_sock_ops_kern, sk),
8589 si->dst_reg, si->src_reg,
8590 offsetof(struct bpf_sock_ops_kern, sk));
8591 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
8592 offsetof(struct sock_common, skc_state));
8593 break;
8594
8595 case offsetof(struct bpf_sock_ops, rtt_min):
8596 BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
8597 sizeof(struct minmax));
8598 BUILD_BUG_ON(sizeof(struct minmax) <
8599 sizeof(struct minmax_sample));
8600
8601 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8602 struct bpf_sock_ops_kern, sk),
8603 si->dst_reg, si->src_reg,
8604 offsetof(struct bpf_sock_ops_kern, sk));
8605 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8606 offsetof(struct tcp_sock, rtt_min) +
8607 sizeof_field(struct minmax_sample, t));
8608 break;
8609
8610 case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
8611 SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
8612 struct tcp_sock);
8613 break;
8614
8615 case offsetof(struct bpf_sock_ops, sk_txhash):
8616 SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
8617 struct sock, type);
8618 break;
8619 case offsetof(struct bpf_sock_ops, snd_cwnd):
8620 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_cwnd);
8621 break;
8622 case offsetof(struct bpf_sock_ops, srtt_us):
8623 SOCK_OPS_GET_TCP_SOCK_FIELD(srtt_us);
8624 break;
8625 case offsetof(struct bpf_sock_ops, snd_ssthresh):
8626 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_ssthresh);
8627 break;
8628 case offsetof(struct bpf_sock_ops, rcv_nxt):
8629 SOCK_OPS_GET_TCP_SOCK_FIELD(rcv_nxt);
8630 break;
8631 case offsetof(struct bpf_sock_ops, snd_nxt):
8632 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_nxt);
8633 break;
8634 case offsetof(struct bpf_sock_ops, snd_una):
8635 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_una);
8636 break;
8637 case offsetof(struct bpf_sock_ops, mss_cache):
8638 SOCK_OPS_GET_TCP_SOCK_FIELD(mss_cache);
8639 break;
8640 case offsetof(struct bpf_sock_ops, ecn_flags):
8641 SOCK_OPS_GET_TCP_SOCK_FIELD(ecn_flags);
8642 break;
8643 case offsetof(struct bpf_sock_ops, rate_delivered):
8644 SOCK_OPS_GET_TCP_SOCK_FIELD(rate_delivered);
8645 break;
8646 case offsetof(struct bpf_sock_ops, rate_interval_us):
8647 SOCK_OPS_GET_TCP_SOCK_FIELD(rate_interval_us);
8648 break;
8649 case offsetof(struct bpf_sock_ops, packets_out):
8650 SOCK_OPS_GET_TCP_SOCK_FIELD(packets_out);
8651 break;
8652 case offsetof(struct bpf_sock_ops, retrans_out):
8653 SOCK_OPS_GET_TCP_SOCK_FIELD(retrans_out);
8654 break;
8655 case offsetof(struct bpf_sock_ops, total_retrans):
8656 SOCK_OPS_GET_TCP_SOCK_FIELD(total_retrans);
8657 break;
8658 case offsetof(struct bpf_sock_ops, segs_in):
8659 SOCK_OPS_GET_TCP_SOCK_FIELD(segs_in);
8660 break;
8661 case offsetof(struct bpf_sock_ops, data_segs_in):
8662 SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_in);
8663 break;
8664 case offsetof(struct bpf_sock_ops, segs_out):
8665 SOCK_OPS_GET_TCP_SOCK_FIELD(segs_out);
8666 break;
8667 case offsetof(struct bpf_sock_ops, data_segs_out):
8668 SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_out);
8669 break;
8670 case offsetof(struct bpf_sock_ops, lost_out):
8671 SOCK_OPS_GET_TCP_SOCK_FIELD(lost_out);
8672 break;
8673 case offsetof(struct bpf_sock_ops, sacked_out):
8674 SOCK_OPS_GET_TCP_SOCK_FIELD(sacked_out);
8675 break;
8676 case offsetof(struct bpf_sock_ops, bytes_received):
8677 SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_received);
8678 break;
8679 case offsetof(struct bpf_sock_ops, bytes_acked):
8680 SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked);
8681 break;
8682 case offsetof(struct bpf_sock_ops, sk):
8683 SOCK_OPS_GET_SK();
8684 break;
8685 }
8686 return insn - insn_buf;
8687}
8688
8689static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
8690 const struct bpf_insn *si,
8691 struct bpf_insn *insn_buf,
8692 struct bpf_prog *prog, u32 *target_size)
8693{
8694 struct bpf_insn *insn = insn_buf;
8695 int off;
8696
8697 switch (si->off) {
8698 case offsetof(struct __sk_buff, data_end):
8699 off = si->off;
8700 off -= offsetof(struct __sk_buff, data_end);
8701 off += offsetof(struct sk_buff, cb);
8702 off += offsetof(struct tcp_skb_cb, bpf.data_end);
8703 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
8704 si->src_reg, off);
8705 break;
8706 default:
8707 return bpf_convert_ctx_access(type, si, insn_buf, prog,
8708 target_size);
8709 }
8710
8711 return insn - insn_buf;
8712}
8713
8714static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
8715 const struct bpf_insn *si,
8716 struct bpf_insn *insn_buf,
8717 struct bpf_prog *prog, u32 *target_size)
8718{
8719 struct bpf_insn *insn = insn_buf;
8720#if IS_ENABLED(CONFIG_IPV6)
8721 int off;
8722#endif
8723
8724
8725 BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0);
8726
8727 switch (si->off) {
8728 case offsetof(struct sk_msg_md, data):
8729 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
8730 si->dst_reg, si->src_reg,
8731 offsetof(struct sk_msg, data));
8732 break;
8733 case offsetof(struct sk_msg_md, data_end):
8734 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end),
8735 si->dst_reg, si->src_reg,
8736 offsetof(struct sk_msg, data_end));
8737 break;
8738 case offsetof(struct sk_msg_md, family):
8739 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
8740
8741 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8742 struct sk_msg, sk),
8743 si->dst_reg, si->src_reg,
8744 offsetof(struct sk_msg, sk));
8745 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8746 offsetof(struct sock_common, skc_family));
8747 break;
8748
8749 case offsetof(struct sk_msg_md, remote_ip4):
8750 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
8751
8752 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8753 struct sk_msg, sk),
8754 si->dst_reg, si->src_reg,
8755 offsetof(struct sk_msg, sk));
8756 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8757 offsetof(struct sock_common, skc_daddr));
8758 break;
8759
8760 case offsetof(struct sk_msg_md, local_ip4):
8761 BUILD_BUG_ON(sizeof_field(struct sock_common,
8762 skc_rcv_saddr) != 4);
8763
8764 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8765 struct sk_msg, sk),
8766 si->dst_reg, si->src_reg,
8767 offsetof(struct sk_msg, sk));
8768 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8769 offsetof(struct sock_common,
8770 skc_rcv_saddr));
8771 break;
8772
8773 case offsetof(struct sk_msg_md, remote_ip6[0]) ...
8774 offsetof(struct sk_msg_md, remote_ip6[3]):
8775#if IS_ENABLED(CONFIG_IPV6)
8776 BUILD_BUG_ON(sizeof_field(struct sock_common,
8777 skc_v6_daddr.s6_addr32[0]) != 4);
8778
8779 off = si->off;
8780 off -= offsetof(struct sk_msg_md, remote_ip6[0]);
8781 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8782 struct sk_msg, sk),
8783 si->dst_reg, si->src_reg,
8784 offsetof(struct sk_msg, sk));
8785 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8786 offsetof(struct sock_common,
8787 skc_v6_daddr.s6_addr32[0]) +
8788 off);
8789#else
8790 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8791#endif
8792 break;
8793
8794 case offsetof(struct sk_msg_md, local_ip6[0]) ...
8795 offsetof(struct sk_msg_md, local_ip6[3]):
8796#if IS_ENABLED(CONFIG_IPV6)
8797 BUILD_BUG_ON(sizeof_field(struct sock_common,
8798 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
8799
8800 off = si->off;
8801 off -= offsetof(struct sk_msg_md, local_ip6[0]);
8802 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8803 struct sk_msg, sk),
8804 si->dst_reg, si->src_reg,
8805 offsetof(struct sk_msg, sk));
8806 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8807 offsetof(struct sock_common,
8808 skc_v6_rcv_saddr.s6_addr32[0]) +
8809 off);
8810#else
8811 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8812#endif
8813 break;
8814
8815 case offsetof(struct sk_msg_md, remote_port):
8816 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
8817
8818 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8819 struct sk_msg, sk),
8820 si->dst_reg, si->src_reg,
8821 offsetof(struct sk_msg, sk));
8822 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8823 offsetof(struct sock_common, skc_dport));
8824#ifndef __BIG_ENDIAN_BITFIELD
8825 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
8826#endif
8827 break;
8828
8829 case offsetof(struct sk_msg_md, local_port):
8830 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
8831
8832 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8833 struct sk_msg, sk),
8834 si->dst_reg, si->src_reg,
8835 offsetof(struct sk_msg, sk));
8836 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8837 offsetof(struct sock_common, skc_num));
8838 break;
8839
8840 case offsetof(struct sk_msg_md, size):
8841 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size),
8842 si->dst_reg, si->src_reg,
8843 offsetof(struct sk_msg_sg, size));
8844 break;
8845
8846 case offsetof(struct sk_msg_md, sk):
8847 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, sk),
8848 si->dst_reg, si->src_reg,
8849 offsetof(struct sk_msg, sk));
8850 break;
8851 }
8852
8853 return insn - insn_buf;
8854}
8855
8856const struct bpf_verifier_ops sk_filter_verifier_ops = {
8857 .get_func_proto = sk_filter_func_proto,
8858 .is_valid_access = sk_filter_is_valid_access,
8859 .convert_ctx_access = bpf_convert_ctx_access,
8860 .gen_ld_abs = bpf_gen_ld_abs,
8861};
8862
8863const struct bpf_prog_ops sk_filter_prog_ops = {
8864 .test_run = bpf_prog_test_run_skb,
8865};
8866
8867const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
8868 .get_func_proto = tc_cls_act_func_proto,
8869 .is_valid_access = tc_cls_act_is_valid_access,
8870 .convert_ctx_access = tc_cls_act_convert_ctx_access,
8871 .gen_prologue = tc_cls_act_prologue,
8872 .gen_ld_abs = bpf_gen_ld_abs,
8873};
8874
8875const struct bpf_prog_ops tc_cls_act_prog_ops = {
8876 .test_run = bpf_prog_test_run_skb,
8877};
8878
8879const struct bpf_verifier_ops xdp_verifier_ops = {
8880 .get_func_proto = xdp_func_proto,
8881 .is_valid_access = xdp_is_valid_access,
8882 .convert_ctx_access = xdp_convert_ctx_access,
8883 .gen_prologue = bpf_noop_prologue,
8884};
8885
8886const struct bpf_prog_ops xdp_prog_ops = {
8887 .test_run = bpf_prog_test_run_xdp,
8888};
8889
8890const struct bpf_verifier_ops cg_skb_verifier_ops = {
8891 .get_func_proto = cg_skb_func_proto,
8892 .is_valid_access = cg_skb_is_valid_access,
8893 .convert_ctx_access = bpf_convert_ctx_access,
8894};
8895
8896const struct bpf_prog_ops cg_skb_prog_ops = {
8897 .test_run = bpf_prog_test_run_skb,
8898};
8899
8900const struct bpf_verifier_ops lwt_in_verifier_ops = {
8901 .get_func_proto = lwt_in_func_proto,
8902 .is_valid_access = lwt_is_valid_access,
8903 .convert_ctx_access = bpf_convert_ctx_access,
8904};
8905
8906const struct bpf_prog_ops lwt_in_prog_ops = {
8907 .test_run = bpf_prog_test_run_skb,
8908};
8909
8910const struct bpf_verifier_ops lwt_out_verifier_ops = {
8911 .get_func_proto = lwt_out_func_proto,
8912 .is_valid_access = lwt_is_valid_access,
8913 .convert_ctx_access = bpf_convert_ctx_access,
8914};
8915
8916const struct bpf_prog_ops lwt_out_prog_ops = {
8917 .test_run = bpf_prog_test_run_skb,
8918};
8919
8920const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
8921 .get_func_proto = lwt_xmit_func_proto,
8922 .is_valid_access = lwt_is_valid_access,
8923 .convert_ctx_access = bpf_convert_ctx_access,
8924 .gen_prologue = tc_cls_act_prologue,
8925};
8926
8927const struct bpf_prog_ops lwt_xmit_prog_ops = {
8928 .test_run = bpf_prog_test_run_skb,
8929};
8930
8931const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
8932 .get_func_proto = lwt_seg6local_func_proto,
8933 .is_valid_access = lwt_is_valid_access,
8934 .convert_ctx_access = bpf_convert_ctx_access,
8935};
8936
8937const struct bpf_prog_ops lwt_seg6local_prog_ops = {
8938 .test_run = bpf_prog_test_run_skb,
8939};
8940
8941const struct bpf_verifier_ops cg_sock_verifier_ops = {
8942 .get_func_proto = sock_filter_func_proto,
8943 .is_valid_access = sock_filter_is_valid_access,
8944 .convert_ctx_access = bpf_sock_convert_ctx_access,
8945};
8946
8947const struct bpf_prog_ops cg_sock_prog_ops = {
8948};
8949
8950const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
8951 .get_func_proto = sock_addr_func_proto,
8952 .is_valid_access = sock_addr_is_valid_access,
8953 .convert_ctx_access = sock_addr_convert_ctx_access,
8954};
8955
8956const struct bpf_prog_ops cg_sock_addr_prog_ops = {
8957};
8958
8959const struct bpf_verifier_ops sock_ops_verifier_ops = {
8960 .get_func_proto = sock_ops_func_proto,
8961 .is_valid_access = sock_ops_is_valid_access,
8962 .convert_ctx_access = sock_ops_convert_ctx_access,
8963};
8964
8965const struct bpf_prog_ops sock_ops_prog_ops = {
8966};
8967
8968const struct bpf_verifier_ops sk_skb_verifier_ops = {
8969 .get_func_proto = sk_skb_func_proto,
8970 .is_valid_access = sk_skb_is_valid_access,
8971 .convert_ctx_access = sk_skb_convert_ctx_access,
8972 .gen_prologue = sk_skb_prologue,
8973};
8974
8975const struct bpf_prog_ops sk_skb_prog_ops = {
8976};
8977
8978const struct bpf_verifier_ops sk_msg_verifier_ops = {
8979 .get_func_proto = sk_msg_func_proto,
8980 .is_valid_access = sk_msg_is_valid_access,
8981 .convert_ctx_access = sk_msg_convert_ctx_access,
8982 .gen_prologue = bpf_noop_prologue,
8983};
8984
8985const struct bpf_prog_ops sk_msg_prog_ops = {
8986};
8987
8988const struct bpf_verifier_ops flow_dissector_verifier_ops = {
8989 .get_func_proto = flow_dissector_func_proto,
8990 .is_valid_access = flow_dissector_is_valid_access,
8991 .convert_ctx_access = flow_dissector_convert_ctx_access,
8992};
8993
8994const struct bpf_prog_ops flow_dissector_prog_ops = {
8995 .test_run = bpf_prog_test_run_flow_dissector,
8996};
8997
8998int sk_detach_filter(struct sock *sk)
8999{
9000 int ret = -ENOENT;
9001 struct sk_filter *filter;
9002
9003 if (sock_flag(sk, SOCK_FILTER_LOCKED))
9004 return -EPERM;
9005
9006 filter = rcu_dereference_protected(sk->sk_filter,
9007 lockdep_sock_is_held(sk));
9008 if (filter) {
9009 RCU_INIT_POINTER(sk->sk_filter, NULL);
9010 sk_filter_uncharge(sk, filter);
9011 ret = 0;
9012 }
9013
9014 return ret;
9015}
9016EXPORT_SYMBOL_GPL(sk_detach_filter);
9017
9018int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
9019 unsigned int len)
9020{
9021 struct sock_fprog_kern *fprog;
9022 struct sk_filter *filter;
9023 int ret = 0;
9024
9025 lock_sock(sk);
9026 filter = rcu_dereference_protected(sk->sk_filter,
9027 lockdep_sock_is_held(sk));
9028 if (!filter)
9029 goto out;
9030
9031
9032
9033
9034
9035 ret = -EACCES;
9036 fprog = filter->prog->orig_prog;
9037 if (!fprog)
9038 goto out;
9039
9040 ret = fprog->len;
9041 if (!len)
9042
9043 goto out;
9044
9045 ret = -EINVAL;
9046 if (len < fprog->len)
9047 goto out;
9048
9049 ret = -EFAULT;
9050 if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
9051 goto out;
9052
9053
9054
9055
9056 ret = fprog->len;
9057out:
9058 release_sock(sk);
9059 return ret;
9060}
9061
9062#ifdef CONFIG_INET
9063static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
9064 struct sock_reuseport *reuse,
9065 struct sock *sk, struct sk_buff *skb,
9066 u32 hash)
9067{
9068 reuse_kern->skb = skb;
9069 reuse_kern->sk = sk;
9070 reuse_kern->selected_sk = NULL;
9071 reuse_kern->data_end = skb->data + skb_headlen(skb);
9072 reuse_kern->hash = hash;
9073 reuse_kern->reuseport_id = reuse->reuseport_id;
9074 reuse_kern->bind_inany = reuse->bind_inany;
9075}
9076
9077struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
9078 struct bpf_prog *prog, struct sk_buff *skb,
9079 u32 hash)
9080{
9081 struct sk_reuseport_kern reuse_kern;
9082 enum sk_action action;
9083
9084 bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
9085 action = BPF_PROG_RUN(prog, &reuse_kern);
9086
9087 if (action == SK_PASS)
9088 return reuse_kern.selected_sk;
9089 else
9090 return ERR_PTR(-ECONNREFUSED);
9091}
9092
9093BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
9094 struct bpf_map *, map, void *, key, u32, flags)
9095{
9096 bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
9097 struct sock_reuseport *reuse;
9098 struct sock *selected_sk;
9099
9100 selected_sk = map->ops->map_lookup_elem(map, key);
9101 if (!selected_sk)
9102 return -ENOENT;
9103
9104 reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
9105 if (!reuse) {
9106
9107 if (sk_is_refcounted(selected_sk))
9108 sock_put(selected_sk);
9109
9110
9111
9112
9113
9114
9115
9116
9117 return is_sockarray ? -ENOENT : -EINVAL;
9118 }
9119
9120 if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
9121 struct sock *sk = reuse_kern->sk;
9122
9123 if (sk->sk_protocol != selected_sk->sk_protocol)
9124 return -EPROTOTYPE;
9125 else if (sk->sk_family != selected_sk->sk_family)
9126 return -EAFNOSUPPORT;
9127
9128
9129 return -EBADFD;
9130 }
9131
9132 reuse_kern->selected_sk = selected_sk;
9133
9134 return 0;
9135}
9136
9137static const struct bpf_func_proto sk_select_reuseport_proto = {
9138 .func = sk_select_reuseport,
9139 .gpl_only = false,
9140 .ret_type = RET_INTEGER,
9141 .arg1_type = ARG_PTR_TO_CTX,
9142 .arg2_type = ARG_CONST_MAP_PTR,
9143 .arg3_type = ARG_PTR_TO_MAP_KEY,
9144 .arg4_type = ARG_ANYTHING,
9145};
9146
9147BPF_CALL_4(sk_reuseport_load_bytes,
9148 const struct sk_reuseport_kern *, reuse_kern, u32, offset,
9149 void *, to, u32, len)
9150{
9151 return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
9152}
9153
9154static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
9155 .func = sk_reuseport_load_bytes,
9156 .gpl_only = false,
9157 .ret_type = RET_INTEGER,
9158 .arg1_type = ARG_PTR_TO_CTX,
9159 .arg2_type = ARG_ANYTHING,
9160 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
9161 .arg4_type = ARG_CONST_SIZE,
9162};
9163
9164BPF_CALL_5(sk_reuseport_load_bytes_relative,
9165 const struct sk_reuseport_kern *, reuse_kern, u32, offset,
9166 void *, to, u32, len, u32, start_header)
9167{
9168 return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
9169 len, start_header);
9170}
9171
9172static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
9173 .func = sk_reuseport_load_bytes_relative,
9174 .gpl_only = false,
9175 .ret_type = RET_INTEGER,
9176 .arg1_type = ARG_PTR_TO_CTX,
9177 .arg2_type = ARG_ANYTHING,
9178 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
9179 .arg4_type = ARG_CONST_SIZE,
9180 .arg5_type = ARG_ANYTHING,
9181};
9182
9183static const struct bpf_func_proto *
9184sk_reuseport_func_proto(enum bpf_func_id func_id,
9185 const struct bpf_prog *prog)
9186{
9187 switch (func_id) {
9188 case BPF_FUNC_sk_select_reuseport:
9189 return &sk_select_reuseport_proto;
9190 case BPF_FUNC_skb_load_bytes:
9191 return &sk_reuseport_load_bytes_proto;
9192 case BPF_FUNC_skb_load_bytes_relative:
9193 return &sk_reuseport_load_bytes_relative_proto;
9194 default:
9195 return bpf_base_func_proto(func_id);
9196 }
9197}
9198
9199static bool
9200sk_reuseport_is_valid_access(int off, int size,
9201 enum bpf_access_type type,
9202 const struct bpf_prog *prog,
9203 struct bpf_insn_access_aux *info)
9204{
9205 const u32 size_default = sizeof(__u32);
9206
9207 if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
9208 off % size || type != BPF_READ)
9209 return false;
9210
9211 switch (off) {
9212 case offsetof(struct sk_reuseport_md, data):
9213 info->reg_type = PTR_TO_PACKET;
9214 return size == sizeof(__u64);
9215
9216 case offsetof(struct sk_reuseport_md, data_end):
9217 info->reg_type = PTR_TO_PACKET_END;
9218 return size == sizeof(__u64);
9219
9220 case offsetof(struct sk_reuseport_md, hash):
9221 return size == size_default;
9222
9223
9224 case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
9225 if (size < sizeof_field(struct sk_buff, protocol))
9226 return false;
9227 fallthrough;
9228 case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
9229 case bpf_ctx_range(struct sk_reuseport_md, bind_inany):
9230 case bpf_ctx_range(struct sk_reuseport_md, len):
9231 bpf_ctx_record_field_size(info, size_default);
9232 return bpf_ctx_narrow_access_ok(off, size, size_default);
9233
9234 default:
9235 return false;
9236 }
9237}
9238
9239#define SK_REUSEPORT_LOAD_FIELD(F) ({ \
9240 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
9241 si->dst_reg, si->src_reg, \
9242 bpf_target_off(struct sk_reuseport_kern, F, \
9243 sizeof_field(struct sk_reuseport_kern, F), \
9244 target_size)); \
9245 })
9246
9247#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD) \
9248 SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
9249 struct sk_buff, \
9250 skb, \
9251 SKB_FIELD)
9252
9253#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \
9254 SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
9255 struct sock, \
9256 sk, \
9257 SK_FIELD)
9258
9259static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
9260 const struct bpf_insn *si,
9261 struct bpf_insn *insn_buf,
9262 struct bpf_prog *prog,
9263 u32 *target_size)
9264{
9265 struct bpf_insn *insn = insn_buf;
9266
9267 switch (si->off) {
9268 case offsetof(struct sk_reuseport_md, data):
9269 SK_REUSEPORT_LOAD_SKB_FIELD(data);
9270 break;
9271
9272 case offsetof(struct sk_reuseport_md, len):
9273 SK_REUSEPORT_LOAD_SKB_FIELD(len);
9274 break;
9275
9276 case offsetof(struct sk_reuseport_md, eth_protocol):
9277 SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
9278 break;
9279
9280 case offsetof(struct sk_reuseport_md, ip_protocol):
9281 SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
9282 break;
9283
9284 case offsetof(struct sk_reuseport_md, data_end):
9285 SK_REUSEPORT_LOAD_FIELD(data_end);
9286 break;
9287
9288 case offsetof(struct sk_reuseport_md, hash):
9289 SK_REUSEPORT_LOAD_FIELD(hash);
9290 break;
9291
9292 case offsetof(struct sk_reuseport_md, bind_inany):
9293 SK_REUSEPORT_LOAD_FIELD(bind_inany);
9294 break;
9295 }
9296
9297 return insn - insn_buf;
9298}
9299
9300const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
9301 .get_func_proto = sk_reuseport_func_proto,
9302 .is_valid_access = sk_reuseport_is_valid_access,
9303 .convert_ctx_access = sk_reuseport_convert_ctx_access,
9304};
9305
9306const struct bpf_prog_ops sk_reuseport_prog_ops = {
9307};
9308
9309DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
9310EXPORT_SYMBOL(bpf_sk_lookup_enabled);
9311
9312BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
9313 struct sock *, sk, u64, flags)
9314{
9315 if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
9316 BPF_SK_LOOKUP_F_NO_REUSEPORT)))
9317 return -EINVAL;
9318 if (unlikely(sk && sk_is_refcounted(sk)))
9319 return -ESOCKTNOSUPPORT;
9320 if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
9321 return -ESOCKTNOSUPPORT;
9322
9323
9324 if (sk && sk->sk_protocol != ctx->protocol)
9325 return -EPROTOTYPE;
9326 if (sk && sk->sk_family != ctx->family &&
9327 (sk->sk_family == AF_INET || ipv6_only_sock(sk)))
9328 return -EAFNOSUPPORT;
9329
9330 if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
9331 return -EEXIST;
9332
9333
9334 ctx->selected_sk = sk;
9335 ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
9336 return 0;
9337}
9338
9339static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
9340 .func = bpf_sk_lookup_assign,
9341 .gpl_only = false,
9342 .ret_type = RET_INTEGER,
9343 .arg1_type = ARG_PTR_TO_CTX,
9344 .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL,
9345 .arg3_type = ARG_ANYTHING,
9346};
9347
9348static const struct bpf_func_proto *
9349sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
9350{
9351 switch (func_id) {
9352 case BPF_FUNC_perf_event_output:
9353 return &bpf_event_output_data_proto;
9354 case BPF_FUNC_sk_assign:
9355 return &bpf_sk_lookup_assign_proto;
9356 case BPF_FUNC_sk_release:
9357 return &bpf_sk_release_proto;
9358 default:
9359 return bpf_base_func_proto(func_id);
9360 }
9361}
9362
9363static bool sk_lookup_is_valid_access(int off, int size,
9364 enum bpf_access_type type,
9365 const struct bpf_prog *prog,
9366 struct bpf_insn_access_aux *info)
9367{
9368 if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
9369 return false;
9370 if (off % size != 0)
9371 return false;
9372 if (type != BPF_READ)
9373 return false;
9374
9375 switch (off) {
9376 case offsetof(struct bpf_sk_lookup, sk):
9377 info->reg_type = PTR_TO_SOCKET_OR_NULL;
9378 return size == sizeof(__u64);
9379
9380 case bpf_ctx_range(struct bpf_sk_lookup, family):
9381 case bpf_ctx_range(struct bpf_sk_lookup, protocol):
9382 case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
9383 case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
9384 case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
9385 case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
9386 case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
9387 case bpf_ctx_range(struct bpf_sk_lookup, local_port):
9388 bpf_ctx_record_field_size(info, sizeof(__u32));
9389 return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
9390
9391 default:
9392 return false;
9393 }
9394}
9395
9396static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
9397 const struct bpf_insn *si,
9398 struct bpf_insn *insn_buf,
9399 struct bpf_prog *prog,
9400 u32 *target_size)
9401{
9402 struct bpf_insn *insn = insn_buf;
9403
9404 switch (si->off) {
9405 case offsetof(struct bpf_sk_lookup, sk):
9406 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
9407 offsetof(struct bpf_sk_lookup_kern, selected_sk));
9408 break;
9409
9410 case offsetof(struct bpf_sk_lookup, family):
9411 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9412 bpf_target_off(struct bpf_sk_lookup_kern,
9413 family, 2, target_size));
9414 break;
9415
9416 case offsetof(struct bpf_sk_lookup, protocol):
9417 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9418 bpf_target_off(struct bpf_sk_lookup_kern,
9419 protocol, 2, target_size));
9420 break;
9421
9422 case offsetof(struct bpf_sk_lookup, remote_ip4):
9423 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9424 bpf_target_off(struct bpf_sk_lookup_kern,
9425 v4.saddr, 4, target_size));
9426 break;
9427
9428 case offsetof(struct bpf_sk_lookup, local_ip4):
9429 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9430 bpf_target_off(struct bpf_sk_lookup_kern,
9431 v4.daddr, 4, target_size));
9432 break;
9433
9434 case bpf_ctx_range_till(struct bpf_sk_lookup,
9435 remote_ip6[0], remote_ip6[3]): {
9436#if IS_ENABLED(CONFIG_IPV6)
9437 int off = si->off;
9438
9439 off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
9440 off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
9441 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
9442 offsetof(struct bpf_sk_lookup_kern, v6.saddr));
9443 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9444 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
9445#else
9446 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9447#endif
9448 break;
9449 }
9450 case bpf_ctx_range_till(struct bpf_sk_lookup,
9451 local_ip6[0], local_ip6[3]): {
9452#if IS_ENABLED(CONFIG_IPV6)
9453 int off = si->off;
9454
9455 off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
9456 off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
9457 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
9458 offsetof(struct bpf_sk_lookup_kern, v6.daddr));
9459 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9460 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
9461#else
9462 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9463#endif
9464 break;
9465 }
9466 case offsetof(struct bpf_sk_lookup, remote_port):
9467 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9468 bpf_target_off(struct bpf_sk_lookup_kern,
9469 sport, 2, target_size));
9470 break;
9471
9472 case offsetof(struct bpf_sk_lookup, local_port):
9473 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9474 bpf_target_off(struct bpf_sk_lookup_kern,
9475 dport, 2, target_size));
9476 break;
9477 }
9478
9479 return insn - insn_buf;
9480}
9481
9482const struct bpf_prog_ops sk_lookup_prog_ops = {
9483};
9484
9485const struct bpf_verifier_ops sk_lookup_verifier_ops = {
9486 .get_func_proto = sk_lookup_func_proto,
9487 .is_valid_access = sk_lookup_is_valid_access,
9488 .convert_ctx_access = sk_lookup_convert_ctx_access,
9489};
9490
9491#endif
9492
9493DEFINE_BPF_DISPATCHER(xdp)
9494
9495void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
9496{
9497 bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
9498}
9499
9500#ifdef CONFIG_DEBUG_INFO_BTF
9501BTF_ID_LIST_GLOBAL(btf_sock_ids)
9502#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
9503BTF_SOCK_TYPE_xxx
9504#undef BTF_SOCK_TYPE
9505#else
9506u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
9507#endif
9508
9509static bool check_arg_btf_id(u32 btf_id, u32 arg)
9510{
9511 int i;
9512
9513
9514 for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
9515 if (btf_sock_ids[i] == btf_id)
9516 return true;
9517 return false;
9518}
9519
9520BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
9521{
9522
9523
9524
9525 BTF_TYPE_EMIT(struct tcp6_sock);
9526 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
9527 sk->sk_family == AF_INET6)
9528 return (unsigned long)sk;
9529
9530 return (unsigned long)NULL;
9531}
9532
9533const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
9534 .func = bpf_skc_to_tcp6_sock,
9535 .gpl_only = false,
9536 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
9537 .arg1_type = ARG_PTR_TO_BTF_ID,
9538 .check_btf_id = check_arg_btf_id,
9539 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
9540};
9541
9542BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
9543{
9544 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
9545 return (unsigned long)sk;
9546
9547 return (unsigned long)NULL;
9548}
9549
9550const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
9551 .func = bpf_skc_to_tcp_sock,
9552 .gpl_only = false,
9553 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
9554 .arg1_type = ARG_PTR_TO_BTF_ID,
9555 .check_btf_id = check_arg_btf_id,
9556 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
9557};
9558
9559BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
9560{
9561
9562
9563
9564 BTF_TYPE_EMIT(struct inet_timewait_sock);
9565 BTF_TYPE_EMIT(struct tcp_timewait_sock);
9566
9567#ifdef CONFIG_INET
9568 if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
9569 return (unsigned long)sk;
9570#endif
9571
9572#if IS_BUILTIN(CONFIG_IPV6)
9573 if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
9574 return (unsigned long)sk;
9575#endif
9576
9577 return (unsigned long)NULL;
9578}
9579
9580const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
9581 .func = bpf_skc_to_tcp_timewait_sock,
9582 .gpl_only = false,
9583 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
9584 .arg1_type = ARG_PTR_TO_BTF_ID,
9585 .check_btf_id = check_arg_btf_id,
9586 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
9587};
9588
9589BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
9590{
9591#ifdef CONFIG_INET
9592 if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
9593 return (unsigned long)sk;
9594#endif
9595
9596#if IS_BUILTIN(CONFIG_IPV6)
9597 if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
9598 return (unsigned long)sk;
9599#endif
9600
9601 return (unsigned long)NULL;
9602}
9603
9604const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
9605 .func = bpf_skc_to_tcp_request_sock,
9606 .gpl_only = false,
9607 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
9608 .arg1_type = ARG_PTR_TO_BTF_ID,
9609 .check_btf_id = check_arg_btf_id,
9610 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
9611};
9612
9613BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
9614{
9615
9616
9617
9618 BTF_TYPE_EMIT(struct udp6_sock);
9619 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
9620 sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
9621 return (unsigned long)sk;
9622
9623 return (unsigned long)NULL;
9624}
9625
9626const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
9627 .func = bpf_skc_to_udp6_sock,
9628 .gpl_only = false,
9629 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
9630 .arg1_type = ARG_PTR_TO_BTF_ID,
9631 .check_btf_id = check_arg_btf_id,
9632 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
9633};
9634