1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/module.h>
25#include <linux/types.h>
26#include <linux/mm.h>
27#include <linux/fcntl.h>
28#include <linux/socket.h>
29#include <linux/sock_diag.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/netdevice.h>
33#include <linux/if_packet.h>
34#include <linux/if_arp.h>
35#include <linux/gfp.h>
36#include <net/inet_common.h>
37#include <net/ip.h>
38#include <net/protocol.h>
39#include <net/netlink.h>
40#include <linux/skbuff.h>
41#include <net/sock.h>
42#include <net/flow_dissector.h>
43#include <linux/errno.h>
44#include <linux/timer.h>
45#include <linux/uaccess.h>
46#include <asm/unaligned.h>
47#include <asm/cmpxchg.h>
48#include <linux/filter.h>
49#include <linux/ratelimit.h>
50#include <linux/seccomp.h>
51#include <linux/if_vlan.h>
52#include <linux/bpf.h>
53#include <net/sch_generic.h>
54#include <net/cls_cgroup.h>
55#include <net/dst_metadata.h>
56#include <net/dst.h>
57#include <net/sock_reuseport.h>
58#include <net/busy_poll.h>
59#include <net/tcp.h>
60#include <net/xfrm.h>
61#include <linux/bpf_trace.h>
62#include <net/xdp_sock.h>
63#include <linux/inetdevice.h>
64#include <net/ip_fib.h>
65#include <net/flow.h>
66#include <net/arp.h>
67#include <net/ipv6.h>
68#include <linux/seg6_local.h>
69#include <net/seg6.h>
70#include <net/seg6_local.h>
71
72#include <linux/rh_features.h>
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
88{
89 int err;
90 struct sk_filter *filter;
91
92
93
94
95
96
97 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
98 NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
99 return -ENOMEM;
100 }
101 err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
102 if (err)
103 return err;
104
105 err = security_sock_rcv_skb(sk, skb);
106 if (err)
107 return err;
108
109 rcu_read_lock();
110 filter = rcu_dereference(sk->sk_filter);
111 if (filter) {
112 struct sock *save_sk = skb->sk;
113 unsigned int pkt_len;
114
115 skb->sk = sk;
116 pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
117 skb->sk = save_sk;
118 err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
119 }
120 rcu_read_unlock();
121
122 return err;
123}
124EXPORT_SYMBOL(sk_filter_trim_cap);
125
126BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
127{
128 return skb_get_poff(skb);
129}
130
131BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
132{
133 struct nlattr *nla;
134
135 if (skb_is_nonlinear(skb))
136 return 0;
137
138 if (skb->len < sizeof(struct nlattr))
139 return 0;
140
141 if (a > skb->len - sizeof(struct nlattr))
142 return 0;
143
144 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
145 if (nla)
146 return (void *) nla - (void *) skb->data;
147
148 return 0;
149}
150
151BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
152{
153 struct nlattr *nla;
154
155 if (skb_is_nonlinear(skb))
156 return 0;
157
158 if (skb->len < sizeof(struct nlattr))
159 return 0;
160
161 if (a > skb->len - sizeof(struct nlattr))
162 return 0;
163
164 nla = (struct nlattr *) &skb->data[a];
165 if (nla->nla_len > skb->len - a)
166 return 0;
167
168 nla = nla_find_nested(nla, x);
169 if (nla)
170 return (void *) nla - (void *) skb->data;
171
172 return 0;
173}
174
175BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
176 data, int, headlen, int, offset)
177{
178 u8 tmp, *ptr;
179 const int len = sizeof(tmp);
180
181 if (offset >= 0) {
182 if (headlen - offset >= len)
183 return *(u8 *)(data + offset);
184 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
185 return tmp;
186 } else {
187 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
188 if (likely(ptr))
189 return *(u8 *)ptr;
190 }
191
192 return -EFAULT;
193}
194
195BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
196 int, offset)
197{
198 return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len,
199 offset);
200}
201
202BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
203 data, int, headlen, int, offset)
204{
205 u16 tmp, *ptr;
206 const int len = sizeof(tmp);
207
208 if (offset >= 0) {
209 if (headlen - offset >= len)
210 return get_unaligned_be16(data + offset);
211 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
212 return be16_to_cpu(tmp);
213 } else {
214 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
215 if (likely(ptr))
216 return get_unaligned_be16(ptr);
217 }
218
219 return -EFAULT;
220}
221
222BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
223 int, offset)
224{
225 return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len,
226 offset);
227}
228
229BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
230 data, int, headlen, int, offset)
231{
232 u32 tmp, *ptr;
233 const int len = sizeof(tmp);
234
235 if (likely(offset >= 0)) {
236 if (headlen - offset >= len)
237 return get_unaligned_be32(data + offset);
238 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
239 return be32_to_cpu(tmp);
240 } else {
241 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
242 if (likely(ptr))
243 return get_unaligned_be32(ptr);
244 }
245
246 return -EFAULT;
247}
248
249BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
250 int, offset)
251{
252 return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len,
253 offset);
254}
255
256BPF_CALL_0(bpf_get_raw_cpu_id)
257{
258 return raw_smp_processor_id();
259}
260
261static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
262 .func = bpf_get_raw_cpu_id,
263 .gpl_only = false,
264 .ret_type = RET_INTEGER,
265};
266
267static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
268 struct bpf_insn *insn_buf)
269{
270 struct bpf_insn *insn = insn_buf;
271
272 switch (skb_field) {
273 case SKF_AD_MARK:
274 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
275
276 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
277 offsetof(struct sk_buff, mark));
278 break;
279
280 case SKF_AD_PKTTYPE:
281 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
282 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
283#ifdef __BIG_ENDIAN_BITFIELD
284 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
285#endif
286 break;
287
288 case SKF_AD_QUEUE:
289 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
290
291 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
292 offsetof(struct sk_buff, queue_mapping));
293 break;
294
295 case SKF_AD_VLAN_TAG:
296 case SKF_AD_VLAN_TAG_PRESENT:
297 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
298 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
299
300
301 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
302 offsetof(struct sk_buff, vlan_tci));
303 if (skb_field == SKF_AD_VLAN_TAG) {
304 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
305 ~VLAN_TAG_PRESENT);
306 } else {
307
308 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
309
310 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
311 }
312 break;
313 }
314
315 return insn - insn_buf;
316}
317
318static bool convert_bpf_extensions(struct sock_filter *fp,
319 struct bpf_insn **insnp)
320{
321 struct bpf_insn *insn = *insnp;
322 u32 cnt;
323
324 switch (fp->k) {
325 case SKF_AD_OFF + SKF_AD_PROTOCOL:
326 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
327
328
329 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
330 offsetof(struct sk_buff, protocol));
331
332 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
333 break;
334
335 case SKF_AD_OFF + SKF_AD_PKTTYPE:
336 cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
337 insn += cnt - 1;
338 break;
339
340 case SKF_AD_OFF + SKF_AD_IFINDEX:
341 case SKF_AD_OFF + SKF_AD_HATYPE:
342 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
343 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
344
345 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
346 BPF_REG_TMP, BPF_REG_CTX,
347 offsetof(struct sk_buff, dev));
348
349 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
350 *insn++ = BPF_EXIT_INSN();
351 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
352 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
353 offsetof(struct net_device, ifindex));
354 else
355 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
356 offsetof(struct net_device, type));
357 break;
358
359 case SKF_AD_OFF + SKF_AD_MARK:
360 cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
361 insn += cnt - 1;
362 break;
363
364 case SKF_AD_OFF + SKF_AD_RXHASH:
365 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
366
367 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
368 offsetof(struct sk_buff, hash));
369 break;
370
371 case SKF_AD_OFF + SKF_AD_QUEUE:
372 cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
373 insn += cnt - 1;
374 break;
375
376 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
377 cnt = convert_skb_access(SKF_AD_VLAN_TAG,
378 BPF_REG_A, BPF_REG_CTX, insn);
379 insn += cnt - 1;
380 break;
381
382 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
383 cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
384 BPF_REG_A, BPF_REG_CTX, insn);
385 insn += cnt - 1;
386 break;
387
388 case SKF_AD_OFF + SKF_AD_VLAN_TPID:
389 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
390
391
392 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
393 offsetof(struct sk_buff, vlan_proto));
394
395 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
396 break;
397
398 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
399 case SKF_AD_OFF + SKF_AD_NLATTR:
400 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
401 case SKF_AD_OFF + SKF_AD_CPU:
402 case SKF_AD_OFF + SKF_AD_RANDOM:
403
404 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
405
406 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
407
408 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
409
410 switch (fp->k) {
411 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
412 *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
413 break;
414 case SKF_AD_OFF + SKF_AD_NLATTR:
415 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
416 break;
417 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
418 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
419 break;
420 case SKF_AD_OFF + SKF_AD_CPU:
421 *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
422 break;
423 case SKF_AD_OFF + SKF_AD_RANDOM:
424 *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
425 bpf_user_rnd_init_once();
426 break;
427 }
428 break;
429
430 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
431
432 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
433 break;
434
435 default:
436
437
438
439
440 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
441 return false;
442 }
443
444 *insnp = insn;
445 return true;
446}
447
448static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
449{
450 const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
451 int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
452 bool endian = BPF_SIZE(fp->code) == BPF_H ||
453 BPF_SIZE(fp->code) == BPF_W;
454 bool indirect = BPF_MODE(fp->code) == BPF_IND;
455 const int ip_align = NET_IP_ALIGN;
456 struct bpf_insn *insn = *insnp;
457 int offset = fp->k;
458
459 if (!indirect &&
460 ((unaligned_ok && offset >= 0) ||
461 (!unaligned_ok && offset >= 0 &&
462 offset + ip_align >= 0 &&
463 offset + ip_align % size == 0))) {
464 bool ldx_off_ok = offset <= S16_MAX;
465
466 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
467 *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
468 *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
469 size, 2 + endian + (!ldx_off_ok * 2));
470 if (ldx_off_ok) {
471 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
472 BPF_REG_D, offset);
473 } else {
474 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
475 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
476 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
477 BPF_REG_TMP, 0);
478 }
479 if (endian)
480 *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
481 *insn++ = BPF_JMP_A(8);
482 }
483
484 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
485 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
486 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
487 if (!indirect) {
488 *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
489 } else {
490 *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
491 if (fp->k)
492 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
493 }
494
495 switch (BPF_SIZE(fp->code)) {
496 case BPF_B:
497 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
498 break;
499 case BPF_H:
500 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
501 break;
502 case BPF_W:
503 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
504 break;
505 default:
506 return false;
507 }
508
509 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2);
510 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
511 *insn = BPF_EXIT_INSN();
512
513 *insnp = insn;
514 return true;
515}
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536static int bpf_convert_filter(struct sock_filter *prog, int len,
537 struct bpf_prog *new_prog, int *new_len,
538 bool *seen_ld_abs)
539{
540 int new_flen = 0, pass = 0, target, i, stack_off;
541 struct bpf_insn *new_insn, *first_insn = NULL;
542 struct sock_filter *fp;
543 int *addrs = NULL;
544 u8 bpf_src;
545
546 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
547 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
548
549 if (len <= 0 || len > BPF_MAXINSNS)
550 return -EINVAL;
551
552 if (new_prog) {
553 first_insn = new_prog->insnsi;
554 addrs = kcalloc(len, sizeof(*addrs),
555 GFP_KERNEL | __GFP_NOWARN);
556 if (!addrs)
557 return -ENOMEM;
558 }
559
560do_pass:
561 new_insn = first_insn;
562 fp = prog;
563
564
565 if (new_prog) {
566
567
568
569 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
570 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
571
572
573
574
575
576 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
577 if (*seen_ld_abs) {
578
579
580
581
582
583 *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
584 BPF_REG_D, BPF_REG_CTX,
585 offsetof(struct sk_buff, data));
586 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
587 offsetof(struct sk_buff, len));
588 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
589 offsetof(struct sk_buff, data_len));
590 *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
591 }
592 } else {
593 new_insn += 3;
594 }
595
596 for (i = 0; i < len; fp++, i++) {
597 struct bpf_insn tmp_insns[32] = { };
598 struct bpf_insn *insn = tmp_insns;
599
600 if (addrs)
601 addrs[i] = new_insn - first_insn;
602
603 switch (fp->code) {
604
605 case BPF_ALU | BPF_ADD | BPF_X:
606 case BPF_ALU | BPF_ADD | BPF_K:
607 case BPF_ALU | BPF_SUB | BPF_X:
608 case BPF_ALU | BPF_SUB | BPF_K:
609 case BPF_ALU | BPF_AND | BPF_X:
610 case BPF_ALU | BPF_AND | BPF_K:
611 case BPF_ALU | BPF_OR | BPF_X:
612 case BPF_ALU | BPF_OR | BPF_K:
613 case BPF_ALU | BPF_LSH | BPF_X:
614 case BPF_ALU | BPF_LSH | BPF_K:
615 case BPF_ALU | BPF_RSH | BPF_X:
616 case BPF_ALU | BPF_RSH | BPF_K:
617 case BPF_ALU | BPF_XOR | BPF_X:
618 case BPF_ALU | BPF_XOR | BPF_K:
619 case BPF_ALU | BPF_MUL | BPF_X:
620 case BPF_ALU | BPF_MUL | BPF_K:
621 case BPF_ALU | BPF_DIV | BPF_X:
622 case BPF_ALU | BPF_DIV | BPF_K:
623 case BPF_ALU | BPF_MOD | BPF_X:
624 case BPF_ALU | BPF_MOD | BPF_K:
625 case BPF_ALU | BPF_NEG:
626 case BPF_LD | BPF_ABS | BPF_W:
627 case BPF_LD | BPF_ABS | BPF_H:
628 case BPF_LD | BPF_ABS | BPF_B:
629 case BPF_LD | BPF_IND | BPF_W:
630 case BPF_LD | BPF_IND | BPF_H:
631 case BPF_LD | BPF_IND | BPF_B:
632
633
634
635
636 if (BPF_CLASS(fp->code) == BPF_LD &&
637 BPF_MODE(fp->code) == BPF_ABS &&
638 convert_bpf_extensions(fp, &insn))
639 break;
640 if (BPF_CLASS(fp->code) == BPF_LD &&
641 convert_bpf_ld_abs(fp, &insn)) {
642 *seen_ld_abs = true;
643 break;
644 }
645
646 if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
647 fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
648 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
649
650
651
652 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
653 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
654 *insn++ = BPF_EXIT_INSN();
655 }
656
657 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
658 break;
659
660
661
662
663
664
665
666#define BPF_EMIT_JMP \
667 do { \
668 const s32 off_min = S16_MIN, off_max = S16_MAX; \
669 s32 off; \
670 \
671 if (target >= len || target < 0) \
672 goto err; \
673 off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
674 \
675 off -= insn - tmp_insns; \
676 \
677 if (off < off_min || off > off_max) \
678 goto err; \
679 insn->off = off; \
680 } while (0)
681
682 case BPF_JMP | BPF_JA:
683 target = i + fp->k + 1;
684 insn->code = fp->code;
685 BPF_EMIT_JMP;
686 break;
687
688 case BPF_JMP | BPF_JEQ | BPF_K:
689 case BPF_JMP | BPF_JEQ | BPF_X:
690 case BPF_JMP | BPF_JSET | BPF_K:
691 case BPF_JMP | BPF_JSET | BPF_X:
692 case BPF_JMP | BPF_JGT | BPF_K:
693 case BPF_JMP | BPF_JGT | BPF_X:
694 case BPF_JMP | BPF_JGE | BPF_K:
695 case BPF_JMP | BPF_JGE | BPF_X:
696 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
697
698
699
700
701 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
702
703 insn->dst_reg = BPF_REG_A;
704 insn->src_reg = BPF_REG_TMP;
705 bpf_src = BPF_X;
706 } else {
707 insn->dst_reg = BPF_REG_A;
708 insn->imm = fp->k;
709 bpf_src = BPF_SRC(fp->code);
710 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
711 }
712
713
714 if (fp->jf == 0) {
715 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
716 target = i + fp->jt + 1;
717 BPF_EMIT_JMP;
718 break;
719 }
720
721
722 if (fp->jt == 0) {
723 switch (BPF_OP(fp->code)) {
724 case BPF_JEQ:
725 insn->code = BPF_JMP | BPF_JNE | bpf_src;
726 break;
727 case BPF_JGT:
728 insn->code = BPF_JMP | BPF_JLE | bpf_src;
729 break;
730 case BPF_JGE:
731 insn->code = BPF_JMP | BPF_JLT | bpf_src;
732 break;
733 default:
734 goto jmp_rest;
735 }
736
737 target = i + fp->jf + 1;
738 BPF_EMIT_JMP;
739 break;
740 }
741jmp_rest:
742
743 target = i + fp->jt + 1;
744 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
745 BPF_EMIT_JMP;
746 insn++;
747
748 insn->code = BPF_JMP | BPF_JA;
749 target = i + fp->jf + 1;
750 BPF_EMIT_JMP;
751 break;
752
753
754 case BPF_LDX | BPF_MSH | BPF_B: {
755 struct sock_filter tmp = {
756 .code = BPF_LD | BPF_ABS | BPF_B,
757 .k = fp->k,
758 };
759
760 *seen_ld_abs = true;
761
762
763 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
764
765 convert_bpf_ld_abs(&tmp, &insn);
766 insn++;
767
768 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
769
770 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
771
772 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
773
774 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
775
776 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
777 break;
778 }
779
780
781
782 case BPF_RET | BPF_A:
783 case BPF_RET | BPF_K:
784 if (BPF_RVAL(fp->code) == BPF_K)
785 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
786 0, fp->k);
787 *insn = BPF_EXIT_INSN();
788 break;
789
790
791 case BPF_ST:
792 case BPF_STX:
793 stack_off = fp->k * 4 + 4;
794 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
795 BPF_ST ? BPF_REG_A : BPF_REG_X,
796 -stack_off);
797
798
799
800
801 if (new_prog && new_prog->aux->stack_depth < stack_off)
802 new_prog->aux->stack_depth = stack_off;
803 break;
804
805
806 case BPF_LD | BPF_MEM:
807 case BPF_LDX | BPF_MEM:
808 stack_off = fp->k * 4 + 4;
809 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
810 BPF_REG_A : BPF_REG_X, BPF_REG_FP,
811 -stack_off);
812 break;
813
814
815 case BPF_LD | BPF_IMM:
816 case BPF_LDX | BPF_IMM:
817 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
818 BPF_REG_A : BPF_REG_X, fp->k);
819 break;
820
821
822 case BPF_MISC | BPF_TAX:
823 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
824 break;
825
826
827 case BPF_MISC | BPF_TXA:
828 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
829 break;
830
831
832 case BPF_LD | BPF_W | BPF_LEN:
833 case BPF_LDX | BPF_W | BPF_LEN:
834 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
835 BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
836 offsetof(struct sk_buff, len));
837 break;
838
839
840 case BPF_LDX | BPF_ABS | BPF_W:
841
842 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
843 break;
844
845
846 default:
847 goto err;
848 }
849
850 insn++;
851 if (new_prog)
852 memcpy(new_insn, tmp_insns,
853 sizeof(*insn) * (insn - tmp_insns));
854 new_insn += insn - tmp_insns;
855 }
856
857 if (!new_prog) {
858
859 *new_len = new_insn - first_insn;
860 if (*seen_ld_abs)
861 *new_len += 4;
862 return 0;
863 }
864
865 pass++;
866 if (new_flen != new_insn - first_insn) {
867 new_flen = new_insn - first_insn;
868 if (pass > 2)
869 goto err;
870 goto do_pass;
871 }
872
873 kfree(addrs);
874 BUG_ON(*new_len != new_flen);
875 return 0;
876err:
877 kfree(addrs);
878 return -EINVAL;
879}
880
881
882
883
884
885
886
887
888static int check_load_and_stores(const struct sock_filter *filter, int flen)
889{
890 u16 *masks, memvalid = 0;
891 int pc, ret = 0;
892
893 BUILD_BUG_ON(BPF_MEMWORDS > 16);
894
895 masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
896 if (!masks)
897 return -ENOMEM;
898
899 memset(masks, 0xff, flen * sizeof(*masks));
900
901 for (pc = 0; pc < flen; pc++) {
902 memvalid &= masks[pc];
903
904 switch (filter[pc].code) {
905 case BPF_ST:
906 case BPF_STX:
907 memvalid |= (1 << filter[pc].k);
908 break;
909 case BPF_LD | BPF_MEM:
910 case BPF_LDX | BPF_MEM:
911 if (!(memvalid & (1 << filter[pc].k))) {
912 ret = -EINVAL;
913 goto error;
914 }
915 break;
916 case BPF_JMP | BPF_JA:
917
918 masks[pc + 1 + filter[pc].k] &= memvalid;
919 memvalid = ~0;
920 break;
921 case BPF_JMP | BPF_JEQ | BPF_K:
922 case BPF_JMP | BPF_JEQ | BPF_X:
923 case BPF_JMP | BPF_JGE | BPF_K:
924 case BPF_JMP | BPF_JGE | BPF_X:
925 case BPF_JMP | BPF_JGT | BPF_K:
926 case BPF_JMP | BPF_JGT | BPF_X:
927 case BPF_JMP | BPF_JSET | BPF_K:
928 case BPF_JMP | BPF_JSET | BPF_X:
929
930 masks[pc + 1 + filter[pc].jt] &= memvalid;
931 masks[pc + 1 + filter[pc].jf] &= memvalid;
932 memvalid = ~0;
933 break;
934 }
935 }
936error:
937 kfree(masks);
938 return ret;
939}
940
941static bool chk_code_allowed(u16 code_to_probe)
942{
943 static const bool codes[] = {
944
945 [BPF_ALU | BPF_ADD | BPF_K] = true,
946 [BPF_ALU | BPF_ADD | BPF_X] = true,
947 [BPF_ALU | BPF_SUB | BPF_K] = true,
948 [BPF_ALU | BPF_SUB | BPF_X] = true,
949 [BPF_ALU | BPF_MUL | BPF_K] = true,
950 [BPF_ALU | BPF_MUL | BPF_X] = true,
951 [BPF_ALU | BPF_DIV | BPF_K] = true,
952 [BPF_ALU | BPF_DIV | BPF_X] = true,
953 [BPF_ALU | BPF_MOD | BPF_K] = true,
954 [BPF_ALU | BPF_MOD | BPF_X] = true,
955 [BPF_ALU | BPF_AND | BPF_K] = true,
956 [BPF_ALU | BPF_AND | BPF_X] = true,
957 [BPF_ALU | BPF_OR | BPF_K] = true,
958 [BPF_ALU | BPF_OR | BPF_X] = true,
959 [BPF_ALU | BPF_XOR | BPF_K] = true,
960 [BPF_ALU | BPF_XOR | BPF_X] = true,
961 [BPF_ALU | BPF_LSH | BPF_K] = true,
962 [BPF_ALU | BPF_LSH | BPF_X] = true,
963 [BPF_ALU | BPF_RSH | BPF_K] = true,
964 [BPF_ALU | BPF_RSH | BPF_X] = true,
965 [BPF_ALU | BPF_NEG] = true,
966
967 [BPF_LD | BPF_W | BPF_ABS] = true,
968 [BPF_LD | BPF_H | BPF_ABS] = true,
969 [BPF_LD | BPF_B | BPF_ABS] = true,
970 [BPF_LD | BPF_W | BPF_LEN] = true,
971 [BPF_LD | BPF_W | BPF_IND] = true,
972 [BPF_LD | BPF_H | BPF_IND] = true,
973 [BPF_LD | BPF_B | BPF_IND] = true,
974 [BPF_LD | BPF_IMM] = true,
975 [BPF_LD | BPF_MEM] = true,
976 [BPF_LDX | BPF_W | BPF_LEN] = true,
977 [BPF_LDX | BPF_B | BPF_MSH] = true,
978 [BPF_LDX | BPF_IMM] = true,
979 [BPF_LDX | BPF_MEM] = true,
980
981 [BPF_ST] = true,
982 [BPF_STX] = true,
983
984 [BPF_MISC | BPF_TAX] = true,
985 [BPF_MISC | BPF_TXA] = true,
986
987 [BPF_RET | BPF_K] = true,
988 [BPF_RET | BPF_A] = true,
989
990 [BPF_JMP | BPF_JA] = true,
991 [BPF_JMP | BPF_JEQ | BPF_K] = true,
992 [BPF_JMP | BPF_JEQ | BPF_X] = true,
993 [BPF_JMP | BPF_JGE | BPF_K] = true,
994 [BPF_JMP | BPF_JGE | BPF_X] = true,
995 [BPF_JMP | BPF_JGT | BPF_K] = true,
996 [BPF_JMP | BPF_JGT | BPF_X] = true,
997 [BPF_JMP | BPF_JSET | BPF_K] = true,
998 [BPF_JMP | BPF_JSET | BPF_X] = true,
999 };
1000
1001 if (code_to_probe >= ARRAY_SIZE(codes))
1002 return false;
1003
1004 return codes[code_to_probe];
1005}
1006
1007static bool bpf_check_basics_ok(const struct sock_filter *filter,
1008 unsigned int flen)
1009{
1010 if (filter == NULL)
1011 return false;
1012 if (flen == 0 || flen > BPF_MAXINSNS)
1013 return false;
1014
1015 return true;
1016}
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032static int bpf_check_classic(const struct sock_filter *filter,
1033 unsigned int flen)
1034{
1035 bool anc_found;
1036 int pc;
1037
1038
1039 for (pc = 0; pc < flen; pc++) {
1040 const struct sock_filter *ftest = &filter[pc];
1041
1042
1043 if (!chk_code_allowed(ftest->code))
1044 return -EINVAL;
1045
1046
1047 switch (ftest->code) {
1048 case BPF_ALU | BPF_DIV | BPF_K:
1049 case BPF_ALU | BPF_MOD | BPF_K:
1050
1051 if (ftest->k == 0)
1052 return -EINVAL;
1053 break;
1054 case BPF_ALU | BPF_LSH | BPF_K:
1055 case BPF_ALU | BPF_RSH | BPF_K:
1056 if (ftest->k >= 32)
1057 return -EINVAL;
1058 break;
1059 case BPF_LD | BPF_MEM:
1060 case BPF_LDX | BPF_MEM:
1061 case BPF_ST:
1062 case BPF_STX:
1063
1064 if (ftest->k >= BPF_MEMWORDS)
1065 return -EINVAL;
1066 break;
1067 case BPF_JMP | BPF_JA:
1068
1069
1070
1071
1072 if (ftest->k >= (unsigned int)(flen - pc - 1))
1073 return -EINVAL;
1074 break;
1075 case BPF_JMP | BPF_JEQ | BPF_K:
1076 case BPF_JMP | BPF_JEQ | BPF_X:
1077 case BPF_JMP | BPF_JGE | BPF_K:
1078 case BPF_JMP | BPF_JGE | BPF_X:
1079 case BPF_JMP | BPF_JGT | BPF_K:
1080 case BPF_JMP | BPF_JGT | BPF_X:
1081 case BPF_JMP | BPF_JSET | BPF_K:
1082 case BPF_JMP | BPF_JSET | BPF_X:
1083
1084 if (pc + ftest->jt + 1 >= flen ||
1085 pc + ftest->jf + 1 >= flen)
1086 return -EINVAL;
1087 break;
1088 case BPF_LD | BPF_W | BPF_ABS:
1089 case BPF_LD | BPF_H | BPF_ABS:
1090 case BPF_LD | BPF_B | BPF_ABS:
1091 anc_found = false;
1092 if (bpf_anc_helper(ftest) & BPF_ANC)
1093 anc_found = true;
1094
1095 if (anc_found == false && ftest->k >= SKF_AD_OFF)
1096 return -EINVAL;
1097 }
1098 }
1099
1100
1101 switch (filter[flen - 1].code) {
1102 case BPF_RET | BPF_K:
1103 case BPF_RET | BPF_A:
1104 return check_load_and_stores(filter, flen);
1105 }
1106
1107 return -EINVAL;
1108}
1109
1110static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
1111 const struct sock_fprog *fprog)
1112{
1113 unsigned int fsize = bpf_classic_proglen(fprog);
1114 struct sock_fprog_kern *fkprog;
1115
1116 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1117 if (!fp->orig_prog)
1118 return -ENOMEM;
1119
1120 fkprog = fp->orig_prog;
1121 fkprog->len = fprog->len;
1122
1123 fkprog->filter = kmemdup(fp->insns, fsize,
1124 GFP_KERNEL | __GFP_NOWARN);
1125 if (!fkprog->filter) {
1126 kfree(fp->orig_prog);
1127 return -ENOMEM;
1128 }
1129
1130 return 0;
1131}
1132
1133static void bpf_release_orig_filter(struct bpf_prog *fp)
1134{
1135 struct sock_fprog_kern *fprog = fp->orig_prog;
1136
1137 if (fprog) {
1138 kfree(fprog->filter);
1139 kfree(fprog);
1140 }
1141}
1142
1143static void __bpf_prog_release(struct bpf_prog *prog)
1144{
1145 if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
1146 bpf_prog_put(prog);
1147 } else {
1148 bpf_release_orig_filter(prog);
1149 bpf_prog_free(prog);
1150 }
1151}
1152
1153static void __sk_filter_release(struct sk_filter *fp)
1154{
1155 __bpf_prog_release(fp->prog);
1156 kfree(fp);
1157}
1158
1159
1160
1161
1162
1163static void sk_filter_release_rcu(struct rcu_head *rcu)
1164{
1165 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1166
1167 __sk_filter_release(fp);
1168}
1169
1170
1171
1172
1173
1174
1175
1176static void sk_filter_release(struct sk_filter *fp)
1177{
1178 if (refcount_dec_and_test(&fp->refcnt))
1179 call_rcu(&fp->rcu, sk_filter_release_rcu);
1180}
1181
1182void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1183{
1184 u32 filter_size = bpf_prog_size(fp->prog->len);
1185
1186 atomic_sub(filter_size, &sk->sk_omem_alloc);
1187 sk_filter_release(fp);
1188}
1189
1190
1191
1192
1193static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1194{
1195 u32 filter_size = bpf_prog_size(fp->prog->len);
1196
1197
1198 if (filter_size <= sysctl_optmem_max &&
1199 atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
1200 atomic_add(filter_size, &sk->sk_omem_alloc);
1201 return true;
1202 }
1203 return false;
1204}
1205
1206bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1207{
1208 if (!refcount_inc_not_zero(&fp->refcnt))
1209 return false;
1210
1211 if (!__sk_filter_charge(sk, fp)) {
1212 sk_filter_release(fp);
1213 return false;
1214 }
1215 return true;
1216}
1217
1218static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
1219{
1220 struct sock_filter *old_prog;
1221 struct bpf_prog *old_fp;
1222 int err, new_len, old_len = fp->len;
1223 bool seen_ld_abs = false;
1224
1225
1226
1227
1228
1229
1230 BUILD_BUG_ON(sizeof(struct sock_filter) !=
1231 sizeof(struct bpf_insn));
1232
1233
1234
1235
1236
1237 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
1238 GFP_KERNEL | __GFP_NOWARN);
1239 if (!old_prog) {
1240 err = -ENOMEM;
1241 goto out_err;
1242 }
1243
1244
1245 err = bpf_convert_filter(old_prog, old_len, NULL, &new_len,
1246 &seen_ld_abs);
1247 if (err)
1248 goto out_err_free;
1249
1250
1251 old_fp = fp;
1252 fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
1253 if (!fp) {
1254
1255
1256
1257 fp = old_fp;
1258 err = -ENOMEM;
1259 goto out_err_free;
1260 }
1261
1262 fp->len = new_len;
1263
1264
1265 err = bpf_convert_filter(old_prog, old_len, fp, &new_len,
1266 &seen_ld_abs);
1267 if (err)
1268
1269
1270
1271
1272
1273 goto out_err_free;
1274
1275 fp = bpf_prog_select_runtime(fp, &err);
1276 if (err)
1277 goto out_err_free;
1278
1279 kfree(old_prog);
1280 return fp;
1281
1282out_err_free:
1283 kfree(old_prog);
1284out_err:
1285 __bpf_prog_release(fp);
1286 return ERR_PTR(err);
1287}
1288
1289static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
1290 bpf_aux_classic_check_t trans)
1291{
1292 int err;
1293
1294 fp->bpf_func = NULL;
1295 fp->jited = 0;
1296
1297 err = bpf_check_classic(fp->insns, fp->len);
1298 if (err) {
1299 __bpf_prog_release(fp);
1300 return ERR_PTR(err);
1301 }
1302
1303
1304
1305
1306 if (trans) {
1307 err = trans(fp->insns, fp->len);
1308 if (err) {
1309 __bpf_prog_release(fp);
1310 return ERR_PTR(err);
1311 }
1312 }
1313
1314
1315
1316
1317 bpf_jit_compile(fp);
1318
1319
1320
1321
1322 if (!fp->jited)
1323 fp = bpf_migrate_filter(fp);
1324
1325 return fp;
1326}
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
1339{
1340 unsigned int fsize = bpf_classic_proglen(fprog);
1341 struct bpf_prog *fp;
1342
1343
1344 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1345 return -EINVAL;
1346
1347 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1348 if (!fp)
1349 return -ENOMEM;
1350
1351 memcpy(fp->insns, fprog->filter, fsize);
1352
1353 fp->len = fprog->len;
1354
1355
1356
1357
1358 fp->orig_prog = NULL;
1359
1360
1361
1362
1363 fp = bpf_prepare_filter(fp, NULL);
1364 if (IS_ERR(fp))
1365 return PTR_ERR(fp);
1366
1367 *pfp = fp;
1368 return 0;
1369}
1370EXPORT_SYMBOL_GPL(bpf_prog_create);
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
1384 bpf_aux_classic_check_t trans, bool save_orig)
1385{
1386 unsigned int fsize = bpf_classic_proglen(fprog);
1387 struct bpf_prog *fp;
1388 int err;
1389
1390
1391 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1392 return -EINVAL;
1393
1394 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1395 if (!fp)
1396 return -ENOMEM;
1397
1398 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
1399 __bpf_prog_free(fp);
1400 return -EFAULT;
1401 }
1402
1403 fp->len = fprog->len;
1404 fp->orig_prog = NULL;
1405
1406 if (save_orig) {
1407 err = bpf_prog_store_orig_filter(fp, fprog);
1408 if (err) {
1409 __bpf_prog_free(fp);
1410 return -ENOMEM;
1411 }
1412 }
1413
1414
1415
1416
1417 fp = bpf_prepare_filter(fp, trans);
1418 if (IS_ERR(fp))
1419 return PTR_ERR(fp);
1420
1421 *pfp = fp;
1422 return 0;
1423}
1424EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
1425
1426void bpf_prog_destroy(struct bpf_prog *fp)
1427{
1428 __bpf_prog_release(fp);
1429}
1430EXPORT_SYMBOL_GPL(bpf_prog_destroy);
1431
1432static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
1433{
1434 struct sk_filter *fp, *old_fp;
1435
1436 fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1437 if (!fp)
1438 return -ENOMEM;
1439
1440 fp->prog = prog;
1441
1442 if (!__sk_filter_charge(sk, fp)) {
1443 kfree(fp);
1444 return -ENOMEM;
1445 }
1446 refcount_set(&fp->refcnt, 1);
1447
1448 old_fp = rcu_dereference_protected(sk->sk_filter,
1449 lockdep_sock_is_held(sk));
1450 rcu_assign_pointer(sk->sk_filter, fp);
1451
1452 if (old_fp)
1453 sk_filter_uncharge(sk, old_fp);
1454
1455 return 0;
1456}
1457
1458static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
1459{
1460 struct bpf_prog *old_prog;
1461 int err;
1462
1463 if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1464 return -ENOMEM;
1465
1466 if (sk_unhashed(sk) && sk->sk_reuseport) {
1467 err = reuseport_alloc(sk);
1468 if (err)
1469 return err;
1470 } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
1471
1472 return -EINVAL;
1473 }
1474
1475 old_prog = reuseport_attach_prog(sk, prog);
1476 if (old_prog)
1477 bpf_prog_destroy(old_prog);
1478
1479 return 0;
1480}
1481
1482static
1483struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1484{
1485 unsigned int fsize = bpf_classic_proglen(fprog);
1486 struct bpf_prog *prog;
1487 int err;
1488
1489 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1490 return ERR_PTR(-EPERM);
1491
1492
1493 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1494 return ERR_PTR(-EINVAL);
1495
1496 prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1497 if (!prog)
1498 return ERR_PTR(-ENOMEM);
1499
1500 if (copy_from_user(prog->insns, fprog->filter, fsize)) {
1501 __bpf_prog_free(prog);
1502 return ERR_PTR(-EFAULT);
1503 }
1504
1505 prog->len = fprog->len;
1506
1507 err = bpf_prog_store_orig_filter(prog, fprog);
1508 if (err) {
1509 __bpf_prog_free(prog);
1510 return ERR_PTR(-ENOMEM);
1511 }
1512
1513
1514
1515
1516 return bpf_prepare_filter(prog, NULL);
1517}
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1530{
1531 struct bpf_prog *prog = __get_filter(fprog, sk);
1532 int err;
1533
1534 if (IS_ERR(prog))
1535 return PTR_ERR(prog);
1536
1537 err = __sk_attach_prog(prog, sk);
1538 if (err < 0) {
1539 __bpf_prog_release(prog);
1540 return err;
1541 }
1542
1543 return 0;
1544}
1545EXPORT_SYMBOL_GPL(sk_attach_filter);
1546
1547int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1548{
1549 struct bpf_prog *prog = __get_filter(fprog, sk);
1550 int err;
1551
1552 if (IS_ERR(prog))
1553 return PTR_ERR(prog);
1554
1555 err = __reuseport_attach_prog(prog, sk);
1556 if (err < 0) {
1557 __bpf_prog_release(prog);
1558 return err;
1559 }
1560
1561 return 0;
1562}
1563
1564static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
1565{
1566 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1567 return ERR_PTR(-EPERM);
1568
1569 return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
1570}
1571
1572int sk_attach_bpf(u32 ufd, struct sock *sk)
1573{
1574 struct bpf_prog *prog = __get_bpf(ufd, sk);
1575 int err;
1576
1577 if (IS_ERR(prog))
1578 return PTR_ERR(prog);
1579
1580 rh_mark_used_feature("eBPF/sock");
1581
1582 err = __sk_attach_prog(prog, sk);
1583 if (err < 0) {
1584 bpf_prog_put(prog);
1585 return err;
1586 }
1587
1588 return 0;
1589}
1590
1591int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1592{
1593 struct bpf_prog *prog = __get_bpf(ufd, sk);
1594 int err;
1595
1596 if (IS_ERR(prog))
1597 return PTR_ERR(prog);
1598
1599 rh_mark_used_feature("eBPF/reuseport");
1600
1601 err = __reuseport_attach_prog(prog, sk);
1602 if (err < 0) {
1603 bpf_prog_put(prog);
1604 return err;
1605 }
1606
1607 return 0;
1608}
1609
1610struct bpf_scratchpad {
1611 union {
1612 __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1613 u8 buff[MAX_BPF_STACK];
1614 };
1615};
1616
1617static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
1618
1619static inline int __bpf_try_make_writable(struct sk_buff *skb,
1620 unsigned int write_len)
1621{
1622 return skb_ensure_writable(skb, write_len);
1623}
1624
1625static inline int bpf_try_make_writable(struct sk_buff *skb,
1626 unsigned int write_len)
1627{
1628 int err = __bpf_try_make_writable(skb, write_len);
1629
1630 bpf_compute_data_pointers(skb);
1631 return err;
1632}
1633
1634static int bpf_try_make_head_writable(struct sk_buff *skb)
1635{
1636 return bpf_try_make_writable(skb, skb_headlen(skb));
1637}
1638
1639static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
1640{
1641 if (skb_at_tc_ingress(skb))
1642 skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1643}
1644
1645static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
1646{
1647 if (skb_at_tc_ingress(skb))
1648 skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1649}
1650
1651BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
1652 const void *, from, u32, len, u64, flags)
1653{
1654 void *ptr;
1655
1656 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
1657 return -EINVAL;
1658 if (unlikely(offset > 0xffff))
1659 return -EFAULT;
1660 if (unlikely(bpf_try_make_writable(skb, offset + len)))
1661 return -EFAULT;
1662
1663 ptr = skb->data + offset;
1664 if (flags & BPF_F_RECOMPUTE_CSUM)
1665 __skb_postpull_rcsum(skb, ptr, len, offset);
1666
1667 memcpy(ptr, from, len);
1668
1669 if (flags & BPF_F_RECOMPUTE_CSUM)
1670 __skb_postpush_rcsum(skb, ptr, len, offset);
1671 if (flags & BPF_F_INVALIDATE_HASH)
1672 skb_clear_hash(skb);
1673
1674 return 0;
1675}
1676
1677static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
1678 .func = bpf_skb_store_bytes,
1679 .gpl_only = false,
1680 .ret_type = RET_INTEGER,
1681 .arg1_type = ARG_PTR_TO_CTX,
1682 .arg2_type = ARG_ANYTHING,
1683 .arg3_type = ARG_PTR_TO_MEM,
1684 .arg4_type = ARG_CONST_SIZE,
1685 .arg5_type = ARG_ANYTHING,
1686};
1687
1688BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
1689 void *, to, u32, len)
1690{
1691 void *ptr;
1692
1693 if (unlikely(offset > 0xffff))
1694 goto err_clear;
1695
1696 ptr = skb_header_pointer(skb, offset, len, to);
1697 if (unlikely(!ptr))
1698 goto err_clear;
1699 if (ptr != to)
1700 memcpy(to, ptr, len);
1701
1702 return 0;
1703err_clear:
1704 memset(to, 0, len);
1705 return -EFAULT;
1706}
1707
1708static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1709 .func = bpf_skb_load_bytes,
1710 .gpl_only = false,
1711 .ret_type = RET_INTEGER,
1712 .arg1_type = ARG_PTR_TO_CTX,
1713 .arg2_type = ARG_ANYTHING,
1714 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1715 .arg4_type = ARG_CONST_SIZE,
1716};
1717
1718BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
1719 u32, offset, void *, to, u32, len, u32, start_header)
1720{
1721 u8 *end = skb_tail_pointer(skb);
1722 u8 *net = skb_network_header(skb);
1723 u8 *mac = skb_mac_header(skb);
1724 u8 *ptr;
1725
1726 if (unlikely(offset > 0xffff || len > (end - mac)))
1727 goto err_clear;
1728
1729 switch (start_header) {
1730 case BPF_HDR_START_MAC:
1731 ptr = mac + offset;
1732 break;
1733 case BPF_HDR_START_NET:
1734 ptr = net + offset;
1735 break;
1736 default:
1737 goto err_clear;
1738 }
1739
1740 if (likely(ptr >= mac && ptr + len <= end)) {
1741 memcpy(to, ptr, len);
1742 return 0;
1743 }
1744
1745err_clear:
1746 memset(to, 0, len);
1747 return -EFAULT;
1748}
1749
1750static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
1751 .func = bpf_skb_load_bytes_relative,
1752 .gpl_only = false,
1753 .ret_type = RET_INTEGER,
1754 .arg1_type = ARG_PTR_TO_CTX,
1755 .arg2_type = ARG_ANYTHING,
1756 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1757 .arg4_type = ARG_CONST_SIZE,
1758 .arg5_type = ARG_ANYTHING,
1759};
1760
1761BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
1762{
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772 return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
1773}
1774
1775static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1776 .func = bpf_skb_pull_data,
1777 .gpl_only = false,
1778 .ret_type = RET_INTEGER,
1779 .arg1_type = ARG_PTR_TO_CTX,
1780 .arg2_type = ARG_ANYTHING,
1781};
1782
1783static inline int sk_skb_try_make_writable(struct sk_buff *skb,
1784 unsigned int write_len)
1785{
1786 int err = __bpf_try_make_writable(skb, write_len);
1787
1788 bpf_compute_data_end_sk_skb(skb);
1789 return err;
1790}
1791
1792BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
1793{
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803 return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
1804}
1805
1806static const struct bpf_func_proto sk_skb_pull_data_proto = {
1807 .func = sk_skb_pull_data,
1808 .gpl_only = false,
1809 .ret_type = RET_INTEGER,
1810 .arg1_type = ARG_PTR_TO_CTX,
1811 .arg2_type = ARG_ANYTHING,
1812};
1813
1814BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
1815 u64, from, u64, to, u64, flags)
1816{
1817 __sum16 *ptr;
1818
1819 if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
1820 return -EINVAL;
1821 if (unlikely(offset > 0xffff || offset & 1))
1822 return -EFAULT;
1823 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1824 return -EFAULT;
1825
1826 ptr = (__sum16 *)(skb->data + offset);
1827 switch (flags & BPF_F_HDR_FIELD_MASK) {
1828 case 0:
1829 if (unlikely(from != 0))
1830 return -EINVAL;
1831
1832 csum_replace_by_diff(ptr, to);
1833 break;
1834 case 2:
1835 csum_replace2(ptr, from, to);
1836 break;
1837 case 4:
1838 csum_replace4(ptr, from, to);
1839 break;
1840 default:
1841 return -EINVAL;
1842 }
1843
1844 return 0;
1845}
1846
1847static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
1848 .func = bpf_l3_csum_replace,
1849 .gpl_only = false,
1850 .ret_type = RET_INTEGER,
1851 .arg1_type = ARG_PTR_TO_CTX,
1852 .arg2_type = ARG_ANYTHING,
1853 .arg3_type = ARG_ANYTHING,
1854 .arg4_type = ARG_ANYTHING,
1855 .arg5_type = ARG_ANYTHING,
1856};
1857
1858BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
1859 u64, from, u64, to, u64, flags)
1860{
1861 bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
1862 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
1863 bool do_mforce = flags & BPF_F_MARK_ENFORCE;
1864 __sum16 *ptr;
1865
1866 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
1867 BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
1868 return -EINVAL;
1869 if (unlikely(offset > 0xffff || offset & 1))
1870 return -EFAULT;
1871 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1872 return -EFAULT;
1873
1874 ptr = (__sum16 *)(skb->data + offset);
1875 if (is_mmzero && !do_mforce && !*ptr)
1876 return 0;
1877
1878 switch (flags & BPF_F_HDR_FIELD_MASK) {
1879 case 0:
1880 if (unlikely(from != 0))
1881 return -EINVAL;
1882
1883 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1884 break;
1885 case 2:
1886 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1887 break;
1888 case 4:
1889 inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
1890 break;
1891 default:
1892 return -EINVAL;
1893 }
1894
1895 if (is_mmzero && !*ptr)
1896 *ptr = CSUM_MANGLED_0;
1897 return 0;
1898}
1899
1900static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1901 .func = bpf_l4_csum_replace,
1902 .gpl_only = false,
1903 .ret_type = RET_INTEGER,
1904 .arg1_type = ARG_PTR_TO_CTX,
1905 .arg2_type = ARG_ANYTHING,
1906 .arg3_type = ARG_ANYTHING,
1907 .arg4_type = ARG_ANYTHING,
1908 .arg5_type = ARG_ANYTHING,
1909};
1910
1911BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
1912 __be32 *, to, u32, to_size, __wsum, seed)
1913{
1914 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1915 u32 diff_size = from_size + to_size;
1916 int i, j = 0;
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
1927 diff_size > sizeof(sp->diff)))
1928 return -EINVAL;
1929
1930 for (i = 0; i < from_size / sizeof(__be32); i++, j++)
1931 sp->diff[j] = ~from[i];
1932 for (i = 0; i < to_size / sizeof(__be32); i++, j++)
1933 sp->diff[j] = to[i];
1934
1935 return csum_partial(sp->diff, diff_size, seed);
1936}
1937
1938static const struct bpf_func_proto bpf_csum_diff_proto = {
1939 .func = bpf_csum_diff,
1940 .gpl_only = false,
1941 .pkt_access = true,
1942 .ret_type = RET_INTEGER,
1943 .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
1944 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
1945 .arg3_type = ARG_PTR_TO_MEM_OR_NULL,
1946 .arg4_type = ARG_CONST_SIZE_OR_ZERO,
1947 .arg5_type = ARG_ANYTHING,
1948};
1949
1950BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
1951{
1952
1953
1954
1955
1956 if (skb->ip_summed == CHECKSUM_COMPLETE)
1957 return (skb->csum = csum_add(skb->csum, csum));
1958
1959 return -ENOTSUPP;
1960}
1961
1962static const struct bpf_func_proto bpf_csum_update_proto = {
1963 .func = bpf_csum_update,
1964 .gpl_only = false,
1965 .ret_type = RET_INTEGER,
1966 .arg1_type = ARG_PTR_TO_CTX,
1967 .arg2_type = ARG_ANYTHING,
1968};
1969
1970static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
1971{
1972 return dev_forward_skb(dev, skb);
1973}
1974
1975static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
1976 struct sk_buff *skb)
1977{
1978 int ret = ____dev_forward_skb(dev, skb);
1979
1980 if (likely(!ret)) {
1981 skb->dev = dev;
1982 ret = netif_rx(skb);
1983 }
1984
1985 return ret;
1986}
1987
1988static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
1989{
1990 int ret;
1991
1992 if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
1993 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
1994 kfree_skb(skb);
1995 return -ENETDOWN;
1996 }
1997
1998 skb->dev = dev;
1999
2000 __this_cpu_inc(xmit_recursion);
2001 ret = dev_queue_xmit(skb);
2002 __this_cpu_dec(xmit_recursion);
2003
2004 return ret;
2005}
2006
2007static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
2008 u32 flags)
2009{
2010
2011 unsigned int mlen = skb->network_header - skb->mac_header;
2012
2013 __skb_pull(skb, mlen);
2014
2015
2016
2017
2018
2019
2020 if (!skb_at_tc_ingress(skb))
2021 skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
2022 skb_pop_mac_header(skb);
2023 skb_reset_mac_len(skb);
2024 return flags & BPF_F_INGRESS ?
2025 __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
2026}
2027
2028static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
2029 u32 flags)
2030{
2031
2032 if (unlikely(skb->mac_header >= skb->network_header)) {
2033 kfree_skb(skb);
2034 return -ERANGE;
2035 }
2036
2037 bpf_push_mac_rcsum(skb);
2038 return flags & BPF_F_INGRESS ?
2039 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
2040}
2041
2042static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
2043 u32 flags)
2044{
2045 if (dev_is_mac_header_xmit(dev))
2046 return __bpf_redirect_common(skb, dev, flags);
2047 else
2048 return __bpf_redirect_no_mac(skb, dev, flags);
2049}
2050
2051BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
2052{
2053 struct net_device *dev;
2054 struct sk_buff *clone;
2055 int ret;
2056
2057 if (unlikely(flags & ~(BPF_F_INGRESS)))
2058 return -EINVAL;
2059
2060 dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
2061 if (unlikely(!dev))
2062 return -EINVAL;
2063
2064 clone = skb_clone(skb, GFP_ATOMIC);
2065 if (unlikely(!clone))
2066 return -ENOMEM;
2067
2068
2069
2070
2071
2072
2073 ret = bpf_try_make_head_writable(skb);
2074 if (unlikely(ret)) {
2075 kfree_skb(clone);
2076 return -ENOMEM;
2077 }
2078
2079 return __bpf_redirect(clone, dev, flags);
2080}
2081
2082static const struct bpf_func_proto bpf_clone_redirect_proto = {
2083 .func = bpf_clone_redirect,
2084 .gpl_only = false,
2085 .ret_type = RET_INTEGER,
2086 .arg1_type = ARG_PTR_TO_CTX,
2087 .arg2_type = ARG_ANYTHING,
2088 .arg3_type = ARG_ANYTHING,
2089};
2090
2091struct redirect_info {
2092 u32 ifindex;
2093 u32 flags;
2094 struct bpf_map *map;
2095 struct bpf_map *map_to_flush;
2096 unsigned long map_owner;
2097};
2098
2099static DEFINE_PER_CPU(struct redirect_info, redirect_info);
2100
2101BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
2102{
2103 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2104
2105 if (unlikely(flags & ~(BPF_F_INGRESS)))
2106 return TC_ACT_SHOT;
2107
2108 ri->ifindex = ifindex;
2109 ri->flags = flags;
2110
2111 return TC_ACT_REDIRECT;
2112}
2113
2114int skb_do_redirect(struct sk_buff *skb)
2115{
2116 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2117 struct net_device *dev;
2118
2119 dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
2120 ri->ifindex = 0;
2121 if (unlikely(!dev)) {
2122 kfree_skb(skb);
2123 return -EINVAL;
2124 }
2125
2126 return __bpf_redirect(skb, dev, ri->flags);
2127}
2128
2129static const struct bpf_func_proto bpf_redirect_proto = {
2130 .func = bpf_redirect,
2131 .gpl_only = false,
2132 .ret_type = RET_INTEGER,
2133 .arg1_type = ARG_ANYTHING,
2134 .arg2_type = ARG_ANYTHING,
2135};
2136
2137BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
2138 struct bpf_map *, map, void *, key, u64, flags)
2139{
2140 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
2141
2142
2143 if (unlikely(flags & ~(BPF_F_INGRESS)))
2144 return SK_DROP;
2145
2146 tcb->bpf.flags = flags;
2147 tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key);
2148 if (!tcb->bpf.sk_redir)
2149 return SK_DROP;
2150
2151 return SK_PASS;
2152}
2153
2154static const struct bpf_func_proto bpf_sk_redirect_hash_proto = {
2155 .func = bpf_sk_redirect_hash,
2156 .gpl_only = false,
2157 .ret_type = RET_INTEGER,
2158 .arg1_type = ARG_PTR_TO_CTX,
2159 .arg2_type = ARG_CONST_MAP_PTR,
2160 .arg3_type = ARG_PTR_TO_MAP_KEY,
2161 .arg4_type = ARG_ANYTHING,
2162};
2163
2164BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
2165 struct bpf_map *, map, u32, key, u64, flags)
2166{
2167 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
2168
2169
2170 if (unlikely(flags & ~(BPF_F_INGRESS)))
2171 return SK_DROP;
2172
2173 tcb->bpf.flags = flags;
2174 tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key);
2175 if (!tcb->bpf.sk_redir)
2176 return SK_DROP;
2177
2178 return SK_PASS;
2179}
2180
2181struct sock *do_sk_redirect_map(struct sk_buff *skb)
2182{
2183 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
2184
2185 return tcb->bpf.sk_redir;
2186}
2187
2188static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
2189 .func = bpf_sk_redirect_map,
2190 .gpl_only = false,
2191 .ret_type = RET_INTEGER,
2192 .arg1_type = ARG_PTR_TO_CTX,
2193 .arg2_type = ARG_CONST_MAP_PTR,
2194 .arg3_type = ARG_ANYTHING,
2195 .arg4_type = ARG_ANYTHING,
2196};
2197
2198BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg_buff *, msg,
2199 struct bpf_map *, map, void *, key, u64, flags)
2200{
2201
2202 if (unlikely(flags & ~(BPF_F_INGRESS)))
2203 return SK_DROP;
2204
2205 msg->flags = flags;
2206 msg->sk_redir = __sock_hash_lookup_elem(map, key);
2207 if (!msg->sk_redir)
2208 return SK_DROP;
2209
2210 return SK_PASS;
2211}
2212
2213static const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
2214 .func = bpf_msg_redirect_hash,
2215 .gpl_only = false,
2216 .ret_type = RET_INTEGER,
2217 .arg1_type = ARG_PTR_TO_CTX,
2218 .arg2_type = ARG_CONST_MAP_PTR,
2219 .arg3_type = ARG_PTR_TO_MAP_KEY,
2220 .arg4_type = ARG_ANYTHING,
2221};
2222
2223BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
2224 struct bpf_map *, map, u32, key, u64, flags)
2225{
2226
2227 if (unlikely(flags & ~(BPF_F_INGRESS)))
2228 return SK_DROP;
2229
2230 msg->flags = flags;
2231 msg->sk_redir = __sock_map_lookup_elem(map, key);
2232 if (!msg->sk_redir)
2233 return SK_DROP;
2234
2235 return SK_PASS;
2236}
2237
2238struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
2239{
2240 return msg->sk_redir;
2241}
2242
2243static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
2244 .func = bpf_msg_redirect_map,
2245 .gpl_only = false,
2246 .ret_type = RET_INTEGER,
2247 .arg1_type = ARG_PTR_TO_CTX,
2248 .arg2_type = ARG_CONST_MAP_PTR,
2249 .arg3_type = ARG_ANYTHING,
2250 .arg4_type = ARG_ANYTHING,
2251};
2252
2253BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
2254{
2255 msg->apply_bytes = bytes;
2256 return 0;
2257}
2258
2259static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
2260 .func = bpf_msg_apply_bytes,
2261 .gpl_only = false,
2262 .ret_type = RET_INTEGER,
2263 .arg1_type = ARG_PTR_TO_CTX,
2264 .arg2_type = ARG_ANYTHING,
2265};
2266
2267BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
2268{
2269 msg->cork_bytes = bytes;
2270 return 0;
2271}
2272
2273static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
2274 .func = bpf_msg_cork_bytes,
2275 .gpl_only = false,
2276 .ret_type = RET_INTEGER,
2277 .arg1_type = ARG_PTR_TO_CTX,
2278 .arg2_type = ARG_ANYTHING,
2279};
2280
2281#define sk_msg_iter_var(var) \
2282 do { \
2283 var++; \
2284 if (var == MAX_SKB_FRAGS) \
2285 var = 0; \
2286 } while (0)
2287
2288BPF_CALL_4(bpf_msg_pull_data,
2289 struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
2290{
2291 unsigned int len = 0, offset = 0, copy = 0, poffset = 0;
2292 int bytes = end - start, bytes_sg_total;
2293 struct scatterlist *sg = msg->sg_data;
2294 int first_sg, last_sg, i, shift;
2295 unsigned char *p, *to, *from;
2296 struct page *page;
2297
2298 if (unlikely(flags || end <= start))
2299 return -EINVAL;
2300
2301
2302 i = msg->sg_start;
2303 do {
2304 len = sg[i].length;
2305 if (start < offset + len)
2306 break;
2307 offset += len;
2308 sk_msg_iter_var(i);
2309 } while (i != msg->sg_end);
2310
2311 if (unlikely(start >= offset + len))
2312 return -EINVAL;
2313
2314 first_sg = i;
2315
2316
2317
2318 bytes_sg_total = start - offset + bytes;
2319 if (!msg->sg_copy[i] && bytes_sg_total <= len)
2320 goto out;
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332 do {
2333 copy += sg[i].length;
2334 sk_msg_iter_var(i);
2335 if (bytes_sg_total <= copy)
2336 break;
2337 } while (i != msg->sg_end);
2338 last_sg = i;
2339
2340 if (unlikely(bytes_sg_total > copy))
2341 return -EINVAL;
2342
2343 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2344 get_order(copy));
2345 if (unlikely(!page))
2346 return -ENOMEM;
2347 p = page_address(page);
2348
2349 i = first_sg;
2350 do {
2351 from = sg_virt(&sg[i]);
2352 len = sg[i].length;
2353 to = p + poffset;
2354
2355 memcpy(to, from, len);
2356 poffset += len;
2357 sg[i].length = 0;
2358 put_page(sg_page(&sg[i]));
2359
2360 sk_msg_iter_var(i);
2361 } while (i != last_sg);
2362
2363 sg[first_sg].length = copy;
2364 sg_set_page(&sg[first_sg], page, copy, 0);
2365
2366
2367
2368
2369
2370 WARN_ON_ONCE(last_sg == first_sg);
2371 shift = last_sg > first_sg ?
2372 last_sg - first_sg - 1 :
2373 MAX_SKB_FRAGS - first_sg + last_sg - 1;
2374 if (!shift)
2375 goto out;
2376
2377 i = first_sg;
2378 sk_msg_iter_var(i);
2379 do {
2380 int move_from;
2381
2382 if (i + shift >= MAX_SKB_FRAGS)
2383 move_from = i + shift - MAX_SKB_FRAGS;
2384 else
2385 move_from = i + shift;
2386
2387 if (move_from == msg->sg_end)
2388 break;
2389
2390 sg[i] = sg[move_from];
2391 sg[move_from].length = 0;
2392 sg[move_from].page_link = 0;
2393 sg[move_from].offset = 0;
2394
2395 sk_msg_iter_var(i);
2396 } while (1);
2397 msg->sg_end -= shift;
2398 if (msg->sg_end < 0)
2399 msg->sg_end += MAX_SKB_FRAGS;
2400out:
2401 msg->data = sg_virt(&sg[first_sg]) + start - offset;
2402 msg->data_end = msg->data + bytes;
2403
2404 return 0;
2405}
2406
2407static const struct bpf_func_proto bpf_msg_pull_data_proto = {
2408 .func = bpf_msg_pull_data,
2409 .gpl_only = false,
2410 .ret_type = RET_INTEGER,
2411 .arg1_type = ARG_PTR_TO_CTX,
2412 .arg2_type = ARG_ANYTHING,
2413 .arg3_type = ARG_ANYTHING,
2414 .arg4_type = ARG_ANYTHING,
2415};
2416
2417BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
2418{
2419 return task_get_classid(skb);
2420}
2421
2422static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
2423 .func = bpf_get_cgroup_classid,
2424 .gpl_only = false,
2425 .ret_type = RET_INTEGER,
2426 .arg1_type = ARG_PTR_TO_CTX,
2427};
2428
2429BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
2430{
2431 return dst_tclassid(skb);
2432}
2433
2434static const struct bpf_func_proto bpf_get_route_realm_proto = {
2435 .func = bpf_get_route_realm,
2436 .gpl_only = false,
2437 .ret_type = RET_INTEGER,
2438 .arg1_type = ARG_PTR_TO_CTX,
2439};
2440
2441BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
2442{
2443
2444
2445
2446
2447
2448 return skb_get_hash(skb);
2449}
2450
2451static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
2452 .func = bpf_get_hash_recalc,
2453 .gpl_only = false,
2454 .ret_type = RET_INTEGER,
2455 .arg1_type = ARG_PTR_TO_CTX,
2456};
2457
2458BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
2459{
2460
2461
2462
2463 skb_clear_hash(skb);
2464 return 0;
2465}
2466
2467static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
2468 .func = bpf_set_hash_invalid,
2469 .gpl_only = false,
2470 .ret_type = RET_INTEGER,
2471 .arg1_type = ARG_PTR_TO_CTX,
2472};
2473
2474BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
2475{
2476
2477
2478
2479
2480 __skb_set_sw_hash(skb, hash, true);
2481 return 0;
2482}
2483
2484static const struct bpf_func_proto bpf_set_hash_proto = {
2485 .func = bpf_set_hash,
2486 .gpl_only = false,
2487 .ret_type = RET_INTEGER,
2488 .arg1_type = ARG_PTR_TO_CTX,
2489 .arg2_type = ARG_ANYTHING,
2490};
2491
2492BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
2493 u16, vlan_tci)
2494{
2495 int ret;
2496
2497 if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
2498 vlan_proto != htons(ETH_P_8021AD)))
2499 vlan_proto = htons(ETH_P_8021Q);
2500
2501 bpf_push_mac_rcsum(skb);
2502 ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
2503 bpf_pull_mac_rcsum(skb);
2504
2505 bpf_compute_data_pointers(skb);
2506 return ret;
2507}
2508
2509static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
2510 .func = bpf_skb_vlan_push,
2511 .gpl_only = false,
2512 .ret_type = RET_INTEGER,
2513 .arg1_type = ARG_PTR_TO_CTX,
2514 .arg2_type = ARG_ANYTHING,
2515 .arg3_type = ARG_ANYTHING,
2516};
2517
2518BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
2519{
2520 int ret;
2521
2522 bpf_push_mac_rcsum(skb);
2523 ret = skb_vlan_pop(skb);
2524 bpf_pull_mac_rcsum(skb);
2525
2526 bpf_compute_data_pointers(skb);
2527 return ret;
2528}
2529
2530static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
2531 .func = bpf_skb_vlan_pop,
2532 .gpl_only = false,
2533 .ret_type = RET_INTEGER,
2534 .arg1_type = ARG_PTR_TO_CTX,
2535};
2536
2537static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
2538{
2539
2540
2541
2542 skb_push(skb, len);
2543 memmove(skb->data, skb->data + len, off);
2544 memset(skb->data + off, 0, len);
2545
2546
2547
2548
2549
2550
2551 return 0;
2552}
2553
2554static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
2555{
2556
2557
2558
2559 if (unlikely(!pskb_may_pull(skb, off + len)))
2560 return -ENOMEM;
2561
2562 skb_postpull_rcsum(skb, skb->data + off, len);
2563 memmove(skb->data + len, skb->data, off);
2564 __skb_pull(skb, len);
2565
2566 return 0;
2567}
2568
2569static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
2570{
2571 bool trans_same = skb->transport_header == skb->network_header;
2572 int ret;
2573
2574
2575
2576
2577
2578 ret = bpf_skb_generic_push(skb, off, len);
2579 if (likely(!ret)) {
2580 skb->mac_header -= len;
2581 skb->network_header -= len;
2582 if (trans_same)
2583 skb->transport_header = skb->network_header;
2584 }
2585
2586 return ret;
2587}
2588
2589static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
2590{
2591 bool trans_same = skb->transport_header == skb->network_header;
2592 int ret;
2593
2594
2595 ret = bpf_skb_generic_pop(skb, off, len);
2596 if (likely(!ret)) {
2597 skb->mac_header += len;
2598 skb->network_header += len;
2599 if (trans_same)
2600 skb->transport_header = skb->network_header;
2601 }
2602
2603 return ret;
2604}
2605
2606static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
2607{
2608 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
2609 u32 off = skb_mac_header_len(skb);
2610 int ret;
2611
2612
2613 if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
2614 return -ENOTSUPP;
2615
2616 ret = skb_cow(skb, len_diff);
2617 if (unlikely(ret < 0))
2618 return ret;
2619
2620 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
2621 if (unlikely(ret < 0))
2622 return ret;
2623
2624 if (skb_is_gso(skb)) {
2625 struct skb_shared_info *shinfo = skb_shinfo(skb);
2626
2627
2628
2629
2630 if (shinfo->gso_type & SKB_GSO_TCPV4) {
2631 shinfo->gso_type &= ~SKB_GSO_TCPV4;
2632 shinfo->gso_type |= SKB_GSO_TCPV6;
2633 }
2634
2635
2636 skb_decrease_gso_size(shinfo, len_diff);
2637
2638 shinfo->gso_type |= SKB_GSO_DODGY;
2639 shinfo->gso_segs = 0;
2640 }
2641
2642 skb->protocol = htons(ETH_P_IPV6);
2643 skb_clear_hash(skb);
2644
2645 return 0;
2646}
2647
2648static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
2649{
2650 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
2651 u32 off = skb_mac_header_len(skb);
2652 int ret;
2653
2654
2655 if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
2656 return -ENOTSUPP;
2657
2658 ret = skb_unclone(skb, GFP_ATOMIC);
2659 if (unlikely(ret < 0))
2660 return ret;
2661
2662 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
2663 if (unlikely(ret < 0))
2664 return ret;
2665
2666 if (skb_is_gso(skb)) {
2667 struct skb_shared_info *shinfo = skb_shinfo(skb);
2668
2669
2670
2671
2672 if (shinfo->gso_type & SKB_GSO_TCPV6) {
2673 shinfo->gso_type &= ~SKB_GSO_TCPV6;
2674 shinfo->gso_type |= SKB_GSO_TCPV4;
2675 }
2676
2677
2678 skb_increase_gso_size(shinfo, len_diff);
2679
2680 shinfo->gso_type |= SKB_GSO_DODGY;
2681 shinfo->gso_segs = 0;
2682 }
2683
2684 skb->protocol = htons(ETH_P_IP);
2685 skb_clear_hash(skb);
2686
2687 return 0;
2688}
2689
2690static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
2691{
2692 __be16 from_proto = skb->protocol;
2693
2694 if (from_proto == htons(ETH_P_IP) &&
2695 to_proto == htons(ETH_P_IPV6))
2696 return bpf_skb_proto_4_to_6(skb);
2697
2698 if (from_proto == htons(ETH_P_IPV6) &&
2699 to_proto == htons(ETH_P_IP))
2700 return bpf_skb_proto_6_to_4(skb);
2701
2702 return -ENOTSUPP;
2703}
2704
2705BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
2706 u64, flags)
2707{
2708 int ret;
2709
2710 if (unlikely(flags))
2711 return -EINVAL;
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730 ret = bpf_skb_proto_xlat(skb, proto);
2731 bpf_compute_data_pointers(skb);
2732 return ret;
2733}
2734
2735static const struct bpf_func_proto bpf_skb_change_proto_proto = {
2736 .func = bpf_skb_change_proto,
2737 .gpl_only = false,
2738 .ret_type = RET_INTEGER,
2739 .arg1_type = ARG_PTR_TO_CTX,
2740 .arg2_type = ARG_ANYTHING,
2741 .arg3_type = ARG_ANYTHING,
2742};
2743
2744BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
2745{
2746
2747 if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
2748 !skb_pkt_type_ok(pkt_type)))
2749 return -EINVAL;
2750
2751 skb->pkt_type = pkt_type;
2752 return 0;
2753}
2754
2755static const struct bpf_func_proto bpf_skb_change_type_proto = {
2756 .func = bpf_skb_change_type,
2757 .gpl_only = false,
2758 .ret_type = RET_INTEGER,
2759 .arg1_type = ARG_PTR_TO_CTX,
2760 .arg2_type = ARG_ANYTHING,
2761};
2762
2763static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
2764{
2765 switch (skb->protocol) {
2766 case htons(ETH_P_IP):
2767 return sizeof(struct iphdr);
2768 case htons(ETH_P_IPV6):
2769 return sizeof(struct ipv6hdr);
2770 default:
2771 return ~0U;
2772 }
2773}
2774
2775static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
2776{
2777 u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
2778 int ret;
2779
2780
2781 if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
2782 return -ENOTSUPP;
2783
2784 ret = skb_cow(skb, len_diff);
2785 if (unlikely(ret < 0))
2786 return ret;
2787
2788 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
2789 if (unlikely(ret < 0))
2790 return ret;
2791
2792 if (skb_is_gso(skb)) {
2793 struct skb_shared_info *shinfo = skb_shinfo(skb);
2794
2795
2796 skb_decrease_gso_size(shinfo, len_diff);
2797
2798 shinfo->gso_type |= SKB_GSO_DODGY;
2799 shinfo->gso_segs = 0;
2800 }
2801
2802 return 0;
2803}
2804
2805static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
2806{
2807 u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
2808 int ret;
2809
2810
2811 if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
2812 return -ENOTSUPP;
2813
2814 ret = skb_unclone(skb, GFP_ATOMIC);
2815 if (unlikely(ret < 0))
2816 return ret;
2817
2818 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
2819 if (unlikely(ret < 0))
2820 return ret;
2821
2822 if (skb_is_gso(skb)) {
2823 struct skb_shared_info *shinfo = skb_shinfo(skb);
2824
2825
2826 skb_increase_gso_size(shinfo, len_diff);
2827
2828 shinfo->gso_type |= SKB_GSO_DODGY;
2829 shinfo->gso_segs = 0;
2830 }
2831
2832 return 0;
2833}
2834
2835static u32 __bpf_skb_max_len(const struct sk_buff *skb)
2836{
2837 return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
2838 SKB_MAX_ALLOC;
2839}
2840
2841static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
2842{
2843 bool trans_same = skb->transport_header == skb->network_header;
2844 u32 len_cur, len_diff_abs = abs(len_diff);
2845 u32 len_min = bpf_skb_net_base_len(skb);
2846 u32 len_max = __bpf_skb_max_len(skb);
2847 __be16 proto = skb->protocol;
2848 bool shrink = len_diff < 0;
2849 int ret;
2850
2851 if (unlikely(len_diff_abs > 0xfffU))
2852 return -EFAULT;
2853 if (unlikely(proto != htons(ETH_P_IP) &&
2854 proto != htons(ETH_P_IPV6)))
2855 return -ENOTSUPP;
2856
2857 len_cur = skb->len - skb_network_offset(skb);
2858 if (skb_transport_header_was_set(skb) && !trans_same)
2859 len_cur = skb_network_header_len(skb);
2860 if ((shrink && (len_diff_abs >= len_cur ||
2861 len_cur - len_diff_abs < len_min)) ||
2862 (!shrink && (skb->len + len_diff_abs > len_max &&
2863 !skb_is_gso(skb))))
2864 return -ENOTSUPP;
2865
2866 ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
2867 bpf_skb_net_grow(skb, len_diff_abs);
2868
2869 bpf_compute_data_pointers(skb);
2870 return ret;
2871}
2872
2873BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
2874 u32, mode, u64, flags)
2875{
2876 if (unlikely(flags))
2877 return -EINVAL;
2878 if (likely(mode == BPF_ADJ_ROOM_NET))
2879 return bpf_skb_adjust_net(skb, len_diff);
2880
2881 return -ENOTSUPP;
2882}
2883
2884static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
2885 .func = bpf_skb_adjust_room,
2886 .gpl_only = false,
2887 .ret_type = RET_INTEGER,
2888 .arg1_type = ARG_PTR_TO_CTX,
2889 .arg2_type = ARG_ANYTHING,
2890 .arg3_type = ARG_ANYTHING,
2891 .arg4_type = ARG_ANYTHING,
2892};
2893
2894static u32 __bpf_skb_min_len(const struct sk_buff *skb)
2895{
2896 u32 min_len = skb_network_offset(skb);
2897
2898 if (skb_transport_header_was_set(skb))
2899 min_len = skb_transport_offset(skb);
2900 if (skb->ip_summed == CHECKSUM_PARTIAL)
2901 min_len = skb_checksum_start_offset(skb) +
2902 skb->csum_offset + sizeof(__sum16);
2903 return min_len;
2904}
2905
2906static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
2907{
2908 unsigned int old_len = skb->len;
2909 int ret;
2910
2911 ret = __skb_grow_rcsum(skb, new_len);
2912 if (!ret)
2913 memset(skb->data + old_len, 0, new_len - old_len);
2914 return ret;
2915}
2916
2917static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
2918{
2919 return __skb_trim_rcsum(skb, new_len);
2920}
2921
2922static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
2923 u64 flags)
2924{
2925 u32 max_len = __bpf_skb_max_len(skb);
2926 u32 min_len = __bpf_skb_min_len(skb);
2927 int ret;
2928
2929 if (unlikely(flags || new_len > max_len || new_len < min_len))
2930 return -EINVAL;
2931 if (skb->encapsulation)
2932 return -ENOTSUPP;
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950 ret = __bpf_try_make_writable(skb, skb->len);
2951 if (!ret) {
2952 if (new_len > skb->len)
2953 ret = bpf_skb_grow_rcsum(skb, new_len);
2954 else if (new_len < skb->len)
2955 ret = bpf_skb_trim_rcsum(skb, new_len);
2956 if (!ret && skb_is_gso(skb))
2957 skb_gso_reset(skb);
2958 }
2959 return ret;
2960}
2961
2962BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
2963 u64, flags)
2964{
2965 int ret = __bpf_skb_change_tail(skb, new_len, flags);
2966
2967 bpf_compute_data_pointers(skb);
2968 return ret;
2969}
2970
2971static const struct bpf_func_proto bpf_skb_change_tail_proto = {
2972 .func = bpf_skb_change_tail,
2973 .gpl_only = false,
2974 .ret_type = RET_INTEGER,
2975 .arg1_type = ARG_PTR_TO_CTX,
2976 .arg2_type = ARG_ANYTHING,
2977 .arg3_type = ARG_ANYTHING,
2978};
2979
2980BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
2981 u64, flags)
2982{
2983 int ret = __bpf_skb_change_tail(skb, new_len, flags);
2984
2985 bpf_compute_data_end_sk_skb(skb);
2986 return ret;
2987}
2988
2989static const struct bpf_func_proto sk_skb_change_tail_proto = {
2990 .func = sk_skb_change_tail,
2991 .gpl_only = false,
2992 .ret_type = RET_INTEGER,
2993 .arg1_type = ARG_PTR_TO_CTX,
2994 .arg2_type = ARG_ANYTHING,
2995 .arg3_type = ARG_ANYTHING,
2996};
2997
2998static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
2999 u64 flags)
3000{
3001 u32 max_len = __bpf_skb_max_len(skb);
3002 u32 new_len = skb->len + head_room;
3003 int ret;
3004
3005 if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
3006 new_len < skb->len))
3007 return -EINVAL;
3008
3009 ret = skb_cow(skb, head_room);
3010 if (likely(!ret)) {
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020 __skb_push(skb, head_room);
3021 memset(skb->data, 0, head_room);
3022 skb_reset_mac_header(skb);
3023 }
3024
3025 return ret;
3026}
3027
3028BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
3029 u64, flags)
3030{
3031 int ret = __bpf_skb_change_head(skb, head_room, flags);
3032
3033 bpf_compute_data_pointers(skb);
3034 return ret;
3035}
3036
3037static const struct bpf_func_proto bpf_skb_change_head_proto = {
3038 .func = bpf_skb_change_head,
3039 .gpl_only = false,
3040 .ret_type = RET_INTEGER,
3041 .arg1_type = ARG_PTR_TO_CTX,
3042 .arg2_type = ARG_ANYTHING,
3043 .arg3_type = ARG_ANYTHING,
3044};
3045
3046BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
3047 u64, flags)
3048{
3049 int ret = __bpf_skb_change_head(skb, head_room, flags);
3050
3051 bpf_compute_data_end_sk_skb(skb);
3052 return ret;
3053}
3054
3055static const struct bpf_func_proto sk_skb_change_head_proto = {
3056 .func = sk_skb_change_head,
3057 .gpl_only = false,
3058 .ret_type = RET_INTEGER,
3059 .arg1_type = ARG_PTR_TO_CTX,
3060 .arg2_type = ARG_ANYTHING,
3061 .arg3_type = ARG_ANYTHING,
3062};
3063static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
3064{
3065 return xdp_data_meta_unsupported(xdp) ? 0 :
3066 xdp->data - xdp->data_meta;
3067}
3068
3069BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
3070{
3071 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
3072 unsigned long metalen = xdp_get_metalen(xdp);
3073 void *data_start = xdp_frame_end + metalen;
3074 void *data = xdp->data + offset;
3075
3076 if (unlikely(data < data_start ||
3077 data > xdp->data_end - ETH_HLEN))
3078 return -EINVAL;
3079
3080 if (metalen)
3081 memmove(xdp->data_meta + offset,
3082 xdp->data_meta, metalen);
3083 xdp->data_meta += offset;
3084 xdp->data = data;
3085
3086 return 0;
3087}
3088
3089static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
3090 .func = bpf_xdp_adjust_head,
3091 .gpl_only = false,
3092 .ret_type = RET_INTEGER,
3093 .arg1_type = ARG_PTR_TO_CTX,
3094 .arg2_type = ARG_ANYTHING,
3095};
3096
3097BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
3098{
3099 void *data_end = xdp->data_end + offset;
3100
3101
3102 if (unlikely(offset >= 0))
3103 return -EINVAL;
3104
3105 if (unlikely(data_end < xdp->data + ETH_HLEN))
3106 return -EINVAL;
3107
3108 xdp->data_end = data_end;
3109
3110 return 0;
3111}
3112
3113static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
3114 .func = bpf_xdp_adjust_tail,
3115 .gpl_only = false,
3116 .ret_type = RET_INTEGER,
3117 .arg1_type = ARG_PTR_TO_CTX,
3118 .arg2_type = ARG_ANYTHING,
3119};
3120
3121BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
3122{
3123 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
3124 void *meta = xdp->data_meta + offset;
3125 unsigned long metalen = xdp->data - meta;
3126
3127 if (xdp_data_meta_unsupported(xdp))
3128 return -ENOTSUPP;
3129 if (unlikely(meta < xdp_frame_end ||
3130 meta > xdp->data))
3131 return -EINVAL;
3132 if (unlikely((metalen & (sizeof(__u32) - 1)) ||
3133 (metalen > 32)))
3134 return -EACCES;
3135
3136 xdp->data_meta = meta;
3137
3138 return 0;
3139}
3140
3141static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
3142 .func = bpf_xdp_adjust_meta,
3143 .gpl_only = false,
3144 .ret_type = RET_INTEGER,
3145 .arg1_type = ARG_PTR_TO_CTX,
3146 .arg2_type = ARG_ANYTHING,
3147};
3148
3149static int __bpf_tx_xdp(struct net_device *dev,
3150 struct bpf_map *map,
3151 struct xdp_buff *xdp,
3152 u32 index)
3153{
3154 struct xdp_frame *xdpf;
3155 int err, sent;
3156
3157 if (!dev->netdev_ops->ndo_xdp_xmit) {
3158 return -EOPNOTSUPP;
3159 }
3160
3161 err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
3162 if (unlikely(err))
3163 return err;
3164
3165 xdpf = convert_to_xdp_frame(xdp);
3166 if (unlikely(!xdpf))
3167 return -EOVERFLOW;
3168
3169 sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
3170 if (sent <= 0)
3171 return sent;
3172 return 0;
3173}
3174
3175static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
3176 struct bpf_map *map,
3177 struct xdp_buff *xdp,
3178 u32 index)
3179{
3180 int err;
3181
3182 switch (map->map_type) {
3183 case BPF_MAP_TYPE_DEVMAP: {
3184 struct bpf_dtab_netdev *dst = fwd;
3185
3186 err = dev_map_enqueue(dst, xdp, dev_rx);
3187 if (err)
3188 return err;
3189 __dev_map_insert_ctx(map, index);
3190 break;
3191 }
3192 case BPF_MAP_TYPE_CPUMAP: {
3193 struct bpf_cpu_map_entry *rcpu = fwd;
3194
3195 err = cpu_map_enqueue(rcpu, xdp, dev_rx);
3196 if (err)
3197 return err;
3198 __cpu_map_insert_ctx(map, index);
3199 break;
3200 }
3201 case BPF_MAP_TYPE_XSKMAP: {
3202 struct xdp_sock *xs = fwd;
3203
3204 err = __xsk_map_redirect(map, xdp, xs);
3205 return err;
3206 }
3207 default:
3208 break;
3209 }
3210 return 0;
3211}
3212
3213void xdp_do_flush_map(void)
3214{
3215 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
3216 struct bpf_map *map = ri->map_to_flush;
3217
3218 ri->map_to_flush = NULL;
3219 if (map) {
3220 switch (map->map_type) {
3221 case BPF_MAP_TYPE_DEVMAP:
3222 __dev_map_flush(map);
3223 break;
3224 case BPF_MAP_TYPE_CPUMAP:
3225 __cpu_map_flush(map);
3226 break;
3227 case BPF_MAP_TYPE_XSKMAP:
3228 __xsk_map_flush(map);
3229 break;
3230 default:
3231 break;
3232 }
3233 }
3234}
3235EXPORT_SYMBOL_GPL(xdp_do_flush_map);
3236
3237static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
3238{
3239 switch (map->map_type) {
3240 case BPF_MAP_TYPE_DEVMAP:
3241 return __dev_map_lookup_elem(map, index);
3242 case BPF_MAP_TYPE_CPUMAP:
3243 return __cpu_map_lookup_elem(map, index);
3244 case BPF_MAP_TYPE_XSKMAP:
3245 return __xsk_map_lookup_elem(map, index);
3246 default:
3247 return NULL;
3248 }
3249}
3250
3251static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
3252 unsigned long aux)
3253{
3254 return (unsigned long)xdp_prog->aux != aux;
3255}
3256
3257static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
3258 struct bpf_prog *xdp_prog)
3259{
3260 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
3261 unsigned long map_owner = ri->map_owner;
3262 struct bpf_map *map = ri->map;
3263 u32 index = ri->ifindex;
3264 void *fwd = NULL;
3265 int err;
3266
3267 ri->ifindex = 0;
3268 ri->map = NULL;
3269 ri->map_owner = 0;
3270
3271 if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
3272 err = -EFAULT;
3273 map = NULL;
3274 goto err;
3275 }
3276
3277 fwd = __xdp_map_lookup_elem(map, index);
3278 if (!fwd) {
3279 err = -EINVAL;
3280 goto err;
3281 }
3282 if (ri->map_to_flush && ri->map_to_flush != map)
3283 xdp_do_flush_map();
3284
3285 err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
3286 if (unlikely(err))
3287 goto err;
3288
3289 ri->map_to_flush = map;
3290 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
3291 return 0;
3292err:
3293 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
3294 return err;
3295}
3296
3297int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
3298 struct bpf_prog *xdp_prog)
3299{
3300 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
3301 struct net_device *fwd;
3302 u32 index = ri->ifindex;
3303 int err;
3304
3305 if (ri->map)
3306 return xdp_do_redirect_map(dev, xdp, xdp_prog);
3307
3308 fwd = dev_get_by_index_rcu(dev_net(dev), index);
3309 ri->ifindex = 0;
3310 if (unlikely(!fwd)) {
3311 err = -EINVAL;
3312 goto err;
3313 }
3314
3315 err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
3316 if (unlikely(err))
3317 goto err;
3318
3319 _trace_xdp_redirect(dev, xdp_prog, index);
3320 return 0;
3321err:
3322 _trace_xdp_redirect_err(dev, xdp_prog, index, err);
3323 return err;
3324}
3325EXPORT_SYMBOL_GPL(xdp_do_redirect);
3326
3327static int xdp_do_generic_redirect_map(struct net_device *dev,
3328 struct sk_buff *skb,
3329 struct xdp_buff *xdp,
3330 struct bpf_prog *xdp_prog)
3331{
3332 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
3333 unsigned long map_owner = ri->map_owner;
3334 struct bpf_map *map = ri->map;
3335 u32 index = ri->ifindex;
3336 void *fwd = NULL;
3337 int err = 0;
3338
3339 ri->ifindex = 0;
3340 ri->map = NULL;
3341 ri->map_owner = 0;
3342
3343 if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
3344 err = -EFAULT;
3345 map = NULL;
3346 goto err;
3347 }
3348 fwd = __xdp_map_lookup_elem(map, index);
3349 if (unlikely(!fwd)) {
3350 err = -EINVAL;
3351 goto err;
3352 }
3353
3354 if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
3355 struct bpf_dtab_netdev *dst = fwd;
3356
3357 err = dev_map_generic_redirect(dst, skb, xdp_prog);
3358 if (unlikely(err))
3359 goto err;
3360 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
3361 struct xdp_sock *xs = fwd;
3362
3363 err = xsk_generic_rcv(xs, xdp);
3364 if (err)
3365 goto err;
3366 consume_skb(skb);
3367 } else {
3368
3369 err = -EBADRQC;
3370 goto err;
3371 }
3372
3373 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
3374 return 0;
3375err:
3376 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
3377 return err;
3378}
3379
3380int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
3381 struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
3382{
3383 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
3384 u32 index = ri->ifindex;
3385 struct net_device *fwd;
3386 int err = 0;
3387
3388 if (ri->map)
3389 return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog);
3390
3391 ri->ifindex = 0;
3392 fwd = dev_get_by_index_rcu(dev_net(dev), index);
3393 if (unlikely(!fwd)) {
3394 err = -EINVAL;
3395 goto err;
3396 }
3397
3398 err = xdp_ok_fwd_dev(fwd, skb->len);
3399 if (unlikely(err))
3400 goto err;
3401
3402 skb->dev = fwd;
3403 _trace_xdp_redirect(dev, xdp_prog, index);
3404 generic_xdp_tx(skb, xdp_prog);
3405 return 0;
3406err:
3407 _trace_xdp_redirect_err(dev, xdp_prog, index, err);
3408 return err;
3409}
3410EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
3411
3412BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
3413{
3414 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
3415
3416 if (unlikely(flags))
3417 return XDP_ABORTED;
3418
3419 ri->ifindex = ifindex;
3420 ri->flags = flags;
3421 ri->map = NULL;
3422 ri->map_owner = 0;
3423
3424 return XDP_REDIRECT;
3425}
3426
3427static const struct bpf_func_proto bpf_xdp_redirect_proto = {
3428 .func = bpf_xdp_redirect,
3429 .gpl_only = false,
3430 .ret_type = RET_INTEGER,
3431 .arg1_type = ARG_ANYTHING,
3432 .arg2_type = ARG_ANYTHING,
3433};
3434
3435BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
3436 unsigned long, map_owner)
3437{
3438 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
3439
3440 if (unlikely(flags))
3441 return XDP_ABORTED;
3442
3443 ri->ifindex = ifindex;
3444 ri->flags = flags;
3445 ri->map = map;
3446 ri->map_owner = map_owner;
3447
3448 return XDP_REDIRECT;
3449}
3450
3451
3452
3453
3454static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
3455 .func = bpf_xdp_redirect_map,
3456 .gpl_only = false,
3457 .ret_type = RET_INTEGER,
3458 .arg1_type = ARG_CONST_MAP_PTR,
3459 .arg2_type = ARG_ANYTHING,
3460 .arg3_type = ARG_ANYTHING,
3461};
3462
3463static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
3464 unsigned long off, unsigned long len)
3465{
3466 void *ptr = skb_header_pointer(skb, off, len, dst_buff);
3467
3468 if (unlikely(!ptr))
3469 return len;
3470 if (ptr != dst_buff)
3471 memcpy(dst_buff, ptr, len);
3472
3473 return 0;
3474}
3475
3476BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
3477 u64, flags, void *, meta, u64, meta_size)
3478{
3479 u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
3480
3481 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
3482 return -EINVAL;
3483 if (unlikely(skb_size > skb->len))
3484 return -EFAULT;
3485
3486 return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
3487 bpf_skb_copy);
3488}
3489
3490static const struct bpf_func_proto bpf_skb_event_output_proto = {
3491 .func = bpf_skb_event_output,
3492 .gpl_only = true,
3493 .ret_type = RET_INTEGER,
3494 .arg1_type = ARG_PTR_TO_CTX,
3495 .arg2_type = ARG_CONST_MAP_PTR,
3496 .arg3_type = ARG_ANYTHING,
3497 .arg4_type = ARG_PTR_TO_MEM,
3498 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
3499};
3500
3501static unsigned short bpf_tunnel_key_af(u64 flags)
3502{
3503 return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
3504}
3505
3506BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
3507 u32, size, u64, flags)
3508{
3509 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
3510 u8 compat[sizeof(struct bpf_tunnel_key)];
3511 void *to_orig = to;
3512 int err;
3513
3514 if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
3515 err = -EINVAL;
3516 goto err_clear;
3517 }
3518 if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
3519 err = -EPROTO;
3520 goto err_clear;
3521 }
3522 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
3523 err = -EINVAL;
3524 switch (size) {
3525 case offsetof(struct bpf_tunnel_key, tunnel_label):
3526 case offsetof(struct bpf_tunnel_key, tunnel_ext):
3527 goto set_compat;
3528 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
3529
3530
3531
3532 if (ip_tunnel_info_af(info) != AF_INET)
3533 goto err_clear;
3534set_compat:
3535 to = (struct bpf_tunnel_key *)compat;
3536 break;
3537 default:
3538 goto err_clear;
3539 }
3540 }
3541
3542 to->tunnel_id = be64_to_cpu(info->key.tun_id);
3543 to->tunnel_tos = info->key.tos;
3544 to->tunnel_ttl = info->key.ttl;
3545 to->tunnel_ext = 0;
3546
3547 if (flags & BPF_F_TUNINFO_IPV6) {
3548 memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
3549 sizeof(to->remote_ipv6));
3550 to->tunnel_label = be32_to_cpu(info->key.label);
3551 } else {
3552 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
3553 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
3554 to->tunnel_label = 0;
3555 }
3556
3557 if (unlikely(size != sizeof(struct bpf_tunnel_key)))
3558 memcpy(to_orig, to, size);
3559
3560 return 0;
3561err_clear:
3562 memset(to_orig, 0, size);
3563 return err;
3564}
3565
3566static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
3567 .func = bpf_skb_get_tunnel_key,
3568 .gpl_only = false,
3569 .ret_type = RET_INTEGER,
3570 .arg1_type = ARG_PTR_TO_CTX,
3571 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
3572 .arg3_type = ARG_CONST_SIZE,
3573 .arg4_type = ARG_ANYTHING,
3574};
3575
3576BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
3577{
3578 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
3579 int err;
3580
3581 if (unlikely(!info ||
3582 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
3583 err = -ENOENT;
3584 goto err_clear;
3585 }
3586 if (unlikely(size < info->options_len)) {
3587 err = -ENOMEM;
3588 goto err_clear;
3589 }
3590
3591 ip_tunnel_info_opts_get(to, info);
3592 if (size > info->options_len)
3593 memset(to + info->options_len, 0, size - info->options_len);
3594
3595 return info->options_len;
3596err_clear:
3597 memset(to, 0, size);
3598 return err;
3599}
3600
3601static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
3602 .func = bpf_skb_get_tunnel_opt,
3603 .gpl_only = false,
3604 .ret_type = RET_INTEGER,
3605 .arg1_type = ARG_PTR_TO_CTX,
3606 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
3607 .arg3_type = ARG_CONST_SIZE,
3608};
3609
3610static struct metadata_dst __percpu *md_dst;
3611
3612BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
3613 const struct bpf_tunnel_key *, from, u32, size, u64, flags)
3614{
3615 struct metadata_dst *md = this_cpu_ptr(md_dst);
3616 u8 compat[sizeof(struct bpf_tunnel_key)];
3617 struct ip_tunnel_info *info;
3618
3619 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
3620 BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
3621 return -EINVAL;
3622 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
3623 switch (size) {
3624 case offsetof(struct bpf_tunnel_key, tunnel_label):
3625 case offsetof(struct bpf_tunnel_key, tunnel_ext):
3626 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
3627
3628
3629
3630 memcpy(compat, from, size);
3631 memset(compat + size, 0, sizeof(compat) - size);
3632 from = (const struct bpf_tunnel_key *) compat;
3633 break;
3634 default:
3635 return -EINVAL;
3636 }
3637 }
3638 if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
3639 from->tunnel_ext))
3640 return -EINVAL;
3641
3642 skb_dst_drop(skb);
3643 dst_hold((struct dst_entry *) md);
3644 skb_dst_set(skb, (struct dst_entry *) md);
3645
3646 info = &md->u.tun_info;
3647 memset(info, 0, sizeof(*info));
3648 info->mode = IP_TUNNEL_INFO_TX;
3649
3650 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
3651 if (flags & BPF_F_DONT_FRAGMENT)
3652 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
3653 if (flags & BPF_F_ZERO_CSUM_TX)
3654 info->key.tun_flags &= ~TUNNEL_CSUM;
3655 if (flags & BPF_F_SEQ_NUMBER)
3656 info->key.tun_flags |= TUNNEL_SEQ;
3657
3658 info->key.tun_id = cpu_to_be64(from->tunnel_id);
3659 info->key.tos = from->tunnel_tos;
3660 info->key.ttl = from->tunnel_ttl;
3661
3662 if (flags & BPF_F_TUNINFO_IPV6) {
3663 info->mode |= IP_TUNNEL_INFO_IPV6;
3664 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
3665 sizeof(from->remote_ipv6));
3666 info->key.label = cpu_to_be32(from->tunnel_label) &
3667 IPV6_FLOWLABEL_MASK;
3668 } else {
3669 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
3670 }
3671
3672 return 0;
3673}
3674
3675static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
3676 .func = bpf_skb_set_tunnel_key,
3677 .gpl_only = false,
3678 .ret_type = RET_INTEGER,
3679 .arg1_type = ARG_PTR_TO_CTX,
3680 .arg2_type = ARG_PTR_TO_MEM,
3681 .arg3_type = ARG_CONST_SIZE,
3682 .arg4_type = ARG_ANYTHING,
3683};
3684
3685BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
3686 const u8 *, from, u32, size)
3687{
3688 struct ip_tunnel_info *info = skb_tunnel_info(skb);
3689 const struct metadata_dst *md = this_cpu_ptr(md_dst);
3690
3691 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
3692 return -EINVAL;
3693 if (unlikely(size > IP_TUNNEL_OPTS_MAX))
3694 return -ENOMEM;
3695
3696 ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
3697
3698 return 0;
3699}
3700
3701static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
3702 .func = bpf_skb_set_tunnel_opt,
3703 .gpl_only = false,
3704 .ret_type = RET_INTEGER,
3705 .arg1_type = ARG_PTR_TO_CTX,
3706 .arg2_type = ARG_PTR_TO_MEM,
3707 .arg3_type = ARG_CONST_SIZE,
3708};
3709
3710static const struct bpf_func_proto *
3711bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
3712{
3713 if (!md_dst) {
3714 struct metadata_dst __percpu *tmp;
3715
3716 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
3717 METADATA_IP_TUNNEL,
3718 GFP_KERNEL);
3719 if (!tmp)
3720 return NULL;
3721 if (cmpxchg(&md_dst, NULL, tmp))
3722 metadata_dst_free_percpu(tmp);
3723 }
3724
3725 switch (which) {
3726 case BPF_FUNC_skb_set_tunnel_key:
3727 return &bpf_skb_set_tunnel_key_proto;
3728 case BPF_FUNC_skb_set_tunnel_opt:
3729 return &bpf_skb_set_tunnel_opt_proto;
3730 default:
3731 return NULL;
3732 }
3733}
3734
3735BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
3736 u32, idx)
3737{
3738 struct bpf_array *array = container_of(map, struct bpf_array, map);
3739 struct cgroup *cgrp;
3740 struct sock *sk;
3741
3742 sk = skb_to_full_sk(skb);
3743 if (!sk || !sk_fullsock(sk))
3744 return -ENOENT;
3745 if (unlikely(idx >= array->map.max_entries))
3746 return -E2BIG;
3747
3748 cgrp = READ_ONCE(array->ptrs[idx]);
3749 if (unlikely(!cgrp))
3750 return -EAGAIN;
3751
3752 return sk_under_cgroup_hierarchy(sk, cgrp);
3753}
3754
3755static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
3756 .func = bpf_skb_under_cgroup,
3757 .gpl_only = false,
3758 .ret_type = RET_INTEGER,
3759 .arg1_type = ARG_PTR_TO_CTX,
3760 .arg2_type = ARG_CONST_MAP_PTR,
3761 .arg3_type = ARG_ANYTHING,
3762};
3763
3764#ifdef CONFIG_SOCK_CGROUP_DATA
3765BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
3766{
3767 struct sock *sk = skb_to_full_sk(skb);
3768 struct cgroup *cgrp;
3769
3770 if (!sk || !sk_fullsock(sk))
3771 return 0;
3772
3773 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
3774 return cgrp->kn->id.id;
3775}
3776
3777static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
3778 .func = bpf_skb_cgroup_id,
3779 .gpl_only = false,
3780 .ret_type = RET_INTEGER,
3781 .arg1_type = ARG_PTR_TO_CTX,
3782};
3783#endif
3784
3785static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
3786 unsigned long off, unsigned long len)
3787{
3788 memcpy(dst_buff, src_buff + off, len);
3789 return 0;
3790}
3791
3792BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
3793 u64, flags, void *, meta, u64, meta_size)
3794{
3795 u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
3796
3797 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
3798 return -EINVAL;
3799 if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
3800 return -EFAULT;
3801
3802 return bpf_event_output(map, flags, meta, meta_size, xdp->data,
3803 xdp_size, bpf_xdp_copy);
3804}
3805
3806static const struct bpf_func_proto bpf_xdp_event_output_proto = {
3807 .func = bpf_xdp_event_output,
3808 .gpl_only = true,
3809 .ret_type = RET_INTEGER,
3810 .arg1_type = ARG_PTR_TO_CTX,
3811 .arg2_type = ARG_CONST_MAP_PTR,
3812 .arg3_type = ARG_ANYTHING,
3813 .arg4_type = ARG_PTR_TO_MEM,
3814 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
3815};
3816
3817BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
3818{
3819 return skb->sk ? sock_gen_cookie(skb->sk) : 0;
3820}
3821
3822static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
3823 .func = bpf_get_socket_cookie,
3824 .gpl_only = false,
3825 .ret_type = RET_INTEGER,
3826 .arg1_type = ARG_PTR_TO_CTX,
3827};
3828
3829BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
3830{
3831 struct sock *sk = sk_to_full_sk(skb->sk);
3832 kuid_t kuid;
3833
3834 if (!sk || !sk_fullsock(sk))
3835 return overflowuid;
3836 kuid = sock_net_uid(sock_net(sk), sk);
3837 return from_kuid_munged(sock_net(sk)->user_ns, kuid);
3838}
3839
3840static const struct bpf_func_proto bpf_get_socket_uid_proto = {
3841 .func = bpf_get_socket_uid,
3842 .gpl_only = false,
3843 .ret_type = RET_INTEGER,
3844 .arg1_type = ARG_PTR_TO_CTX,
3845};
3846
3847BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3848 int, level, int, optname, char *, optval, int, optlen)
3849{
3850 struct sock *sk = bpf_sock->sk;
3851 int ret = 0;
3852 int val;
3853
3854 if (!sk_fullsock(sk))
3855 return -EINVAL;
3856
3857 if (level == SOL_SOCKET) {
3858 if (optlen != sizeof(int))
3859 return -EINVAL;
3860 val = *((int *)optval);
3861
3862
3863 switch (optname) {
3864 case SO_RCVBUF:
3865 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
3866 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
3867 break;
3868 case SO_SNDBUF:
3869 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
3870 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
3871 break;
3872 case SO_MAX_PACING_RATE:
3873 sk->sk_max_pacing_rate = val;
3874 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
3875 sk->sk_max_pacing_rate);
3876 break;
3877 case SO_PRIORITY:
3878 sk->sk_priority = val;
3879 break;
3880 case SO_RCVLOWAT:
3881 if (val < 0)
3882 val = INT_MAX;
3883 sk->sk_rcvlowat = val ? : 1;
3884 break;
3885 case SO_MARK:
3886 sk->sk_mark = val;
3887 break;
3888 default:
3889 ret = -EINVAL;
3890 }
3891#ifdef CONFIG_INET
3892 } else if (level == SOL_IP) {
3893 if (optlen != sizeof(int) || sk->sk_family != AF_INET)
3894 return -EINVAL;
3895
3896 val = *((int *)optval);
3897
3898 switch (optname) {
3899 case IP_TOS:
3900 if (val < -1 || val > 0xff) {
3901 ret = -EINVAL;
3902 } else {
3903 struct inet_sock *inet = inet_sk(sk);
3904
3905 if (val == -1)
3906 val = 0;
3907 inet->tos = val;
3908 }
3909 break;
3910 default:
3911 ret = -EINVAL;
3912 }
3913#if IS_ENABLED(CONFIG_IPV6)
3914 } else if (level == SOL_IPV6) {
3915 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
3916 return -EINVAL;
3917
3918 val = *((int *)optval);
3919
3920 switch (optname) {
3921 case IPV6_TCLASS:
3922 if (val < -1 || val > 0xff) {
3923 ret = -EINVAL;
3924 } else {
3925 struct ipv6_pinfo *np = inet6_sk(sk);
3926
3927 if (val == -1)
3928 val = 0;
3929 np->tclass = val;
3930 }
3931 break;
3932 default:
3933 ret = -EINVAL;
3934 }
3935#endif
3936 } else if (level == SOL_TCP &&
3937 sk->sk_prot->setsockopt == tcp_setsockopt) {
3938 if (optname == TCP_CONGESTION) {
3939 char name[TCP_CA_NAME_MAX];
3940 bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
3941
3942 strncpy(name, optval, min_t(long, optlen,
3943 TCP_CA_NAME_MAX-1));
3944 name[TCP_CA_NAME_MAX-1] = 0;
3945 ret = tcp_set_congestion_control(sk, name, false,
3946 reinit);
3947 } else {
3948 struct tcp_sock *tp = tcp_sk(sk);
3949
3950 if (optlen != sizeof(int))
3951 return -EINVAL;
3952
3953 val = *((int *)optval);
3954
3955 switch (optname) {
3956 case TCP_BPF_IW:
3957 if (val <= 0 || tp->data_segs_out > 0)
3958 ret = -EINVAL;
3959 else
3960 tp->snd_cwnd = val;
3961 break;
3962 case TCP_BPF_SNDCWND_CLAMP:
3963 if (val <= 0) {
3964 ret = -EINVAL;
3965 } else {
3966 tp->snd_cwnd_clamp = val;
3967 tp->snd_ssthresh = val;
3968 }
3969 break;
3970 default:
3971 ret = -EINVAL;
3972 }
3973 }
3974#endif
3975 } else {
3976 ret = -EINVAL;
3977 }
3978 return ret;
3979}
3980
3981static const struct bpf_func_proto bpf_setsockopt_proto = {
3982 .func = bpf_setsockopt,
3983 .gpl_only = false,
3984 .ret_type = RET_INTEGER,
3985 .arg1_type = ARG_PTR_TO_CTX,
3986 .arg2_type = ARG_ANYTHING,
3987 .arg3_type = ARG_ANYTHING,
3988 .arg4_type = ARG_PTR_TO_MEM,
3989 .arg5_type = ARG_CONST_SIZE,
3990};
3991
3992BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3993 int, level, int, optname, char *, optval, int, optlen)
3994{
3995 struct sock *sk = bpf_sock->sk;
3996
3997 if (!sk_fullsock(sk))
3998 goto err_clear;
3999
4000#ifdef CONFIG_INET
4001 if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
4002 if (optname == TCP_CONGESTION) {
4003 struct inet_connection_sock *icsk = inet_csk(sk);
4004
4005 if (!icsk->icsk_ca_ops || optlen <= 1)
4006 goto err_clear;
4007 strncpy(optval, icsk->icsk_ca_ops->name, optlen);
4008 optval[optlen - 1] = 0;
4009 } else {
4010 goto err_clear;
4011 }
4012 } else if (level == SOL_IP) {
4013 struct inet_sock *inet = inet_sk(sk);
4014
4015 if (optlen != sizeof(int) || sk->sk_family != AF_INET)
4016 goto err_clear;
4017
4018
4019 switch (optname) {
4020 case IP_TOS:
4021 *((int *)optval) = (int)inet->tos;
4022 break;
4023 default:
4024 goto err_clear;
4025 }
4026#if IS_ENABLED(CONFIG_IPV6)
4027 } else if (level == SOL_IPV6) {
4028 struct ipv6_pinfo *np = inet6_sk(sk);
4029
4030 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
4031 goto err_clear;
4032
4033
4034 switch (optname) {
4035 case IPV6_TCLASS:
4036 *((int *)optval) = (int)np->tclass;
4037 break;
4038 default:
4039 goto err_clear;
4040 }
4041#endif
4042 } else {
4043 goto err_clear;
4044 }
4045 return 0;
4046#endif
4047err_clear:
4048 memset(optval, 0, optlen);
4049 return -EINVAL;
4050}
4051
4052static const struct bpf_func_proto bpf_getsockopt_proto = {
4053 .func = bpf_getsockopt,
4054 .gpl_only = false,
4055 .ret_type = RET_INTEGER,
4056 .arg1_type = ARG_PTR_TO_CTX,
4057 .arg2_type = ARG_ANYTHING,
4058 .arg3_type = ARG_ANYTHING,
4059 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
4060 .arg5_type = ARG_CONST_SIZE,
4061};
4062
4063BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
4064 int, argval)
4065{
4066 struct sock *sk = bpf_sock->sk;
4067 int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
4068
4069 if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
4070 return -EINVAL;
4071
4072 if (val)
4073 tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
4074
4075 return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
4076}
4077
4078static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
4079 .func = bpf_sock_ops_cb_flags_set,
4080 .gpl_only = false,
4081 .ret_type = RET_INTEGER,
4082 .arg1_type = ARG_PTR_TO_CTX,
4083 .arg2_type = ARG_ANYTHING,
4084};
4085
4086const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
4087EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
4088
4089BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
4090 int, addr_len)
4091{
4092#ifdef CONFIG_INET
4093 struct sock *sk = ctx->sk;
4094 int err;
4095
4096
4097
4098
4099 err = -EINVAL;
4100 if (addr->sa_family == AF_INET) {
4101 if (addr_len < sizeof(struct sockaddr_in))
4102 return err;
4103 if (((struct sockaddr_in *)addr)->sin_port != htons(0))
4104 return err;
4105 return __inet_bind(sk, addr, addr_len, true, false);
4106#if IS_ENABLED(CONFIG_IPV6)
4107 } else if (addr->sa_family == AF_INET6) {
4108 if (addr_len < SIN6_LEN_RFC2133)
4109 return err;
4110 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
4111 return err;
4112
4113
4114
4115 return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
4116#endif
4117 }
4118#endif
4119
4120 return -EAFNOSUPPORT;
4121}
4122
4123static const struct bpf_func_proto bpf_bind_proto = {
4124 .func = bpf_bind,
4125 .gpl_only = false,
4126 .ret_type = RET_INTEGER,
4127 .arg1_type = ARG_PTR_TO_CTX,
4128 .arg2_type = ARG_PTR_TO_MEM,
4129 .arg3_type = ARG_CONST_SIZE,
4130};
4131
4132#ifdef CONFIG_XFRM
4133BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
4134 struct bpf_xfrm_state *, to, u32, size, u64, flags)
4135{
4136 const struct sec_path *sp = skb_sec_path(skb);
4137 const struct xfrm_state *x;
4138
4139 if (!sp || unlikely(index >= sp->len || flags))
4140 goto err_clear;
4141
4142 x = sp->xvec[index];
4143
4144 if (unlikely(size != sizeof(struct bpf_xfrm_state)))
4145 goto err_clear;
4146
4147 to->reqid = x->props.reqid;
4148 to->spi = x->id.spi;
4149 to->family = x->props.family;
4150 to->ext = 0;
4151
4152 if (to->family == AF_INET6) {
4153 memcpy(to->remote_ipv6, x->props.saddr.a6,
4154 sizeof(to->remote_ipv6));
4155 } else {
4156 to->remote_ipv4 = x->props.saddr.a4;
4157 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
4158 }
4159
4160 return 0;
4161err_clear:
4162 memset(to, 0, size);
4163 return -EINVAL;
4164}
4165
4166static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
4167 .func = bpf_skb_get_xfrm_state,
4168 .gpl_only = false,
4169 .ret_type = RET_INTEGER,
4170 .arg1_type = ARG_PTR_TO_CTX,
4171 .arg2_type = ARG_ANYTHING,
4172 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
4173 .arg4_type = ARG_CONST_SIZE,
4174 .arg5_type = ARG_ANYTHING,
4175};
4176#endif
4177
4178#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
4179static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
4180 const struct neighbour *neigh,
4181 const struct net_device *dev)
4182{
4183 memcpy(params->dmac, neigh->ha, ETH_ALEN);
4184 memcpy(params->smac, dev->dev_addr, ETH_ALEN);
4185 params->h_vlan_TCI = 0;
4186 params->h_vlan_proto = 0;
4187 params->ifindex = dev->ifindex;
4188
4189 return 0;
4190}
4191#endif
4192
4193#if IS_ENABLED(CONFIG_INET)
4194static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4195 u32 flags, bool check_mtu)
4196{
4197 struct in_device *in_dev;
4198 struct neighbour *neigh;
4199 struct net_device *dev;
4200 struct fib_result res;
4201 struct fib_nh *nh;
4202 struct flowi4 fl4;
4203 int err;
4204 u32 mtu;
4205
4206 dev = dev_get_by_index_rcu(net, params->ifindex);
4207 if (unlikely(!dev))
4208 return -ENODEV;
4209
4210
4211 in_dev = __in_dev_get_rcu(dev);
4212 if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
4213 return BPF_FIB_LKUP_RET_FWD_DISABLED;
4214
4215 if (flags & BPF_FIB_LOOKUP_OUTPUT) {
4216 fl4.flowi4_iif = 1;
4217 fl4.flowi4_oif = params->ifindex;
4218 } else {
4219 fl4.flowi4_iif = params->ifindex;
4220 fl4.flowi4_oif = 0;
4221 }
4222 fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
4223 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
4224 fl4.flowi4_flags = 0;
4225
4226 fl4.flowi4_proto = params->l4_protocol;
4227 fl4.daddr = params->ipv4_dst;
4228 fl4.saddr = params->ipv4_src;
4229 fl4.fl4_sport = params->sport;
4230 fl4.fl4_dport = params->dport;
4231
4232 if (flags & BPF_FIB_LOOKUP_DIRECT) {
4233 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
4234 struct fib_table *tb;
4235
4236 tb = fib_get_table(net, tbid);
4237 if (unlikely(!tb))
4238 return BPF_FIB_LKUP_RET_NOT_FWDED;
4239
4240 err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
4241 } else {
4242 fl4.flowi4_mark = 0;
4243 fl4.flowi4_secid = 0;
4244 fl4.flowi4_tun_key.tun_id = 0;
4245 fl4.flowi4_uid = sock_net_uid(net, NULL);
4246
4247 err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
4248 }
4249
4250 if (err) {
4251
4252 if (err == -EINVAL)
4253 return BPF_FIB_LKUP_RET_BLACKHOLE;
4254 if (err == -EHOSTUNREACH)
4255 return BPF_FIB_LKUP_RET_UNREACHABLE;
4256 if (err == -EACCES)
4257 return BPF_FIB_LKUP_RET_PROHIBIT;
4258
4259 return BPF_FIB_LKUP_RET_NOT_FWDED;
4260 }
4261
4262 if (res.type != RTN_UNICAST)
4263 return BPF_FIB_LKUP_RET_NOT_FWDED;
4264
4265 if (res.fi->fib_nhs > 1)
4266 fib_select_path(net, &res, &fl4, NULL);
4267
4268 if (check_mtu) {
4269 mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
4270 if (params->tot_len > mtu)
4271 return BPF_FIB_LKUP_RET_FRAG_NEEDED;
4272 }
4273
4274 nh = &res.fi->fib_nh[res.nh_sel];
4275
4276
4277 if (nh->nh_lwtstate)
4278 return BPF_FIB_LKUP_RET_UNSUPP_LWT;
4279
4280 dev = nh->nh_dev;
4281 if (nh->nh_gw)
4282 params->ipv4_dst = nh->nh_gw;
4283
4284 params->rt_metric = res.fi->fib_priority;
4285
4286
4287
4288
4289 neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
4290 if (!neigh)
4291 return BPF_FIB_LKUP_RET_NO_NEIGH;
4292
4293 return bpf_fib_set_fwd_params(params, neigh, dev);
4294}
4295#endif
4296
4297#if IS_ENABLED(CONFIG_IPV6)
4298static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4299 u32 flags, bool check_mtu)
4300{
4301 struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
4302 struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
4303 struct neighbour *neigh;
4304 struct net_device *dev;
4305 struct inet6_dev *idev;
4306 struct fib6_info *f6i;
4307 struct flowi6 fl6;
4308 int strict = 0;
4309 int oif;
4310 u32 mtu;
4311
4312
4313 if (rt6_need_strict(dst) || rt6_need_strict(src))
4314 return BPF_FIB_LKUP_RET_NOT_FWDED;
4315
4316 dev = dev_get_by_index_rcu(net, params->ifindex);
4317 if (unlikely(!dev))
4318 return -ENODEV;
4319
4320 idev = __in6_dev_get_safely(dev);
4321 if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
4322 return BPF_FIB_LKUP_RET_FWD_DISABLED;
4323
4324 if (flags & BPF_FIB_LOOKUP_OUTPUT) {
4325 fl6.flowi6_iif = 1;
4326 oif = fl6.flowi6_oif = params->ifindex;
4327 } else {
4328 oif = fl6.flowi6_iif = params->ifindex;
4329 fl6.flowi6_oif = 0;
4330 strict = RT6_LOOKUP_F_HAS_SADDR;
4331 }
4332 fl6.flowlabel = params->flowinfo;
4333 fl6.flowi6_scope = 0;
4334 fl6.flowi6_flags = 0;
4335 fl6.mp_hash = 0;
4336
4337 fl6.flowi6_proto = params->l4_protocol;
4338 fl6.daddr = *dst;
4339 fl6.saddr = *src;
4340 fl6.fl6_sport = params->sport;
4341 fl6.fl6_dport = params->dport;
4342
4343 if (flags & BPF_FIB_LOOKUP_DIRECT) {
4344 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
4345 struct fib6_table *tb;
4346
4347 tb = ipv6_stub->fib6_get_table(net, tbid);
4348 if (unlikely(!tb))
4349 return BPF_FIB_LKUP_RET_NOT_FWDED;
4350
4351 f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
4352 } else {
4353 fl6.flowi6_mark = 0;
4354 fl6.flowi6_secid = 0;
4355 fl6.flowi6_tun_key.tun_id = 0;
4356 fl6.flowi6_uid = sock_net_uid(net, NULL);
4357
4358 f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
4359 }
4360
4361 if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
4362 return BPF_FIB_LKUP_RET_NOT_FWDED;
4363
4364 if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
4365 switch (f6i->fib6_type) {
4366 case RTN_BLACKHOLE:
4367 return BPF_FIB_LKUP_RET_BLACKHOLE;
4368 case RTN_UNREACHABLE:
4369 return BPF_FIB_LKUP_RET_UNREACHABLE;
4370 case RTN_PROHIBIT:
4371 return BPF_FIB_LKUP_RET_PROHIBIT;
4372 default:
4373 return BPF_FIB_LKUP_RET_NOT_FWDED;
4374 }
4375 }
4376
4377 if (f6i->fib6_type != RTN_UNICAST)
4378 return BPF_FIB_LKUP_RET_NOT_FWDED;
4379
4380 if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
4381 f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
4382 fl6.flowi6_oif, NULL,
4383 strict);
4384
4385 if (check_mtu) {
4386 mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
4387 if (params->tot_len > mtu)
4388 return BPF_FIB_LKUP_RET_FRAG_NEEDED;
4389 }
4390
4391 if (f6i->fib6_nh.nh_lwtstate)
4392 return BPF_FIB_LKUP_RET_UNSUPP_LWT;
4393
4394 if (f6i->fib6_flags & RTF_GATEWAY)
4395 *dst = f6i->fib6_nh.nh_gw;
4396
4397 dev = f6i->fib6_nh.nh_dev;
4398 params->rt_metric = f6i->fib6_metric;
4399
4400
4401
4402
4403
4404 neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
4405 ndisc_hashfn, dst, dev);
4406 if (!neigh)
4407 return BPF_FIB_LKUP_RET_NO_NEIGH;
4408
4409 return bpf_fib_set_fwd_params(params, neigh, dev);
4410}
4411#endif
4412
4413BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
4414 struct bpf_fib_lookup *, params, int, plen, u32, flags)
4415{
4416 if (plen < sizeof(*params))
4417 return -EINVAL;
4418
4419 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
4420 return -EINVAL;
4421
4422 switch (params->family) {
4423#if IS_ENABLED(CONFIG_INET)
4424 case AF_INET:
4425 return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
4426 flags, true);
4427#endif
4428#if IS_ENABLED(CONFIG_IPV6)
4429 case AF_INET6:
4430 return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
4431 flags, true);
4432#endif
4433 }
4434 return -EAFNOSUPPORT;
4435}
4436
4437static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
4438 .func = bpf_xdp_fib_lookup,
4439 .gpl_only = true,
4440 .ret_type = RET_INTEGER,
4441 .arg1_type = ARG_PTR_TO_CTX,
4442 .arg2_type = ARG_PTR_TO_MEM,
4443 .arg3_type = ARG_CONST_SIZE,
4444 .arg4_type = ARG_ANYTHING,
4445};
4446
4447BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
4448 struct bpf_fib_lookup *, params, int, plen, u32, flags)
4449{
4450 struct net *net = dev_net(skb->dev);
4451 int rc = -EAFNOSUPPORT;
4452
4453 if (plen < sizeof(*params))
4454 return -EINVAL;
4455
4456 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
4457 return -EINVAL;
4458
4459 switch (params->family) {
4460#if IS_ENABLED(CONFIG_INET)
4461 case AF_INET:
4462 rc = bpf_ipv4_fib_lookup(net, params, flags, false);
4463 break;
4464#endif
4465#if IS_ENABLED(CONFIG_IPV6)
4466 case AF_INET6:
4467 rc = bpf_ipv6_fib_lookup(net, params, flags, false);
4468 break;
4469#endif
4470 }
4471
4472 if (!rc) {
4473 struct net_device *dev;
4474
4475 dev = dev_get_by_index_rcu(net, params->ifindex);
4476 if (!is_skb_forwardable(dev, skb))
4477 rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
4478 }
4479
4480 return rc;
4481}
4482
4483static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
4484 .func = bpf_skb_fib_lookup,
4485 .gpl_only = true,
4486 .ret_type = RET_INTEGER,
4487 .arg1_type = ARG_PTR_TO_CTX,
4488 .arg2_type = ARG_PTR_TO_MEM,
4489 .arg3_type = ARG_CONST_SIZE,
4490 .arg4_type = ARG_ANYTHING,
4491};
4492
4493#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
4494static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
4495{
4496 int err;
4497 struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
4498
4499 if (!seg6_validate_srh(srh, len))
4500 return -EINVAL;
4501
4502 switch (type) {
4503 case BPF_LWT_ENCAP_SEG6_INLINE:
4504 if (skb->protocol != htons(ETH_P_IPV6))
4505 return -EBADMSG;
4506
4507 err = seg6_do_srh_inline(skb, srh);
4508 break;
4509 case BPF_LWT_ENCAP_SEG6:
4510 skb_reset_inner_headers(skb);
4511 skb->encapsulation = 1;
4512 err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
4513 break;
4514 default:
4515 return -EINVAL;
4516 }
4517
4518 bpf_compute_data_pointers(skb);
4519 if (err)
4520 return err;
4521
4522 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
4523 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
4524
4525 return seg6_lookup_nexthop(skb, NULL, 0);
4526}
4527#endif
4528
4529BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
4530 u32, len)
4531{
4532 switch (type) {
4533#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
4534 case BPF_LWT_ENCAP_SEG6:
4535 case BPF_LWT_ENCAP_SEG6_INLINE:
4536 return bpf_push_seg6_encap(skb, type, hdr, len);
4537#endif
4538 default:
4539 return -EINVAL;
4540 }
4541}
4542
4543static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
4544 .func = bpf_lwt_push_encap,
4545 .gpl_only = false,
4546 .ret_type = RET_INTEGER,
4547 .arg1_type = ARG_PTR_TO_CTX,
4548 .arg2_type = ARG_ANYTHING,
4549 .arg3_type = ARG_PTR_TO_MEM,
4550 .arg4_type = ARG_CONST_SIZE
4551};
4552
4553#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
4554BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
4555 const void *, from, u32, len)
4556{
4557 struct seg6_bpf_srh_state *srh_state =
4558 this_cpu_ptr(&seg6_bpf_srh_states);
4559 void *srh_tlvs, *srh_end, *ptr;
4560 struct ipv6_sr_hdr *srh;
4561 int srhoff = 0;
4562
4563 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
4564 return -EINVAL;
4565
4566 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
4567 srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
4568 srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
4569
4570 ptr = skb->data + offset;
4571 if (ptr >= srh_tlvs && ptr + len <= srh_end)
4572 srh_state->valid = 0;
4573 else if (ptr < (void *)&srh->flags ||
4574 ptr + len > (void *)&srh->segments)
4575 return -EFAULT;
4576
4577 if (unlikely(bpf_try_make_writable(skb, offset + len)))
4578 return -EFAULT;
4579
4580 memcpy(skb->data + offset, from, len);
4581 return 0;
4582}
4583
4584static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
4585 .func = bpf_lwt_seg6_store_bytes,
4586 .gpl_only = false,
4587 .ret_type = RET_INTEGER,
4588 .arg1_type = ARG_PTR_TO_CTX,
4589 .arg2_type = ARG_ANYTHING,
4590 .arg3_type = ARG_PTR_TO_MEM,
4591 .arg4_type = ARG_CONST_SIZE
4592};
4593
4594BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
4595 u32, action, void *, param, u32, param_len)
4596{
4597 struct seg6_bpf_srh_state *srh_state =
4598 this_cpu_ptr(&seg6_bpf_srh_states);
4599 struct ipv6_sr_hdr *srh;
4600 int srhoff = 0;
4601 int err;
4602
4603 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
4604 return -EINVAL;
4605 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
4606
4607 if (!srh_state->valid) {
4608 if (unlikely((srh_state->hdrlen & 7) != 0))
4609 return -EBADMSG;
4610
4611 srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
4612 if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
4613 return -EBADMSG;
4614
4615 srh_state->valid = 1;
4616 }
4617
4618 switch (action) {
4619 case SEG6_LOCAL_ACTION_END_X:
4620 if (param_len != sizeof(struct in6_addr))
4621 return -EINVAL;
4622 return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
4623 case SEG6_LOCAL_ACTION_END_T:
4624 if (param_len != sizeof(int))
4625 return -EINVAL;
4626 return seg6_lookup_nexthop(skb, NULL, *(int *)param);
4627 case SEG6_LOCAL_ACTION_END_B6:
4628 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
4629 param, param_len);
4630 if (!err)
4631 srh_state->hdrlen =
4632 ((struct ipv6_sr_hdr *)param)->hdrlen << 3;
4633 return err;
4634 case SEG6_LOCAL_ACTION_END_B6_ENCAP:
4635 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
4636 param, param_len);
4637 if (!err)
4638 srh_state->hdrlen =
4639 ((struct ipv6_sr_hdr *)param)->hdrlen << 3;
4640 return err;
4641 default:
4642 return -EINVAL;
4643 }
4644}
4645
4646static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
4647 .func = bpf_lwt_seg6_action,
4648 .gpl_only = false,
4649 .ret_type = RET_INTEGER,
4650 .arg1_type = ARG_PTR_TO_CTX,
4651 .arg2_type = ARG_ANYTHING,
4652 .arg3_type = ARG_PTR_TO_MEM,
4653 .arg4_type = ARG_CONST_SIZE
4654};
4655
4656BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
4657 s32, len)
4658{
4659 struct seg6_bpf_srh_state *srh_state =
4660 this_cpu_ptr(&seg6_bpf_srh_states);
4661 void *srh_end, *srh_tlvs, *ptr;
4662 struct ipv6_sr_hdr *srh;
4663 struct ipv6hdr *hdr;
4664 int srhoff = 0;
4665 int ret;
4666
4667 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
4668 return -EINVAL;
4669 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
4670
4671 srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
4672 ((srh->first_segment + 1) << 4));
4673 srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
4674 srh_state->hdrlen);
4675 ptr = skb->data + offset;
4676
4677 if (unlikely(ptr < srh_tlvs || ptr > srh_end))
4678 return -EFAULT;
4679 if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
4680 return -EFAULT;
4681
4682 if (len > 0) {
4683 ret = skb_cow_head(skb, len);
4684 if (unlikely(ret < 0))
4685 return ret;
4686
4687 ret = bpf_skb_net_hdr_push(skb, offset, len);
4688 } else {
4689 ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
4690 }
4691
4692 bpf_compute_data_pointers(skb);
4693 if (unlikely(ret < 0))
4694 return ret;
4695
4696 hdr = (struct ipv6hdr *)skb->data;
4697 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
4698
4699 srh_state->hdrlen += len;
4700 srh_state->valid = 0;
4701 return 0;
4702}
4703
4704static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
4705 .func = bpf_lwt_seg6_adjust_srh,
4706 .gpl_only = false,
4707 .ret_type = RET_INTEGER,
4708 .arg1_type = ARG_PTR_TO_CTX,
4709 .arg2_type = ARG_ANYTHING,
4710 .arg3_type = ARG_ANYTHING,
4711};
4712#endif
4713
4714bool bpf_helper_changes_pkt_data(void *func)
4715{
4716 if (func == bpf_skb_vlan_push ||
4717 func == bpf_skb_vlan_pop ||
4718 func == bpf_skb_store_bytes ||
4719 func == bpf_skb_change_proto ||
4720 func == bpf_skb_change_head ||
4721 func == sk_skb_change_head ||
4722 func == bpf_skb_change_tail ||
4723 func == sk_skb_change_tail ||
4724 func == bpf_skb_adjust_room ||
4725 func == bpf_skb_pull_data ||
4726 func == sk_skb_pull_data ||
4727 func == bpf_clone_redirect ||
4728 func == bpf_l3_csum_replace ||
4729 func == bpf_l4_csum_replace ||
4730 func == bpf_xdp_adjust_head ||
4731 func == bpf_xdp_adjust_meta ||
4732 func == bpf_msg_pull_data ||
4733 func == bpf_xdp_adjust_tail ||
4734#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
4735 func == bpf_lwt_seg6_store_bytes ||
4736 func == bpf_lwt_seg6_adjust_srh ||
4737 func == bpf_lwt_seg6_action ||
4738#endif
4739 func == bpf_lwt_push_encap)
4740 return true;
4741
4742 return false;
4743}
4744
4745static const struct bpf_func_proto *
4746bpf_base_func_proto(enum bpf_func_id func_id)
4747{
4748 switch (func_id) {
4749 case BPF_FUNC_map_lookup_elem:
4750 return &bpf_map_lookup_elem_proto;
4751 case BPF_FUNC_map_update_elem:
4752 return &bpf_map_update_elem_proto;
4753 case BPF_FUNC_map_delete_elem:
4754 return &bpf_map_delete_elem_proto;
4755 case BPF_FUNC_get_prandom_u32:
4756 return &bpf_get_prandom_u32_proto;
4757 case BPF_FUNC_get_smp_processor_id:
4758 return &bpf_get_raw_smp_processor_id_proto;
4759 case BPF_FUNC_get_numa_node_id:
4760 return &bpf_get_numa_node_id_proto;
4761 case BPF_FUNC_tail_call:
4762 return &bpf_tail_call_proto;
4763 case BPF_FUNC_ktime_get_ns:
4764 return &bpf_ktime_get_ns_proto;
4765 case BPF_FUNC_trace_printk:
4766 if (capable(CAP_SYS_ADMIN))
4767 return bpf_get_trace_printk_proto();
4768 default:
4769 return NULL;
4770 }
4771}
4772
4773static const struct bpf_func_proto *
4774sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4775{
4776 switch (func_id) {
4777
4778
4779
4780 case BPF_FUNC_get_current_uid_gid:
4781 return &bpf_get_current_uid_gid_proto;
4782 default:
4783 return bpf_base_func_proto(func_id);
4784 }
4785}
4786
4787static const struct bpf_func_proto *
4788sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4789{
4790 switch (func_id) {
4791
4792
4793
4794 case BPF_FUNC_get_current_uid_gid:
4795 return &bpf_get_current_uid_gid_proto;
4796 case BPF_FUNC_bind:
4797 switch (prog->expected_attach_type) {
4798 case BPF_CGROUP_INET4_CONNECT:
4799 case BPF_CGROUP_INET6_CONNECT:
4800 return &bpf_bind_proto;
4801 default:
4802 return NULL;
4803 }
4804 default:
4805 return bpf_base_func_proto(func_id);
4806 }
4807}
4808
4809static const struct bpf_func_proto *
4810sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4811{
4812 switch (func_id) {
4813 case BPF_FUNC_skb_load_bytes:
4814 return &bpf_skb_load_bytes_proto;
4815 case BPF_FUNC_skb_load_bytes_relative:
4816 return &bpf_skb_load_bytes_relative_proto;
4817 case BPF_FUNC_get_socket_cookie:
4818 return &bpf_get_socket_cookie_proto;
4819 case BPF_FUNC_get_socket_uid:
4820 return &bpf_get_socket_uid_proto;
4821 default:
4822 return bpf_base_func_proto(func_id);
4823 }
4824}
4825
4826static const struct bpf_func_proto *
4827tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4828{
4829 switch (func_id) {
4830 case BPF_FUNC_skb_store_bytes:
4831 return &bpf_skb_store_bytes_proto;
4832 case BPF_FUNC_skb_load_bytes:
4833 return &bpf_skb_load_bytes_proto;
4834 case BPF_FUNC_skb_load_bytes_relative:
4835 return &bpf_skb_load_bytes_relative_proto;
4836 case BPF_FUNC_skb_pull_data:
4837 return &bpf_skb_pull_data_proto;
4838 case BPF_FUNC_csum_diff:
4839 return &bpf_csum_diff_proto;
4840 case BPF_FUNC_csum_update:
4841 return &bpf_csum_update_proto;
4842 case BPF_FUNC_l3_csum_replace:
4843 return &bpf_l3_csum_replace_proto;
4844 case BPF_FUNC_l4_csum_replace:
4845 return &bpf_l4_csum_replace_proto;
4846 case BPF_FUNC_clone_redirect:
4847 return &bpf_clone_redirect_proto;
4848 case BPF_FUNC_get_cgroup_classid:
4849 return &bpf_get_cgroup_classid_proto;
4850 case BPF_FUNC_skb_vlan_push:
4851 return &bpf_skb_vlan_push_proto;
4852 case BPF_FUNC_skb_vlan_pop:
4853 return &bpf_skb_vlan_pop_proto;
4854 case BPF_FUNC_skb_change_proto:
4855 return &bpf_skb_change_proto_proto;
4856 case BPF_FUNC_skb_change_type:
4857 return &bpf_skb_change_type_proto;
4858 case BPF_FUNC_skb_adjust_room:
4859 return &bpf_skb_adjust_room_proto;
4860 case BPF_FUNC_skb_change_tail:
4861 return &bpf_skb_change_tail_proto;
4862 case BPF_FUNC_skb_get_tunnel_key:
4863 return &bpf_skb_get_tunnel_key_proto;
4864 case BPF_FUNC_skb_set_tunnel_key:
4865 return bpf_get_skb_set_tunnel_proto(func_id);
4866 case BPF_FUNC_skb_get_tunnel_opt:
4867 return &bpf_skb_get_tunnel_opt_proto;
4868 case BPF_FUNC_skb_set_tunnel_opt:
4869 return bpf_get_skb_set_tunnel_proto(func_id);
4870 case BPF_FUNC_redirect:
4871 return &bpf_redirect_proto;
4872 case BPF_FUNC_get_route_realm:
4873 return &bpf_get_route_realm_proto;
4874 case BPF_FUNC_get_hash_recalc:
4875 return &bpf_get_hash_recalc_proto;
4876 case BPF_FUNC_set_hash_invalid:
4877 return &bpf_set_hash_invalid_proto;
4878 case BPF_FUNC_set_hash:
4879 return &bpf_set_hash_proto;
4880 case BPF_FUNC_perf_event_output:
4881 return &bpf_skb_event_output_proto;
4882 case BPF_FUNC_get_smp_processor_id:
4883 return &bpf_get_smp_processor_id_proto;
4884 case BPF_FUNC_skb_under_cgroup:
4885 return &bpf_skb_under_cgroup_proto;
4886 case BPF_FUNC_get_socket_cookie:
4887 return &bpf_get_socket_cookie_proto;
4888 case BPF_FUNC_get_socket_uid:
4889 return &bpf_get_socket_uid_proto;
4890 case BPF_FUNC_fib_lookup:
4891 return &bpf_skb_fib_lookup_proto;
4892#ifdef CONFIG_XFRM
4893 case BPF_FUNC_skb_get_xfrm_state:
4894 return &bpf_skb_get_xfrm_state_proto;
4895#endif
4896#ifdef CONFIG_SOCK_CGROUP_DATA
4897 case BPF_FUNC_skb_cgroup_id:
4898 return &bpf_skb_cgroup_id_proto;
4899#endif
4900 default:
4901 return bpf_base_func_proto(func_id);
4902 }
4903}
4904
4905static const struct bpf_func_proto *
4906xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4907{
4908 switch (func_id) {
4909 case BPF_FUNC_perf_event_output:
4910 return &bpf_xdp_event_output_proto;
4911 case BPF_FUNC_get_smp_processor_id:
4912 return &bpf_get_smp_processor_id_proto;
4913 case BPF_FUNC_csum_diff:
4914 return &bpf_csum_diff_proto;
4915 case BPF_FUNC_xdp_adjust_head:
4916 return &bpf_xdp_adjust_head_proto;
4917 case BPF_FUNC_xdp_adjust_meta:
4918 return &bpf_xdp_adjust_meta_proto;
4919 case BPF_FUNC_redirect:
4920 return &bpf_xdp_redirect_proto;
4921 case BPF_FUNC_redirect_map:
4922 return &bpf_xdp_redirect_map_proto;
4923 case BPF_FUNC_xdp_adjust_tail:
4924 return &bpf_xdp_adjust_tail_proto;
4925 case BPF_FUNC_fib_lookup:
4926 return &bpf_xdp_fib_lookup_proto;
4927 default:
4928 return bpf_base_func_proto(func_id);
4929 }
4930}
4931
4932static const struct bpf_func_proto *
4933sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4934{
4935 switch (func_id) {
4936 case BPF_FUNC_setsockopt:
4937 return &bpf_setsockopt_proto;
4938 case BPF_FUNC_getsockopt:
4939 return &bpf_getsockopt_proto;
4940 case BPF_FUNC_sock_ops_cb_flags_set:
4941 return &bpf_sock_ops_cb_flags_set_proto;
4942 case BPF_FUNC_sock_map_update:
4943 return &bpf_sock_map_update_proto;
4944 case BPF_FUNC_sock_hash_update:
4945 return &bpf_sock_hash_update_proto;
4946 default:
4947 return bpf_base_func_proto(func_id);
4948 }
4949}
4950
4951static const struct bpf_func_proto *
4952sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4953{
4954 switch (func_id) {
4955 case BPF_FUNC_msg_redirect_map:
4956 return &bpf_msg_redirect_map_proto;
4957 case BPF_FUNC_msg_redirect_hash:
4958 return &bpf_msg_redirect_hash_proto;
4959 case BPF_FUNC_msg_apply_bytes:
4960 return &bpf_msg_apply_bytes_proto;
4961 case BPF_FUNC_msg_cork_bytes:
4962 return &bpf_msg_cork_bytes_proto;
4963 case BPF_FUNC_msg_pull_data:
4964 return &bpf_msg_pull_data_proto;
4965 default:
4966 return bpf_base_func_proto(func_id);
4967 }
4968}
4969
4970static const struct bpf_func_proto *
4971sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4972{
4973 switch (func_id) {
4974 case BPF_FUNC_skb_store_bytes:
4975 return &bpf_skb_store_bytes_proto;
4976 case BPF_FUNC_skb_load_bytes:
4977 return &bpf_skb_load_bytes_proto;
4978 case BPF_FUNC_skb_pull_data:
4979 return &sk_skb_pull_data_proto;
4980 case BPF_FUNC_skb_change_tail:
4981 return &sk_skb_change_tail_proto;
4982 case BPF_FUNC_skb_change_head:
4983 return &sk_skb_change_head_proto;
4984 case BPF_FUNC_get_socket_cookie:
4985 return &bpf_get_socket_cookie_proto;
4986 case BPF_FUNC_get_socket_uid:
4987 return &bpf_get_socket_uid_proto;
4988 case BPF_FUNC_sk_redirect_map:
4989 return &bpf_sk_redirect_map_proto;
4990 case BPF_FUNC_sk_redirect_hash:
4991 return &bpf_sk_redirect_hash_proto;
4992 default:
4993 return bpf_base_func_proto(func_id);
4994 }
4995}
4996
4997static const struct bpf_func_proto *
4998lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4999{
5000 switch (func_id) {
5001 case BPF_FUNC_skb_load_bytes:
5002 return &bpf_skb_load_bytes_proto;
5003 case BPF_FUNC_skb_pull_data:
5004 return &bpf_skb_pull_data_proto;
5005 case BPF_FUNC_csum_diff:
5006 return &bpf_csum_diff_proto;
5007 case BPF_FUNC_get_cgroup_classid:
5008 return &bpf_get_cgroup_classid_proto;
5009 case BPF_FUNC_get_route_realm:
5010 return &bpf_get_route_realm_proto;
5011 case BPF_FUNC_get_hash_recalc:
5012 return &bpf_get_hash_recalc_proto;
5013 case BPF_FUNC_perf_event_output:
5014 return &bpf_skb_event_output_proto;
5015 case BPF_FUNC_get_smp_processor_id:
5016 return &bpf_get_smp_processor_id_proto;
5017 case BPF_FUNC_skb_under_cgroup:
5018 return &bpf_skb_under_cgroup_proto;
5019 default:
5020 return bpf_base_func_proto(func_id);
5021 }
5022}
5023
5024static const struct bpf_func_proto *
5025lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5026{
5027 switch (func_id) {
5028 case BPF_FUNC_lwt_push_encap:
5029 return &bpf_lwt_push_encap_proto;
5030 default:
5031 return lwt_out_func_proto(func_id, prog);
5032 }
5033}
5034
5035static const struct bpf_func_proto *
5036lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5037{
5038 switch (func_id) {
5039 case BPF_FUNC_skb_get_tunnel_key:
5040 return &bpf_skb_get_tunnel_key_proto;
5041 case BPF_FUNC_skb_set_tunnel_key:
5042 return bpf_get_skb_set_tunnel_proto(func_id);
5043 case BPF_FUNC_skb_get_tunnel_opt:
5044 return &bpf_skb_get_tunnel_opt_proto;
5045 case BPF_FUNC_skb_set_tunnel_opt:
5046 return bpf_get_skb_set_tunnel_proto(func_id);
5047 case BPF_FUNC_redirect:
5048 return &bpf_redirect_proto;
5049 case BPF_FUNC_clone_redirect:
5050 return &bpf_clone_redirect_proto;
5051 case BPF_FUNC_skb_change_tail:
5052 return &bpf_skb_change_tail_proto;
5053 case BPF_FUNC_skb_change_head:
5054 return &bpf_skb_change_head_proto;
5055 case BPF_FUNC_skb_store_bytes:
5056 return &bpf_skb_store_bytes_proto;
5057 case BPF_FUNC_csum_update:
5058 return &bpf_csum_update_proto;
5059 case BPF_FUNC_l3_csum_replace:
5060 return &bpf_l3_csum_replace_proto;
5061 case BPF_FUNC_l4_csum_replace:
5062 return &bpf_l4_csum_replace_proto;
5063 case BPF_FUNC_set_hash_invalid:
5064 return &bpf_set_hash_invalid_proto;
5065 default:
5066 return lwt_out_func_proto(func_id, prog);
5067 }
5068}
5069
5070static const struct bpf_func_proto *
5071lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5072{
5073 switch (func_id) {
5074#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
5075 case BPF_FUNC_lwt_seg6_store_bytes:
5076 return &bpf_lwt_seg6_store_bytes_proto;
5077 case BPF_FUNC_lwt_seg6_action:
5078 return &bpf_lwt_seg6_action_proto;
5079 case BPF_FUNC_lwt_seg6_adjust_srh:
5080 return &bpf_lwt_seg6_adjust_srh_proto;
5081#endif
5082 default:
5083 return lwt_out_func_proto(func_id, prog);
5084 }
5085}
5086
5087static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
5088 const struct bpf_prog *prog,
5089 struct bpf_insn_access_aux *info)
5090{
5091 const int size_default = sizeof(__u32);
5092
5093 if (off < 0 || off >= sizeof(struct __sk_buff))
5094 return false;
5095
5096
5097 if (off % size != 0)
5098 return false;
5099
5100 switch (off) {
5101 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
5102 if (off + size > offsetofend(struct __sk_buff, cb[4]))
5103 return false;
5104 break;
5105 case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
5106 case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
5107 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
5108 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
5109 case bpf_ctx_range(struct __sk_buff, data):
5110 case bpf_ctx_range(struct __sk_buff, data_meta):
5111 case bpf_ctx_range(struct __sk_buff, data_end):
5112 if (size != size_default)
5113 return false;
5114 break;
5115 default:
5116
5117 if (type == BPF_WRITE) {
5118 if (size != size_default)
5119 return false;
5120 } else {
5121 bpf_ctx_record_field_size(info, size_default);
5122 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
5123 return false;
5124 }
5125 }
5126
5127 return true;
5128}
5129
5130static bool sk_filter_is_valid_access(int off, int size,
5131 enum bpf_access_type type,
5132 const struct bpf_prog *prog,
5133 struct bpf_insn_access_aux *info)
5134{
5135 switch (off) {
5136 case bpf_ctx_range(struct __sk_buff, tc_classid):
5137 case bpf_ctx_range(struct __sk_buff, data):
5138 case bpf_ctx_range(struct __sk_buff, data_meta):
5139 case bpf_ctx_range(struct __sk_buff, data_end):
5140 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
5141 return false;
5142 }
5143
5144 if (type == BPF_WRITE) {
5145 switch (off) {
5146 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
5147 break;
5148 default:
5149 return false;
5150 }
5151 }
5152
5153 return bpf_skb_is_valid_access(off, size, type, prog, info);
5154}
5155
5156static bool lwt_is_valid_access(int off, int size,
5157 enum bpf_access_type type,
5158 const struct bpf_prog *prog,
5159 struct bpf_insn_access_aux *info)
5160{
5161 switch (off) {
5162 case bpf_ctx_range(struct __sk_buff, tc_classid):
5163 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
5164 case bpf_ctx_range(struct __sk_buff, data_meta):
5165 return false;
5166 }
5167
5168 if (type == BPF_WRITE) {
5169 switch (off) {
5170 case bpf_ctx_range(struct __sk_buff, mark):
5171 case bpf_ctx_range(struct __sk_buff, priority):
5172 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
5173 break;
5174 default:
5175 return false;
5176 }
5177 }
5178
5179 switch (off) {
5180 case bpf_ctx_range(struct __sk_buff, data):
5181 info->reg_type = PTR_TO_PACKET;
5182 break;
5183 case bpf_ctx_range(struct __sk_buff, data_end):
5184 info->reg_type = PTR_TO_PACKET_END;
5185 break;
5186 }
5187
5188 return bpf_skb_is_valid_access(off, size, type, prog, info);
5189}
5190
5191
5192static bool __sock_filter_check_attach_type(int off,
5193 enum bpf_access_type access_type,
5194 enum bpf_attach_type attach_type)
5195{
5196 switch (off) {
5197 case offsetof(struct bpf_sock, bound_dev_if):
5198 case offsetof(struct bpf_sock, mark):
5199 case offsetof(struct bpf_sock, priority):
5200 switch (attach_type) {
5201 case BPF_CGROUP_INET_SOCK_CREATE:
5202 goto full_access;
5203 default:
5204 return false;
5205 }
5206 case bpf_ctx_range(struct bpf_sock, src_ip4):
5207 switch (attach_type) {
5208 case BPF_CGROUP_INET4_POST_BIND:
5209 goto read_only;
5210 default:
5211 return false;
5212 }
5213 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
5214 switch (attach_type) {
5215 case BPF_CGROUP_INET6_POST_BIND:
5216 goto read_only;
5217 default:
5218 return false;
5219 }
5220 case bpf_ctx_range(struct bpf_sock, src_port):
5221 switch (attach_type) {
5222 case BPF_CGROUP_INET4_POST_BIND:
5223 case BPF_CGROUP_INET6_POST_BIND:
5224 goto read_only;
5225 default:
5226 return false;
5227 }
5228 }
5229read_only:
5230 return access_type == BPF_READ;
5231full_access:
5232 return true;
5233}
5234
5235static bool __sock_filter_check_size(int off, int size,
5236 struct bpf_insn_access_aux *info)
5237{
5238 const int size_default = sizeof(__u32);
5239
5240 switch (off) {
5241 case bpf_ctx_range(struct bpf_sock, src_ip4):
5242 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
5243 bpf_ctx_record_field_size(info, size_default);
5244 return bpf_ctx_narrow_access_ok(off, size, size_default);
5245 }
5246
5247 return size == size_default;
5248}
5249
5250static bool sock_filter_is_valid_access(int off, int size,
5251 enum bpf_access_type type,
5252 const struct bpf_prog *prog,
5253 struct bpf_insn_access_aux *info)
5254{
5255 if (off < 0 || off >= sizeof(struct bpf_sock))
5256 return false;
5257 if (off % size != 0)
5258 return false;
5259 if (!__sock_filter_check_attach_type(off, type,
5260 prog->expected_attach_type))
5261 return false;
5262 if (!__sock_filter_check_size(off, size, info))
5263 return false;
5264 return true;
5265}
5266
5267static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
5268 const struct bpf_prog *prog, int drop_verdict)
5269{
5270 struct bpf_insn *insn = insn_buf;
5271
5272 if (!direct_write)
5273 return 0;
5274
5275
5276
5277
5278
5279
5280
5281 *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
5282 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
5283 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
5284
5285
5286 *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
5287 *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
5288 *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
5289 BPF_FUNC_skb_pull_data);
5290
5291
5292
5293
5294 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
5295 *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
5296 *insn++ = BPF_EXIT_INSN();
5297
5298
5299 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
5300
5301 *insn++ = prog->insnsi[0];
5302
5303 return insn - insn_buf;
5304}
5305
5306static int bpf_gen_ld_abs(const struct bpf_insn *orig,
5307 struct bpf_insn *insn_buf)
5308{
5309 bool indirect = BPF_MODE(orig->code) == BPF_IND;
5310 struct bpf_insn *insn = insn_buf;
5311
5312
5313 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
5314 if (!indirect) {
5315 *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
5316 } else {
5317 *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
5318 if (orig->imm)
5319 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
5320 }
5321
5322 switch (BPF_SIZE(orig->code)) {
5323 case BPF_B:
5324 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
5325 break;
5326 case BPF_H:
5327 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
5328 break;
5329 case BPF_W:
5330 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
5331 break;
5332 }
5333
5334 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2);
5335 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
5336 *insn++ = BPF_EXIT_INSN();
5337
5338 return insn - insn_buf;
5339}
5340
5341static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
5342 const struct bpf_prog *prog)
5343{
5344 return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
5345}
5346
5347static bool tc_cls_act_is_valid_access(int off, int size,
5348 enum bpf_access_type type,
5349 const struct bpf_prog *prog,
5350 struct bpf_insn_access_aux *info)
5351{
5352 if (type == BPF_WRITE) {
5353 switch (off) {
5354 case bpf_ctx_range(struct __sk_buff, mark):
5355 case bpf_ctx_range(struct __sk_buff, tc_index):
5356 case bpf_ctx_range(struct __sk_buff, priority):
5357 case bpf_ctx_range(struct __sk_buff, tc_classid):
5358 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
5359 break;
5360 default:
5361 return false;
5362 }
5363 }
5364
5365 switch (off) {
5366 case bpf_ctx_range(struct __sk_buff, data):
5367 info->reg_type = PTR_TO_PACKET;
5368 break;
5369 case bpf_ctx_range(struct __sk_buff, data_meta):
5370 info->reg_type = PTR_TO_PACKET_META;
5371 break;
5372 case bpf_ctx_range(struct __sk_buff, data_end):
5373 info->reg_type = PTR_TO_PACKET_END;
5374 break;
5375 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
5376 return false;
5377 }
5378
5379 return bpf_skb_is_valid_access(off, size, type, prog, info);
5380}
5381
5382static bool __is_valid_xdp_access(int off, int size)
5383{
5384 if (off < 0 || off >= sizeof(struct xdp_md))
5385 return false;
5386 if (off % size != 0)
5387 return false;
5388 if (size != sizeof(__u32))
5389 return false;
5390
5391 return true;
5392}
5393
5394static bool xdp_is_valid_access(int off, int size,
5395 enum bpf_access_type type,
5396 const struct bpf_prog *prog,
5397 struct bpf_insn_access_aux *info)
5398{
5399 if (type == BPF_WRITE) {
5400 if (bpf_prog_is_dev_bound(prog->aux)) {
5401 switch (off) {
5402 case offsetof(struct xdp_md, rx_queue_index):
5403 return __is_valid_xdp_access(off, size);
5404 }
5405 }
5406 return false;
5407 }
5408
5409 switch (off) {
5410 case offsetof(struct xdp_md, data):
5411 info->reg_type = PTR_TO_PACKET;
5412 break;
5413 case offsetof(struct xdp_md, data_meta):
5414 info->reg_type = PTR_TO_PACKET_META;
5415 break;
5416 case offsetof(struct xdp_md, data_end):
5417 info->reg_type = PTR_TO_PACKET_END;
5418 break;
5419 }
5420
5421 return __is_valid_xdp_access(off, size);
5422}
5423
5424void bpf_warn_invalid_xdp_action(u32 act)
5425{
5426 const u32 act_max = XDP_REDIRECT;
5427
5428 WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
5429 act > act_max ? "Illegal" : "Driver unsupported",
5430 act);
5431}
5432EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
5433
5434static bool sock_addr_is_valid_access(int off, int size,
5435 enum bpf_access_type type,
5436 const struct bpf_prog *prog,
5437 struct bpf_insn_access_aux *info)
5438{
5439 const int size_default = sizeof(__u32);
5440
5441 if (off < 0 || off >= sizeof(struct bpf_sock_addr))
5442 return false;
5443 if (off % size != 0)
5444 return false;
5445
5446
5447
5448
5449 switch (off) {
5450 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
5451 switch (prog->expected_attach_type) {
5452 case BPF_CGROUP_INET4_BIND:
5453 case BPF_CGROUP_INET4_CONNECT:
5454 case BPF_CGROUP_UDP4_SENDMSG:
5455 break;
5456 default:
5457 return false;
5458 }
5459 break;
5460 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
5461 switch (prog->expected_attach_type) {
5462 case BPF_CGROUP_INET6_BIND:
5463 case BPF_CGROUP_INET6_CONNECT:
5464 case BPF_CGROUP_UDP6_SENDMSG:
5465 break;
5466 default:
5467 return false;
5468 }
5469 break;
5470 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
5471 switch (prog->expected_attach_type) {
5472 case BPF_CGROUP_UDP4_SENDMSG:
5473 break;
5474 default:
5475 return false;
5476 }
5477 break;
5478 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
5479 msg_src_ip6[3]):
5480 switch (prog->expected_attach_type) {
5481 case BPF_CGROUP_UDP6_SENDMSG:
5482 break;
5483 default:
5484 return false;
5485 }
5486 break;
5487 }
5488
5489 switch (off) {
5490 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
5491 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
5492 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
5493 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
5494 msg_src_ip6[3]):
5495
5496 if (type == BPF_READ) {
5497 bpf_ctx_record_field_size(info, size_default);
5498 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
5499 return false;
5500 } else {
5501 if (size != size_default)
5502 return false;
5503 }
5504 break;
5505 case bpf_ctx_range(struct bpf_sock_addr, user_port):
5506 if (size != size_default)
5507 return false;
5508 break;
5509 default:
5510 if (type == BPF_READ) {
5511 if (size != size_default)
5512 return false;
5513 } else {
5514 return false;
5515 }
5516 }
5517
5518 return true;
5519}
5520
5521static bool sock_ops_is_valid_access(int off, int size,
5522 enum bpf_access_type type,
5523 const struct bpf_prog *prog,
5524 struct bpf_insn_access_aux *info)
5525{
5526 const int size_default = sizeof(__u32);
5527
5528 if (off < 0 || off >= sizeof(struct bpf_sock_ops))
5529 return false;
5530
5531
5532 if (off % size != 0)
5533 return false;
5534
5535 if (type == BPF_WRITE) {
5536 switch (off) {
5537 case offsetof(struct bpf_sock_ops, reply):
5538 case offsetof(struct bpf_sock_ops, sk_txhash):
5539 if (size != size_default)
5540 return false;
5541 break;
5542 default:
5543 return false;
5544 }
5545 } else {
5546 switch (off) {
5547 case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
5548 bytes_acked):
5549 if (size != sizeof(__u64))
5550 return false;
5551 break;
5552 default:
5553 if (size != size_default)
5554 return false;
5555 break;
5556 }
5557 }
5558
5559 return true;
5560}
5561
5562static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
5563 const struct bpf_prog *prog)
5564{
5565 return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
5566}
5567
5568static bool sk_skb_is_valid_access(int off, int size,
5569 enum bpf_access_type type,
5570 const struct bpf_prog *prog,
5571 struct bpf_insn_access_aux *info)
5572{
5573 switch (off) {
5574 case bpf_ctx_range(struct __sk_buff, tc_classid):
5575 case bpf_ctx_range(struct __sk_buff, data_meta):
5576 return false;
5577 }
5578
5579 if (type == BPF_WRITE) {
5580 switch (off) {
5581 case bpf_ctx_range(struct __sk_buff, tc_index):
5582 case bpf_ctx_range(struct __sk_buff, priority):
5583 break;
5584 default:
5585 return false;
5586 }
5587 }
5588
5589 switch (off) {
5590 case bpf_ctx_range(struct __sk_buff, mark):
5591 return false;
5592 case bpf_ctx_range(struct __sk_buff, data):
5593 info->reg_type = PTR_TO_PACKET;
5594 break;
5595 case bpf_ctx_range(struct __sk_buff, data_end):
5596 info->reg_type = PTR_TO_PACKET_END;
5597 break;
5598 }
5599
5600 return bpf_skb_is_valid_access(off, size, type, prog, info);
5601}
5602
5603static bool sk_msg_is_valid_access(int off, int size,
5604 enum bpf_access_type type,
5605 const struct bpf_prog *prog,
5606 struct bpf_insn_access_aux *info)
5607{
5608 if (type == BPF_WRITE)
5609 return false;
5610
5611 switch (off) {
5612 case offsetof(struct sk_msg_md, data):
5613 info->reg_type = PTR_TO_PACKET;
5614 if (size != sizeof(__u64))
5615 return false;
5616 break;
5617 case offsetof(struct sk_msg_md, data_end):
5618 info->reg_type = PTR_TO_PACKET_END;
5619 if (size != sizeof(__u64))
5620 return false;
5621 break;
5622 default:
5623 if (size != sizeof(__u32))
5624 return false;
5625 }
5626
5627 if (off < 0 || off >= sizeof(struct sk_msg_md))
5628 return false;
5629 if (off % size != 0)
5630 return false;
5631
5632 return true;
5633}
5634
5635static u32 bpf_convert_ctx_access(enum bpf_access_type type,
5636 const struct bpf_insn *si,
5637 struct bpf_insn *insn_buf,
5638 struct bpf_prog *prog, u32 *target_size)
5639{
5640 struct bpf_insn *insn = insn_buf;
5641 int off;
5642
5643 switch (si->off) {
5644 case offsetof(struct __sk_buff, len):
5645 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5646 bpf_target_off(struct sk_buff, len, 4,
5647 target_size));
5648 break;
5649
5650 case offsetof(struct __sk_buff, protocol):
5651 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
5652 bpf_target_off(struct sk_buff, protocol, 2,
5653 target_size));
5654 break;
5655
5656 case offsetof(struct __sk_buff, vlan_proto):
5657 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
5658 bpf_target_off(struct sk_buff, vlan_proto, 2,
5659 target_size));
5660 break;
5661
5662 case offsetof(struct __sk_buff, priority):
5663 if (type == BPF_WRITE)
5664 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
5665 bpf_target_off(struct sk_buff, priority, 4,
5666 target_size));
5667 else
5668 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5669 bpf_target_off(struct sk_buff, priority, 4,
5670 target_size));
5671 break;
5672
5673 case offsetof(struct __sk_buff, ingress_ifindex):
5674 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5675 bpf_target_off(struct sk_buff, skb_iif, 4,
5676 target_size));
5677 break;
5678
5679 case offsetof(struct __sk_buff, ifindex):
5680 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
5681 si->dst_reg, si->src_reg,
5682 offsetof(struct sk_buff, dev));
5683 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
5684 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
5685 bpf_target_off(struct net_device, ifindex, 4,
5686 target_size));
5687 break;
5688
5689 case offsetof(struct __sk_buff, hash):
5690 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5691 bpf_target_off(struct sk_buff, hash, 4,
5692 target_size));
5693 break;
5694
5695 case offsetof(struct __sk_buff, mark):
5696 if (type == BPF_WRITE)
5697 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
5698 bpf_target_off(struct sk_buff, mark, 4,
5699 target_size));
5700 else
5701 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5702 bpf_target_off(struct sk_buff, mark, 4,
5703 target_size));
5704 break;
5705
5706 case offsetof(struct __sk_buff, pkt_type):
5707 *target_size = 1;
5708 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
5709 PKT_TYPE_OFFSET());
5710 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
5711#ifdef __BIG_ENDIAN_BITFIELD
5712 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
5713#endif
5714 break;
5715
5716 case offsetof(struct __sk_buff, queue_mapping):
5717 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
5718 bpf_target_off(struct sk_buff, queue_mapping, 2,
5719 target_size));
5720 break;
5721
5722 case offsetof(struct __sk_buff, vlan_present):
5723 case offsetof(struct __sk_buff, vlan_tci):
5724 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
5725
5726 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
5727 bpf_target_off(struct sk_buff, vlan_tci, 2,
5728 target_size));
5729 if (si->off == offsetof(struct __sk_buff, vlan_tci)) {
5730 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg,
5731 ~VLAN_TAG_PRESENT);
5732 } else {
5733 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12);
5734 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
5735 }
5736 break;
5737
5738 case offsetof(struct __sk_buff, cb[0]) ...
5739 offsetofend(struct __sk_buff, cb[4]) - 1:
5740 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
5741 BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
5742 offsetof(struct qdisc_skb_cb, data)) %
5743 sizeof(__u64));
5744
5745 prog->cb_access = 1;
5746 off = si->off;
5747 off -= offsetof(struct __sk_buff, cb[0]);
5748 off += offsetof(struct sk_buff, cb);
5749 off += offsetof(struct qdisc_skb_cb, data);
5750 if (type == BPF_WRITE)
5751 *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
5752 si->src_reg, off);
5753 else
5754 *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
5755 si->src_reg, off);
5756 break;
5757
5758 case offsetof(struct __sk_buff, tc_classid):
5759 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2);
5760
5761 off = si->off;
5762 off -= offsetof(struct __sk_buff, tc_classid);
5763 off += offsetof(struct sk_buff, cb);
5764 off += offsetof(struct qdisc_skb_cb, tc_classid);
5765 *target_size = 2;
5766 if (type == BPF_WRITE)
5767 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
5768 si->src_reg, off);
5769 else
5770 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
5771 si->src_reg, off);
5772 break;
5773
5774 case offsetof(struct __sk_buff, data):
5775 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
5776 si->dst_reg, si->src_reg,
5777 offsetof(struct sk_buff, data));
5778 break;
5779
5780 case offsetof(struct __sk_buff, data_meta):
5781 off = si->off;
5782 off -= offsetof(struct __sk_buff, data_meta);
5783 off += offsetof(struct sk_buff, cb);
5784 off += offsetof(struct bpf_skb_data_end, data_meta);
5785 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
5786 si->src_reg, off);
5787 break;
5788
5789 case offsetof(struct __sk_buff, data_end):
5790 off = si->off;
5791 off -= offsetof(struct __sk_buff, data_end);
5792 off += offsetof(struct sk_buff, cb);
5793 off += offsetof(struct bpf_skb_data_end, data_end);
5794 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
5795 si->src_reg, off);
5796 break;
5797
5798 case offsetof(struct __sk_buff, tc_index):
5799#ifdef CONFIG_NET_SCHED
5800 if (type == BPF_WRITE)
5801 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
5802 bpf_target_off(struct sk_buff, tc_index, 2,
5803 target_size));
5804 else
5805 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
5806 bpf_target_off(struct sk_buff, tc_index, 2,
5807 target_size));
5808#else
5809 *target_size = 2;
5810 if (type == BPF_WRITE)
5811 *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
5812 else
5813 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
5814#endif
5815 break;
5816
5817 case offsetof(struct __sk_buff, napi_id):
5818#if defined(CONFIG_NET_RX_BUSY_POLL)
5819 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5820 bpf_target_off(struct sk_buff, napi_id, 4,
5821 target_size));
5822 *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
5823 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
5824#else
5825 *target_size = 4;
5826 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
5827#endif
5828 break;
5829 case offsetof(struct __sk_buff, family):
5830 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
5831
5832 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
5833 si->dst_reg, si->src_reg,
5834 offsetof(struct sk_buff, sk));
5835 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
5836 bpf_target_off(struct sock_common,
5837 skc_family,
5838 2, target_size));
5839 break;
5840 case offsetof(struct __sk_buff, remote_ip4):
5841 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
5842
5843 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
5844 si->dst_reg, si->src_reg,
5845 offsetof(struct sk_buff, sk));
5846 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
5847 bpf_target_off(struct sock_common,
5848 skc_daddr,
5849 4, target_size));
5850 break;
5851 case offsetof(struct __sk_buff, local_ip4):
5852 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
5853 skc_rcv_saddr) != 4);
5854
5855 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
5856 si->dst_reg, si->src_reg,
5857 offsetof(struct sk_buff, sk));
5858 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
5859 bpf_target_off(struct sock_common,
5860 skc_rcv_saddr,
5861 4, target_size));
5862 break;
5863 case offsetof(struct __sk_buff, remote_ip6[0]) ...
5864 offsetof(struct __sk_buff, remote_ip6[3]):
5865#if IS_ENABLED(CONFIG_IPV6)
5866 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
5867 skc_v6_daddr.s6_addr32[0]) != 4);
5868
5869 off = si->off;
5870 off -= offsetof(struct __sk_buff, remote_ip6[0]);
5871
5872 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
5873 si->dst_reg, si->src_reg,
5874 offsetof(struct sk_buff, sk));
5875 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
5876 offsetof(struct sock_common,
5877 skc_v6_daddr.s6_addr32[0]) +
5878 off);
5879#else
5880 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
5881#endif
5882 break;
5883 case offsetof(struct __sk_buff, local_ip6[0]) ...
5884 offsetof(struct __sk_buff, local_ip6[3]):
5885#if IS_ENABLED(CONFIG_IPV6)
5886 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
5887 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
5888
5889 off = si->off;
5890 off -= offsetof(struct __sk_buff, local_ip6[0]);
5891
5892 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
5893 si->dst_reg, si->src_reg,
5894 offsetof(struct sk_buff, sk));
5895 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
5896 offsetof(struct sock_common,
5897 skc_v6_rcv_saddr.s6_addr32[0]) +
5898 off);
5899#else
5900 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
5901#endif
5902 break;
5903
5904 case offsetof(struct __sk_buff, remote_port):
5905 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
5906
5907 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
5908 si->dst_reg, si->src_reg,
5909 offsetof(struct sk_buff, sk));
5910 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
5911 bpf_target_off(struct sock_common,
5912 skc_dport,
5913 2, target_size));
5914#ifndef __BIG_ENDIAN_BITFIELD
5915 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
5916#endif
5917 break;
5918
5919 case offsetof(struct __sk_buff, local_port):
5920 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
5921
5922 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
5923 si->dst_reg, si->src_reg,
5924 offsetof(struct sk_buff, sk));
5925 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
5926 bpf_target_off(struct sock_common,
5927 skc_num, 2, target_size));
5928 break;
5929 }
5930
5931 return insn - insn_buf;
5932}
5933
5934static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
5935 const struct bpf_insn *si,
5936 struct bpf_insn *insn_buf,
5937 struct bpf_prog *prog, u32 *target_size)
5938{
5939 struct bpf_insn *insn = insn_buf;
5940 int off;
5941
5942 switch (si->off) {
5943 case offsetof(struct bpf_sock, bound_dev_if):
5944 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
5945
5946 if (type == BPF_WRITE)
5947 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
5948 offsetof(struct sock, sk_bound_dev_if));
5949 else
5950 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5951 offsetof(struct sock, sk_bound_dev_if));
5952 break;
5953
5954 case offsetof(struct bpf_sock, mark):
5955 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
5956
5957 if (type == BPF_WRITE)
5958 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
5959 offsetof(struct sock, sk_mark));
5960 else
5961 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5962 offsetof(struct sock, sk_mark));
5963 break;
5964
5965 case offsetof(struct bpf_sock, priority):
5966 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
5967
5968 if (type == BPF_WRITE)
5969 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
5970 offsetof(struct sock, sk_priority));
5971 else
5972 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5973 offsetof(struct sock, sk_priority));
5974 break;
5975
5976 case offsetof(struct bpf_sock, family):
5977 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
5978
5979 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
5980 offsetof(struct sock, sk_family));
5981 break;
5982
5983 case offsetof(struct bpf_sock, type):
5984 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5985 offsetof(struct sock, __sk_flags_offset));
5986 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
5987 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
5988 break;
5989
5990 case offsetof(struct bpf_sock, protocol):
5991 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5992 offsetof(struct sock, __sk_flags_offset));
5993 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
5994 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
5995 break;
5996
5997 case offsetof(struct bpf_sock, src_ip4):
5998 *insn++ = BPF_LDX_MEM(
5999 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
6000 bpf_target_off(struct sock_common, skc_rcv_saddr,
6001 FIELD_SIZEOF(struct sock_common,
6002 skc_rcv_saddr),
6003 target_size));
6004 break;
6005
6006 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
6007#if IS_ENABLED(CONFIG_IPV6)
6008 off = si->off;
6009 off -= offsetof(struct bpf_sock, src_ip6[0]);
6010 *insn++ = BPF_LDX_MEM(
6011 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
6012 bpf_target_off(
6013 struct sock_common,
6014 skc_v6_rcv_saddr.s6_addr32[0],
6015 FIELD_SIZEOF(struct sock_common,
6016 skc_v6_rcv_saddr.s6_addr32[0]),
6017 target_size) + off);
6018#else
6019 (void)off;
6020 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
6021#endif
6022 break;
6023
6024 case offsetof(struct bpf_sock, src_port):
6025 *insn++ = BPF_LDX_MEM(
6026 BPF_FIELD_SIZEOF(struct sock_common, skc_num),
6027 si->dst_reg, si->src_reg,
6028 bpf_target_off(struct sock_common, skc_num,
6029 FIELD_SIZEOF(struct sock_common,
6030 skc_num),
6031 target_size));
6032 break;
6033 }
6034
6035 return insn - insn_buf;
6036}
6037
6038static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
6039 const struct bpf_insn *si,
6040 struct bpf_insn *insn_buf,
6041 struct bpf_prog *prog, u32 *target_size)
6042{
6043 struct bpf_insn *insn = insn_buf;
6044
6045 switch (si->off) {
6046 case offsetof(struct __sk_buff, ifindex):
6047 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
6048 si->dst_reg, si->src_reg,
6049 offsetof(struct sk_buff, dev));
6050 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
6051 bpf_target_off(struct net_device, ifindex, 4,
6052 target_size));
6053 break;
6054 default:
6055 return bpf_convert_ctx_access(type, si, insn_buf, prog,
6056 target_size);
6057 }
6058
6059 return insn - insn_buf;
6060}
6061
6062static u32 xdp_convert_ctx_access(enum bpf_access_type type,
6063 const struct bpf_insn *si,
6064 struct bpf_insn *insn_buf,
6065 struct bpf_prog *prog, u32 *target_size)
6066{
6067 struct bpf_insn *insn = insn_buf;
6068
6069 switch (si->off) {
6070 case offsetof(struct xdp_md, data):
6071 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
6072 si->dst_reg, si->src_reg,
6073 offsetof(struct xdp_buff, data));
6074 break;
6075 case offsetof(struct xdp_md, data_meta):
6076 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
6077 si->dst_reg, si->src_reg,
6078 offsetof(struct xdp_buff, data_meta));
6079 break;
6080 case offsetof(struct xdp_md, data_end):
6081 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
6082 si->dst_reg, si->src_reg,
6083 offsetof(struct xdp_buff, data_end));
6084 break;
6085 case offsetof(struct xdp_md, ingress_ifindex):
6086 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
6087 si->dst_reg, si->src_reg,
6088 offsetof(struct xdp_buff, rxq));
6089 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
6090 si->dst_reg, si->dst_reg,
6091 offsetof(struct xdp_rxq_info, dev));
6092 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
6093 offsetof(struct net_device, ifindex));
6094 break;
6095 case offsetof(struct xdp_md, rx_queue_index):
6096 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
6097 si->dst_reg, si->src_reg,
6098 offsetof(struct xdp_buff, rxq));
6099 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
6100 offsetof(struct xdp_rxq_info,
6101 queue_index));
6102 break;
6103 }
6104
6105 return insn - insn_buf;
6106}
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
6119 do { \
6120 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
6121 si->src_reg, offsetof(S, F)); \
6122 *insn++ = BPF_LDX_MEM( \
6123 SIZE, si->dst_reg, si->dst_reg, \
6124 bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
6125 target_size) \
6126 + OFF); \
6127 } while (0)
6128
6129#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
6130 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
6131 BPF_FIELD_SIZEOF(NS, NF), 0)
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
6147 do { \
6148 int tmp_reg = BPF_REG_9; \
6149 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
6150 --tmp_reg; \
6151 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
6152 --tmp_reg; \
6153 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
6154 offsetof(S, TF)); \
6155 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
6156 si->dst_reg, offsetof(S, F)); \
6157 *insn++ = BPF_STX_MEM( \
6158 BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
6159 bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
6160 target_size) \
6161 + OFF); \
6162 *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
6163 offsetof(S, TF)); \
6164 } while (0)
6165
6166#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
6167 TF) \
6168 do { \
6169 if (type == BPF_WRITE) { \
6170 SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
6171 TF); \
6172 } else { \
6173 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
6174 S, NS, F, NF, SIZE, OFF); \
6175 } \
6176 } while (0)
6177
6178#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
6179 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
6180 S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
6181
6182static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
6183 const struct bpf_insn *si,
6184 struct bpf_insn *insn_buf,
6185 struct bpf_prog *prog, u32 *target_size)
6186{
6187 struct bpf_insn *insn = insn_buf;
6188 int off;
6189
6190 switch (si->off) {
6191 case offsetof(struct bpf_sock_addr, user_family):
6192 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
6193 struct sockaddr, uaddr, sa_family);
6194 break;
6195
6196 case offsetof(struct bpf_sock_addr, user_ip4):
6197 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
6198 struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
6199 sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
6200 break;
6201
6202 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
6203 off = si->off;
6204 off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
6205 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
6206 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
6207 sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
6208 tmp_reg);
6209 break;
6210
6211 case offsetof(struct bpf_sock_addr, user_port):
6212
6213
6214
6215
6216
6217
6218
6219 BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
6220 offsetof(struct sockaddr_in6, sin6_port));
6221 BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
6222 FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
6223 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
6224 struct sockaddr_in6, uaddr,
6225 sin6_port, tmp_reg);
6226 break;
6227
6228 case offsetof(struct bpf_sock_addr, family):
6229 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
6230 struct sock, sk, sk_family);
6231 break;
6232
6233 case offsetof(struct bpf_sock_addr, type):
6234 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
6235 struct bpf_sock_addr_kern, struct sock, sk,
6236 __sk_flags_offset, BPF_W, 0);
6237 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
6238 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
6239 break;
6240
6241 case offsetof(struct bpf_sock_addr, protocol):
6242 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
6243 struct bpf_sock_addr_kern, struct sock, sk,
6244 __sk_flags_offset, BPF_W, 0);
6245 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
6246 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
6247 SK_FL_PROTO_SHIFT);
6248 break;
6249
6250 case offsetof(struct bpf_sock_addr, msg_src_ip4):
6251
6252 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
6253 struct bpf_sock_addr_kern, struct in_addr, t_ctx,
6254 s_addr, BPF_SIZE(si->code), 0, tmp_reg);
6255 break;
6256
6257 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
6258 msg_src_ip6[3]):
6259 off = si->off;
6260 off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]);
6261
6262 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
6263 struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
6264 s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
6265 break;
6266 }
6267
6268 return insn - insn_buf;
6269}
6270
6271static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
6272 const struct bpf_insn *si,
6273 struct bpf_insn *insn_buf,
6274 struct bpf_prog *prog,
6275 u32 *target_size)
6276{
6277 struct bpf_insn *insn = insn_buf;
6278 int off;
6279
6280 switch (si->off) {
6281 case offsetof(struct bpf_sock_ops, op) ...
6282 offsetof(struct bpf_sock_ops, replylong[3]):
6283 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
6284 FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
6285 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
6286 FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
6287 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
6288 FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
6289 off = si->off;
6290 off -= offsetof(struct bpf_sock_ops, op);
6291 off += offsetof(struct bpf_sock_ops_kern, op);
6292 if (type == BPF_WRITE)
6293 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
6294 off);
6295 else
6296 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
6297 off);
6298 break;
6299
6300 case offsetof(struct bpf_sock_ops, family):
6301 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
6302
6303 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6304 struct bpf_sock_ops_kern, sk),
6305 si->dst_reg, si->src_reg,
6306 offsetof(struct bpf_sock_ops_kern, sk));
6307 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
6308 offsetof(struct sock_common, skc_family));
6309 break;
6310
6311 case offsetof(struct bpf_sock_ops, remote_ip4):
6312 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
6313
6314 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6315 struct bpf_sock_ops_kern, sk),
6316 si->dst_reg, si->src_reg,
6317 offsetof(struct bpf_sock_ops_kern, sk));
6318 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
6319 offsetof(struct sock_common, skc_daddr));
6320 break;
6321
6322 case offsetof(struct bpf_sock_ops, local_ip4):
6323 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
6324 skc_rcv_saddr) != 4);
6325
6326 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6327 struct bpf_sock_ops_kern, sk),
6328 si->dst_reg, si->src_reg,
6329 offsetof(struct bpf_sock_ops_kern, sk));
6330 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
6331 offsetof(struct sock_common,
6332 skc_rcv_saddr));
6333 break;
6334
6335 case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
6336 offsetof(struct bpf_sock_ops, remote_ip6[3]):
6337#if IS_ENABLED(CONFIG_IPV6)
6338 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
6339 skc_v6_daddr.s6_addr32[0]) != 4);
6340
6341 off = si->off;
6342 off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
6343 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6344 struct bpf_sock_ops_kern, sk),
6345 si->dst_reg, si->src_reg,
6346 offsetof(struct bpf_sock_ops_kern, sk));
6347 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
6348 offsetof(struct sock_common,
6349 skc_v6_daddr.s6_addr32[0]) +
6350 off);
6351#else
6352 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
6353#endif
6354 break;
6355
6356 case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
6357 offsetof(struct bpf_sock_ops, local_ip6[3]):
6358#if IS_ENABLED(CONFIG_IPV6)
6359 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
6360 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
6361
6362 off = si->off;
6363 off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
6364 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6365 struct bpf_sock_ops_kern, sk),
6366 si->dst_reg, si->src_reg,
6367 offsetof(struct bpf_sock_ops_kern, sk));
6368 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
6369 offsetof(struct sock_common,
6370 skc_v6_rcv_saddr.s6_addr32[0]) +
6371 off);
6372#else
6373 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
6374#endif
6375 break;
6376
6377 case offsetof(struct bpf_sock_ops, remote_port):
6378 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
6379
6380 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6381 struct bpf_sock_ops_kern, sk),
6382 si->dst_reg, si->src_reg,
6383 offsetof(struct bpf_sock_ops_kern, sk));
6384 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
6385 offsetof(struct sock_common, skc_dport));
6386#ifndef __BIG_ENDIAN_BITFIELD
6387 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
6388#endif
6389 break;
6390
6391 case offsetof(struct bpf_sock_ops, local_port):
6392 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
6393
6394 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6395 struct bpf_sock_ops_kern, sk),
6396 si->dst_reg, si->src_reg,
6397 offsetof(struct bpf_sock_ops_kern, sk));
6398 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
6399 offsetof(struct sock_common, skc_num));
6400 break;
6401
6402 case offsetof(struct bpf_sock_ops, is_fullsock):
6403 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6404 struct bpf_sock_ops_kern,
6405 is_fullsock),
6406 si->dst_reg, si->src_reg,
6407 offsetof(struct bpf_sock_ops_kern,
6408 is_fullsock));
6409 break;
6410
6411 case offsetof(struct bpf_sock_ops, state):
6412 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
6413
6414 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6415 struct bpf_sock_ops_kern, sk),
6416 si->dst_reg, si->src_reg,
6417 offsetof(struct bpf_sock_ops_kern, sk));
6418 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
6419 offsetof(struct sock_common, skc_state));
6420 break;
6421
6422 case offsetof(struct bpf_sock_ops, rtt_min):
6423 BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
6424 sizeof(struct minmax));
6425 BUILD_BUG_ON(sizeof(struct minmax) <
6426 sizeof(struct minmax_sample));
6427
6428 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6429 struct bpf_sock_ops_kern, sk),
6430 si->dst_reg, si->src_reg,
6431 offsetof(struct bpf_sock_ops_kern, sk));
6432 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
6433 offsetof(struct tcp_sock, rtt_min) +
6434 FIELD_SIZEOF(struct minmax_sample, t));
6435 break;
6436
6437
6438#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
6439 do { \
6440 BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
6441 FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
6442 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
6443 struct bpf_sock_ops_kern, \
6444 is_fullsock), \
6445 si->dst_reg, si->src_reg, \
6446 offsetof(struct bpf_sock_ops_kern, \
6447 is_fullsock)); \
6448 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
6449 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
6450 struct bpf_sock_ops_kern, sk),\
6451 si->dst_reg, si->src_reg, \
6452 offsetof(struct bpf_sock_ops_kern, sk));\
6453 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
6454 OBJ_FIELD), \
6455 si->dst_reg, si->dst_reg, \
6456 offsetof(OBJ, OBJ_FIELD)); \
6457 } while (0)
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
6469 do { \
6470 int reg = BPF_REG_9; \
6471 BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
6472 FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
6473 if (si->dst_reg == reg || si->src_reg == reg) \
6474 reg--; \
6475 if (si->dst_reg == reg || si->src_reg == reg) \
6476 reg--; \
6477 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
6478 offsetof(struct bpf_sock_ops_kern, \
6479 temp)); \
6480 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
6481 struct bpf_sock_ops_kern, \
6482 is_fullsock), \
6483 reg, si->dst_reg, \
6484 offsetof(struct bpf_sock_ops_kern, \
6485 is_fullsock)); \
6486 *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
6487 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
6488 struct bpf_sock_ops_kern, sk),\
6489 reg, si->dst_reg, \
6490 offsetof(struct bpf_sock_ops_kern, sk));\
6491 *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
6492 reg, si->src_reg, \
6493 offsetof(OBJ, OBJ_FIELD)); \
6494 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
6495 offsetof(struct bpf_sock_ops_kern, \
6496 temp)); \
6497 } while (0)
6498
6499#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
6500 do { \
6501 if (TYPE == BPF_WRITE) \
6502 SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
6503 else \
6504 SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
6505 } while (0)
6506
6507 case offsetof(struct bpf_sock_ops, snd_cwnd):
6508 SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock);
6509 break;
6510
6511 case offsetof(struct bpf_sock_ops, srtt_us):
6512 SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
6513 break;
6514
6515 case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
6516 SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
6517 struct tcp_sock);
6518 break;
6519
6520 case offsetof(struct bpf_sock_ops, snd_ssthresh):
6521 SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
6522 break;
6523
6524 case offsetof(struct bpf_sock_ops, rcv_nxt):
6525 SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
6526 break;
6527
6528 case offsetof(struct bpf_sock_ops, snd_nxt):
6529 SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
6530 break;
6531
6532 case offsetof(struct bpf_sock_ops, snd_una):
6533 SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
6534 break;
6535
6536 case offsetof(struct bpf_sock_ops, mss_cache):
6537 SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
6538 break;
6539
6540 case offsetof(struct bpf_sock_ops, ecn_flags):
6541 SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
6542 break;
6543
6544 case offsetof(struct bpf_sock_ops, rate_delivered):
6545 SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
6546 struct tcp_sock);
6547 break;
6548
6549 case offsetof(struct bpf_sock_ops, rate_interval_us):
6550 SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
6551 struct tcp_sock);
6552 break;
6553
6554 case offsetof(struct bpf_sock_ops, packets_out):
6555 SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
6556 break;
6557
6558 case offsetof(struct bpf_sock_ops, retrans_out):
6559 SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
6560 break;
6561
6562 case offsetof(struct bpf_sock_ops, total_retrans):
6563 SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
6564 struct tcp_sock);
6565 break;
6566
6567 case offsetof(struct bpf_sock_ops, segs_in):
6568 SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
6569 break;
6570
6571 case offsetof(struct bpf_sock_ops, data_segs_in):
6572 SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
6573 break;
6574
6575 case offsetof(struct bpf_sock_ops, segs_out):
6576 SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
6577 break;
6578
6579 case offsetof(struct bpf_sock_ops, data_segs_out):
6580 SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
6581 struct tcp_sock);
6582 break;
6583
6584 case offsetof(struct bpf_sock_ops, lost_out):
6585 SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock);
6586 break;
6587
6588 case offsetof(struct bpf_sock_ops, sacked_out):
6589 SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock);
6590 break;
6591
6592 case offsetof(struct bpf_sock_ops, sk_txhash):
6593 SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
6594 struct sock, type);
6595 break;
6596
6597 case offsetof(struct bpf_sock_ops, bytes_received):
6598 SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
6599 struct tcp_sock);
6600 break;
6601
6602 case offsetof(struct bpf_sock_ops, bytes_acked):
6603 SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
6604 break;
6605
6606 }
6607 return insn - insn_buf;
6608}
6609
6610static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
6611 const struct bpf_insn *si,
6612 struct bpf_insn *insn_buf,
6613 struct bpf_prog *prog, u32 *target_size)
6614{
6615 struct bpf_insn *insn = insn_buf;
6616 int off;
6617
6618 switch (si->off) {
6619 case offsetof(struct __sk_buff, data_end):
6620 off = si->off;
6621 off -= offsetof(struct __sk_buff, data_end);
6622 off += offsetof(struct sk_buff, cb);
6623 off += offsetof(struct tcp_skb_cb, bpf.data_end);
6624 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
6625 si->src_reg, off);
6626 break;
6627 default:
6628 return bpf_convert_ctx_access(type, si, insn_buf, prog,
6629 target_size);
6630 }
6631
6632 return insn - insn_buf;
6633}
6634
6635static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
6636 const struct bpf_insn *si,
6637 struct bpf_insn *insn_buf,
6638 struct bpf_prog *prog, u32 *target_size)
6639{
6640 struct bpf_insn *insn = insn_buf;
6641#if IS_ENABLED(CONFIG_IPV6)
6642 int off;
6643#endif
6644
6645 switch (si->off) {
6646 case offsetof(struct sk_msg_md, data):
6647 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
6648 si->dst_reg, si->src_reg,
6649 offsetof(struct sk_msg_buff, data));
6650 break;
6651 case offsetof(struct sk_msg_md, data_end):
6652 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
6653 si->dst_reg, si->src_reg,
6654 offsetof(struct sk_msg_buff, data_end));
6655 break;
6656 case offsetof(struct sk_msg_md, family):
6657 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
6658
6659 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6660 struct sk_msg_buff, sk),
6661 si->dst_reg, si->src_reg,
6662 offsetof(struct sk_msg_buff, sk));
6663 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
6664 offsetof(struct sock_common, skc_family));
6665 break;
6666
6667 case offsetof(struct sk_msg_md, remote_ip4):
6668 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
6669
6670 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6671 struct sk_msg_buff, sk),
6672 si->dst_reg, si->src_reg,
6673 offsetof(struct sk_msg_buff, sk));
6674 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
6675 offsetof(struct sock_common, skc_daddr));
6676 break;
6677
6678 case offsetof(struct sk_msg_md, local_ip4):
6679 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
6680 skc_rcv_saddr) != 4);
6681
6682 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6683 struct sk_msg_buff, sk),
6684 si->dst_reg, si->src_reg,
6685 offsetof(struct sk_msg_buff, sk));
6686 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
6687 offsetof(struct sock_common,
6688 skc_rcv_saddr));
6689 break;
6690
6691 case offsetof(struct sk_msg_md, remote_ip6[0]) ...
6692 offsetof(struct sk_msg_md, remote_ip6[3]):
6693#if IS_ENABLED(CONFIG_IPV6)
6694 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
6695 skc_v6_daddr.s6_addr32[0]) != 4);
6696
6697 off = si->off;
6698 off -= offsetof(struct sk_msg_md, remote_ip6[0]);
6699 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6700 struct sk_msg_buff, sk),
6701 si->dst_reg, si->src_reg,
6702 offsetof(struct sk_msg_buff, sk));
6703 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
6704 offsetof(struct sock_common,
6705 skc_v6_daddr.s6_addr32[0]) +
6706 off);
6707#else
6708 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
6709#endif
6710 break;
6711
6712 case offsetof(struct sk_msg_md, local_ip6[0]) ...
6713 offsetof(struct sk_msg_md, local_ip6[3]):
6714#if IS_ENABLED(CONFIG_IPV6)
6715 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
6716 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
6717
6718 off = si->off;
6719 off -= offsetof(struct sk_msg_md, local_ip6[0]);
6720 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6721 struct sk_msg_buff, sk),
6722 si->dst_reg, si->src_reg,
6723 offsetof(struct sk_msg_buff, sk));
6724 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
6725 offsetof(struct sock_common,
6726 skc_v6_rcv_saddr.s6_addr32[0]) +
6727 off);
6728#else
6729 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
6730#endif
6731 break;
6732
6733 case offsetof(struct sk_msg_md, remote_port):
6734 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
6735
6736 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6737 struct sk_msg_buff, sk),
6738 si->dst_reg, si->src_reg,
6739 offsetof(struct sk_msg_buff, sk));
6740 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
6741 offsetof(struct sock_common, skc_dport));
6742#ifndef __BIG_ENDIAN_BITFIELD
6743 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
6744#endif
6745 break;
6746
6747 case offsetof(struct sk_msg_md, local_port):
6748 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
6749
6750 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
6751 struct sk_msg_buff, sk),
6752 si->dst_reg, si->src_reg,
6753 offsetof(struct sk_msg_buff, sk));
6754 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
6755 offsetof(struct sock_common, skc_num));
6756 break;
6757 }
6758
6759 return insn - insn_buf;
6760}
6761
6762const struct bpf_verifier_ops sk_filter_verifier_ops = {
6763 .get_func_proto = sk_filter_func_proto,
6764 .is_valid_access = sk_filter_is_valid_access,
6765 .convert_ctx_access = bpf_convert_ctx_access,
6766 .gen_ld_abs = bpf_gen_ld_abs,
6767};
6768
6769const struct bpf_prog_ops sk_filter_prog_ops = {
6770 .test_run = bpf_prog_test_run_skb,
6771};
6772
6773const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
6774 .get_func_proto = tc_cls_act_func_proto,
6775 .is_valid_access = tc_cls_act_is_valid_access,
6776 .convert_ctx_access = tc_cls_act_convert_ctx_access,
6777 .gen_prologue = tc_cls_act_prologue,
6778 .gen_ld_abs = bpf_gen_ld_abs,
6779};
6780
6781const struct bpf_prog_ops tc_cls_act_prog_ops = {
6782 .test_run = bpf_prog_test_run_skb,
6783};
6784
6785const struct bpf_verifier_ops xdp_verifier_ops = {
6786 .get_func_proto = xdp_func_proto,
6787 .is_valid_access = xdp_is_valid_access,
6788 .convert_ctx_access = xdp_convert_ctx_access,
6789};
6790
6791const struct bpf_prog_ops xdp_prog_ops = {
6792 .test_run = bpf_prog_test_run_xdp,
6793};
6794
6795const struct bpf_verifier_ops cg_skb_verifier_ops = {
6796 .get_func_proto = sk_filter_func_proto,
6797 .is_valid_access = sk_filter_is_valid_access,
6798 .convert_ctx_access = bpf_convert_ctx_access,
6799};
6800
6801const struct bpf_prog_ops cg_skb_prog_ops = {
6802 .test_run = bpf_prog_test_run_skb,
6803};
6804
6805const struct bpf_verifier_ops lwt_in_verifier_ops = {
6806 .get_func_proto = lwt_in_func_proto,
6807 .is_valid_access = lwt_is_valid_access,
6808 .convert_ctx_access = bpf_convert_ctx_access,
6809};
6810
6811const struct bpf_prog_ops lwt_in_prog_ops = {
6812 .test_run = bpf_prog_test_run_skb,
6813};
6814
6815const struct bpf_verifier_ops lwt_out_verifier_ops = {
6816 .get_func_proto = lwt_out_func_proto,
6817 .is_valid_access = lwt_is_valid_access,
6818 .convert_ctx_access = bpf_convert_ctx_access,
6819};
6820
6821const struct bpf_prog_ops lwt_out_prog_ops = {
6822 .test_run = bpf_prog_test_run_skb,
6823};
6824
6825const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
6826 .get_func_proto = lwt_xmit_func_proto,
6827 .is_valid_access = lwt_is_valid_access,
6828 .convert_ctx_access = bpf_convert_ctx_access,
6829 .gen_prologue = tc_cls_act_prologue,
6830};
6831
6832const struct bpf_prog_ops lwt_xmit_prog_ops = {
6833 .test_run = bpf_prog_test_run_skb,
6834};
6835
6836const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
6837 .get_func_proto = lwt_seg6local_func_proto,
6838 .is_valid_access = lwt_is_valid_access,
6839 .convert_ctx_access = bpf_convert_ctx_access,
6840};
6841
6842const struct bpf_prog_ops lwt_seg6local_prog_ops = {
6843 .test_run = bpf_prog_test_run_skb,
6844};
6845
6846const struct bpf_verifier_ops cg_sock_verifier_ops = {
6847 .get_func_proto = sock_filter_func_proto,
6848 .is_valid_access = sock_filter_is_valid_access,
6849 .convert_ctx_access = sock_filter_convert_ctx_access,
6850};
6851
6852const struct bpf_prog_ops cg_sock_prog_ops = {
6853};
6854
6855const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
6856 .get_func_proto = sock_addr_func_proto,
6857 .is_valid_access = sock_addr_is_valid_access,
6858 .convert_ctx_access = sock_addr_convert_ctx_access,
6859};
6860
6861const struct bpf_prog_ops cg_sock_addr_prog_ops = {
6862};
6863
6864const struct bpf_verifier_ops sock_ops_verifier_ops = {
6865 .get_func_proto = sock_ops_func_proto,
6866 .is_valid_access = sock_ops_is_valid_access,
6867 .convert_ctx_access = sock_ops_convert_ctx_access,
6868};
6869
6870const struct bpf_prog_ops sock_ops_prog_ops = {
6871};
6872
6873const struct bpf_verifier_ops sk_skb_verifier_ops = {
6874 .get_func_proto = sk_skb_func_proto,
6875 .is_valid_access = sk_skb_is_valid_access,
6876 .convert_ctx_access = sk_skb_convert_ctx_access,
6877 .gen_prologue = sk_skb_prologue,
6878};
6879
6880const struct bpf_prog_ops sk_skb_prog_ops = {
6881};
6882
6883const struct bpf_verifier_ops sk_msg_verifier_ops = {
6884 .get_func_proto = sk_msg_func_proto,
6885 .is_valid_access = sk_msg_is_valid_access,
6886 .convert_ctx_access = sk_msg_convert_ctx_access,
6887};
6888
6889const struct bpf_prog_ops sk_msg_prog_ops = {
6890};
6891
6892int sk_detach_filter(struct sock *sk)
6893{
6894 int ret = -ENOENT;
6895 struct sk_filter *filter;
6896
6897 if (sock_flag(sk, SOCK_FILTER_LOCKED))
6898 return -EPERM;
6899
6900 filter = rcu_dereference_protected(sk->sk_filter,
6901 lockdep_sock_is_held(sk));
6902 if (filter) {
6903 RCU_INIT_POINTER(sk->sk_filter, NULL);
6904 sk_filter_uncharge(sk, filter);
6905 ret = 0;
6906 }
6907
6908 return ret;
6909}
6910EXPORT_SYMBOL_GPL(sk_detach_filter);
6911
6912int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
6913 unsigned int len)
6914{
6915 struct sock_fprog_kern *fprog;
6916 struct sk_filter *filter;
6917 int ret = 0;
6918
6919 lock_sock(sk);
6920 filter = rcu_dereference_protected(sk->sk_filter,
6921 lockdep_sock_is_held(sk));
6922 if (!filter)
6923 goto out;
6924
6925
6926
6927
6928
6929 ret = -EACCES;
6930 fprog = filter->prog->orig_prog;
6931 if (!fprog)
6932 goto out;
6933
6934 ret = fprog->len;
6935 if (!len)
6936
6937 goto out;
6938
6939 ret = -EINVAL;
6940 if (len < fprog->len)
6941 goto out;
6942
6943 ret = -EFAULT;
6944 if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
6945 goto out;
6946
6947
6948
6949
6950 ret = fprog->len;
6951out:
6952 release_sock(sk);
6953 return ret;
6954}
6955