1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/atomic.h>
13#include <linux/cgroup.h>
14#include <linux/slab.h>
15#include <linux/bpf.h>
16#include <linux/bpf-cgroup.h>
17#include <net/sock.h>
18
19DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
20EXPORT_SYMBOL(cgroup_bpf_enabled_key);
21
22
23
24
25
26void cgroup_bpf_put(struct cgroup *cgrp)
27{
28 unsigned int type;
29
30 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
31 struct list_head *progs = &cgrp->bpf.progs[type];
32 struct bpf_prog_list *pl, *tmp;
33
34 list_for_each_entry_safe(pl, tmp, progs, node) {
35 list_del(&pl->node);
36 bpf_prog_put(pl->prog);
37 bpf_cgroup_storage_unlink(pl->storage);
38 bpf_cgroup_storage_free(pl->storage);
39 kfree(pl);
40 static_branch_dec(&cgroup_bpf_enabled_key);
41 }
42 bpf_prog_array_free(cgrp->bpf.effective[type]);
43 }
44}
45
46
47
48
49static u32 prog_list_length(struct list_head *head)
50{
51 struct bpf_prog_list *pl;
52 u32 cnt = 0;
53
54 list_for_each_entry(pl, head, node) {
55 if (!pl->prog)
56 continue;
57 cnt++;
58 }
59 return cnt;
60}
61
62
63
64
65
66static bool hierarchy_allows_attach(struct cgroup *cgrp,
67 enum bpf_attach_type type,
68 u32 new_flags)
69{
70 struct cgroup *p;
71
72 p = cgroup_parent(cgrp);
73 if (!p)
74 return true;
75 do {
76 u32 flags = p->bpf.flags[type];
77 u32 cnt;
78
79 if (flags & BPF_F_ALLOW_MULTI)
80 return true;
81 cnt = prog_list_length(&p->bpf.progs[type]);
82 WARN_ON_ONCE(cnt > 1);
83 if (cnt == 1)
84 return !!(flags & BPF_F_ALLOW_OVERRIDE);
85 p = cgroup_parent(p);
86 } while (p);
87 return true;
88}
89
90
91
92
93
94
95
96static int compute_effective_progs(struct cgroup *cgrp,
97 enum bpf_attach_type type,
98 struct bpf_prog_array __rcu **array)
99{
100 struct bpf_prog_array *progs;
101 struct bpf_prog_list *pl;
102 struct cgroup *p = cgrp;
103 int cnt = 0;
104
105
106 do {
107 if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
108 cnt += prog_list_length(&p->bpf.progs[type]);
109 p = cgroup_parent(p);
110 } while (p);
111
112 progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
113 if (!progs)
114 return -ENOMEM;
115
116
117 cnt = 0;
118 p = cgrp;
119 do {
120 if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
121 continue;
122
123 list_for_each_entry(pl, &p->bpf.progs[type], node) {
124 if (!pl->prog)
125 continue;
126
127 progs->items[cnt].prog = pl->prog;
128 progs->items[cnt].cgroup_storage = pl->storage;
129 cnt++;
130 }
131 } while ((p = cgroup_parent(p)));
132
133 rcu_assign_pointer(*array, progs);
134 return 0;
135}
136
137static void activate_effective_progs(struct cgroup *cgrp,
138 enum bpf_attach_type type,
139 struct bpf_prog_array __rcu *array)
140{
141 struct bpf_prog_array __rcu *old_array;
142
143 old_array = xchg(&cgrp->bpf.effective[type], array);
144
145
146
147 bpf_prog_array_free(old_array);
148}
149
150
151
152
153
154int cgroup_bpf_inherit(struct cgroup *cgrp)
155{
156
157
158
159#define NR ARRAY_SIZE(cgrp->bpf.effective)
160 struct bpf_prog_array __rcu *arrays[NR] = {};
161 int i;
162
163 for (i = 0; i < NR; i++)
164 INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
165
166 for (i = 0; i < NR; i++)
167 if (compute_effective_progs(cgrp, i, &arrays[i]))
168 goto cleanup;
169
170 for (i = 0; i < NR; i++)
171 activate_effective_progs(cgrp, i, arrays[i]);
172
173 return 0;
174cleanup:
175 for (i = 0; i < NR; i++)
176 bpf_prog_array_free(arrays[i]);
177 return -ENOMEM;
178}
179
180static int update_effective_progs(struct cgroup *cgrp,
181 enum bpf_attach_type type)
182{
183 struct cgroup_subsys_state *css;
184 int err;
185
186
187 css_for_each_descendant_pre(css, &cgrp->self) {
188 struct cgroup *desc = container_of(css, struct cgroup, self);
189
190 err = compute_effective_progs(desc, type, &desc->bpf.inactive);
191 if (err)
192 goto cleanup;
193 }
194
195
196 css_for_each_descendant_pre(css, &cgrp->self) {
197 struct cgroup *desc = container_of(css, struct cgroup, self);
198
199 activate_effective_progs(desc, type, desc->bpf.inactive);
200 desc->bpf.inactive = NULL;
201 }
202
203 return 0;
204
205cleanup:
206
207
208
209 css_for_each_descendant_pre(css, &cgrp->self) {
210 struct cgroup *desc = container_of(css, struct cgroup, self);
211
212 bpf_prog_array_free(desc->bpf.inactive);
213 desc->bpf.inactive = NULL;
214 }
215
216 return err;
217}
218
219#define BPF_CGROUP_MAX_PROGS 64
220
221
222
223
224
225
226
227
228
229
230int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
231 enum bpf_attach_type type, u32 flags)
232{
233 struct list_head *progs = &cgrp->bpf.progs[type];
234 struct bpf_prog *old_prog = NULL;
235 struct bpf_cgroup_storage *storage, *old_storage = NULL;
236 struct bpf_prog_list *pl;
237 bool pl_was_allocated;
238 int err;
239
240 if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
241
242 return -EINVAL;
243
244 if (!hierarchy_allows_attach(cgrp, type, flags))
245 return -EPERM;
246
247 if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
248
249
250
251
252 return -EPERM;
253
254 if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
255 return -E2BIG;
256
257 storage = bpf_cgroup_storage_alloc(prog);
258 if (IS_ERR(storage))
259 return -ENOMEM;
260
261 if (flags & BPF_F_ALLOW_MULTI) {
262 list_for_each_entry(pl, progs, node) {
263 if (pl->prog == prog) {
264
265 bpf_cgroup_storage_free(storage);
266 return -EINVAL;
267 }
268 }
269
270 pl = kmalloc(sizeof(*pl), GFP_KERNEL);
271 if (!pl) {
272 bpf_cgroup_storage_free(storage);
273 return -ENOMEM;
274 }
275
276 pl_was_allocated = true;
277 pl->prog = prog;
278 pl->storage = storage;
279 list_add_tail(&pl->node, progs);
280 } else {
281 if (list_empty(progs)) {
282 pl = kmalloc(sizeof(*pl), GFP_KERNEL);
283 if (!pl) {
284 bpf_cgroup_storage_free(storage);
285 return -ENOMEM;
286 }
287 pl_was_allocated = true;
288 list_add_tail(&pl->node, progs);
289 } else {
290 pl = list_first_entry(progs, typeof(*pl), node);
291 old_prog = pl->prog;
292 old_storage = pl->storage;
293 bpf_cgroup_storage_unlink(old_storage);
294 pl_was_allocated = false;
295 }
296 pl->prog = prog;
297 pl->storage = storage;
298 }
299
300 cgrp->bpf.flags[type] = flags;
301
302 err = update_effective_progs(cgrp, type);
303 if (err)
304 goto cleanup;
305
306 static_branch_inc(&cgroup_bpf_enabled_key);
307 if (old_storage)
308 bpf_cgroup_storage_free(old_storage);
309 if (old_prog) {
310 bpf_prog_put(old_prog);
311 static_branch_dec(&cgroup_bpf_enabled_key);
312 }
313 bpf_cgroup_storage_link(storage, cgrp, type);
314 return 0;
315
316cleanup:
317
318 pl->prog = old_prog;
319 bpf_cgroup_storage_free(pl->storage);
320 pl->storage = old_storage;
321 bpf_cgroup_storage_link(old_storage, cgrp, type);
322 if (pl_was_allocated) {
323 list_del(&pl->node);
324 kfree(pl);
325 }
326 return err;
327}
328
329
330
331
332
333
334
335
336
337
338int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
339 enum bpf_attach_type type, u32 unused_flags)
340{
341 struct list_head *progs = &cgrp->bpf.progs[type];
342 u32 flags = cgrp->bpf.flags[type];
343 struct bpf_prog *old_prog = NULL;
344 struct bpf_prog_list *pl;
345 int err;
346
347 if (flags & BPF_F_ALLOW_MULTI) {
348 if (!prog)
349
350
351
352 return -EINVAL;
353 } else {
354 if (list_empty(progs))
355
356 return -ENOENT;
357 }
358
359 if (flags & BPF_F_ALLOW_MULTI) {
360
361 list_for_each_entry(pl, progs, node) {
362 if (pl->prog != prog)
363 continue;
364 old_prog = prog;
365
366
367
368 pl->prog = NULL;
369 break;
370 }
371 if (!old_prog)
372 return -ENOENT;
373 } else {
374
375
376
377 pl = list_first_entry(progs, typeof(*pl), node);
378 old_prog = pl->prog;
379 pl->prog = NULL;
380 }
381
382 err = update_effective_progs(cgrp, type);
383 if (err)
384 goto cleanup;
385
386
387 list_del(&pl->node);
388 bpf_cgroup_storage_unlink(pl->storage);
389 bpf_cgroup_storage_free(pl->storage);
390 kfree(pl);
391 if (list_empty(progs))
392
393 cgrp->bpf.flags[type] = 0;
394
395 bpf_prog_put(old_prog);
396 static_branch_dec(&cgroup_bpf_enabled_key);
397 return 0;
398
399cleanup:
400
401 pl->prog = old_prog;
402 return err;
403}
404
405
406int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
407 union bpf_attr __user *uattr)
408{
409 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
410 enum bpf_attach_type type = attr->query.attach_type;
411 struct list_head *progs = &cgrp->bpf.progs[type];
412 u32 flags = cgrp->bpf.flags[type];
413 int cnt, ret = 0, i;
414
415 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
416 cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
417 else
418 cnt = prog_list_length(progs);
419
420 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
421 return -EFAULT;
422 if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
423 return -EFAULT;
424 if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
425
426 return 0;
427 if (attr->query.prog_cnt < cnt) {
428 cnt = attr->query.prog_cnt;
429 ret = -ENOSPC;
430 }
431
432 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
433 return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
434 prog_ids, cnt);
435 } else {
436 struct bpf_prog_list *pl;
437 u32 id;
438
439 i = 0;
440 list_for_each_entry(pl, progs, node) {
441 id = pl->prog->aux->id;
442 if (copy_to_user(prog_ids + i, &id, sizeof(id)))
443 return -EFAULT;
444 if (++i == cnt)
445 break;
446 }
447 }
448 return ret;
449}
450
451int cgroup_bpf_prog_attach(const union bpf_attr *attr,
452 enum bpf_prog_type ptype, struct bpf_prog *prog)
453{
454 struct cgroup *cgrp;
455 int ret;
456
457 cgrp = cgroup_get_from_fd(attr->target_fd);
458 if (IS_ERR(cgrp))
459 return PTR_ERR(cgrp);
460
461 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
462 attr->attach_flags);
463 cgroup_put(cgrp);
464 return ret;
465}
466
467int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
468{
469 struct bpf_prog *prog;
470 struct cgroup *cgrp;
471 int ret;
472
473 cgrp = cgroup_get_from_fd(attr->target_fd);
474 if (IS_ERR(cgrp))
475 return PTR_ERR(cgrp);
476
477 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
478 if (IS_ERR(prog))
479 prog = NULL;
480
481 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
482 if (prog)
483 bpf_prog_put(prog);
484
485 cgroup_put(cgrp);
486 return ret;
487}
488
489int cgroup_bpf_prog_query(const union bpf_attr *attr,
490 union bpf_attr __user *uattr)
491{
492 struct cgroup *cgrp;
493 int ret;
494
495 cgrp = cgroup_get_from_fd(attr->query.target_fd);
496 if (IS_ERR(cgrp))
497 return PTR_ERR(cgrp);
498
499 ret = cgroup_bpf_query(cgrp, attr, uattr);
500
501 cgroup_put(cgrp);
502 return ret;
503}
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520int __cgroup_bpf_run_filter_skb(struct sock *sk,
521 struct sk_buff *skb,
522 enum bpf_attach_type type)
523{
524 unsigned int offset = skb->data - skb_network_header(skb);
525 struct sock *save_sk;
526 struct cgroup *cgrp;
527 int ret;
528
529 if (!sk || !sk_fullsock(sk))
530 return 0;
531
532 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
533 return 0;
534
535 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
536 save_sk = skb->sk;
537 skb->sk = sk;
538 __skb_push(skb, offset);
539 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
540 bpf_prog_run_save_cb);
541 __skb_pull(skb, offset);
542 skb->sk = save_sk;
543 return ret == 1 ? 0 : -EPERM;
544}
545EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560int __cgroup_bpf_run_filter_sk(struct sock *sk,
561 enum bpf_attach_type type)
562{
563 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
564 int ret;
565
566 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
567 return ret == 1 ? 0 : -EPERM;
568}
569EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
585 struct sockaddr *uaddr,
586 enum bpf_attach_type type,
587 void *t_ctx)
588{
589 struct bpf_sock_addr_kern ctx = {
590 .sk = sk,
591 .uaddr = uaddr,
592 .t_ctx = t_ctx,
593 };
594 struct sockaddr_storage unspec;
595 struct cgroup *cgrp;
596 int ret;
597
598
599
600
601 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
602 return 0;
603
604 if (!ctx.uaddr) {
605 memset(&unspec, 0, sizeof(unspec));
606 ctx.uaddr = (struct sockaddr *)&unspec;
607 }
608
609 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
610 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
611
612 return ret == 1 ? 0 : -EPERM;
613}
614EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
633 struct bpf_sock_ops_kern *sock_ops,
634 enum bpf_attach_type type)
635{
636 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
637 int ret;
638
639 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
640 BPF_PROG_RUN);
641 return ret == 1 ? 0 : -EPERM;
642}
643EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
644
645int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
646 short access, enum bpf_attach_type type)
647{
648 struct cgroup *cgrp;
649 struct bpf_cgroup_dev_ctx ctx = {
650 .access_type = (access << 16) | dev_type,
651 .major = major,
652 .minor = minor,
653 };
654 int allow = 1;
655
656 rcu_read_lock();
657 cgrp = task_dfl_cgroup(current);
658 allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
659 BPF_PROG_RUN);
660 rcu_read_unlock();
661
662 return !allow;
663}
664EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
665
666static const struct bpf_func_proto *
667cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
668{
669 switch (func_id) {
670 case BPF_FUNC_map_lookup_elem:
671 return &bpf_map_lookup_elem_proto;
672 case BPF_FUNC_map_update_elem:
673 return &bpf_map_update_elem_proto;
674 case BPF_FUNC_map_delete_elem:
675 return &bpf_map_delete_elem_proto;
676 case BPF_FUNC_get_current_uid_gid:
677 return &bpf_get_current_uid_gid_proto;
678 case BPF_FUNC_get_local_storage:
679 return &bpf_get_local_storage_proto;
680 case BPF_FUNC_trace_printk:
681 if (capable(CAP_SYS_ADMIN))
682 return bpf_get_trace_printk_proto();
683 default:
684 return NULL;
685 }
686}
687
688static bool cgroup_dev_is_valid_access(int off, int size,
689 enum bpf_access_type type,
690 const struct bpf_prog *prog,
691 struct bpf_insn_access_aux *info)
692{
693 const int size_default = sizeof(__u32);
694
695 if (type == BPF_WRITE)
696 return false;
697
698 if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
699 return false;
700
701 if (off % size != 0)
702 return false;
703
704 switch (off) {
705 case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
706 bpf_ctx_record_field_size(info, size_default);
707 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
708 return false;
709 break;
710 default:
711 if (size != size_default)
712 return false;
713 }
714
715 return true;
716}
717
718const struct bpf_prog_ops cg_dev_prog_ops = {
719};
720
721const struct bpf_verifier_ops cg_dev_verifier_ops = {
722 .get_func_proto = cgroup_dev_func_proto,
723 .is_valid_access = cgroup_dev_is_valid_access,
724};
725