LXR linux/kernel/bpf/cgroup.c

   1/*
   2 * Functions to manage eBPF programs attached to cgroups
   3 *
   4 * Copyright (c) 2016 Daniel Mack
   5 *
   6 * This file is subject to the terms and conditions of version 2 of the GNU
   7 * General Public License.  See the file COPYING in the main directory of the
   8 * Linux distribution for more details.
   9 */
  10
  11#include <linux/kernel.h>
  12#include <linux/atomic.h>
  13#include <linux/cgroup.h>
  14#include <linux/slab.h>
  15#include <linux/bpf.h>
  16#include <linux/bpf-cgroup.h>
  17#include <net/sock.h>
  18
  19DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
  20EXPORT_SYMBOL(cgroup_bpf_enabled_key);
  21
  22/**
  23 * cgroup_bpf_put() - put references of all bpf programs
  24 * @cgrp: the cgroup to modify
  25 */
  26void cgroup_bpf_put(struct cgroup *cgrp)
  27{
  28        unsigned int type;
  29
  30        for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
  31                struct bpf_prog *prog = cgrp->bpf.prog[type];
  32
  33                if (prog) {
  34                        bpf_prog_put(prog);
  35                        static_branch_dec(&cgroup_bpf_enabled_key);
  36                }
  37        }
  38}
  39
  40/**
  41 * cgroup_bpf_inherit() - inherit effective programs from parent
  42 * @cgrp: the cgroup to modify
  43 * @parent: the parent to inherit from
  44 */
  45void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
  46{
  47        unsigned int type;
  48
  49        for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
  50                struct bpf_prog *e;
  51
  52                e = rcu_dereference_protected(parent->bpf.effective[type],
  53                                              lockdep_is_held(&cgroup_mutex));
  54                rcu_assign_pointer(cgrp->bpf.effective[type], e);
  55                cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
  56        }
  57}
  58
  59/**
  60 * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
  61 *                         propagate the change to descendants
  62 * @cgrp: The cgroup which descendants to traverse
  63 * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
  64 * @prog: A new program to pin
  65 * @type: Type of pinning operation (ingress/egress)
  66 *
  67 * Each cgroup has a set of two pointers for bpf programs; one for eBPF
  68 * programs it owns, and which is effective for execution.
  69 *
  70 * If @prog is not %NULL, this function attaches a new program to the cgroup
  71 * and releases the one that is currently attached, if any. @prog is then made
  72 * the effective program of type @type in that cgroup.
  73 *
  74 * If @prog is %NULL, the currently attached program of type @type is released,
  75 * and the effective program of the parent cgroup (if any) is inherited to
  76 * @cgrp.
  77 *
  78 * Then, the descendants of @cgrp are walked and the effective program for
  79 * each of them is set to the effective program of @cgrp unless the
  80 * descendant has its own program attached, in which case the subbranch is
  81 * skipped. This ensures that delegated subcgroups with own programs are left
  82 * untouched.
  83 *
  84 * Must be called with cgroup_mutex held.
  85 */
  86int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
  87                        struct bpf_prog *prog, enum bpf_attach_type type,
  88                        bool new_overridable)
  89{
  90        struct bpf_prog *old_prog, *effective = NULL;
  91        struct cgroup_subsys_state *pos;
  92        bool overridable = true;
  93
  94        if (parent) {
  95                overridable = !parent->bpf.disallow_override[type];
  96                effective = rcu_dereference_protected(parent->bpf.effective[type],
  97                                                      lockdep_is_held(&cgroup_mutex));
  98        }
  99
 100        if (prog && effective && !overridable)
 101                /* if parent has non-overridable prog attached, disallow
 102                 * attaching new programs to descendent cgroup
 103                 */
 104                return -EPERM;
 105
 106        if (prog && effective && overridable != new_overridable)
 107                /* if parent has overridable prog attached, only
 108                 * allow overridable programs in descendent cgroup
 109                 */
 110                return -EPERM;
 111
 112        old_prog = cgrp->bpf.prog[type];
 113
 114        if (prog) {
 115                overridable = new_overridable;
 116                effective = prog;
 117                if (old_prog &&
 118                    cgrp->bpf.disallow_override[type] == new_overridable)
 119                        /* disallow attaching non-overridable on top
 120                         * of existing overridable in this cgroup
 121                         * and vice versa
 122                         */
 123                        return -EPERM;
 124        }
 125
 126        if (!prog && !old_prog)
 127                /* report error when trying to detach and nothing is attached */
 128                return -ENOENT;
 129
 130        cgrp->bpf.prog[type] = prog;
 131
 132        css_for_each_descendant_pre(pos, &cgrp->self) {
 133                struct cgroup *desc = container_of(pos, struct cgroup, self);
 134
 135                /* skip the subtree if the descendant has its own program */
 136                if (desc->bpf.prog[type] && desc != cgrp) {
 137                        pos = css_rightmost_descendant(pos);
 138                } else {
 139                        rcu_assign_pointer(desc->bpf.effective[type],
 140                                           effective);
 141                        desc->bpf.disallow_override[type] = !overridable;
 142                }
 143        }
 144
 145        if (prog)
 146                static_branch_inc(&cgroup_bpf_enabled_key);
 147
 148        if (old_prog) {
 149                bpf_prog_put(old_prog);
 150                static_branch_dec(&cgroup_bpf_enabled_key);
 151        }
 152        return 0;
 153}
 154
 155/**
 156 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
 157 * @sk: The socken sending or receiving traffic
 158 * @skb: The skb that is being sent or received
 159 * @type: The type of program to be exectuted
 160 *
 161 * If no socket is passed, or the socket is not of type INET or INET6,
 162 * this function does nothing and returns 0.
 163 *
 164 * The program type passed in via @type must be suitable for network
 165 * filtering. No further check is performed to assert that.
 166 *
 167 * This function will return %-EPERM if any if an attached program was found
 168 * and if it returned != 1 during execution. In all other cases, 0 is returned.
 169 */
 170int __cgroup_bpf_run_filter_skb(struct sock *sk,
 171                                struct sk_buff *skb,
 172                                enum bpf_attach_type type)
 173{
 174        struct bpf_prog *prog;
 175        struct cgroup *cgrp;
 176        int ret = 0;
 177
 178        if (!sk || !sk_fullsock(sk))
 179                return 0;
 180
 181        if (sk->sk_family != AF_INET &&
 182            sk->sk_family != AF_INET6)
 183                return 0;
 184
 185        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 186
 187        rcu_read_lock();
 188
 189        prog = rcu_dereference(cgrp->bpf.effective[type]);
 190        if (prog) {
 191                unsigned int offset = skb->data - skb_network_header(skb);
 192
 193                __skb_push(skb, offset);
 194                ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
 195                __skb_pull(skb, offset);
 196        }
 197
 198        rcu_read_unlock();
 199
 200        return ret;
 201}
 202EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
 203
 204/**
 205 * __cgroup_bpf_run_filter_sk() - Run a program on a sock
 206 * @sk: sock structure to manipulate
 207 * @type: The type of program to be exectuted
 208 *
 209 * socket is passed is expected to be of type INET or INET6.
 210 *
 211 * The program type passed in via @type must be suitable for sock
 212 * filtering. No further check is performed to assert that.
 213 *
 214 * This function will return %-EPERM if any if an attached program was found
 215 * and if it returned != 1 during execution. In all other cases, 0 is returned.
 216 */
 217int __cgroup_bpf_run_filter_sk(struct sock *sk,
 218                               enum bpf_attach_type type)
 219{
 220        struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 221        struct bpf_prog *prog;
 222        int ret = 0;
 223
 224
 225        rcu_read_lock();
 226
 227        prog = rcu_dereference(cgrp->bpf.effective[type]);
 228        if (prog)
 229                ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
 230
 231        rcu_read_unlock();
 232
 233        return ret;
 234}
 235EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 236