linux/kernel/bpf/net_namespace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3#include <linux/bpf.h>
   4#include <linux/bpf-netns.h>
   5#include <linux/filter.h>
   6#include <net/net_namespace.h>
   7
   8/*
   9 * Functions to manage BPF programs attached to netns
  10 */
  11
  12struct bpf_netns_link {
  13        struct bpf_link link;
  14        enum bpf_attach_type type;
  15        enum netns_bpf_attach_type netns_type;
  16
  17        /* We don't hold a ref to net in order to auto-detach the link
  18         * when netns is going away. Instead we rely on pernet
  19         * pre_exit callback to clear this pointer. Must be accessed
  20         * with netns_bpf_mutex held.
  21         */
  22        struct net *net;
  23        struct list_head node; /* node in list of links attached to net */
  24};
  25
  26/* Protects updates to netns_bpf */
  27DEFINE_MUTEX(netns_bpf_mutex);
  28
  29static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type)
  30{
  31        switch (type) {
  32#ifdef CONFIG_INET
  33        case NETNS_BPF_SK_LOOKUP:
  34                static_branch_dec(&bpf_sk_lookup_enabled);
  35                break;
  36#endif
  37        default:
  38                break;
  39        }
  40}
  41
  42static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type)
  43{
  44        switch (type) {
  45#ifdef CONFIG_INET
  46        case NETNS_BPF_SK_LOOKUP:
  47                static_branch_inc(&bpf_sk_lookup_enabled);
  48                break;
  49#endif
  50        default:
  51                break;
  52        }
  53}
  54
  55/* Must be called with netns_bpf_mutex held. */
  56static void netns_bpf_run_array_detach(struct net *net,
  57                                       enum netns_bpf_attach_type type)
  58{
  59        struct bpf_prog_array *run_array;
  60
  61        run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL,
  62                                        lockdep_is_held(&netns_bpf_mutex));
  63        bpf_prog_array_free(run_array);
  64}
  65
  66static int link_index(struct net *net, enum netns_bpf_attach_type type,
  67                      struct bpf_netns_link *link)
  68{
  69        struct bpf_netns_link *pos;
  70        int i = 0;
  71
  72        list_for_each_entry(pos, &net->bpf.links[type], node) {
  73                if (pos == link)
  74                        return i;
  75                i++;
  76        }
  77        return -ENOENT;
  78}
  79
  80static int link_count(struct net *net, enum netns_bpf_attach_type type)
  81{
  82        struct list_head *pos;
  83        int i = 0;
  84
  85        list_for_each(pos, &net->bpf.links[type])
  86                i++;
  87        return i;
  88}
  89
  90static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type,
  91                            struct bpf_prog_array *prog_array)
  92{
  93        struct bpf_netns_link *pos;
  94        unsigned int i = 0;
  95
  96        list_for_each_entry(pos, &net->bpf.links[type], node) {
  97                prog_array->items[i].prog = pos->link.prog;
  98                i++;
  99        }
 100}
 101
 102static void bpf_netns_link_release(struct bpf_link *link)
 103{
 104        struct bpf_netns_link *net_link =
 105                container_of(link, struct bpf_netns_link, link);
 106        enum netns_bpf_attach_type type = net_link->netns_type;
 107        struct bpf_prog_array *old_array, *new_array;
 108        struct net *net;
 109        int cnt, idx;
 110
 111        mutex_lock(&netns_bpf_mutex);
 112
 113        /* We can race with cleanup_net, but if we see a non-NULL
 114         * struct net pointer, pre_exit has not run yet and wait for
 115         * netns_bpf_mutex.
 116         */
 117        net = net_link->net;
 118        if (!net)
 119                goto out_unlock;
 120
 121        /* Mark attach point as unused */
 122        netns_bpf_attach_type_unneed(type);
 123
 124        /* Remember link position in case of safe delete */
 125        idx = link_index(net, type, net_link);
 126        list_del(&net_link->node);
 127
 128        cnt = link_count(net, type);
 129        if (!cnt) {
 130                netns_bpf_run_array_detach(net, type);
 131                goto out_unlock;
 132        }
 133
 134        old_array = rcu_dereference_protected(net->bpf.run_array[type],
 135                                              lockdep_is_held(&netns_bpf_mutex));
 136        new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL);
 137        if (!new_array) {
 138                WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx));
 139                goto out_unlock;
 140        }
 141        fill_prog_array(net, type, new_array);
 142        rcu_assign_pointer(net->bpf.run_array[type], new_array);
 143        bpf_prog_array_free(old_array);
 144
 145out_unlock:
 146        net_link->net = NULL;
 147        mutex_unlock(&netns_bpf_mutex);
 148}
 149
 150static int bpf_netns_link_detach(struct bpf_link *link)
 151{
 152        bpf_netns_link_release(link);
 153        return 0;
 154}
 155
 156static void bpf_netns_link_dealloc(struct bpf_link *link)
 157{
 158        struct bpf_netns_link *net_link =
 159                container_of(link, struct bpf_netns_link, link);
 160
 161        kfree(net_link);
 162}
 163
 164static int bpf_netns_link_update_prog(struct bpf_link *link,
 165                                      struct bpf_prog *new_prog,
 166                                      struct bpf_prog *old_prog)
 167{
 168        struct bpf_netns_link *net_link =
 169                container_of(link, struct bpf_netns_link, link);
 170        enum netns_bpf_attach_type type = net_link->netns_type;
 171        struct bpf_prog_array *run_array;
 172        struct net *net;
 173        int idx, ret;
 174
 175        if (old_prog && old_prog != link->prog)
 176                return -EPERM;
 177        if (new_prog->type != link->prog->type)
 178                return -EINVAL;
 179
 180        mutex_lock(&netns_bpf_mutex);
 181
 182        net = net_link->net;
 183        if (!net || !check_net(net)) {
 184                /* Link auto-detached or netns dying */
 185                ret = -ENOLINK;
 186                goto out_unlock;
 187        }
 188
 189        run_array = rcu_dereference_protected(net->bpf.run_array[type],
 190                                              lockdep_is_held(&netns_bpf_mutex));
 191        idx = link_index(net, type, net_link);
 192        ret = bpf_prog_array_update_at(run_array, idx, new_prog);
 193        if (ret)
 194                goto out_unlock;
 195
 196        old_prog = xchg(&link->prog, new_prog);
 197        bpf_prog_put(old_prog);
 198
 199out_unlock:
 200        mutex_unlock(&netns_bpf_mutex);
 201        return ret;
 202}
 203
 204static int bpf_netns_link_fill_info(const struct bpf_link *link,
 205                                    struct bpf_link_info *info)
 206{
 207        const struct bpf_netns_link *net_link =
 208                container_of(link, struct bpf_netns_link, link);
 209        unsigned int inum = 0;
 210        struct net *net;
 211
 212        mutex_lock(&netns_bpf_mutex);
 213        net = net_link->net;
 214        if (net && check_net(net))
 215                inum = net->ns.inum;
 216        mutex_unlock(&netns_bpf_mutex);
 217
 218        info->netns.netns_ino = inum;
 219        info->netns.attach_type = net_link->type;
 220        return 0;
 221}
 222
 223static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
 224                                       struct seq_file *seq)
 225{
 226        struct bpf_link_info info = {};
 227
 228        bpf_netns_link_fill_info(link, &info);
 229        seq_printf(seq,
 230                   "netns_ino:\t%u\n"
 231                   "attach_type:\t%u\n",
 232                   info.netns.netns_ino,
 233                   info.netns.attach_type);
 234}
 235
 236static const struct bpf_link_ops bpf_netns_link_ops = {
 237        .release = bpf_netns_link_release,
 238        .dealloc = bpf_netns_link_dealloc,
 239        .detach = bpf_netns_link_detach,
 240        .update_prog = bpf_netns_link_update_prog,
 241        .fill_link_info = bpf_netns_link_fill_info,
 242        .show_fdinfo = bpf_netns_link_show_fdinfo,
 243};
 244
 245/* Must be called with netns_bpf_mutex held. */
 246static int __netns_bpf_prog_query(const union bpf_attr *attr,
 247                                  union bpf_attr __user *uattr,
 248                                  struct net *net,
 249                                  enum netns_bpf_attach_type type)
 250{
 251        __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
 252        struct bpf_prog_array *run_array;
 253        u32 prog_cnt = 0, flags = 0;
 254
 255        run_array = rcu_dereference_protected(net->bpf.run_array[type],
 256                                              lockdep_is_held(&netns_bpf_mutex));
 257        if (run_array)
 258                prog_cnt = bpf_prog_array_length(run_array);
 259
 260        if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
 261                return -EFAULT;
 262        if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
 263                return -EFAULT;
 264        if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
 265                return 0;
 266
 267        return bpf_prog_array_copy_to_user(run_array, prog_ids,
 268                                           attr->query.prog_cnt);
 269}
 270
 271int netns_bpf_prog_query(const union bpf_attr *attr,
 272                         union bpf_attr __user *uattr)
 273{
 274        enum netns_bpf_attach_type type;
 275        struct net *net;
 276        int ret;
 277
 278        if (attr->query.query_flags)
 279                return -EINVAL;
 280
 281        type = to_netns_bpf_attach_type(attr->query.attach_type);
 282        if (type < 0)
 283                return -EINVAL;
 284
 285        net = get_net_ns_by_fd(attr->query.target_fd);
 286        if (IS_ERR(net))
 287                return PTR_ERR(net);
 288
 289        mutex_lock(&netns_bpf_mutex);
 290        ret = __netns_bpf_prog_query(attr, uattr, net, type);
 291        mutex_unlock(&netns_bpf_mutex);
 292
 293        put_net(net);
 294        return ret;
 295}
 296
 297int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 298{
 299        struct bpf_prog_array *run_array;
 300        enum netns_bpf_attach_type type;
 301        struct bpf_prog *attached;
 302        struct net *net;
 303        int ret;
 304
 305        if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd)
 306                return -EINVAL;
 307
 308        type = to_netns_bpf_attach_type(attr->attach_type);
 309        if (type < 0)
 310                return -EINVAL;
 311
 312        net = current->nsproxy->net_ns;
 313        mutex_lock(&netns_bpf_mutex);
 314
 315        /* Attaching prog directly is not compatible with links */
 316        if (!list_empty(&net->bpf.links[type])) {
 317                ret = -EEXIST;
 318                goto out_unlock;
 319        }
 320
 321        switch (type) {
 322        case NETNS_BPF_FLOW_DISSECTOR:
 323                ret = flow_dissector_bpf_prog_attach_check(net, prog);
 324                break;
 325        default:
 326                ret = -EINVAL;
 327                break;
 328        }
 329        if (ret)
 330                goto out_unlock;
 331
 332        attached = net->bpf.progs[type];
 333        if (attached == prog) {
 334                /* The same program cannot be attached twice */
 335                ret = -EINVAL;
 336                goto out_unlock;
 337        }
 338
 339        run_array = rcu_dereference_protected(net->bpf.run_array[type],
 340                                              lockdep_is_held(&netns_bpf_mutex));
 341        if (run_array) {
 342                WRITE_ONCE(run_array->items[0].prog, prog);
 343        } else {
 344                run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
 345                if (!run_array) {
 346                        ret = -ENOMEM;
 347                        goto out_unlock;
 348                }
 349                run_array->items[0].prog = prog;
 350                rcu_assign_pointer(net->bpf.run_array[type], run_array);
 351        }
 352
 353        net->bpf.progs[type] = prog;
 354        if (attached)
 355                bpf_prog_put(attached);
 356
 357out_unlock:
 358        mutex_unlock(&netns_bpf_mutex);
 359
 360        return ret;
 361}
 362
 363/* Must be called with netns_bpf_mutex held. */
 364static int __netns_bpf_prog_detach(struct net *net,
 365                                   enum netns_bpf_attach_type type,
 366                                   struct bpf_prog *old)
 367{
 368        struct bpf_prog *attached;
 369
 370        /* Progs attached via links cannot be detached */
 371        if (!list_empty(&net->bpf.links[type]))
 372                return -EINVAL;
 373
 374        attached = net->bpf.progs[type];
 375        if (!attached || attached != old)
 376                return -ENOENT;
 377        netns_bpf_run_array_detach(net, type);
 378        net->bpf.progs[type] = NULL;
 379        bpf_prog_put(attached);
 380        return 0;
 381}
 382
 383int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
 384{
 385        enum netns_bpf_attach_type type;
 386        struct bpf_prog *prog;
 387        int ret;
 388
 389        if (attr->target_fd)
 390                return -EINVAL;
 391
 392        type = to_netns_bpf_attach_type(attr->attach_type);
 393        if (type < 0)
 394                return -EINVAL;
 395
 396        prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
 397        if (IS_ERR(prog))
 398                return PTR_ERR(prog);
 399
 400        mutex_lock(&netns_bpf_mutex);
 401        ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog);
 402        mutex_unlock(&netns_bpf_mutex);
 403
 404        bpf_prog_put(prog);
 405
 406        return ret;
 407}
 408
 409static int netns_bpf_max_progs(enum netns_bpf_attach_type type)
 410{
 411        switch (type) {
 412        case NETNS_BPF_FLOW_DISSECTOR:
 413                return 1;
 414        case NETNS_BPF_SK_LOOKUP:
 415                return 64;
 416        default:
 417                return 0;
 418        }
 419}
 420
 421static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
 422                                 enum netns_bpf_attach_type type)
 423{
 424        struct bpf_netns_link *net_link =
 425                container_of(link, struct bpf_netns_link, link);
 426        struct bpf_prog_array *run_array;
 427        int cnt, err;
 428
 429        mutex_lock(&netns_bpf_mutex);
 430
 431        cnt = link_count(net, type);
 432        if (cnt >= netns_bpf_max_progs(type)) {
 433                err = -E2BIG;
 434                goto out_unlock;
 435        }
 436        /* Links are not compatible with attaching prog directly */
 437        if (net->bpf.progs[type]) {
 438                err = -EEXIST;
 439                goto out_unlock;
 440        }
 441
 442        switch (type) {
 443        case NETNS_BPF_FLOW_DISSECTOR:
 444                err = flow_dissector_bpf_prog_attach_check(net, link->prog);
 445                break;
 446        case NETNS_BPF_SK_LOOKUP:
 447                err = 0; /* nothing to check */
 448                break;
 449        default:
 450                err = -EINVAL;
 451                break;
 452        }
 453        if (err)
 454                goto out_unlock;
 455
 456        run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL);
 457        if (!run_array) {
 458                err = -ENOMEM;
 459                goto out_unlock;
 460        }
 461
 462        list_add_tail(&net_link->node, &net->bpf.links[type]);
 463
 464        fill_prog_array(net, type, run_array);
 465        run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array,
 466                                        lockdep_is_held(&netns_bpf_mutex));
 467        bpf_prog_array_free(run_array);
 468
 469        /* Mark attach point as used */
 470        netns_bpf_attach_type_need(type);
 471
 472out_unlock:
 473        mutex_unlock(&netns_bpf_mutex);
 474        return err;
 475}
 476
 477int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
 478{
 479        enum netns_bpf_attach_type netns_type;
 480        struct bpf_link_primer link_primer;
 481        struct bpf_netns_link *net_link;
 482        enum bpf_attach_type type;
 483        struct net *net;
 484        int err;
 485
 486        if (attr->link_create.flags)
 487                return -EINVAL;
 488
 489        type = attr->link_create.attach_type;
 490        netns_type = to_netns_bpf_attach_type(type);
 491        if (netns_type < 0)
 492                return -EINVAL;
 493
 494        net = get_net_ns_by_fd(attr->link_create.target_fd);
 495        if (IS_ERR(net))
 496                return PTR_ERR(net);
 497
 498        net_link = kzalloc(sizeof(*net_link), GFP_USER);
 499        if (!net_link) {
 500                err = -ENOMEM;
 501                goto out_put_net;
 502        }
 503        bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS,
 504                      &bpf_netns_link_ops, prog);
 505        net_link->net = net;
 506        net_link->type = type;
 507        net_link->netns_type = netns_type;
 508
 509        err = bpf_link_prime(&net_link->link, &link_primer);
 510        if (err) {
 511                kfree(net_link);
 512                goto out_put_net;
 513        }
 514
 515        err = netns_bpf_link_attach(net, &net_link->link, netns_type);
 516        if (err) {
 517                bpf_link_cleanup(&link_primer);
 518                goto out_put_net;
 519        }
 520
 521        put_net(net);
 522        return bpf_link_settle(&link_primer);
 523
 524out_put_net:
 525        put_net(net);
 526        return err;
 527}
 528
 529static int __net_init netns_bpf_pernet_init(struct net *net)
 530{
 531        int type;
 532
 533        for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
 534                INIT_LIST_HEAD(&net->bpf.links[type]);
 535
 536        return 0;
 537}
 538
 539static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
 540{
 541        enum netns_bpf_attach_type type;
 542        struct bpf_netns_link *net_link;
 543
 544        mutex_lock(&netns_bpf_mutex);
 545        for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
 546                netns_bpf_run_array_detach(net, type);
 547                list_for_each_entry(net_link, &net->bpf.links[type], node) {
 548                        net_link->net = NULL; /* auto-detach link */
 549                        netns_bpf_attach_type_unneed(type);
 550                }
 551                if (net->bpf.progs[type])
 552                        bpf_prog_put(net->bpf.progs[type]);
 553        }
 554        mutex_unlock(&netns_bpf_mutex);
 555}
 556
 557static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
 558        .init = netns_bpf_pernet_init,
 559        .pre_exit = netns_bpf_pernet_pre_exit,
 560};
 561
 562static int __init netns_bpf_init(void)
 563{
 564        return register_pernet_subsys(&netns_bpf_pernet_ops);
 565}
 566
 567subsys_initcall(netns_bpf_init);
 568