linux/net/sched/cls_bpf.c
<<
>>
Prefs
   1/*
   2 * Berkeley Packet Filter based traffic classifier
   3 *
   4 * Might be used to classify traffic through flexible, user-defined and
   5 * possibly JIT-ed BPF filters for traffic control as an alternative to
   6 * ematches.
   7 *
   8 * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
   9 *
  10 * This program is free software; you can redistribute it and/or modify
  11 * it under the terms of the GNU General Public License version 2 as
  12 * published by the Free Software Foundation.
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/types.h>
  17#include <linux/skbuff.h>
  18#include <linux/filter.h>
  19#include <linux/bpf.h>
  20
  21#include <net/rtnetlink.h>
  22#include <net/pkt_cls.h>
  23#include <net/sock.h>
  24
  25MODULE_LICENSE("GPL");
  26MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
  27MODULE_DESCRIPTION("TC BPF based classifier");
  28
  29#define CLS_BPF_NAME_LEN        256
  30#define CLS_BPF_SUPPORTED_GEN_FLAGS             \
  31        (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)
  32
  33struct cls_bpf_head {
  34        struct list_head plist;
  35        u32 hgen;
  36        struct rcu_head rcu;
  37};
  38
  39struct cls_bpf_prog {
  40        struct bpf_prog *filter;
  41        struct list_head link;
  42        struct tcf_result res;
  43        bool exts_integrated;
  44        bool offloaded;
  45        u32 gen_flags;
  46        struct tcf_exts exts;
  47        u32 handle;
  48        u16 bpf_num_ops;
  49        struct sock_filter *bpf_ops;
  50        const char *bpf_name;
  51        struct tcf_proto *tp;
  52        struct rcu_head rcu;
  53};
  54
  55static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
  56        [TCA_BPF_CLASSID]       = { .type = NLA_U32 },
  57        [TCA_BPF_FLAGS]         = { .type = NLA_U32 },
  58        [TCA_BPF_FLAGS_GEN]     = { .type = NLA_U32 },
  59        [TCA_BPF_FD]            = { .type = NLA_U32 },
  60        [TCA_BPF_NAME]          = { .type = NLA_NUL_STRING,
  61                                    .len = CLS_BPF_NAME_LEN },
  62        [TCA_BPF_OPS_LEN]       = { .type = NLA_U16 },
  63        [TCA_BPF_OPS]           = { .type = NLA_BINARY,
  64                                    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
  65};
  66
  67static int cls_bpf_exec_opcode(int code)
  68{
  69        switch (code) {
  70        case TC_ACT_OK:
  71        case TC_ACT_SHOT:
  72        case TC_ACT_STOLEN:
  73        case TC_ACT_REDIRECT:
  74        case TC_ACT_UNSPEC:
  75                return code;
  76        default:
  77                return TC_ACT_UNSPEC;
  78        }
  79}
  80
  81static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
  82                            struct tcf_result *res)
  83{
  84        struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
  85        bool at_ingress = skb_at_tc_ingress(skb);
  86        struct cls_bpf_prog *prog;
  87        int ret = -1;
  88
  89        /* Needed here for accessing maps. */
  90        rcu_read_lock();
  91        list_for_each_entry_rcu(prog, &head->plist, link) {
  92                int filter_res;
  93
  94                qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
  95
  96                if (tc_skip_sw(prog->gen_flags)) {
  97                        filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0;
  98                } else if (at_ingress) {
  99                        /* It is safe to push/pull even if skb_shared() */
 100                        __skb_push(skb, skb->mac_len);
 101                        bpf_compute_data_end(skb);
 102                        filter_res = BPF_PROG_RUN(prog->filter, skb);
 103                        __skb_pull(skb, skb->mac_len);
 104                } else {
 105                        bpf_compute_data_end(skb);
 106                        filter_res = BPF_PROG_RUN(prog->filter, skb);
 107                }
 108
 109                if (prog->exts_integrated) {
 110                        res->class   = 0;
 111                        res->classid = TC_H_MAJ(prog->res.classid) |
 112                                       qdisc_skb_cb(skb)->tc_classid;
 113
 114                        ret = cls_bpf_exec_opcode(filter_res);
 115                        if (ret == TC_ACT_UNSPEC)
 116                                continue;
 117                        break;
 118                }
 119
 120                if (filter_res == 0)
 121                        continue;
 122                if (filter_res != -1) {
 123                        res->class   = 0;
 124                        res->classid = filter_res;
 125                } else {
 126                        *res = prog->res;
 127                }
 128
 129                ret = tcf_exts_exec(skb, &prog->exts, res);
 130                if (ret < 0)
 131                        continue;
 132
 133                break;
 134        }
 135        rcu_read_unlock();
 136
 137        return ret;
 138}
 139
 140static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
 141{
 142        return !prog->bpf_ops;
 143}
 144
 145static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 146                               enum tc_clsbpf_command cmd)
 147{
 148        struct net_device *dev = tp->q->dev_queue->dev;
 149        struct tc_cls_bpf_offload bpf_offload = {};
 150        struct tc_to_netdev offload;
 151
 152        offload.type = TC_SETUP_CLSBPF;
 153        offload.cls_bpf = &bpf_offload;
 154
 155        bpf_offload.command = cmd;
 156        bpf_offload.exts = &prog->exts;
 157        bpf_offload.prog = prog->filter;
 158        bpf_offload.name = prog->bpf_name;
 159        bpf_offload.exts_integrated = prog->exts_integrated;
 160        bpf_offload.gen_flags = prog->gen_flags;
 161
 162        return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
 163                                             tp->protocol, &offload);
 164}
 165
 166static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 167                           struct cls_bpf_prog *oldprog)
 168{
 169        struct net_device *dev = tp->q->dev_queue->dev;
 170        struct cls_bpf_prog *obj = prog;
 171        enum tc_clsbpf_command cmd;
 172        bool skip_sw;
 173        int ret;
 174
 175        skip_sw = tc_skip_sw(prog->gen_flags) ||
 176                (oldprog && tc_skip_sw(oldprog->gen_flags));
 177
 178        if (oldprog && oldprog->offloaded) {
 179                if (tc_should_offload(dev, tp, prog->gen_flags)) {
 180                        cmd = TC_CLSBPF_REPLACE;
 181                } else if (!tc_skip_sw(prog->gen_flags)) {
 182                        obj = oldprog;
 183                        cmd = TC_CLSBPF_DESTROY;
 184                } else {
 185                        return -EINVAL;
 186                }
 187        } else {
 188                if (!tc_should_offload(dev, tp, prog->gen_flags))
 189                        return skip_sw ? -EINVAL : 0;
 190                cmd = TC_CLSBPF_ADD;
 191        }
 192
 193        ret = cls_bpf_offload_cmd(tp, obj, cmd);
 194        if (ret)
 195                return skip_sw ? ret : 0;
 196
 197        obj->offloaded = true;
 198        if (oldprog)
 199                oldprog->offloaded = false;
 200
 201        return 0;
 202}
 203
 204static void cls_bpf_stop_offload(struct tcf_proto *tp,
 205                                 struct cls_bpf_prog *prog)
 206{
 207        int err;
 208
 209        if (!prog->offloaded)
 210                return;
 211
 212        err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
 213        if (err) {
 214                pr_err("Stopping hardware offload failed: %d\n", err);
 215                return;
 216        }
 217
 218        prog->offloaded = false;
 219}
 220
 221static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
 222                                         struct cls_bpf_prog *prog)
 223{
 224        if (!prog->offloaded)
 225                return;
 226
 227        cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
 228}
 229
 230static int cls_bpf_init(struct tcf_proto *tp)
 231{
 232        struct cls_bpf_head *head;
 233
 234        head = kzalloc(sizeof(*head), GFP_KERNEL);
 235        if (head == NULL)
 236                return -ENOBUFS;
 237
 238        INIT_LIST_HEAD_RCU(&head->plist);
 239        rcu_assign_pointer(tp->root, head);
 240
 241        return 0;
 242}
 243
 244static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
 245{
 246        tcf_exts_destroy(&prog->exts);
 247
 248        if (cls_bpf_is_ebpf(prog))
 249                bpf_prog_put(prog->filter);
 250        else
 251                bpf_prog_destroy(prog->filter);
 252
 253        kfree(prog->bpf_name);
 254        kfree(prog->bpf_ops);
 255        kfree(prog);
 256}
 257
 258static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
 259{
 260        __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu));
 261}
 262
 263static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
 264{
 265        cls_bpf_stop_offload(tp, prog);
 266        list_del_rcu(&prog->link);
 267        tcf_unbind_filter(tp, &prog->res);
 268        call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
 269}
 270
 271static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
 272{
 273        __cls_bpf_delete(tp, (struct cls_bpf_prog *) arg);
 274        return 0;
 275}
 276
 277static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
 278{
 279        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 280        struct cls_bpf_prog *prog, *tmp;
 281
 282        if (!force && !list_empty(&head->plist))
 283                return false;
 284
 285        list_for_each_entry_safe(prog, tmp, &head->plist, link)
 286                __cls_bpf_delete(tp, prog);
 287
 288        kfree_rcu(head, rcu);
 289        return true;
 290}
 291
 292static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
 293{
 294        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 295        struct cls_bpf_prog *prog;
 296        unsigned long ret = 0UL;
 297
 298        list_for_each_entry(prog, &head->plist, link) {
 299                if (prog->handle == handle) {
 300                        ret = (unsigned long) prog;
 301                        break;
 302                }
 303        }
 304
 305        return ret;
 306}
 307
 308static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
 309{
 310        struct sock_filter *bpf_ops;
 311        struct sock_fprog_kern fprog_tmp;
 312        struct bpf_prog *fp;
 313        u16 bpf_size, bpf_num_ops;
 314        int ret;
 315
 316        bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
 317        if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
 318                return -EINVAL;
 319
 320        bpf_size = bpf_num_ops * sizeof(*bpf_ops);
 321        if (bpf_size != nla_len(tb[TCA_BPF_OPS]))
 322                return -EINVAL;
 323
 324        bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
 325        if (bpf_ops == NULL)
 326                return -ENOMEM;
 327
 328        memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
 329
 330        fprog_tmp.len = bpf_num_ops;
 331        fprog_tmp.filter = bpf_ops;
 332
 333        ret = bpf_prog_create(&fp, &fprog_tmp);
 334        if (ret < 0) {
 335                kfree(bpf_ops);
 336                return ret;
 337        }
 338
 339        prog->bpf_ops = bpf_ops;
 340        prog->bpf_num_ops = bpf_num_ops;
 341        prog->bpf_name = NULL;
 342        prog->filter = fp;
 343
 344        return 0;
 345}
 346
 347static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
 348                                 const struct tcf_proto *tp)
 349{
 350        struct bpf_prog *fp;
 351        char *name = NULL;
 352        u32 bpf_fd;
 353
 354        bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
 355
 356        fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
 357        if (IS_ERR(fp))
 358                return PTR_ERR(fp);
 359
 360        if (tb[TCA_BPF_NAME]) {
 361                name = nla_memdup(tb[TCA_BPF_NAME], GFP_KERNEL);
 362                if (!name) {
 363                        bpf_prog_put(fp);
 364                        return -ENOMEM;
 365                }
 366        }
 367
 368        prog->bpf_ops = NULL;
 369        prog->bpf_name = name;
 370        prog->filter = fp;
 371
 372        if (fp->dst_needed && !(tp->q->flags & TCQ_F_INGRESS))
 373                netif_keep_dst(qdisc_dev(tp->q));
 374
 375        return 0;
 376}
 377
 378static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 379                                   struct cls_bpf_prog *prog,
 380                                   unsigned long base, struct nlattr **tb,
 381                                   struct nlattr *est, bool ovr)
 382{
 383        bool is_bpf, is_ebpf, have_exts = false;
 384        struct tcf_exts exts;
 385        u32 gen_flags = 0;
 386        int ret;
 387
 388        is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
 389        is_ebpf = tb[TCA_BPF_FD];
 390        if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
 391                return -EINVAL;
 392
 393        ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
 394        if (ret < 0)
 395                return ret;
 396        ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
 397        if (ret < 0)
 398                goto errout;
 399
 400        if (tb[TCA_BPF_FLAGS]) {
 401                u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
 402
 403                if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
 404                        ret = -EINVAL;
 405                        goto errout;
 406                }
 407
 408                have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
 409        }
 410        if (tb[TCA_BPF_FLAGS_GEN]) {
 411                gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
 412                if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
 413                    !tc_flags_valid(gen_flags)) {
 414                        ret = -EINVAL;
 415                        goto errout;
 416                }
 417        }
 418
 419        prog->exts_integrated = have_exts;
 420        prog->gen_flags = gen_flags;
 421
 422        ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
 423                       cls_bpf_prog_from_efd(tb, prog, tp);
 424        if (ret < 0)
 425                goto errout;
 426
 427        if (tb[TCA_BPF_CLASSID]) {
 428                prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
 429                tcf_bind_filter(tp, &prog->res, base);
 430        }
 431
 432        tcf_exts_change(tp, &prog->exts, &exts);
 433        return 0;
 434
 435errout:
 436        tcf_exts_destroy(&exts);
 437        return ret;
 438}
 439
 440static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
 441                                   struct cls_bpf_head *head)
 442{
 443        unsigned int i = 0x80000000;
 444        u32 handle;
 445
 446        do {
 447                if (++head->hgen == 0x7FFFFFFF)
 448                        head->hgen = 1;
 449        } while (--i > 0 && cls_bpf_get(tp, head->hgen));
 450
 451        if (unlikely(i == 0)) {
 452                pr_err("Insufficient number of handles\n");
 453                handle = 0;
 454        } else {
 455                handle = head->hgen;
 456        }
 457
 458        return handle;
 459}
 460
 461static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 462                          struct tcf_proto *tp, unsigned long base,
 463                          u32 handle, struct nlattr **tca,
 464                          unsigned long *arg, bool ovr)
 465{
 466        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 467        struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg;
 468        struct nlattr *tb[TCA_BPF_MAX + 1];
 469        struct cls_bpf_prog *prog;
 470        int ret;
 471
 472        if (tca[TCA_OPTIONS] == NULL)
 473                return -EINVAL;
 474
 475        ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy);
 476        if (ret < 0)
 477                return ret;
 478
 479        prog = kzalloc(sizeof(*prog), GFP_KERNEL);
 480        if (!prog)
 481                return -ENOBUFS;
 482
 483        ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
 484        if (ret < 0)
 485                goto errout;
 486
 487        if (oldprog) {
 488                if (handle && oldprog->handle != handle) {
 489                        ret = -EINVAL;
 490                        goto errout;
 491                }
 492        }
 493
 494        if (handle == 0)
 495                prog->handle = cls_bpf_grab_new_handle(tp, head);
 496        else
 497                prog->handle = handle;
 498        if (prog->handle == 0) {
 499                ret = -EINVAL;
 500                goto errout;
 501        }
 502
 503        ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE],
 504                                      ovr);
 505        if (ret < 0)
 506                goto errout;
 507
 508        ret = cls_bpf_offload(tp, prog, oldprog);
 509        if (ret) {
 510                __cls_bpf_delete_prog(prog);
 511                return ret;
 512        }
 513
 514        if (oldprog) {
 515                list_replace_rcu(&oldprog->link, &prog->link);
 516                tcf_unbind_filter(tp, &oldprog->res);
 517                call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
 518        } else {
 519                list_add_rcu(&prog->link, &head->plist);
 520        }
 521
 522        *arg = (unsigned long) prog;
 523        return 0;
 524
 525errout:
 526        tcf_exts_destroy(&prog->exts);
 527        kfree(prog);
 528        return ret;
 529}
 530
 531static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
 532                                 struct sk_buff *skb)
 533{
 534        struct nlattr *nla;
 535
 536        if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
 537                return -EMSGSIZE;
 538
 539        nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
 540                          sizeof(struct sock_filter));
 541        if (nla == NULL)
 542                return -EMSGSIZE;
 543
 544        memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
 545
 546        return 0;
 547}
 548
 549static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
 550                                  struct sk_buff *skb)
 551{
 552        struct nlattr *nla;
 553
 554        if (prog->bpf_name &&
 555            nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
 556                return -EMSGSIZE;
 557
 558        nla = nla_reserve(skb, TCA_BPF_TAG, sizeof(prog->filter->tag));
 559        if (nla == NULL)
 560                return -EMSGSIZE;
 561
 562        memcpy(nla_data(nla), prog->filter->tag, nla_len(nla));
 563
 564        return 0;
 565}
 566
 567static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 568                        struct sk_buff *skb, struct tcmsg *tm)
 569{
 570        struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
 571        struct nlattr *nest;
 572        u32 bpf_flags = 0;
 573        int ret;
 574
 575        if (prog == NULL)
 576                return skb->len;
 577
 578        tm->tcm_handle = prog->handle;
 579
 580        cls_bpf_offload_update_stats(tp, prog);
 581
 582        nest = nla_nest_start(skb, TCA_OPTIONS);
 583        if (nest == NULL)
 584                goto nla_put_failure;
 585
 586        if (prog->res.classid &&
 587            nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
 588                goto nla_put_failure;
 589
 590        if (cls_bpf_is_ebpf(prog))
 591                ret = cls_bpf_dump_ebpf_info(prog, skb);
 592        else
 593                ret = cls_bpf_dump_bpf_info(prog, skb);
 594        if (ret)
 595                goto nla_put_failure;
 596
 597        if (tcf_exts_dump(skb, &prog->exts) < 0)
 598                goto nla_put_failure;
 599
 600        if (prog->exts_integrated)
 601                bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
 602        if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
 603                goto nla_put_failure;
 604        if (prog->gen_flags &&
 605            nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags))
 606                goto nla_put_failure;
 607
 608        nla_nest_end(skb, nest);
 609
 610        if (tcf_exts_dump_stats(skb, &prog->exts) < 0)
 611                goto nla_put_failure;
 612
 613        return skb->len;
 614
 615nla_put_failure:
 616        nla_nest_cancel(skb, nest);
 617        return -1;
 618}
 619
 620static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 621{
 622        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 623        struct cls_bpf_prog *prog;
 624
 625        list_for_each_entry(prog, &head->plist, link) {
 626                if (arg->count < arg->skip)
 627                        goto skip;
 628                if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
 629                        arg->stop = 1;
 630                        break;
 631                }
 632skip:
 633                arg->count++;
 634        }
 635}
 636
 637static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
 638        .kind           =       "bpf",
 639        .owner          =       THIS_MODULE,
 640        .classify       =       cls_bpf_classify,
 641        .init           =       cls_bpf_init,
 642        .destroy        =       cls_bpf_destroy,
 643        .get            =       cls_bpf_get,
 644        .change         =       cls_bpf_change,
 645        .delete         =       cls_bpf_delete,
 646        .walk           =       cls_bpf_walk,
 647        .dump           =       cls_bpf_dump,
 648};
 649
 650static int __init cls_bpf_init_mod(void)
 651{
 652        return register_tcf_proto_ops(&cls_bpf_ops);
 653}
 654
 655static void __exit cls_bpf_exit_mod(void)
 656{
 657        unregister_tcf_proto_ops(&cls_bpf_ops);
 658}
 659
 660module_init(cls_bpf_init_mod);
 661module_exit(cls_bpf_exit_mod);
 662