linux/net/sched/cls_bpf.c
<<
>>
Prefs
   1/*
   2 * Berkeley Packet Filter based traffic classifier
   3 *
   4 * Might be used to classify traffic through flexible, user-defined and
   5 * possibly JIT-ed BPF filters for traffic control as an alternative to
   6 * ematches.
   7 *
   8 * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
   9 *
  10 * This program is free software; you can redistribute it and/or modify
  11 * it under the terms of the GNU General Public License version 2 as
  12 * published by the Free Software Foundation.
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/types.h>
  17#include <linux/skbuff.h>
  18#include <linux/filter.h>
  19#include <linux/bpf.h>
  20
  21#include <net/rtnetlink.h>
  22#include <net/pkt_cls.h>
  23#include <net/sock.h>
  24
  25MODULE_LICENSE("GPL");
  26MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
  27MODULE_DESCRIPTION("TC BPF based classifier");
  28
  29#define CLS_BPF_NAME_LEN        256
  30
  31struct cls_bpf_head {
  32        struct list_head plist;
  33        u32 hgen;
  34        struct rcu_head rcu;
  35};
  36
  37struct cls_bpf_prog {
  38        struct bpf_prog *filter;
  39        struct list_head link;
  40        struct tcf_result res;
  41        bool exts_integrated;
  42        struct tcf_exts exts;
  43        u32 handle;
  44        union {
  45                u32 bpf_fd;
  46                u16 bpf_num_ops;
  47        };
  48        struct sock_filter *bpf_ops;
  49        const char *bpf_name;
  50        struct tcf_proto *tp;
  51        struct rcu_head rcu;
  52};
  53
  54static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
  55        [TCA_BPF_CLASSID]       = { .type = NLA_U32 },
  56        [TCA_BPF_FLAGS]         = { .type = NLA_U32 },
  57        [TCA_BPF_FD]            = { .type = NLA_U32 },
  58        [TCA_BPF_NAME]          = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
  59        [TCA_BPF_OPS_LEN]       = { .type = NLA_U16 },
  60        [TCA_BPF_OPS]           = { .type = NLA_BINARY,
  61                                    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
  62};
  63
  64static int cls_bpf_exec_opcode(int code)
  65{
  66        switch (code) {
  67        case TC_ACT_OK:
  68        case TC_ACT_SHOT:
  69        case TC_ACT_STOLEN:
  70        case TC_ACT_REDIRECT:
  71        case TC_ACT_UNSPEC:
  72                return code;
  73        default:
  74                return TC_ACT_UNSPEC;
  75        }
  76}
  77
  78static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
  79                            struct tcf_result *res)
  80{
  81        struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
  82        bool at_ingress = skb_at_tc_ingress(skb);
  83        struct cls_bpf_prog *prog;
  84        int ret = -1;
  85
  86        if (unlikely(!skb_mac_header_was_set(skb)))
  87                return -1;
  88
  89        /* Needed here for accessing maps. */
  90        rcu_read_lock();
  91        list_for_each_entry_rcu(prog, &head->plist, link) {
  92                int filter_res;
  93
  94                qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
  95
  96                if (at_ingress) {
  97                        /* It is safe to push/pull even if skb_shared() */
  98                        __skb_push(skb, skb->mac_len);
  99                        filter_res = BPF_PROG_RUN(prog->filter, skb);
 100                        __skb_pull(skb, skb->mac_len);
 101                } else {
 102                        filter_res = BPF_PROG_RUN(prog->filter, skb);
 103                }
 104
 105                if (prog->exts_integrated) {
 106                        res->class   = 0;
 107                        res->classid = TC_H_MAJ(prog->res.classid) |
 108                                       qdisc_skb_cb(skb)->tc_classid;
 109
 110                        ret = cls_bpf_exec_opcode(filter_res);
 111                        if (ret == TC_ACT_UNSPEC)
 112                                continue;
 113                        break;
 114                }
 115
 116                if (filter_res == 0)
 117                        continue;
 118                if (filter_res != -1) {
 119                        res->class   = 0;
 120                        res->classid = filter_res;
 121                } else {
 122                        *res = prog->res;
 123                }
 124
 125                ret = tcf_exts_exec(skb, &prog->exts, res);
 126                if (ret < 0)
 127                        continue;
 128
 129                break;
 130        }
 131        rcu_read_unlock();
 132
 133        return ret;
 134}
 135
 136static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
 137{
 138        return !prog->bpf_ops;
 139}
 140
 141static int cls_bpf_init(struct tcf_proto *tp)
 142{
 143        struct cls_bpf_head *head;
 144
 145        head = kzalloc(sizeof(*head), GFP_KERNEL);
 146        if (head == NULL)
 147                return -ENOBUFS;
 148
 149        INIT_LIST_HEAD_RCU(&head->plist);
 150        rcu_assign_pointer(tp->root, head);
 151
 152        return 0;
 153}
 154
 155static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
 156{
 157        tcf_exts_destroy(&prog->exts);
 158
 159        if (cls_bpf_is_ebpf(prog))
 160                bpf_prog_put(prog->filter);
 161        else
 162                bpf_prog_destroy(prog->filter);
 163
 164        kfree(prog->bpf_name);
 165        kfree(prog->bpf_ops);
 166        kfree(prog);
 167}
 168
 169static void __cls_bpf_delete_prog(struct rcu_head *rcu)
 170{
 171        struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
 172
 173        cls_bpf_delete_prog(prog->tp, prog);
 174}
 175
 176static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
 177{
 178        struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
 179
 180        list_del_rcu(&prog->link);
 181        tcf_unbind_filter(tp, &prog->res);
 182        call_rcu(&prog->rcu, __cls_bpf_delete_prog);
 183
 184        return 0;
 185}
 186
 187static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
 188{
 189        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 190        struct cls_bpf_prog *prog, *tmp;
 191
 192        if (!force && !list_empty(&head->plist))
 193                return false;
 194
 195        list_for_each_entry_safe(prog, tmp, &head->plist, link) {
 196                list_del_rcu(&prog->link);
 197                tcf_unbind_filter(tp, &prog->res);
 198                call_rcu(&prog->rcu, __cls_bpf_delete_prog);
 199        }
 200
 201        RCU_INIT_POINTER(tp->root, NULL);
 202        kfree_rcu(head, rcu);
 203        return true;
 204}
 205
 206static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
 207{
 208        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 209        struct cls_bpf_prog *prog;
 210        unsigned long ret = 0UL;
 211
 212        if (head == NULL)
 213                return 0UL;
 214
 215        list_for_each_entry(prog, &head->plist, link) {
 216                if (prog->handle == handle) {
 217                        ret = (unsigned long) prog;
 218                        break;
 219                }
 220        }
 221
 222        return ret;
 223}
 224
 225static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
 226{
 227        struct sock_filter *bpf_ops;
 228        struct sock_fprog_kern fprog_tmp;
 229        struct bpf_prog *fp;
 230        u16 bpf_size, bpf_num_ops;
 231        int ret;
 232
 233        bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
 234        if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
 235                return -EINVAL;
 236
 237        bpf_size = bpf_num_ops * sizeof(*bpf_ops);
 238        if (bpf_size != nla_len(tb[TCA_BPF_OPS]))
 239                return -EINVAL;
 240
 241        bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
 242        if (bpf_ops == NULL)
 243                return -ENOMEM;
 244
 245        memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
 246
 247        fprog_tmp.len = bpf_num_ops;
 248        fprog_tmp.filter = bpf_ops;
 249
 250        ret = bpf_prog_create(&fp, &fprog_tmp);
 251        if (ret < 0) {
 252                kfree(bpf_ops);
 253                return ret;
 254        }
 255
 256        prog->bpf_ops = bpf_ops;
 257        prog->bpf_num_ops = bpf_num_ops;
 258        prog->bpf_name = NULL;
 259        prog->filter = fp;
 260
 261        return 0;
 262}
 263
 264static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
 265                                 const struct tcf_proto *tp)
 266{
 267        struct bpf_prog *fp;
 268        char *name = NULL;
 269        u32 bpf_fd;
 270
 271        bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
 272
 273        fp = bpf_prog_get(bpf_fd);
 274        if (IS_ERR(fp))
 275                return PTR_ERR(fp);
 276
 277        if (fp->type != BPF_PROG_TYPE_SCHED_CLS) {
 278                bpf_prog_put(fp);
 279                return -EINVAL;
 280        }
 281
 282        if (tb[TCA_BPF_NAME]) {
 283                name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
 284                               nla_len(tb[TCA_BPF_NAME]),
 285                               GFP_KERNEL);
 286                if (!name) {
 287                        bpf_prog_put(fp);
 288                        return -ENOMEM;
 289                }
 290        }
 291
 292        prog->bpf_ops = NULL;
 293        prog->bpf_fd = bpf_fd;
 294        prog->bpf_name = name;
 295        prog->filter = fp;
 296
 297        if (fp->dst_needed && !(tp->q->flags & TCQ_F_INGRESS))
 298                netif_keep_dst(qdisc_dev(tp->q));
 299
 300        return 0;
 301}
 302
 303static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 304                                   struct cls_bpf_prog *prog,
 305                                   unsigned long base, struct nlattr **tb,
 306                                   struct nlattr *est, bool ovr)
 307{
 308        bool is_bpf, is_ebpf, have_exts = false;
 309        struct tcf_exts exts;
 310        int ret;
 311
 312        is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
 313        is_ebpf = tb[TCA_BPF_FD];
 314        if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
 315                return -EINVAL;
 316
 317        tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
 318        ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
 319        if (ret < 0)
 320                return ret;
 321
 322        if (tb[TCA_BPF_FLAGS]) {
 323                u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
 324
 325                if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
 326                        tcf_exts_destroy(&exts);
 327                        return -EINVAL;
 328                }
 329
 330                have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
 331        }
 332
 333        prog->exts_integrated = have_exts;
 334
 335        ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
 336                       cls_bpf_prog_from_efd(tb, prog, tp);
 337        if (ret < 0) {
 338                tcf_exts_destroy(&exts);
 339                return ret;
 340        }
 341
 342        if (tb[TCA_BPF_CLASSID]) {
 343                prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
 344                tcf_bind_filter(tp, &prog->res, base);
 345        }
 346
 347        tcf_exts_change(tp, &prog->exts, &exts);
 348        return 0;
 349}
 350
 351static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
 352                                   struct cls_bpf_head *head)
 353{
 354        unsigned int i = 0x80000000;
 355        u32 handle;
 356
 357        do {
 358                if (++head->hgen == 0x7FFFFFFF)
 359                        head->hgen = 1;
 360        } while (--i > 0 && cls_bpf_get(tp, head->hgen));
 361
 362        if (unlikely(i == 0)) {
 363                pr_err("Insufficient number of handles\n");
 364                handle = 0;
 365        } else {
 366                handle = head->hgen;
 367        }
 368
 369        return handle;
 370}
 371
 372static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 373                          struct tcf_proto *tp, unsigned long base,
 374                          u32 handle, struct nlattr **tca,
 375                          unsigned long *arg, bool ovr)
 376{
 377        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 378        struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg;
 379        struct nlattr *tb[TCA_BPF_MAX + 1];
 380        struct cls_bpf_prog *prog;
 381        int ret;
 382
 383        if (tca[TCA_OPTIONS] == NULL)
 384                return -EINVAL;
 385
 386        ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy);
 387        if (ret < 0)
 388                return ret;
 389
 390        prog = kzalloc(sizeof(*prog), GFP_KERNEL);
 391        if (!prog)
 392                return -ENOBUFS;
 393
 394        tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
 395
 396        if (oldprog) {
 397                if (handle && oldprog->handle != handle) {
 398                        ret = -EINVAL;
 399                        goto errout;
 400                }
 401        }
 402
 403        if (handle == 0)
 404                prog->handle = cls_bpf_grab_new_handle(tp, head);
 405        else
 406                prog->handle = handle;
 407        if (prog->handle == 0) {
 408                ret = -EINVAL;
 409                goto errout;
 410        }
 411
 412        ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
 413        if (ret < 0)
 414                goto errout;
 415
 416        if (oldprog) {
 417                list_replace_rcu(&oldprog->link, &prog->link);
 418                tcf_unbind_filter(tp, &oldprog->res);
 419                call_rcu(&oldprog->rcu, __cls_bpf_delete_prog);
 420        } else {
 421                list_add_rcu(&prog->link, &head->plist);
 422        }
 423
 424        *arg = (unsigned long) prog;
 425        return 0;
 426errout:
 427        kfree(prog);
 428
 429        return ret;
 430}
 431
 432static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
 433                                 struct sk_buff *skb)
 434{
 435        struct nlattr *nla;
 436
 437        if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
 438                return -EMSGSIZE;
 439
 440        nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
 441                          sizeof(struct sock_filter));
 442        if (nla == NULL)
 443                return -EMSGSIZE;
 444
 445        memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
 446
 447        return 0;
 448}
 449
 450static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
 451                                  struct sk_buff *skb)
 452{
 453        if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd))
 454                return -EMSGSIZE;
 455
 456        if (prog->bpf_name &&
 457            nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
 458                return -EMSGSIZE;
 459
 460        return 0;
 461}
 462
 463static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 464                        struct sk_buff *skb, struct tcmsg *tm)
 465{
 466        struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
 467        struct nlattr *nest;
 468        u32 bpf_flags = 0;
 469        int ret;
 470
 471        if (prog == NULL)
 472                return skb->len;
 473
 474        tm->tcm_handle = prog->handle;
 475
 476        nest = nla_nest_start(skb, TCA_OPTIONS);
 477        if (nest == NULL)
 478                goto nla_put_failure;
 479
 480        if (prog->res.classid &&
 481            nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
 482                goto nla_put_failure;
 483
 484        if (cls_bpf_is_ebpf(prog))
 485                ret = cls_bpf_dump_ebpf_info(prog, skb);
 486        else
 487                ret = cls_bpf_dump_bpf_info(prog, skb);
 488        if (ret)
 489                goto nla_put_failure;
 490
 491        if (tcf_exts_dump(skb, &prog->exts) < 0)
 492                goto nla_put_failure;
 493
 494        if (prog->exts_integrated)
 495                bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
 496        if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
 497                goto nla_put_failure;
 498
 499        nla_nest_end(skb, nest);
 500
 501        if (tcf_exts_dump_stats(skb, &prog->exts) < 0)
 502                goto nla_put_failure;
 503
 504        return skb->len;
 505
 506nla_put_failure:
 507        nla_nest_cancel(skb, nest);
 508        return -1;
 509}
 510
 511static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 512{
 513        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 514        struct cls_bpf_prog *prog;
 515
 516        list_for_each_entry(prog, &head->plist, link) {
 517                if (arg->count < arg->skip)
 518                        goto skip;
 519                if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
 520                        arg->stop = 1;
 521                        break;
 522                }
 523skip:
 524                arg->count++;
 525        }
 526}
 527
 528static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
 529        .kind           =       "bpf",
 530        .owner          =       THIS_MODULE,
 531        .classify       =       cls_bpf_classify,
 532        .init           =       cls_bpf_init,
 533        .destroy        =       cls_bpf_destroy,
 534        .get            =       cls_bpf_get,
 535        .change         =       cls_bpf_change,
 536        .delete         =       cls_bpf_delete,
 537        .walk           =       cls_bpf_walk,
 538        .dump           =       cls_bpf_dump,
 539};
 540
 541static int __init cls_bpf_init_mod(void)
 542{
 543        return register_tcf_proto_ops(&cls_bpf_ops);
 544}
 545
 546static void __exit cls_bpf_exit_mod(void)
 547{
 548        unregister_tcf_proto_ops(&cls_bpf_ops);
 549}
 550
 551module_init(cls_bpf_init_mod);
 552module_exit(cls_bpf_exit_mod);
 553