linux/net/sched/cls_bpf.c
<<
>>
Prefs
   1/*
   2 * Berkeley Packet Filter based traffic classifier
   3 *
   4 * Might be used to classify traffic through flexible, user-defined and
   5 * possibly JIT-ed BPF filters for traffic control as an alternative to
   6 * ematches.
   7 *
   8 * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
   9 *
  10 * This program is free software; you can redistribute it and/or modify
  11 * it under the terms of the GNU General Public License version 2 as
  12 * published by the Free Software Foundation.
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/types.h>
  17#include <linux/skbuff.h>
  18#include <linux/filter.h>
  19#include <linux/bpf.h>
  20
  21#include <net/rtnetlink.h>
  22#include <net/pkt_cls.h>
  23#include <net/sock.h>
  24
  25MODULE_LICENSE("GPL");
  26MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
  27MODULE_DESCRIPTION("TC BPF based classifier");
  28
  29#define CLS_BPF_NAME_LEN        256
  30
  31struct cls_bpf_head {
  32        struct list_head plist;
  33        u32 hgen;
  34        struct rcu_head rcu;
  35};
  36
  37struct cls_bpf_prog {
  38        struct bpf_prog *filter;
  39        struct list_head link;
  40        struct tcf_result res;
  41        bool exts_integrated;
  42        struct tcf_exts exts;
  43        u32 handle;
  44        union {
  45                u32 bpf_fd;
  46                u16 bpf_num_ops;
  47        };
  48        struct sock_filter *bpf_ops;
  49        const char *bpf_name;
  50        struct tcf_proto *tp;
  51        struct rcu_head rcu;
  52};
  53
  54static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
  55        [TCA_BPF_CLASSID]       = { .type = NLA_U32 },
  56        [TCA_BPF_FLAGS]         = { .type = NLA_U32 },
  57        [TCA_BPF_FD]            = { .type = NLA_U32 },
  58        [TCA_BPF_NAME]          = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
  59        [TCA_BPF_OPS_LEN]       = { .type = NLA_U16 },
  60        [TCA_BPF_OPS]           = { .type = NLA_BINARY,
  61                                    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
  62};
  63
  64static int cls_bpf_exec_opcode(int code)
  65{
  66        switch (code) {
  67        case TC_ACT_OK:
  68        case TC_ACT_SHOT:
  69        case TC_ACT_STOLEN:
  70        case TC_ACT_REDIRECT:
  71        case TC_ACT_UNSPEC:
  72                return code;
  73        default:
  74                return TC_ACT_UNSPEC;
  75        }
  76}
  77
  78static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
  79                            struct tcf_result *res)
  80{
  81        struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
  82        struct cls_bpf_prog *prog;
  83#ifdef CONFIG_NET_CLS_ACT
  84        bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
  85#else
  86        bool at_ingress = false;
  87#endif
  88        int ret = -1;
  89
  90        if (unlikely(!skb_mac_header_was_set(skb)))
  91                return -1;
  92
  93        /* Needed here for accessing maps. */
  94        rcu_read_lock();
  95        list_for_each_entry_rcu(prog, &head->plist, link) {
  96                int filter_res;
  97
  98                qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
  99
 100                if (at_ingress) {
 101                        /* It is safe to push/pull even if skb_shared() */
 102                        __skb_push(skb, skb->mac_len);
 103                        filter_res = BPF_PROG_RUN(prog->filter, skb);
 104                        __skb_pull(skb, skb->mac_len);
 105                } else {
 106                        filter_res = BPF_PROG_RUN(prog->filter, skb);
 107                }
 108
 109                if (prog->exts_integrated) {
 110                        res->class = prog->res.class;
 111                        res->classid = qdisc_skb_cb(skb)->tc_classid;
 112
 113                        ret = cls_bpf_exec_opcode(filter_res);
 114                        if (ret == TC_ACT_UNSPEC)
 115                                continue;
 116                        break;
 117                }
 118
 119                if (filter_res == 0)
 120                        continue;
 121
 122                *res = prog->res;
 123                if (filter_res != -1)
 124                        res->classid = filter_res;
 125
 126                ret = tcf_exts_exec(skb, &prog->exts, res);
 127                if (ret < 0)
 128                        continue;
 129
 130                break;
 131        }
 132        rcu_read_unlock();
 133
 134        return ret;
 135}
 136
 137static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
 138{
 139        return !prog->bpf_ops;
 140}
 141
 142static int cls_bpf_init(struct tcf_proto *tp)
 143{
 144        struct cls_bpf_head *head;
 145
 146        head = kzalloc(sizeof(*head), GFP_KERNEL);
 147        if (head == NULL)
 148                return -ENOBUFS;
 149
 150        INIT_LIST_HEAD_RCU(&head->plist);
 151        rcu_assign_pointer(tp->root, head);
 152
 153        return 0;
 154}
 155
 156static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
 157{
 158        tcf_exts_destroy(&prog->exts);
 159
 160        if (cls_bpf_is_ebpf(prog))
 161                bpf_prog_put(prog->filter);
 162        else
 163                bpf_prog_destroy(prog->filter);
 164
 165        kfree(prog->bpf_name);
 166        kfree(prog->bpf_ops);
 167        kfree(prog);
 168}
 169
 170static void __cls_bpf_delete_prog(struct rcu_head *rcu)
 171{
 172        struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
 173
 174        cls_bpf_delete_prog(prog->tp, prog);
 175}
 176
 177static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
 178{
 179        struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
 180
 181        list_del_rcu(&prog->link);
 182        tcf_unbind_filter(tp, &prog->res);
 183        call_rcu(&prog->rcu, __cls_bpf_delete_prog);
 184
 185        return 0;
 186}
 187
 188static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
 189{
 190        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 191        struct cls_bpf_prog *prog, *tmp;
 192
 193        if (!force && !list_empty(&head->plist))
 194                return false;
 195
 196        list_for_each_entry_safe(prog, tmp, &head->plist, link) {
 197                list_del_rcu(&prog->link);
 198                tcf_unbind_filter(tp, &prog->res);
 199                call_rcu(&prog->rcu, __cls_bpf_delete_prog);
 200        }
 201
 202        RCU_INIT_POINTER(tp->root, NULL);
 203        kfree_rcu(head, rcu);
 204        return true;
 205}
 206
 207static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
 208{
 209        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 210        struct cls_bpf_prog *prog;
 211        unsigned long ret = 0UL;
 212
 213        if (head == NULL)
 214                return 0UL;
 215
 216        list_for_each_entry(prog, &head->plist, link) {
 217                if (prog->handle == handle) {
 218                        ret = (unsigned long) prog;
 219                        break;
 220                }
 221        }
 222
 223        return ret;
 224}
 225
 226static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
 227{
 228        struct sock_filter *bpf_ops;
 229        struct sock_fprog_kern fprog_tmp;
 230        struct bpf_prog *fp;
 231        u16 bpf_size, bpf_num_ops;
 232        int ret;
 233
 234        bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
 235        if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
 236                return -EINVAL;
 237
 238        bpf_size = bpf_num_ops * sizeof(*bpf_ops);
 239        if (bpf_size != nla_len(tb[TCA_BPF_OPS]))
 240                return -EINVAL;
 241
 242        bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
 243        if (bpf_ops == NULL)
 244                return -ENOMEM;
 245
 246        memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
 247
 248        fprog_tmp.len = bpf_num_ops;
 249        fprog_tmp.filter = bpf_ops;
 250
 251        ret = bpf_prog_create(&fp, &fprog_tmp);
 252        if (ret < 0) {
 253                kfree(bpf_ops);
 254                return ret;
 255        }
 256
 257        prog->bpf_ops = bpf_ops;
 258        prog->bpf_num_ops = bpf_num_ops;
 259        prog->bpf_name = NULL;
 260        prog->filter = fp;
 261
 262        return 0;
 263}
 264
 265static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
 266                                 const struct tcf_proto *tp)
 267{
 268        struct bpf_prog *fp;
 269        char *name = NULL;
 270        u32 bpf_fd;
 271
 272        bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
 273
 274        fp = bpf_prog_get(bpf_fd);
 275        if (IS_ERR(fp))
 276                return PTR_ERR(fp);
 277
 278        if (fp->type != BPF_PROG_TYPE_SCHED_CLS) {
 279                bpf_prog_put(fp);
 280                return -EINVAL;
 281        }
 282
 283        if (tb[TCA_BPF_NAME]) {
 284                name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
 285                               nla_len(tb[TCA_BPF_NAME]),
 286                               GFP_KERNEL);
 287                if (!name) {
 288                        bpf_prog_put(fp);
 289                        return -ENOMEM;
 290                }
 291        }
 292
 293        prog->bpf_ops = NULL;
 294        prog->bpf_fd = bpf_fd;
 295        prog->bpf_name = name;
 296        prog->filter = fp;
 297
 298        if (fp->dst_needed)
 299                netif_keep_dst(qdisc_dev(tp->q));
 300
 301        return 0;
 302}
 303
 304static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 305                                   struct cls_bpf_prog *prog,
 306                                   unsigned long base, struct nlattr **tb,
 307                                   struct nlattr *est, bool ovr)
 308{
 309        bool is_bpf, is_ebpf, have_exts = false;
 310        struct tcf_exts exts;
 311        int ret;
 312
 313        is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
 314        is_ebpf = tb[TCA_BPF_FD];
 315        if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
 316                return -EINVAL;
 317
 318        tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
 319        ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
 320        if (ret < 0)
 321                return ret;
 322
 323        if (tb[TCA_BPF_FLAGS]) {
 324                u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
 325
 326                if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
 327                        tcf_exts_destroy(&exts);
 328                        return -EINVAL;
 329                }
 330
 331                have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
 332        }
 333
 334        prog->exts_integrated = have_exts;
 335
 336        ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
 337                       cls_bpf_prog_from_efd(tb, prog, tp);
 338        if (ret < 0) {
 339                tcf_exts_destroy(&exts);
 340                return ret;
 341        }
 342
 343        if (tb[TCA_BPF_CLASSID]) {
 344                prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
 345                tcf_bind_filter(tp, &prog->res, base);
 346        }
 347
 348        tcf_exts_change(tp, &prog->exts, &exts);
 349        return 0;
 350}
 351
 352static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
 353                                   struct cls_bpf_head *head)
 354{
 355        unsigned int i = 0x80000000;
 356        u32 handle;
 357
 358        do {
 359                if (++head->hgen == 0x7FFFFFFF)
 360                        head->hgen = 1;
 361        } while (--i > 0 && cls_bpf_get(tp, head->hgen));
 362
 363        if (unlikely(i == 0)) {
 364                pr_err("Insufficient number of handles\n");
 365                handle = 0;
 366        } else {
 367                handle = head->hgen;
 368        }
 369
 370        return handle;
 371}
 372
 373static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 374                          struct tcf_proto *tp, unsigned long base,
 375                          u32 handle, struct nlattr **tca,
 376                          unsigned long *arg, bool ovr)
 377{
 378        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 379        struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg;
 380        struct nlattr *tb[TCA_BPF_MAX + 1];
 381        struct cls_bpf_prog *prog;
 382        int ret;
 383
 384        if (tca[TCA_OPTIONS] == NULL)
 385                return -EINVAL;
 386
 387        ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy);
 388        if (ret < 0)
 389                return ret;
 390
 391        prog = kzalloc(sizeof(*prog), GFP_KERNEL);
 392        if (!prog)
 393                return -ENOBUFS;
 394
 395        tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
 396
 397        if (oldprog) {
 398                if (handle && oldprog->handle != handle) {
 399                        ret = -EINVAL;
 400                        goto errout;
 401                }
 402        }
 403
 404        if (handle == 0)
 405                prog->handle = cls_bpf_grab_new_handle(tp, head);
 406        else
 407                prog->handle = handle;
 408        if (prog->handle == 0) {
 409                ret = -EINVAL;
 410                goto errout;
 411        }
 412
 413        ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
 414        if (ret < 0)
 415                goto errout;
 416
 417        if (oldprog) {
 418                list_replace_rcu(&oldprog->link, &prog->link);
 419                tcf_unbind_filter(tp, &oldprog->res);
 420                call_rcu(&oldprog->rcu, __cls_bpf_delete_prog);
 421        } else {
 422                list_add_rcu(&prog->link, &head->plist);
 423        }
 424
 425        *arg = (unsigned long) prog;
 426        return 0;
 427errout:
 428        kfree(prog);
 429
 430        return ret;
 431}
 432
 433static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
 434                                 struct sk_buff *skb)
 435{
 436        struct nlattr *nla;
 437
 438        if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
 439                return -EMSGSIZE;
 440
 441        nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
 442                          sizeof(struct sock_filter));
 443        if (nla == NULL)
 444                return -EMSGSIZE;
 445
 446        memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
 447
 448        return 0;
 449}
 450
 451static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
 452                                  struct sk_buff *skb)
 453{
 454        if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd))
 455                return -EMSGSIZE;
 456
 457        if (prog->bpf_name &&
 458            nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
 459                return -EMSGSIZE;
 460
 461        return 0;
 462}
 463
 464static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 465                        struct sk_buff *skb, struct tcmsg *tm)
 466{
 467        struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
 468        struct nlattr *nest;
 469        u32 bpf_flags = 0;
 470        int ret;
 471
 472        if (prog == NULL)
 473                return skb->len;
 474
 475        tm->tcm_handle = prog->handle;
 476
 477        nest = nla_nest_start(skb, TCA_OPTIONS);
 478        if (nest == NULL)
 479                goto nla_put_failure;
 480
 481        if (prog->res.classid &&
 482            nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
 483                goto nla_put_failure;
 484
 485        if (cls_bpf_is_ebpf(prog))
 486                ret = cls_bpf_dump_ebpf_info(prog, skb);
 487        else
 488                ret = cls_bpf_dump_bpf_info(prog, skb);
 489        if (ret)
 490                goto nla_put_failure;
 491
 492        if (tcf_exts_dump(skb, &prog->exts) < 0)
 493                goto nla_put_failure;
 494
 495        if (prog->exts_integrated)
 496                bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
 497        if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
 498                goto nla_put_failure;
 499
 500        nla_nest_end(skb, nest);
 501
 502        if (tcf_exts_dump_stats(skb, &prog->exts) < 0)
 503                goto nla_put_failure;
 504
 505        return skb->len;
 506
 507nla_put_failure:
 508        nla_nest_cancel(skb, nest);
 509        return -1;
 510}
 511
 512static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 513{
 514        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 515        struct cls_bpf_prog *prog;
 516
 517        list_for_each_entry(prog, &head->plist, link) {
 518                if (arg->count < arg->skip)
 519                        goto skip;
 520                if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
 521                        arg->stop = 1;
 522                        break;
 523                }
 524skip:
 525                arg->count++;
 526        }
 527}
 528
 529static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
 530        .kind           =       "bpf",
 531        .owner          =       THIS_MODULE,
 532        .classify       =       cls_bpf_classify,
 533        .init           =       cls_bpf_init,
 534        .destroy        =       cls_bpf_destroy,
 535        .get            =       cls_bpf_get,
 536        .change         =       cls_bpf_change,
 537        .delete         =       cls_bpf_delete,
 538        .walk           =       cls_bpf_walk,
 539        .dump           =       cls_bpf_dump,
 540};
 541
 542static int __init cls_bpf_init_mod(void)
 543{
 544        return register_tcf_proto_ops(&cls_bpf_ops);
 545}
 546
 547static void __exit cls_bpf_exit_mod(void)
 548{
 549        unregister_tcf_proto_ops(&cls_bpf_ops);
 550}
 551
 552module_init(cls_bpf_init_mod);
 553module_exit(cls_bpf_exit_mod);
 554