linux/net/sched/cls_api.c
<<
>>
Prefs
   1/*
   2 * net/sched/cls_api.c  Packet classifier API.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 * Changes:
  12 *
  13 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
  14 *
  15 */
  16
  17#include <linux/module.h>
  18#include <linux/types.h>
  19#include <linux/kernel.h>
  20#include <linux/string.h>
  21#include <linux/errno.h>
  22#include <linux/skbuff.h>
  23#include <linux/init.h>
  24#include <linux/kmod.h>
  25#include <linux/err.h>
  26#include <linux/slab.h>
  27#include <net/net_namespace.h>
  28#include <net/sock.h>
  29#include <net/netlink.h>
  30#include <net/pkt_sched.h>
  31#include <net/pkt_cls.h>
  32
  33/* The list of all installed classifier types */
  34static LIST_HEAD(tcf_proto_base);
  35
  36/* Protects list of registered TC modules. It is pure SMP lock. */
  37static DEFINE_RWLOCK(cls_mod_lock);
  38
  39/* Find classifier type by string name */
  40
  41static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind)
  42{
  43        const struct tcf_proto_ops *t, *res = NULL;
  44
  45        if (kind) {
  46                read_lock(&cls_mod_lock);
  47                list_for_each_entry(t, &tcf_proto_base, head) {
  48                        if (nla_strcmp(kind, t->kind) == 0) {
  49                                if (try_module_get(t->owner))
  50                                        res = t;
  51                                break;
  52                        }
  53                }
  54                read_unlock(&cls_mod_lock);
  55        }
  56        return res;
  57}
  58
  59/* Register(unregister) new classifier type */
  60
  61int register_tcf_proto_ops(struct tcf_proto_ops *ops)
  62{
  63        struct tcf_proto_ops *t;
  64        int rc = -EEXIST;
  65
  66        write_lock(&cls_mod_lock);
  67        list_for_each_entry(t, &tcf_proto_base, head)
  68                if (!strcmp(ops->kind, t->kind))
  69                        goto out;
  70
  71        list_add_tail(&ops->head, &tcf_proto_base);
  72        rc = 0;
  73out:
  74        write_unlock(&cls_mod_lock);
  75        return rc;
  76}
  77EXPORT_SYMBOL(register_tcf_proto_ops);
  78
  79int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
  80{
  81        struct tcf_proto_ops *t;
  82        int rc = -ENOENT;
  83
  84        /* Wait for outstanding call_rcu()s, if any, from a
  85         * tcf_proto_ops's destroy() handler.
  86         */
  87        rcu_barrier();
  88
  89        write_lock(&cls_mod_lock);
  90        list_for_each_entry(t, &tcf_proto_base, head) {
  91                if (t == ops) {
  92                        list_del(&t->head);
  93                        rc = 0;
  94                        break;
  95                }
  96        }
  97        write_unlock(&cls_mod_lock);
  98        return rc;
  99}
 100EXPORT_SYMBOL(unregister_tcf_proto_ops);
 101
 102static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 103                          struct nlmsghdr *n, struct tcf_proto *tp,
 104                          unsigned long fh, int event);
 105
 106
 107/* Select new prio value from the range, managed by kernel. */
 108
 109static inline u32 tcf_auto_prio(struct tcf_proto *tp)
 110{
 111        u32 first = TC_H_MAKE(0xC0000000U, 0U);
 112
 113        if (tp)
 114                first = tp->prio - 1;
 115
 116        return first;
 117}
 118
 119/* Add/change/delete/get a filter node */
 120
 121static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
 122{
 123        struct net *net = sock_net(skb->sk);
 124        struct nlattr *tca[TCA_MAX + 1];
 125        struct tcmsg *t;
 126        u32 protocol;
 127        u32 prio;
 128        u32 nprio;
 129        u32 parent;
 130        struct net_device *dev;
 131        struct Qdisc  *q;
 132        struct tcf_proto __rcu **back;
 133        struct tcf_proto __rcu **chain;
 134        struct tcf_proto *tp;
 135        const struct tcf_proto_ops *tp_ops;
 136        const struct Qdisc_class_ops *cops;
 137        unsigned long cl;
 138        unsigned long fh;
 139        int err;
 140        int tp_created = 0;
 141
 142        if ((n->nlmsg_type != RTM_GETTFILTER) &&
 143            !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 144                return -EPERM;
 145
 146replay:
 147        err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
 148        if (err < 0)
 149                return err;
 150
 151        t = nlmsg_data(n);
 152        protocol = TC_H_MIN(t->tcm_info);
 153        prio = TC_H_MAJ(t->tcm_info);
 154        nprio = prio;
 155        parent = t->tcm_parent;
 156        cl = 0;
 157
 158        if (prio == 0) {
 159                /* If no priority is given, user wants we allocated it. */
 160                if (n->nlmsg_type != RTM_NEWTFILTER ||
 161                    !(n->nlmsg_flags & NLM_F_CREATE))
 162                        return -ENOENT;
 163                prio = TC_H_MAKE(0x80000000U, 0U);
 164        }
 165
 166        /* Find head of filter chain. */
 167
 168        /* Find link */
 169        dev = __dev_get_by_index(net, t->tcm_ifindex);
 170        if (dev == NULL)
 171                return -ENODEV;
 172
 173        /* Find qdisc */
 174        if (!parent) {
 175                q = dev->qdisc;
 176                parent = q->handle;
 177        } else {
 178                q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
 179                if (q == NULL)
 180                        return -EINVAL;
 181        }
 182
 183        /* Is it classful? */
 184        cops = q->ops->cl_ops;
 185        if (!cops)
 186                return -EINVAL;
 187
 188        if (cops->tcf_chain == NULL)
 189                return -EOPNOTSUPP;
 190
 191        /* Do we search for filter, attached to class? */
 192        if (TC_H_MIN(parent)) {
 193                cl = cops->get(q, parent);
 194                if (cl == 0)
 195                        return -ENOENT;
 196        }
 197
 198        /* And the last stroke */
 199        chain = cops->tcf_chain(q, cl);
 200        err = -EINVAL;
 201        if (chain == NULL)
 202                goto errout;
 203
 204        /* Check the chain for existence of proto-tcf with this priority */
 205        for (back = chain;
 206             (tp = rtnl_dereference(*back)) != NULL;
 207             back = &tp->next) {
 208                if (tp->prio >= prio) {
 209                        if (tp->prio == prio) {
 210                                if (!nprio ||
 211                                    (tp->protocol != protocol && protocol))
 212                                        goto errout;
 213                        } else
 214                                tp = NULL;
 215                        break;
 216                }
 217        }
 218
 219        if (tp == NULL) {
 220                /* Proto-tcf does not exist, create new one */
 221
 222                if (tca[TCA_KIND] == NULL || !protocol)
 223                        goto errout;
 224
 225                err = -ENOENT;
 226                if (n->nlmsg_type != RTM_NEWTFILTER ||
 227                    !(n->nlmsg_flags & NLM_F_CREATE))
 228                        goto errout;
 229
 230
 231                /* Create new proto tcf */
 232
 233                err = -ENOBUFS;
 234                tp = kzalloc(sizeof(*tp), GFP_KERNEL);
 235                if (tp == NULL)
 236                        goto errout;
 237                err = -ENOENT;
 238                tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
 239                if (tp_ops == NULL) {
 240#ifdef CONFIG_MODULES
 241                        struct nlattr *kind = tca[TCA_KIND];
 242                        char name[IFNAMSIZ];
 243
 244                        if (kind != NULL &&
 245                            nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
 246                                rtnl_unlock();
 247                                request_module("cls_%s", name);
 248                                rtnl_lock();
 249                                tp_ops = tcf_proto_lookup_ops(kind);
 250                                /* We dropped the RTNL semaphore in order to
 251                                 * perform the module load.  So, even if we
 252                                 * succeeded in loading the module we have to
 253                                 * replay the request.  We indicate this using
 254                                 * -EAGAIN.
 255                                 */
 256                                if (tp_ops != NULL) {
 257                                        module_put(tp_ops->owner);
 258                                        err = -EAGAIN;
 259                                }
 260                        }
 261#endif
 262                        kfree(tp);
 263                        goto errout;
 264                }
 265                tp->ops = tp_ops;
 266                tp->protocol = protocol;
 267                tp->prio = nprio ? :
 268                               TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
 269                tp->q = q;
 270                tp->classify = tp_ops->classify;
 271                tp->classid = parent;
 272
 273                err = tp_ops->init(tp);
 274                if (err != 0) {
 275                        module_put(tp_ops->owner);
 276                        kfree(tp);
 277                        goto errout;
 278                }
 279
 280                tp_created = 1;
 281
 282        } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
 283                goto errout;
 284
 285        fh = tp->ops->get(tp, t->tcm_handle);
 286
 287        if (fh == 0) {
 288                if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
 289                        struct tcf_proto *next = rtnl_dereference(tp->next);
 290
 291                        RCU_INIT_POINTER(*back, next);
 292
 293                        tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
 294                        tcf_destroy(tp, true);
 295                        err = 0;
 296                        goto errout;
 297                }
 298
 299                err = -ENOENT;
 300                if (n->nlmsg_type != RTM_NEWTFILTER ||
 301                    !(n->nlmsg_flags & NLM_F_CREATE))
 302                        goto errout;
 303        } else {
 304                switch (n->nlmsg_type) {
 305                case RTM_NEWTFILTER:
 306                        err = -EEXIST;
 307                        if (n->nlmsg_flags & NLM_F_EXCL) {
 308                                if (tp_created)
 309                                        tcf_destroy(tp, true);
 310                                goto errout;
 311                        }
 312                        break;
 313                case RTM_DELTFILTER:
 314                        err = tp->ops->delete(tp, fh);
 315                        if (err == 0) {
 316                                struct tcf_proto *next = rtnl_dereference(tp->next);
 317
 318                                tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
 319                                if (tcf_destroy(tp, false))
 320                                        RCU_INIT_POINTER(*back, next);
 321                        }
 322                        goto errout;
 323                case RTM_GETTFILTER:
 324                        err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
 325                        goto errout;
 326                default:
 327                        err = -EINVAL;
 328                        goto errout;
 329                }
 330        }
 331
 332        err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
 333                              n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
 334        if (err == 0) {
 335                if (tp_created) {
 336                        RCU_INIT_POINTER(tp->next, rtnl_dereference(*back));
 337                        rcu_assign_pointer(*back, tp);
 338                }
 339                tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
 340        } else {
 341                if (tp_created)
 342                        tcf_destroy(tp, true);
 343        }
 344
 345errout:
 346        if (cl)
 347                cops->put(q, cl);
 348        if (err == -EAGAIN)
 349                /* Replay the request. */
 350                goto replay;
 351        return err;
 352}
 353
 354static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp,
 355                         unsigned long fh, u32 portid, u32 seq, u16 flags, int event)
 356{
 357        struct tcmsg *tcm;
 358        struct nlmsghdr  *nlh;
 359        unsigned char *b = skb_tail_pointer(skb);
 360
 361        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
 362        if (!nlh)
 363                goto out_nlmsg_trim;
 364        tcm = nlmsg_data(nlh);
 365        tcm->tcm_family = AF_UNSPEC;
 366        tcm->tcm__pad1 = 0;
 367        tcm->tcm__pad2 = 0;
 368        tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
 369        tcm->tcm_parent = tp->classid;
 370        tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
 371        if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
 372                goto nla_put_failure;
 373        tcm->tcm_handle = fh;
 374        if (RTM_DELTFILTER != event) {
 375                tcm->tcm_handle = 0;
 376                if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
 377                        goto nla_put_failure;
 378        }
 379        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 380        return skb->len;
 381
 382out_nlmsg_trim:
 383nla_put_failure:
 384        nlmsg_trim(skb, b);
 385        return -1;
 386}
 387
 388static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 389                          struct nlmsghdr *n, struct tcf_proto *tp,
 390                          unsigned long fh, int event)
 391{
 392        struct sk_buff *skb;
 393        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
 394
 395        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 396        if (!skb)
 397                return -ENOBUFS;
 398
 399        if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) {
 400                kfree_skb(skb);
 401                return -EINVAL;
 402        }
 403
 404        return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
 405                              n->nlmsg_flags & NLM_F_ECHO);
 406}
 407
 408struct tcf_dump_args {
 409        struct tcf_walker w;
 410        struct sk_buff *skb;
 411        struct netlink_callback *cb;
 412};
 413
 414static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
 415                         struct tcf_walker *arg)
 416{
 417        struct tcf_dump_args *a = (void *)arg;
 418        struct net *net = sock_net(a->skb->sk);
 419
 420        return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
 421                             a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
 422}
 423
 424/* called with RTNL */
 425static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 426{
 427        struct net *net = sock_net(skb->sk);
 428        int t;
 429        int s_t;
 430        struct net_device *dev;
 431        struct Qdisc *q;
 432        struct tcf_proto *tp, __rcu **chain;
 433        struct tcmsg *tcm = nlmsg_data(cb->nlh);
 434        unsigned long cl = 0;
 435        const struct Qdisc_class_ops *cops;
 436        struct tcf_dump_args arg;
 437
 438        if (nlmsg_len(cb->nlh) < sizeof(*tcm))
 439                return skb->len;
 440        dev = __dev_get_by_index(net, tcm->tcm_ifindex);
 441        if (!dev)
 442                return skb->len;
 443
 444        if (!tcm->tcm_parent)
 445                q = dev->qdisc;
 446        else
 447                q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
 448        if (!q)
 449                goto out;
 450        cops = q->ops->cl_ops;
 451        if (!cops)
 452                goto errout;
 453        if (cops->tcf_chain == NULL)
 454                goto errout;
 455        if (TC_H_MIN(tcm->tcm_parent)) {
 456                cl = cops->get(q, tcm->tcm_parent);
 457                if (cl == 0)
 458                        goto errout;
 459        }
 460        chain = cops->tcf_chain(q, cl);
 461        if (chain == NULL)
 462                goto errout;
 463
 464        s_t = cb->args[0];
 465
 466        for (tp = rtnl_dereference(*chain), t = 0;
 467             tp; tp = rtnl_dereference(tp->next), t++) {
 468                if (t < s_t)
 469                        continue;
 470                if (TC_H_MAJ(tcm->tcm_info) &&
 471                    TC_H_MAJ(tcm->tcm_info) != tp->prio)
 472                        continue;
 473                if (TC_H_MIN(tcm->tcm_info) &&
 474                    TC_H_MIN(tcm->tcm_info) != tp->protocol)
 475                        continue;
 476                if (t > s_t)
 477                        memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
 478                if (cb->args[1] == 0) {
 479                        if (tcf_fill_node(net, skb, tp, 0, NETLINK_CB(cb->skb).portid,
 480                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
 481                                          RTM_NEWTFILTER) <= 0)
 482                                break;
 483
 484                        cb->args[1] = 1;
 485                }
 486                if (tp->ops->walk == NULL)
 487                        continue;
 488                arg.w.fn = tcf_node_dump;
 489                arg.skb = skb;
 490                arg.cb = cb;
 491                arg.w.stop = 0;
 492                arg.w.skip = cb->args[1] - 1;
 493                arg.w.count = 0;
 494                tp->ops->walk(tp, &arg.w);
 495                cb->args[1] = arg.w.count + 1;
 496                if (arg.w.stop)
 497                        break;
 498        }
 499
 500        cb->args[0] = t;
 501
 502errout:
 503        if (cl)
 504                cops->put(q, cl);
 505out:
 506        return skb->len;
 507}
 508
 509void tcf_exts_destroy(struct tcf_exts *exts)
 510{
 511#ifdef CONFIG_NET_CLS_ACT
 512        tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND);
 513        INIT_LIST_HEAD(&exts->actions);
 514#endif
 515}
 516EXPORT_SYMBOL(tcf_exts_destroy);
 517
 518int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 519                  struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
 520{
 521#ifdef CONFIG_NET_CLS_ACT
 522        {
 523                struct tc_action *act;
 524
 525                INIT_LIST_HEAD(&exts->actions);
 526                if (exts->police && tb[exts->police]) {
 527                        act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
 528                                                "police", ovr,
 529                                                TCA_ACT_BIND);
 530                        if (IS_ERR(act))
 531                                return PTR_ERR(act);
 532
 533                        act->type = exts->type = TCA_OLD_COMPAT;
 534                        list_add(&act->list, &exts->actions);
 535                } else if (exts->action && tb[exts->action]) {
 536                        int err;
 537                        err = tcf_action_init(net, tb[exts->action], rate_tlv,
 538                                              NULL, ovr,
 539                                              TCA_ACT_BIND, &exts->actions);
 540                        if (err)
 541                                return err;
 542                }
 543        }
 544#else
 545        if ((exts->action && tb[exts->action]) ||
 546            (exts->police && tb[exts->police]))
 547                return -EOPNOTSUPP;
 548#endif
 549
 550        return 0;
 551}
 552EXPORT_SYMBOL(tcf_exts_validate);
 553
 554void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
 555                     struct tcf_exts *src)
 556{
 557#ifdef CONFIG_NET_CLS_ACT
 558        LIST_HEAD(tmp);
 559        tcf_tree_lock(tp);
 560        list_splice_init(&dst->actions, &tmp);
 561        list_splice(&src->actions, &dst->actions);
 562        dst->type = src->type;
 563        tcf_tree_unlock(tp);
 564        tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
 565#endif
 566}
 567EXPORT_SYMBOL(tcf_exts_change);
 568
 569#define tcf_exts_first_act(ext)                                 \
 570        list_first_entry_or_null(&(exts)->actions,              \
 571                                 struct tc_action, list)
 572
 573int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 574{
 575#ifdef CONFIG_NET_CLS_ACT
 576        struct nlattr *nest;
 577
 578        if (exts->action && !list_empty(&exts->actions)) {
 579                /*
 580                 * again for backward compatible mode - we want
 581                 * to work with both old and new modes of entering
 582                 * tc data even if iproute2  was newer - jhs
 583                 */
 584                if (exts->type != TCA_OLD_COMPAT) {
 585                        nest = nla_nest_start(skb, exts->action);
 586                        if (nest == NULL)
 587                                goto nla_put_failure;
 588                        if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0)
 589                                goto nla_put_failure;
 590                        nla_nest_end(skb, nest);
 591                } else if (exts->police) {
 592                        struct tc_action *act = tcf_exts_first_act(exts);
 593                        nest = nla_nest_start(skb, exts->police);
 594                        if (nest == NULL || !act)
 595                                goto nla_put_failure;
 596                        if (tcf_action_dump_old(skb, act, 0, 0) < 0)
 597                                goto nla_put_failure;
 598                        nla_nest_end(skb, nest);
 599                }
 600        }
 601        return 0;
 602
 603nla_put_failure:
 604        nla_nest_cancel(skb, nest);
 605        return -1;
 606#else
 607        return 0;
 608#endif
 609}
 610EXPORT_SYMBOL(tcf_exts_dump);
 611
 612
 613int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
 614{
 615#ifdef CONFIG_NET_CLS_ACT
 616        struct tc_action *a = tcf_exts_first_act(exts);
 617        if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
 618                return -1;
 619#endif
 620        return 0;
 621}
 622EXPORT_SYMBOL(tcf_exts_dump_stats);
 623
 624static int __init tc_filter_init(void)
 625{
 626        rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
 627        rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL);
 628        rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
 629                      tc_dump_tfilter, NULL);
 630
 631        return 0;
 632}
 633
 634subsys_initcall(tc_filter_init);
 635