linux/net/sched/cls_tcindex.c
<<
>>
Prefs
   1/*
   2 * net/sched/cls_tcindex.c      Packet classifier for skb->tc_index
   3 *
   4 * Written 1998,1999 by Werner Almesberger, EPFL ICA
   5 */
   6
   7#include <linux/module.h>
   8#include <linux/types.h>
   9#include <linux/kernel.h>
  10#include <linux/skbuff.h>
  11#include <linux/errno.h>
  12#include <linux/slab.h>
  13#include <net/act_api.h>
  14#include <net/netlink.h>
  15#include <net/pkt_cls.h>
  16
  17/*
  18 * Passing parameters to the root seems to be done more awkwardly than really
  19 * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
  20 * verified. FIXME.
  21 */
  22
  23#define PERFECT_HASH_THRESHOLD  64      /* use perfect hash if not bigger */
  24#define DEFAULT_HASH_SIZE       64      /* optimized for diffserv */
  25
  26
  27struct tcindex_filter_result {
  28        struct tcf_exts         exts;
  29        struct tcf_result       res;
  30        struct rcu_head         rcu;
  31};
  32
  33struct tcindex_filter {
  34        u16 key;
  35        struct tcindex_filter_result result;
  36        struct tcindex_filter __rcu *next;
  37        struct rcu_head rcu;
  38};
  39
  40
  41struct tcindex_data {
  42        struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
  43        struct tcindex_filter __rcu **h; /* imperfect hash; */
  44        struct tcf_proto *tp;
  45        u16 mask;               /* AND key with mask */
  46        u32 shift;              /* shift ANDed key to the right */
  47        u32 hash;               /* hash table size; 0 if undefined */
  48        u32 alloc_hash;         /* allocated size */
  49        u32 fall_through;       /* 0: only classify if explicit match */
  50        struct rcu_head rcu;
  51};
  52
  53static inline int
  54tcindex_filter_is_set(struct tcindex_filter_result *r)
  55{
  56        return tcf_exts_is_predicative(&r->exts) || r->res.classid;
  57}
  58
  59static struct tcindex_filter_result *
  60tcindex_lookup(struct tcindex_data *p, u16 key)
  61{
  62        if (p->perfect) {
  63                struct tcindex_filter_result *f = p->perfect + key;
  64
  65                return tcindex_filter_is_set(f) ? f : NULL;
  66        } else if (p->h) {
  67                struct tcindex_filter __rcu **fp;
  68                struct tcindex_filter *f;
  69
  70                fp = &p->h[key % p->hash];
  71                for (f = rcu_dereference_bh_rtnl(*fp);
  72                     f;
  73                     fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
  74                        if (f->key == key)
  75                                return &f->result;
  76        }
  77
  78        return NULL;
  79}
  80
  81
  82static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
  83                            struct tcf_result *res)
  84{
  85        struct tcindex_data *p = rcu_dereference_bh(tp->root);
  86        struct tcindex_filter_result *f;
  87        int key = (skb->tc_index & p->mask) >> p->shift;
  88
  89        pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
  90                 skb, tp, res, p);
  91
  92        f = tcindex_lookup(p, key);
  93        if (!f) {
  94                if (!p->fall_through)
  95                        return -1;
  96                res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
  97                res->class = 0;
  98                pr_debug("alg 0x%x\n", res->classid);
  99                return 0;
 100        }
 101        *res = f->res;
 102        pr_debug("map 0x%x\n", res->classid);
 103
 104        return tcf_exts_exec(skb, &f->exts, res);
 105}
 106
 107
 108static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
 109{
 110        struct tcindex_data *p = rtnl_dereference(tp->root);
 111        struct tcindex_filter_result *r;
 112
 113        pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
 114        if (p->perfect && handle >= p->alloc_hash)
 115                return 0;
 116        r = tcindex_lookup(p, handle);
 117        return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL;
 118}
 119
 120static int tcindex_init(struct tcf_proto *tp)
 121{
 122        struct tcindex_data *p;
 123
 124        pr_debug("tcindex_init(tp %p)\n", tp);
 125        p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
 126        if (!p)
 127                return -ENOMEM;
 128
 129        p->mask = 0xffff;
 130        p->hash = DEFAULT_HASH_SIZE;
 131        p->fall_through = 1;
 132
 133        rcu_assign_pointer(tp->root, p);
 134        return 0;
 135}
 136
 137static void tcindex_destroy_rexts(struct rcu_head *head)
 138{
 139        struct tcindex_filter_result *r;
 140
 141        r = container_of(head, struct tcindex_filter_result, rcu);
 142        tcf_exts_destroy(&r->exts);
 143}
 144
 145static void tcindex_destroy_fexts(struct rcu_head *head)
 146{
 147        struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu);
 148
 149        tcf_exts_destroy(&f->result.exts);
 150        kfree(f);
 151}
 152
 153static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
 154{
 155        struct tcindex_data *p = rtnl_dereference(tp->root);
 156        struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
 157        struct tcindex_filter __rcu **walk;
 158        struct tcindex_filter *f = NULL;
 159
 160        pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p);
 161        if (p->perfect) {
 162                if (!r->res.class)
 163                        return -ENOENT;
 164        } else {
 165                int i;
 166
 167                for (i = 0; i < p->hash; i++) {
 168                        walk = p->h + i;
 169                        for (f = rtnl_dereference(*walk); f;
 170                             walk = &f->next, f = rtnl_dereference(*walk)) {
 171                                if (&f->result == r)
 172                                        goto found;
 173                        }
 174                }
 175                return -ENOENT;
 176
 177found:
 178                rcu_assign_pointer(*walk, rtnl_dereference(f->next));
 179        }
 180        tcf_unbind_filter(tp, &r->res);
 181        /* all classifiers are required to call tcf_exts_destroy() after rcu
 182         * grace period, since converted-to-rcu actions are relying on that
 183         * in cleanup() callback
 184         */
 185        if (f)
 186                call_rcu(&f->rcu, tcindex_destroy_fexts);
 187        else
 188                call_rcu(&r->rcu, tcindex_destroy_rexts);
 189        return 0;
 190}
 191
 192static int tcindex_destroy_element(struct tcf_proto *tp,
 193                                   unsigned long arg,
 194                                   struct tcf_walker *walker)
 195{
 196        return tcindex_delete(tp, arg);
 197}
 198
 199static void __tcindex_destroy(struct rcu_head *head)
 200{
 201        struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
 202
 203        kfree(p->perfect);
 204        kfree(p->h);
 205        kfree(p);
 206}
 207
 208static inline int
 209valid_perfect_hash(struct tcindex_data *p)
 210{
 211        return  p->hash > (p->mask >> p->shift);
 212}
 213
 214static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
 215        [TCA_TCINDEX_HASH]              = { .type = NLA_U32 },
 216        [TCA_TCINDEX_MASK]              = { .type = NLA_U16 },
 217        [TCA_TCINDEX_SHIFT]             = { .type = NLA_U32 },
 218        [TCA_TCINDEX_FALL_THROUGH]      = { .type = NLA_U32 },
 219        [TCA_TCINDEX_CLASSID]           = { .type = NLA_U32 },
 220};
 221
 222static void tcindex_filter_result_init(struct tcindex_filter_result *r)
 223{
 224        memset(r, 0, sizeof(*r));
 225        tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 226}
 227
 228static void __tcindex_partial_destroy(struct rcu_head *head)
 229{
 230        struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
 231
 232        kfree(p->perfect);
 233        kfree(p);
 234}
 235
 236static int
 237tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 238                  u32 handle, struct tcindex_data *p,
 239                  struct tcindex_filter_result *r, struct nlattr **tb,
 240                  struct nlattr *est, bool ovr)
 241{
 242        int err, balloc = 0;
 243        struct tcindex_filter_result new_filter_result, *old_r = r;
 244        struct tcindex_filter_result cr;
 245        struct tcindex_data *cp, *oldp;
 246        struct tcindex_filter *f = NULL; /* make gcc behave */
 247        struct tcf_exts e;
 248
 249        tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 250        err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
 251        if (err < 0)
 252                return err;
 253
 254        err = -ENOMEM;
 255        /* tcindex_data attributes must look atomic to classifier/lookup so
 256         * allocate new tcindex data and RCU assign it onto root. Keeping
 257         * perfect hash and hash pointers from old data.
 258         */
 259        cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 260        if (!cp)
 261                goto errout;
 262
 263        cp->mask = p->mask;
 264        cp->shift = p->shift;
 265        cp->hash = p->hash;
 266        cp->alloc_hash = p->alloc_hash;
 267        cp->fall_through = p->fall_through;
 268        cp->tp = tp;
 269
 270        if (p->perfect) {
 271                int i;
 272
 273                cp->perfect = kmemdup(p->perfect,
 274                                      sizeof(*r) * cp->hash, GFP_KERNEL);
 275                if (!cp->perfect)
 276                        goto errout;
 277                for (i = 0; i < cp->hash; i++)
 278                        tcf_exts_init(&cp->perfect[i].exts,
 279                                      TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 280                balloc = 1;
 281        }
 282        cp->h = p->h;
 283
 284        tcindex_filter_result_init(&new_filter_result);
 285        tcindex_filter_result_init(&cr);
 286        if (old_r)
 287                cr.res = r->res;
 288
 289        if (tb[TCA_TCINDEX_HASH])
 290                cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
 291
 292        if (tb[TCA_TCINDEX_MASK])
 293                cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
 294
 295        if (tb[TCA_TCINDEX_SHIFT])
 296                cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
 297
 298        err = -EBUSY;
 299
 300        /* Hash already allocated, make sure that we still meet the
 301         * requirements for the allocated hash.
 302         */
 303        if (cp->perfect) {
 304                if (!valid_perfect_hash(cp) ||
 305                    cp->hash > cp->alloc_hash)
 306                        goto errout_alloc;
 307        } else if (cp->h && cp->hash != cp->alloc_hash) {
 308                goto errout_alloc;
 309        }
 310
 311        err = -EINVAL;
 312        if (tb[TCA_TCINDEX_FALL_THROUGH])
 313                cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
 314
 315        if (!cp->hash) {
 316                /* Hash not specified, use perfect hash if the upper limit
 317                 * of the hashing index is below the threshold.
 318                 */
 319                if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
 320                        cp->hash = (cp->mask >> cp->shift) + 1;
 321                else
 322                        cp->hash = DEFAULT_HASH_SIZE;
 323        }
 324
 325        if (!cp->perfect && !cp->h)
 326                cp->alloc_hash = cp->hash;
 327
 328        /* Note: this could be as restrictive as if (handle & ~(mask >> shift))
 329         * but then, we'd fail handles that may become valid after some future
 330         * mask change. While this is extremely unlikely to ever matter,
 331         * the check below is safer (and also more backwards-compatible).
 332         */
 333        if (cp->perfect || valid_perfect_hash(cp))
 334                if (handle >= cp->alloc_hash)
 335                        goto errout_alloc;
 336
 337
 338        err = -ENOMEM;
 339        if (!cp->perfect && !cp->h) {
 340                if (valid_perfect_hash(cp)) {
 341                        int i;
 342
 343                        cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL);
 344                        if (!cp->perfect)
 345                                goto errout_alloc;
 346                        for (i = 0; i < cp->hash; i++)
 347                                tcf_exts_init(&cp->perfect[i].exts,
 348                                              TCA_TCINDEX_ACT,
 349                                              TCA_TCINDEX_POLICE);
 350                        balloc = 1;
 351                } else {
 352                        struct tcindex_filter __rcu **hash;
 353
 354                        hash = kcalloc(cp->hash,
 355                                       sizeof(struct tcindex_filter *),
 356                                       GFP_KERNEL);
 357
 358                        if (!hash)
 359                                goto errout_alloc;
 360
 361                        cp->h = hash;
 362                        balloc = 2;
 363                }
 364        }
 365
 366        if (cp->perfect)
 367                r = cp->perfect + handle;
 368        else
 369                r = tcindex_lookup(cp, handle) ? : &new_filter_result;
 370
 371        if (r == &new_filter_result) {
 372                f = kzalloc(sizeof(*f), GFP_KERNEL);
 373                if (!f)
 374                        goto errout_alloc;
 375                f->key = handle;
 376                tcindex_filter_result_init(&f->result);
 377                f->next = NULL;
 378        }
 379
 380        if (tb[TCA_TCINDEX_CLASSID]) {
 381                cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
 382                tcf_bind_filter(tp, &cr.res, base);
 383        }
 384
 385        if (old_r)
 386                tcf_exts_change(tp, &r->exts, &e);
 387        else
 388                tcf_exts_change(tp, &cr.exts, &e);
 389
 390        if (old_r && old_r != r)
 391                tcindex_filter_result_init(old_r);
 392
 393        oldp = p;
 394        r->res = cr.res;
 395        rcu_assign_pointer(tp->root, cp);
 396
 397        if (r == &new_filter_result) {
 398                struct tcindex_filter *nfp;
 399                struct tcindex_filter __rcu **fp;
 400
 401                tcf_exts_change(tp, &f->result.exts, &r->exts);
 402
 403                fp = cp->h + (handle % cp->hash);
 404                for (nfp = rtnl_dereference(*fp);
 405                     nfp;
 406                     fp = &nfp->next, nfp = rtnl_dereference(*fp))
 407                                ; /* nothing */
 408
 409                rcu_assign_pointer(*fp, f);
 410        }
 411
 412        if (oldp)
 413                call_rcu(&oldp->rcu, __tcindex_partial_destroy);
 414        return 0;
 415
 416errout_alloc:
 417        if (balloc == 1)
 418                kfree(cp->perfect);
 419        else if (balloc == 2)
 420                kfree(cp->h);
 421errout:
 422        kfree(cp);
 423        tcf_exts_destroy(&e);
 424        return err;
 425}
 426
 427static int
 428tcindex_change(struct net *net, struct sk_buff *in_skb,
 429               struct tcf_proto *tp, unsigned long base, u32 handle,
 430               struct nlattr **tca, unsigned long *arg, bool ovr)
 431{
 432        struct nlattr *opt = tca[TCA_OPTIONS];
 433        struct nlattr *tb[TCA_TCINDEX_MAX + 1];
 434        struct tcindex_data *p = rtnl_dereference(tp->root);
 435        struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
 436        int err;
 437
 438        pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
 439            "p %p,r %p,*arg 0x%lx\n",
 440            tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L);
 441
 442        if (!opt)
 443                return 0;
 444
 445        err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy);
 446        if (err < 0)
 447                return err;
 448
 449        return tcindex_set_parms(net, tp, base, handle, p, r, tb,
 450                                 tca[TCA_RATE], ovr);
 451}
 452
 453static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 454{
 455        struct tcindex_data *p = rtnl_dereference(tp->root);
 456        struct tcindex_filter *f, *next;
 457        int i;
 458
 459        pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
 460        if (p->perfect) {
 461                for (i = 0; i < p->hash; i++) {
 462                        if (!p->perfect[i].res.class)
 463                                continue;
 464                        if (walker->count >= walker->skip) {
 465                                if (walker->fn(tp,
 466                                    (unsigned long) (p->perfect+i), walker)
 467                                     < 0) {
 468                                        walker->stop = 1;
 469                                        return;
 470                                }
 471                        }
 472                        walker->count++;
 473                }
 474        }
 475        if (!p->h)
 476                return;
 477        for (i = 0; i < p->hash; i++) {
 478                for (f = rtnl_dereference(p->h[i]); f; f = next) {
 479                        next = rtnl_dereference(f->next);
 480                        if (walker->count >= walker->skip) {
 481                                if (walker->fn(tp, (unsigned long) &f->result,
 482                                    walker) < 0) {
 483                                        walker->stop = 1;
 484                                        return;
 485                                }
 486                        }
 487                        walker->count++;
 488                }
 489        }
 490}
 491
 492static bool tcindex_destroy(struct tcf_proto *tp, bool force)
 493{
 494        struct tcindex_data *p = rtnl_dereference(tp->root);
 495        struct tcf_walker walker;
 496
 497        if (!force)
 498                return false;
 499
 500        pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
 501        walker.count = 0;
 502        walker.skip = 0;
 503        walker.fn = tcindex_destroy_element;
 504        tcindex_walk(tp, &walker);
 505
 506        RCU_INIT_POINTER(tp->root, NULL);
 507        call_rcu(&p->rcu, __tcindex_destroy);
 508        return true;
 509}
 510
 511
 512static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 513    struct sk_buff *skb, struct tcmsg *t)
 514{
 515        struct tcindex_data *p = rtnl_dereference(tp->root);
 516        struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
 517        struct nlattr *nest;
 518
 519        pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p\n",
 520                 tp, fh, skb, t, p, r);
 521        pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
 522
 523        nest = nla_nest_start(skb, TCA_OPTIONS);
 524        if (nest == NULL)
 525                goto nla_put_failure;
 526
 527        if (!fh) {
 528                t->tcm_handle = ~0; /* whatever ... */
 529                if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) ||
 530                    nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) ||
 531                    nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) ||
 532                    nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through))
 533                        goto nla_put_failure;
 534                nla_nest_end(skb, nest);
 535        } else {
 536                if (p->perfect) {
 537                        t->tcm_handle = r - p->perfect;
 538                } else {
 539                        struct tcindex_filter *f;
 540                        struct tcindex_filter __rcu **fp;
 541                        int i;
 542
 543                        t->tcm_handle = 0;
 544                        for (i = 0; !t->tcm_handle && i < p->hash; i++) {
 545                                fp = &p->h[i];
 546                                for (f = rtnl_dereference(*fp);
 547                                     !t->tcm_handle && f;
 548                                     fp = &f->next, f = rtnl_dereference(*fp)) {
 549                                        if (&f->result == r)
 550                                                t->tcm_handle = f->key;
 551                                }
 552                        }
 553                }
 554                pr_debug("handle = %d\n", t->tcm_handle);
 555                if (r->res.class &&
 556                    nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
 557                        goto nla_put_failure;
 558
 559                if (tcf_exts_dump(skb, &r->exts) < 0)
 560                        goto nla_put_failure;
 561                nla_nest_end(skb, nest);
 562
 563                if (tcf_exts_dump_stats(skb, &r->exts) < 0)
 564                        goto nla_put_failure;
 565        }
 566
 567        return skb->len;
 568
 569nla_put_failure:
 570        nla_nest_cancel(skb, nest);
 571        return -1;
 572}
 573
 574static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
 575        .kind           =       "tcindex",
 576        .classify       =       tcindex_classify,
 577        .init           =       tcindex_init,
 578        .destroy        =       tcindex_destroy,
 579        .get            =       tcindex_get,
 580        .change         =       tcindex_change,
 581        .delete         =       tcindex_delete,
 582        .walk           =       tcindex_walk,
 583        .dump           =       tcindex_dump,
 584        .owner          =       THIS_MODULE,
 585};
 586
 587static int __init init_tcindex(void)
 588{
 589        return register_tcf_proto_ops(&cls_tcindex_ops);
 590}
 591
 592static void __exit exit_tcindex(void)
 593{
 594        unregister_tcf_proto_ops(&cls_tcindex_ops);
 595}
 596
 597module_init(init_tcindex)
 598module_exit(exit_tcindex)
 599MODULE_LICENSE("GPL");
 600