linux/net/sched/cls_tcindex.c
<<
>>
Prefs
   1/*
   2 * net/sched/cls_tcindex.c      Packet classifier for skb->tc_index
   3 *
   4 * Written 1998,1999 by Werner Almesberger, EPFL ICA
   5 */
   6
   7#include <linux/module.h>
   8#include <linux/types.h>
   9#include <linux/kernel.h>
  10#include <linux/skbuff.h>
  11#include <linux/errno.h>
  12#include <linux/slab.h>
  13#include <net/act_api.h>
  14#include <net/netlink.h>
  15#include <net/pkt_cls.h>
  16
  17/*
  18 * Passing parameters to the root seems to be done more awkwardly than really
  19 * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
  20 * verified. FIXME.
  21 */
  22
  23#define PERFECT_HASH_THRESHOLD  64      /* use perfect hash if not bigger */
  24#define DEFAULT_HASH_SIZE       64      /* optimized for diffserv */
  25
  26
  27struct tcindex_filter_result {
  28        struct tcf_exts         exts;
  29        struct tcf_result       res;
  30};
  31
  32struct tcindex_filter {
  33        u16 key;
  34        struct tcindex_filter_result result;
  35        struct tcindex_filter __rcu *next;
  36        struct rcu_head rcu;
  37};
  38
  39
  40struct tcindex_data {
  41        struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
  42        struct tcindex_filter __rcu **h; /* imperfect hash; */
  43        struct tcf_proto *tp;
  44        u16 mask;               /* AND key with mask */
  45        u32 shift;              /* shift ANDed key to the right */
  46        u32 hash;               /* hash table size; 0 if undefined */
  47        u32 alloc_hash;         /* allocated size */
  48        u32 fall_through;       /* 0: only classify if explicit match */
  49        struct rcu_head rcu;
  50};
  51
  52static inline int
  53tcindex_filter_is_set(struct tcindex_filter_result *r)
  54{
  55        return tcf_exts_is_predicative(&r->exts) || r->res.classid;
  56}
  57
  58static struct tcindex_filter_result *
  59tcindex_lookup(struct tcindex_data *p, u16 key)
  60{
  61        if (p->perfect) {
  62                struct tcindex_filter_result *f = p->perfect + key;
  63
  64                return tcindex_filter_is_set(f) ? f : NULL;
  65        } else if (p->h) {
  66                struct tcindex_filter __rcu **fp;
  67                struct tcindex_filter *f;
  68
  69                fp = &p->h[key % p->hash];
  70                for (f = rcu_dereference_bh_rtnl(*fp);
  71                     f;
  72                     fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
  73                        if (f->key == key)
  74                                return &f->result;
  75        }
  76
  77        return NULL;
  78}
  79
  80
  81static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
  82                            struct tcf_result *res)
  83{
  84        struct tcindex_data *p = rcu_dereference_bh(tp->root);
  85        struct tcindex_filter_result *f;
  86        int key = (skb->tc_index & p->mask) >> p->shift;
  87
  88        pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
  89                 skb, tp, res, p);
  90
  91        f = tcindex_lookup(p, key);
  92        if (!f) {
  93                if (!p->fall_through)
  94                        return -1;
  95                res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
  96                res->class = 0;
  97                pr_debug("alg 0x%x\n", res->classid);
  98                return 0;
  99        }
 100        *res = f->res;
 101        pr_debug("map 0x%x\n", res->classid);
 102
 103        return tcf_exts_exec(skb, &f->exts, res);
 104}
 105
 106
 107static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
 108{
 109        struct tcindex_data *p = rtnl_dereference(tp->root);
 110        struct tcindex_filter_result *r;
 111
 112        pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
 113        if (p->perfect && handle >= p->alloc_hash)
 114                return 0;
 115        r = tcindex_lookup(p, handle);
 116        return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL;
 117}
 118
 119static int tcindex_init(struct tcf_proto *tp)
 120{
 121        struct tcindex_data *p;
 122
 123        pr_debug("tcindex_init(tp %p)\n", tp);
 124        p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
 125        if (!p)
 126                return -ENOMEM;
 127
 128        p->mask = 0xffff;
 129        p->hash = DEFAULT_HASH_SIZE;
 130        p->fall_through = 1;
 131
 132        rcu_assign_pointer(tp->root, p);
 133        return 0;
 134}
 135
 136static int
 137tcindex_delete(struct tcf_proto *tp, unsigned long arg)
 138{
 139        struct tcindex_data *p = rtnl_dereference(tp->root);
 140        struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
 141        struct tcindex_filter __rcu **walk;
 142        struct tcindex_filter *f = NULL;
 143
 144        pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p);
 145        if (p->perfect) {
 146                if (!r->res.class)
 147                        return -ENOENT;
 148        } else {
 149                int i;
 150
 151                for (i = 0; i < p->hash; i++) {
 152                        walk = p->h + i;
 153                        for (f = rtnl_dereference(*walk); f;
 154                             walk = &f->next, f = rtnl_dereference(*walk)) {
 155                                if (&f->result == r)
 156                                        goto found;
 157                        }
 158                }
 159                return -ENOENT;
 160
 161found:
 162                rcu_assign_pointer(*walk, rtnl_dereference(f->next));
 163        }
 164        tcf_unbind_filter(tp, &r->res);
 165        tcf_exts_destroy(&r->exts);
 166        if (f)
 167                kfree_rcu(f, rcu);
 168        return 0;
 169}
 170
 171static int tcindex_destroy_element(struct tcf_proto *tp,
 172                                   unsigned long arg,
 173                                   struct tcf_walker *walker)
 174{
 175        return tcindex_delete(tp, arg);
 176}
 177
 178static void __tcindex_destroy(struct rcu_head *head)
 179{
 180        struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
 181
 182        kfree(p->perfect);
 183        kfree(p->h);
 184        kfree(p);
 185}
 186
 187static inline int
 188valid_perfect_hash(struct tcindex_data *p)
 189{
 190        return  p->hash > (p->mask >> p->shift);
 191}
 192
 193static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
 194        [TCA_TCINDEX_HASH]              = { .type = NLA_U32 },
 195        [TCA_TCINDEX_MASK]              = { .type = NLA_U16 },
 196        [TCA_TCINDEX_SHIFT]             = { .type = NLA_U32 },
 197        [TCA_TCINDEX_FALL_THROUGH]      = { .type = NLA_U32 },
 198        [TCA_TCINDEX_CLASSID]           = { .type = NLA_U32 },
 199};
 200
 201static void tcindex_filter_result_init(struct tcindex_filter_result *r)
 202{
 203        memset(r, 0, sizeof(*r));
 204        tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 205}
 206
 207static void __tcindex_partial_destroy(struct rcu_head *head)
 208{
 209        struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
 210
 211        kfree(p->perfect);
 212        kfree(p);
 213}
 214
 215static int
 216tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 217                  u32 handle, struct tcindex_data *p,
 218                  struct tcindex_filter_result *r, struct nlattr **tb,
 219                  struct nlattr *est, bool ovr)
 220{
 221        int err, balloc = 0;
 222        struct tcindex_filter_result new_filter_result, *old_r = r;
 223        struct tcindex_filter_result cr;
 224        struct tcindex_data *cp, *oldp;
 225        struct tcindex_filter *f = NULL; /* make gcc behave */
 226        struct tcf_exts e;
 227
 228        tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 229        err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
 230        if (err < 0)
 231                return err;
 232
 233        err = -ENOMEM;
 234        /* tcindex_data attributes must look atomic to classifier/lookup so
 235         * allocate new tcindex data and RCU assign it onto root. Keeping
 236         * perfect hash and hash pointers from old data.
 237         */
 238        cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 239        if (!cp)
 240                goto errout;
 241
 242        cp->mask = p->mask;
 243        cp->shift = p->shift;
 244        cp->hash = p->hash;
 245        cp->alloc_hash = p->alloc_hash;
 246        cp->fall_through = p->fall_through;
 247        cp->tp = tp;
 248
 249        if (p->perfect) {
 250                int i;
 251
 252                cp->perfect = kmemdup(p->perfect,
 253                                      sizeof(*r) * cp->hash, GFP_KERNEL);
 254                if (!cp->perfect)
 255                        goto errout;
 256                for (i = 0; i < cp->hash; i++)
 257                        tcf_exts_init(&cp->perfect[i].exts,
 258                                      TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 259                balloc = 1;
 260        }
 261        cp->h = p->h;
 262
 263        tcindex_filter_result_init(&new_filter_result);
 264        tcindex_filter_result_init(&cr);
 265        if (old_r)
 266                cr.res = r->res;
 267
 268        if (tb[TCA_TCINDEX_HASH])
 269                cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
 270
 271        if (tb[TCA_TCINDEX_MASK])
 272                cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
 273
 274        if (tb[TCA_TCINDEX_SHIFT])
 275                cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
 276
 277        err = -EBUSY;
 278
 279        /* Hash already allocated, make sure that we still meet the
 280         * requirements for the allocated hash.
 281         */
 282        if (cp->perfect) {
 283                if (!valid_perfect_hash(cp) ||
 284                    cp->hash > cp->alloc_hash)
 285                        goto errout_alloc;
 286        } else if (cp->h && cp->hash != cp->alloc_hash) {
 287                goto errout_alloc;
 288        }
 289
 290        err = -EINVAL;
 291        if (tb[TCA_TCINDEX_FALL_THROUGH])
 292                cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
 293
 294        if (!cp->hash) {
 295                /* Hash not specified, use perfect hash if the upper limit
 296                 * of the hashing index is below the threshold.
 297                 */
 298                if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
 299                        cp->hash = (cp->mask >> cp->shift) + 1;
 300                else
 301                        cp->hash = DEFAULT_HASH_SIZE;
 302        }
 303
 304        if (!cp->perfect && !cp->h)
 305                cp->alloc_hash = cp->hash;
 306
 307        /* Note: this could be as restrictive as if (handle & ~(mask >> shift))
 308         * but then, we'd fail handles that may become valid after some future
 309         * mask change. While this is extremely unlikely to ever matter,
 310         * the check below is safer (and also more backwards-compatible).
 311         */
 312        if (cp->perfect || valid_perfect_hash(cp))
 313                if (handle >= cp->alloc_hash)
 314                        goto errout_alloc;
 315
 316
 317        err = -ENOMEM;
 318        if (!cp->perfect && !cp->h) {
 319                if (valid_perfect_hash(cp)) {
 320                        int i;
 321
 322                        cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL);
 323                        if (!cp->perfect)
 324                                goto errout_alloc;
 325                        for (i = 0; i < cp->hash; i++)
 326                                tcf_exts_init(&cp->perfect[i].exts,
 327                                              TCA_TCINDEX_ACT,
 328                                              TCA_TCINDEX_POLICE);
 329                        balloc = 1;
 330                } else {
 331                        struct tcindex_filter __rcu **hash;
 332
 333                        hash = kcalloc(cp->hash,
 334                                       sizeof(struct tcindex_filter *),
 335                                       GFP_KERNEL);
 336
 337                        if (!hash)
 338                                goto errout_alloc;
 339
 340                        cp->h = hash;
 341                        balloc = 2;
 342                }
 343        }
 344
 345        if (cp->perfect)
 346                r = cp->perfect + handle;
 347        else
 348                r = tcindex_lookup(cp, handle) ? : &new_filter_result;
 349
 350        if (r == &new_filter_result) {
 351                f = kzalloc(sizeof(*f), GFP_KERNEL);
 352                if (!f)
 353                        goto errout_alloc;
 354                f->key = handle;
 355                tcindex_filter_result_init(&f->result);
 356                f->next = NULL;
 357        }
 358
 359        if (tb[TCA_TCINDEX_CLASSID]) {
 360                cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
 361                tcf_bind_filter(tp, &cr.res, base);
 362        }
 363
 364        if (old_r)
 365                tcf_exts_change(tp, &r->exts, &e);
 366        else
 367                tcf_exts_change(tp, &cr.exts, &e);
 368
 369        if (old_r && old_r != r)
 370                tcindex_filter_result_init(old_r);
 371
 372        oldp = p;
 373        r->res = cr.res;
 374        rcu_assign_pointer(tp->root, cp);
 375
 376        if (r == &new_filter_result) {
 377                struct tcindex_filter *nfp;
 378                struct tcindex_filter __rcu **fp;
 379
 380                tcf_exts_change(tp, &f->result.exts, &r->exts);
 381
 382                fp = cp->h + (handle % cp->hash);
 383                for (nfp = rtnl_dereference(*fp);
 384                     nfp;
 385                     fp = &nfp->next, nfp = rtnl_dereference(*fp))
 386                                ; /* nothing */
 387
 388                rcu_assign_pointer(*fp, f);
 389        }
 390
 391        if (oldp)
 392                call_rcu(&oldp->rcu, __tcindex_partial_destroy);
 393        return 0;
 394
 395errout_alloc:
 396        if (balloc == 1)
 397                kfree(cp->perfect);
 398        else if (balloc == 2)
 399                kfree(cp->h);
 400errout:
 401        kfree(cp);
 402        tcf_exts_destroy(&e);
 403        return err;
 404}
 405
 406static int
 407tcindex_change(struct net *net, struct sk_buff *in_skb,
 408               struct tcf_proto *tp, unsigned long base, u32 handle,
 409               struct nlattr **tca, unsigned long *arg, bool ovr)
 410{
 411        struct nlattr *opt = tca[TCA_OPTIONS];
 412        struct nlattr *tb[TCA_TCINDEX_MAX + 1];
 413        struct tcindex_data *p = rtnl_dereference(tp->root);
 414        struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
 415        int err;
 416
 417        pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
 418            "p %p,r %p,*arg 0x%lx\n",
 419            tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L);
 420
 421        if (!opt)
 422                return 0;
 423
 424        err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy);
 425        if (err < 0)
 426                return err;
 427
 428        return tcindex_set_parms(net, tp, base, handle, p, r, tb,
 429                                 tca[TCA_RATE], ovr);
 430}
 431
 432static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 433{
 434        struct tcindex_data *p = rtnl_dereference(tp->root);
 435        struct tcindex_filter *f, *next;
 436        int i;
 437
 438        pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
 439        if (p->perfect) {
 440                for (i = 0; i < p->hash; i++) {
 441                        if (!p->perfect[i].res.class)
 442                                continue;
 443                        if (walker->count >= walker->skip) {
 444                                if (walker->fn(tp,
 445                                    (unsigned long) (p->perfect+i), walker)
 446                                     < 0) {
 447                                        walker->stop = 1;
 448                                        return;
 449                                }
 450                        }
 451                        walker->count++;
 452                }
 453        }
 454        if (!p->h)
 455                return;
 456        for (i = 0; i < p->hash; i++) {
 457                for (f = rtnl_dereference(p->h[i]); f; f = next) {
 458                        next = rtnl_dereference(f->next);
 459                        if (walker->count >= walker->skip) {
 460                                if (walker->fn(tp, (unsigned long) &f->result,
 461                                    walker) < 0) {
 462                                        walker->stop = 1;
 463                                        return;
 464                                }
 465                        }
 466                        walker->count++;
 467                }
 468        }
 469}
 470
 471static bool tcindex_destroy(struct tcf_proto *tp, bool force)
 472{
 473        struct tcindex_data *p = rtnl_dereference(tp->root);
 474        struct tcf_walker walker;
 475
 476        if (!force)
 477                return false;
 478
 479        pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
 480        walker.count = 0;
 481        walker.skip = 0;
 482        walker.fn = tcindex_destroy_element;
 483        tcindex_walk(tp, &walker);
 484
 485        RCU_INIT_POINTER(tp->root, NULL);
 486        call_rcu(&p->rcu, __tcindex_destroy);
 487        return true;
 488}
 489
 490
 491static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 492    struct sk_buff *skb, struct tcmsg *t)
 493{
 494        struct tcindex_data *p = rtnl_dereference(tp->root);
 495        struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
 496        struct nlattr *nest;
 497
 498        pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p\n",
 499                 tp, fh, skb, t, p, r);
 500        pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
 501
 502        nest = nla_nest_start(skb, TCA_OPTIONS);
 503        if (nest == NULL)
 504                goto nla_put_failure;
 505
 506        if (!fh) {
 507                t->tcm_handle = ~0; /* whatever ... */
 508                if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) ||
 509                    nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) ||
 510                    nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) ||
 511                    nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through))
 512                        goto nla_put_failure;
 513                nla_nest_end(skb, nest);
 514        } else {
 515                if (p->perfect) {
 516                        t->tcm_handle = r - p->perfect;
 517                } else {
 518                        struct tcindex_filter *f;
 519                        struct tcindex_filter __rcu **fp;
 520                        int i;
 521
 522                        t->tcm_handle = 0;
 523                        for (i = 0; !t->tcm_handle && i < p->hash; i++) {
 524                                fp = &p->h[i];
 525                                for (f = rtnl_dereference(*fp);
 526                                     !t->tcm_handle && f;
 527                                     fp = &f->next, f = rtnl_dereference(*fp)) {
 528                                        if (&f->result == r)
 529                                                t->tcm_handle = f->key;
 530                                }
 531                        }
 532                }
 533                pr_debug("handle = %d\n", t->tcm_handle);
 534                if (r->res.class &&
 535                    nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
 536                        goto nla_put_failure;
 537
 538                if (tcf_exts_dump(skb, &r->exts) < 0)
 539                        goto nla_put_failure;
 540                nla_nest_end(skb, nest);
 541
 542                if (tcf_exts_dump_stats(skb, &r->exts) < 0)
 543                        goto nla_put_failure;
 544        }
 545
 546        return skb->len;
 547
 548nla_put_failure:
 549        nla_nest_cancel(skb, nest);
 550        return -1;
 551}
 552
 553static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
 554        .kind           =       "tcindex",
 555        .classify       =       tcindex_classify,
 556        .init           =       tcindex_init,
 557        .destroy        =       tcindex_destroy,
 558        .get            =       tcindex_get,
 559        .change         =       tcindex_change,
 560        .delete         =       tcindex_delete,
 561        .walk           =       tcindex_walk,
 562        .dump           =       tcindex_dump,
 563        .owner          =       THIS_MODULE,
 564};
 565
 566static int __init init_tcindex(void)
 567{
 568        return register_tcf_proto_ops(&cls_tcindex_ops);
 569}
 570
 571static void __exit exit_tcindex(void)
 572{
 573        unregister_tcf_proto_ops(&cls_tcindex_ops);
 574}
 575
 576module_init(init_tcindex)
 577module_exit(exit_tcindex)
 578MODULE_LICENSE("GPL");
 579