linux/net/sched/cls_u32.c
<<
>>
Prefs
   1/*
   2 * net/sched/cls_u32.c  Ugly (or Universal) 32bit key Packet Classifier.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 *      The filters are packed to hash tables of key nodes
  12 *      with a set of 32bit key/mask pairs at every node.
  13 *      Nodes reference next level hash tables etc.
  14 *
  15 *      This scheme is the best universal classifier I managed to
  16 *      invent; it is not super-fast, but it is not slow (provided you
  17 *      program it correctly), and general enough.  And its relative
  18 *      speed grows as the number of rules becomes larger.
  19 *
  20 *      It seems that it represents the best middle point between
  21 *      speed and manageability both by human and by machine.
  22 *
  23 *      It is especially useful for link sharing combined with QoS;
  24 *      pure RSVP doesn't need such a general approach and can use
  25 *      much simpler (and faster) schemes, sort of cls_rsvp.c.
  26 *
  27 *      JHS: We should remove the CONFIG_NET_CLS_IND from here
  28 *      eventually when the meta match extension is made available
  29 *
  30 *      nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
  31 */
  32
  33#include <linux/module.h>
  34#include <linux/slab.h>
  35#include <linux/types.h>
  36#include <linux/kernel.h>
  37#include <linux/string.h>
  38#include <linux/errno.h>
  39#include <linux/percpu.h>
  40#include <linux/rtnetlink.h>
  41#include <linux/skbuff.h>
  42#include <linux/bitmap.h>
  43#include <net/netlink.h>
  44#include <net/act_api.h>
  45#include <net/pkt_cls.h>
  46#include <linux/netdevice.h>
  47
  48struct tc_u_knode {
  49        struct tc_u_knode __rcu *next;
  50        u32                     handle;
  51        struct tc_u_hnode __rcu *ht_up;
  52        struct tcf_exts         exts;
  53#ifdef CONFIG_NET_CLS_IND
  54        int                     ifindex;
  55#endif
  56        u8                      fshift;
  57        struct tcf_result       res;
  58        struct tc_u_hnode __rcu *ht_down;
  59#ifdef CONFIG_CLS_U32_PERF
  60        struct tc_u32_pcnt __percpu *pf;
  61#endif
  62        u32                     flags;
  63#ifdef CONFIG_CLS_U32_MARK
  64        u32                     val;
  65        u32                     mask;
  66        u32 __percpu            *pcpu_success;
  67#endif
  68        struct tcf_proto        *tp;
  69        struct rcu_head         rcu;
  70        /* The 'sel' field MUST be the last field in structure to allow for
  71         * tc_u32_keys allocated at end of structure.
  72         */
  73        struct tc_u32_sel       sel;
  74};
  75
  76struct tc_u_hnode {
  77        struct tc_u_hnode __rcu *next;
  78        u32                     handle;
  79        u32                     prio;
  80        struct tc_u_common      *tp_c;
  81        int                     refcnt;
  82        unsigned int            divisor;
  83        struct rcu_head         rcu;
  84        /* The 'ht' field MUST be the last field in structure to allow for
  85         * more entries allocated at end of structure.
  86         */
  87        struct tc_u_knode __rcu *ht[1];
  88};
  89
  90struct tc_u_common {
  91        struct tc_u_hnode __rcu *hlist;
  92        struct Qdisc            *q;
  93        int                     refcnt;
  94        u32                     hgenerator;
  95        struct rcu_head         rcu;
  96};
  97
  98static inline unsigned int u32_hash_fold(__be32 key,
  99                                         const struct tc_u32_sel *sel,
 100                                         u8 fshift)
 101{
 102        unsigned int h = ntohl(key & sel->hmask) >> fshift;
 103
 104        return h;
 105}
 106
 107static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 108                        struct tcf_result *res)
 109{
 110        struct {
 111                struct tc_u_knode *knode;
 112                unsigned int      off;
 113        } stack[TC_U32_MAXDEPTH];
 114
 115        struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
 116        unsigned int off = skb_network_offset(skb);
 117        struct tc_u_knode *n;
 118        int sdepth = 0;
 119        int off2 = 0;
 120        int sel = 0;
 121#ifdef CONFIG_CLS_U32_PERF
 122        int j;
 123#endif
 124        int i, r;
 125
 126next_ht:
 127        n = rcu_dereference_bh(ht->ht[sel]);
 128
 129next_knode:
 130        if (n) {
 131                struct tc_u32_key *key = n->sel.keys;
 132
 133#ifdef CONFIG_CLS_U32_PERF
 134                __this_cpu_inc(n->pf->rcnt);
 135                j = 0;
 136#endif
 137
 138                if (tc_skip_sw(n->flags)) {
 139                        n = rcu_dereference_bh(n->next);
 140                        goto next_knode;
 141                }
 142
 143#ifdef CONFIG_CLS_U32_MARK
 144                if ((skb->mark & n->mask) != n->val) {
 145                        n = rcu_dereference_bh(n->next);
 146                        goto next_knode;
 147                } else {
 148                        __this_cpu_inc(*n->pcpu_success);
 149                }
 150#endif
 151
 152                for (i = n->sel.nkeys; i > 0; i--, key++) {
 153                        int toff = off + key->off + (off2 & key->offmask);
 154                        __be32 *data, hdata;
 155
 156                        if (skb_headroom(skb) + toff > INT_MAX)
 157                                goto out;
 158
 159                        data = skb_header_pointer(skb, toff, 4, &hdata);
 160                        if (!data)
 161                                goto out;
 162                        if ((*data ^ key->val) & key->mask) {
 163                                n = rcu_dereference_bh(n->next);
 164                                goto next_knode;
 165                        }
 166#ifdef CONFIG_CLS_U32_PERF
 167                        __this_cpu_inc(n->pf->kcnts[j]);
 168                        j++;
 169#endif
 170                }
 171
 172                ht = rcu_dereference_bh(n->ht_down);
 173                if (!ht) {
 174check_terminal:
 175                        if (n->sel.flags & TC_U32_TERMINAL) {
 176
 177                                *res = n->res;
 178#ifdef CONFIG_NET_CLS_IND
 179                                if (!tcf_match_indev(skb, n->ifindex)) {
 180                                        n = rcu_dereference_bh(n->next);
 181                                        goto next_knode;
 182                                }
 183#endif
 184#ifdef CONFIG_CLS_U32_PERF
 185                                __this_cpu_inc(n->pf->rhit);
 186#endif
 187                                r = tcf_exts_exec(skb, &n->exts, res);
 188                                if (r < 0) {
 189                                        n = rcu_dereference_bh(n->next);
 190                                        goto next_knode;
 191                                }
 192
 193                                return r;
 194                        }
 195                        n = rcu_dereference_bh(n->next);
 196                        goto next_knode;
 197                }
 198
 199                /* PUSH */
 200                if (sdepth >= TC_U32_MAXDEPTH)
 201                        goto deadloop;
 202                stack[sdepth].knode = n;
 203                stack[sdepth].off = off;
 204                sdepth++;
 205
 206                ht = rcu_dereference_bh(n->ht_down);
 207                sel = 0;
 208                if (ht->divisor) {
 209                        __be32 *data, hdata;
 210
 211                        data = skb_header_pointer(skb, off + n->sel.hoff, 4,
 212                                                  &hdata);
 213                        if (!data)
 214                                goto out;
 215                        sel = ht->divisor & u32_hash_fold(*data, &n->sel,
 216                                                          n->fshift);
 217                }
 218                if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
 219                        goto next_ht;
 220
 221                if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
 222                        off2 = n->sel.off + 3;
 223                        if (n->sel.flags & TC_U32_VAROFFSET) {
 224                                __be16 *data, hdata;
 225
 226                                data = skb_header_pointer(skb,
 227                                                          off + n->sel.offoff,
 228                                                          2, &hdata);
 229                                if (!data)
 230                                        goto out;
 231                                off2 += ntohs(n->sel.offmask & *data) >>
 232                                        n->sel.offshift;
 233                        }
 234                        off2 &= ~3;
 235                }
 236                if (n->sel.flags & TC_U32_EAT) {
 237                        off += off2;
 238                        off2 = 0;
 239                }
 240
 241                if (off < skb->len)
 242                        goto next_ht;
 243        }
 244
 245        /* POP */
 246        if (sdepth--) {
 247                n = stack[sdepth].knode;
 248                ht = rcu_dereference_bh(n->ht_up);
 249                off = stack[sdepth].off;
 250                goto check_terminal;
 251        }
 252out:
 253        return -1;
 254
 255deadloop:
 256        net_warn_ratelimited("cls_u32: dead loop\n");
 257        return -1;
 258}
 259
 260static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
 261{
 262        struct tc_u_hnode *ht;
 263
 264        for (ht = rtnl_dereference(tp_c->hlist);
 265             ht;
 266             ht = rtnl_dereference(ht->next))
 267                if (ht->handle == handle)
 268                        break;
 269
 270        return ht;
 271}
 272
 273static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
 274{
 275        unsigned int sel;
 276        struct tc_u_knode *n = NULL;
 277
 278        sel = TC_U32_HASH(handle);
 279        if (sel > ht->divisor)
 280                goto out;
 281
 282        for (n = rtnl_dereference(ht->ht[sel]);
 283             n;
 284             n = rtnl_dereference(n->next))
 285                if (n->handle == handle)
 286                        break;
 287out:
 288        return n;
 289}
 290
 291
 292static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
 293{
 294        struct tc_u_hnode *ht;
 295        struct tc_u_common *tp_c = tp->data;
 296
 297        if (TC_U32_HTID(handle) == TC_U32_ROOT)
 298                ht = rtnl_dereference(tp->root);
 299        else
 300                ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
 301
 302        if (!ht)
 303                return 0;
 304
 305        if (TC_U32_KEY(handle) == 0)
 306                return (unsigned long)ht;
 307
 308        return (unsigned long)u32_lookup_key(ht, handle);
 309}
 310
 311static u32 gen_new_htid(struct tc_u_common *tp_c)
 312{
 313        int i = 0x800;
 314
 315        /* hgenerator only used inside rtnl lock it is safe to increment
 316         * without read _copy_ update semantics
 317         */
 318        do {
 319                if (++tp_c->hgenerator == 0x7FF)
 320                        tp_c->hgenerator = 1;
 321        } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
 322
 323        return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
 324}
 325
 326static int u32_init(struct tcf_proto *tp)
 327{
 328        struct tc_u_hnode *root_ht;
 329        struct tc_u_common *tp_c;
 330
 331        tp_c = tp->q->u32_node;
 332
 333        root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
 334        if (root_ht == NULL)
 335                return -ENOBUFS;
 336
 337        root_ht->refcnt++;
 338        root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
 339        root_ht->prio = tp->prio;
 340
 341        if (tp_c == NULL) {
 342                tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
 343                if (tp_c == NULL) {
 344                        kfree(root_ht);
 345                        return -ENOBUFS;
 346                }
 347                tp_c->q = tp->q;
 348                tp->q->u32_node = tp_c;
 349        }
 350
 351        tp_c->refcnt++;
 352        RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
 353        rcu_assign_pointer(tp_c->hlist, root_ht);
 354        root_ht->tp_c = tp_c;
 355
 356        rcu_assign_pointer(tp->root, root_ht);
 357        tp->data = tp_c;
 358        return 0;
 359}
 360
 361static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
 362                           bool free_pf)
 363{
 364        tcf_exts_destroy(&n->exts);
 365        if (n->ht_down)
 366                n->ht_down->refcnt--;
 367#ifdef CONFIG_CLS_U32_PERF
 368        if (free_pf)
 369                free_percpu(n->pf);
 370#endif
 371#ifdef CONFIG_CLS_U32_MARK
 372        if (free_pf)
 373                free_percpu(n->pcpu_success);
 374#endif
 375        kfree(n);
 376        return 0;
 377}
 378
 379/* u32_delete_key_rcu should be called when free'ing a copied
 380 * version of a tc_u_knode obtained from u32_init_knode(). When
 381 * copies are obtained from u32_init_knode() the statistics are
 382 * shared between the old and new copies to allow readers to
 383 * continue to update the statistics during the copy. To support
 384 * this the u32_delete_key_rcu variant does not free the percpu
 385 * statistics.
 386 */
 387static void u32_delete_key_rcu(struct rcu_head *rcu)
 388{
 389        struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
 390
 391        u32_destroy_key(key->tp, key, false);
 392}
 393
 394/* u32_delete_key_freepf_rcu is the rcu callback variant
 395 * that free's the entire structure including the statistics
 396 * percpu variables. Only use this if the key is not a copy
 397 * returned by u32_init_knode(). See u32_delete_key_rcu()
 398 * for the variant that should be used with keys return from
 399 * u32_init_knode()
 400 */
 401static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
 402{
 403        struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
 404
 405        u32_destroy_key(key->tp, key, true);
 406}
 407
 408static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 409{
 410        struct tc_u_knode __rcu **kp;
 411        struct tc_u_knode *pkp;
 412        struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
 413
 414        if (ht) {
 415                kp = &ht->ht[TC_U32_HASH(key->handle)];
 416                for (pkp = rtnl_dereference(*kp); pkp;
 417                     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
 418                        if (pkp == key) {
 419                                RCU_INIT_POINTER(*kp, key->next);
 420
 421                                tcf_unbind_filter(tp, &key->res);
 422                                call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
 423                                return 0;
 424                        }
 425                }
 426        }
 427        WARN_ON(1);
 428        return 0;
 429}
 430
 431static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
 432{
 433        struct net_device *dev = tp->q->dev_queue->dev;
 434        struct tc_cls_u32_offload u32_offload = {0};
 435        struct tc_to_netdev offload;
 436
 437        offload.type = TC_SETUP_CLSU32;
 438        offload.cls_u32 = &u32_offload;
 439
 440        if (tc_should_offload(dev, tp, 0)) {
 441                offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
 442                offload.cls_u32->knode.handle = handle;
 443                dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
 444                                              tp->protocol, &offload);
 445        }
 446}
 447
 448static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
 449                                u32 flags)
 450{
 451        struct net_device *dev = tp->q->dev_queue->dev;
 452        struct tc_cls_u32_offload u32_offload = {0};
 453        struct tc_to_netdev offload;
 454        int err;
 455
 456        if (!tc_should_offload(dev, tp, flags))
 457                return tc_skip_sw(flags) ? -EINVAL : 0;
 458
 459        offload.type = TC_SETUP_CLSU32;
 460        offload.cls_u32 = &u32_offload;
 461
 462        offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
 463        offload.cls_u32->hnode.divisor = h->divisor;
 464        offload.cls_u32->hnode.handle = h->handle;
 465        offload.cls_u32->hnode.prio = h->prio;
 466
 467        err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
 468                                            tp->protocol, &offload);
 469        if (tc_skip_sw(flags))
 470                return err;
 471
 472        return 0;
 473}
 474
 475static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
 476{
 477        struct net_device *dev = tp->q->dev_queue->dev;
 478        struct tc_cls_u32_offload u32_offload = {0};
 479        struct tc_to_netdev offload;
 480
 481        offload.type = TC_SETUP_CLSU32;
 482        offload.cls_u32 = &u32_offload;
 483
 484        if (tc_should_offload(dev, tp, 0)) {
 485                offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
 486                offload.cls_u32->hnode.divisor = h->divisor;
 487                offload.cls_u32->hnode.handle = h->handle;
 488                offload.cls_u32->hnode.prio = h->prio;
 489
 490                dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
 491                                              tp->protocol, &offload);
 492        }
 493}
 494
 495static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 496                                u32 flags)
 497{
 498        struct net_device *dev = tp->q->dev_queue->dev;
 499        struct tc_cls_u32_offload u32_offload = {0};
 500        struct tc_to_netdev offload;
 501        int err;
 502
 503        offload.type = TC_SETUP_CLSU32;
 504        offload.cls_u32 = &u32_offload;
 505
 506        if (!tc_should_offload(dev, tp, flags))
 507                return tc_skip_sw(flags) ? -EINVAL : 0;
 508
 509        offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
 510        offload.cls_u32->knode.handle = n->handle;
 511        offload.cls_u32->knode.fshift = n->fshift;
 512#ifdef CONFIG_CLS_U32_MARK
 513        offload.cls_u32->knode.val = n->val;
 514        offload.cls_u32->knode.mask = n->mask;
 515#else
 516        offload.cls_u32->knode.val = 0;
 517        offload.cls_u32->knode.mask = 0;
 518#endif
 519        offload.cls_u32->knode.sel = &n->sel;
 520        offload.cls_u32->knode.exts = &n->exts;
 521        if (n->ht_down)
 522                offload.cls_u32->knode.link_handle = n->ht_down->handle;
 523
 524        err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
 525                                            tp->protocol, &offload);
 526
 527        if (!err)
 528                n->flags |= TCA_CLS_FLAGS_IN_HW;
 529
 530        if (tc_skip_sw(flags))
 531                return err;
 532
 533        return 0;
 534}
 535
 536static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 537{
 538        struct tc_u_knode *n;
 539        unsigned int h;
 540
 541        for (h = 0; h <= ht->divisor; h++) {
 542                while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
 543                        RCU_INIT_POINTER(ht->ht[h],
 544                                         rtnl_dereference(n->next));
 545                        tcf_unbind_filter(tp, &n->res);
 546                        u32_remove_hw_knode(tp, n->handle);
 547                        call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
 548                }
 549        }
 550}
 551
 552static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 553{
 554        struct tc_u_common *tp_c = tp->data;
 555        struct tc_u_hnode __rcu **hn;
 556        struct tc_u_hnode *phn;
 557
 558        WARN_ON(ht->refcnt);
 559
 560        u32_clear_hnode(tp, ht);
 561
 562        hn = &tp_c->hlist;
 563        for (phn = rtnl_dereference(*hn);
 564             phn;
 565             hn = &phn->next, phn = rtnl_dereference(*hn)) {
 566                if (phn == ht) {
 567                        u32_clear_hw_hnode(tp, ht);
 568                        RCU_INIT_POINTER(*hn, ht->next);
 569                        kfree_rcu(ht, rcu);
 570                        return 0;
 571                }
 572        }
 573
 574        return -ENOENT;
 575}
 576
 577static bool ht_empty(struct tc_u_hnode *ht)
 578{
 579        unsigned int h;
 580
 581        for (h = 0; h <= ht->divisor; h++)
 582                if (rcu_access_pointer(ht->ht[h]))
 583                        return false;
 584
 585        return true;
 586}
 587
 588static void u32_destroy(struct tcf_proto *tp)
 589{
 590        struct tc_u_common *tp_c = tp->data;
 591        struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
 592
 593        WARN_ON(root_ht == NULL);
 594
 595        if (root_ht && --root_ht->refcnt == 0)
 596                u32_destroy_hnode(tp, root_ht);
 597
 598        if (--tp_c->refcnt == 0) {
 599                struct tc_u_hnode *ht;
 600
 601                tp->q->u32_node = NULL;
 602
 603                for (ht = rtnl_dereference(tp_c->hlist);
 604                     ht;
 605                     ht = rtnl_dereference(ht->next)) {
 606                        ht->refcnt--;
 607                        u32_clear_hnode(tp, ht);
 608                }
 609
 610                while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
 611                        RCU_INIT_POINTER(tp_c->hlist, ht->next);
 612                        kfree_rcu(ht, rcu);
 613                }
 614
 615                kfree(tp_c);
 616        }
 617
 618        tp->data = NULL;
 619}
 620
 621static int u32_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
 622{
 623        struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
 624        struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
 625        struct tc_u_common *tp_c = tp->data;
 626        int ret = 0;
 627
 628        if (ht == NULL)
 629                goto out;
 630
 631        if (TC_U32_KEY(ht->handle)) {
 632                u32_remove_hw_knode(tp, ht->handle);
 633                ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
 634                goto out;
 635        }
 636
 637        if (root_ht == ht)
 638                return -EINVAL;
 639
 640        if (ht->refcnt == 1) {
 641                ht->refcnt--;
 642                u32_destroy_hnode(tp, ht);
 643        } else {
 644                return -EBUSY;
 645        }
 646
 647out:
 648        *last = true;
 649        if (root_ht) {
 650                if (root_ht->refcnt > 1) {
 651                        *last = false;
 652                        goto ret;
 653                }
 654                if (root_ht->refcnt == 1) {
 655                        if (!ht_empty(root_ht)) {
 656                                *last = false;
 657                                goto ret;
 658                        }
 659                }
 660        }
 661
 662        if (tp_c->refcnt > 1) {
 663                *last = false;
 664                goto ret;
 665        }
 666
 667        if (tp_c->refcnt == 1) {
 668                struct tc_u_hnode *ht;
 669
 670                for (ht = rtnl_dereference(tp_c->hlist);
 671                     ht;
 672                     ht = rtnl_dereference(ht->next))
 673                        if (!ht_empty(ht)) {
 674                                *last = false;
 675                                break;
 676                        }
 677        }
 678
 679ret:
 680        return ret;
 681}
 682
 683#define NR_U32_NODE (1<<12)
 684static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
 685{
 686        struct tc_u_knode *n;
 687        unsigned long i;
 688        unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
 689                                        GFP_KERNEL);
 690        if (!bitmap)
 691                return handle | 0xFFF;
 692
 693        for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
 694             n;
 695             n = rtnl_dereference(n->next))
 696                set_bit(TC_U32_NODE(n->handle), bitmap);
 697
 698        i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
 699        if (i >= NR_U32_NODE)
 700                i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
 701
 702        kfree(bitmap);
 703        return handle | (i >= NR_U32_NODE ? 0xFFF : i);
 704}
 705
 706static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
 707        [TCA_U32_CLASSID]       = { .type = NLA_U32 },
 708        [TCA_U32_HASH]          = { .type = NLA_U32 },
 709        [TCA_U32_LINK]          = { .type = NLA_U32 },
 710        [TCA_U32_DIVISOR]       = { .type = NLA_U32 },
 711        [TCA_U32_SEL]           = { .len = sizeof(struct tc_u32_sel) },
 712        [TCA_U32_INDEV]         = { .type = NLA_STRING, .len = IFNAMSIZ },
 713        [TCA_U32_MARK]          = { .len = sizeof(struct tc_u32_mark) },
 714        [TCA_U32_FLAGS]         = { .type = NLA_U32 },
 715};
 716
 717static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 718                         unsigned long base, struct tc_u_hnode *ht,
 719                         struct tc_u_knode *n, struct nlattr **tb,
 720                         struct nlattr *est, bool ovr)
 721{
 722        struct tcf_exts e;
 723        int err;
 724
 725        err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
 726        if (err < 0)
 727                return err;
 728        err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
 729        if (err < 0)
 730                goto errout;
 731
 732        err = -EINVAL;
 733        if (tb[TCA_U32_LINK]) {
 734                u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
 735                struct tc_u_hnode *ht_down = NULL, *ht_old;
 736
 737                if (TC_U32_KEY(handle))
 738                        goto errout;
 739
 740                if (handle) {
 741                        ht_down = u32_lookup_ht(ht->tp_c, handle);
 742
 743                        if (ht_down == NULL)
 744                                goto errout;
 745                        ht_down->refcnt++;
 746                }
 747
 748                ht_old = rtnl_dereference(n->ht_down);
 749                rcu_assign_pointer(n->ht_down, ht_down);
 750
 751                if (ht_old)
 752                        ht_old->refcnt--;
 753        }
 754        if (tb[TCA_U32_CLASSID]) {
 755                n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
 756                tcf_bind_filter(tp, &n->res, base);
 757        }
 758
 759#ifdef CONFIG_NET_CLS_IND
 760        if (tb[TCA_U32_INDEV]) {
 761                int ret;
 762                ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
 763                if (ret < 0)
 764                        goto errout;
 765                n->ifindex = ret;
 766        }
 767#endif
 768        tcf_exts_change(tp, &n->exts, &e);
 769
 770        return 0;
 771errout:
 772        tcf_exts_destroy(&e);
 773        return err;
 774}
 775
 776static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
 777                              struct tc_u_knode *n)
 778{
 779        struct tc_u_knode __rcu **ins;
 780        struct tc_u_knode *pins;
 781        struct tc_u_hnode *ht;
 782
 783        if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
 784                ht = rtnl_dereference(tp->root);
 785        else
 786                ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));
 787
 788        ins = &ht->ht[TC_U32_HASH(n->handle)];
 789
 790        /* The node must always exist for it to be replaced if this is not the
 791         * case then something went very wrong elsewhere.
 792         */
 793        for (pins = rtnl_dereference(*ins); ;
 794             ins = &pins->next, pins = rtnl_dereference(*ins))
 795                if (pins->handle == n->handle)
 796                        break;
 797
 798        RCU_INIT_POINTER(n->next, pins->next);
 799        rcu_assign_pointer(*ins, n);
 800}
 801
 802static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
 803                                         struct tc_u_knode *n)
 804{
 805        struct tc_u_knode *new;
 806        struct tc_u32_sel *s = &n->sel;
 807
 808        new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
 809                      GFP_KERNEL);
 810
 811        if (!new)
 812                return NULL;
 813
 814        RCU_INIT_POINTER(new->next, n->next);
 815        new->handle = n->handle;
 816        RCU_INIT_POINTER(new->ht_up, n->ht_up);
 817
 818#ifdef CONFIG_NET_CLS_IND
 819        new->ifindex = n->ifindex;
 820#endif
 821        new->fshift = n->fshift;
 822        new->res = n->res;
 823        new->flags = n->flags;
 824        RCU_INIT_POINTER(new->ht_down, n->ht_down);
 825
 826        /* bump reference count as long as we hold pointer to structure */
 827        if (new->ht_down)
 828                new->ht_down->refcnt++;
 829
 830#ifdef CONFIG_CLS_U32_PERF
 831        /* Statistics may be incremented by readers during update
 832         * so we must keep them in tact. When the node is later destroyed
 833         * a special destroy call must be made to not free the pf memory.
 834         */
 835        new->pf = n->pf;
 836#endif
 837
 838#ifdef CONFIG_CLS_U32_MARK
 839        new->val = n->val;
 840        new->mask = n->mask;
 841        /* Similarly success statistics must be moved as pointers */
 842        new->pcpu_success = n->pcpu_success;
 843#endif
 844        new->tp = tp;
 845        memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
 846
 847        if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
 848                kfree(new);
 849                return NULL;
 850        }
 851
 852        return new;
 853}
 854
 855static int u32_change(struct net *net, struct sk_buff *in_skb,
 856                      struct tcf_proto *tp, unsigned long base, u32 handle,
 857                      struct nlattr **tca, unsigned long *arg, bool ovr)
 858{
 859        struct tc_u_common *tp_c = tp->data;
 860        struct tc_u_hnode *ht;
 861        struct tc_u_knode *n;
 862        struct tc_u32_sel *s;
 863        struct nlattr *opt = tca[TCA_OPTIONS];
 864        struct nlattr *tb[TCA_U32_MAX + 1];
 865        u32 htid, flags = 0;
 866        int err;
 867#ifdef CONFIG_CLS_U32_PERF
 868        size_t size;
 869#endif
 870
 871        if (opt == NULL)
 872                return handle ? -EINVAL : 0;
 873
 874        err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL);
 875        if (err < 0)
 876                return err;
 877
 878        if (tb[TCA_U32_FLAGS]) {
 879                flags = nla_get_u32(tb[TCA_U32_FLAGS]);
 880                if (!tc_flags_valid(flags))
 881                        return -EINVAL;
 882        }
 883
 884        n = (struct tc_u_knode *)*arg;
 885        if (n) {
 886                struct tc_u_knode *new;
 887
 888                if (TC_U32_KEY(n->handle) == 0)
 889                        return -EINVAL;
 890
 891                if (n->flags != flags)
 892                        return -EINVAL;
 893
 894                new = u32_init_knode(tp, n);
 895                if (!new)
 896                        return -ENOMEM;
 897
 898                err = u32_set_parms(net, tp, base,
 899                                    rtnl_dereference(n->ht_up), new, tb,
 900                                    tca[TCA_RATE], ovr);
 901
 902                if (err) {
 903                        u32_destroy_key(tp, new, false);
 904                        return err;
 905                }
 906
 907                err = u32_replace_hw_knode(tp, new, flags);
 908                if (err) {
 909                        u32_destroy_key(tp, new, false);
 910                        return err;
 911                }
 912
 913                if (!tc_in_hw(new->flags))
 914                        new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
 915
 916                u32_replace_knode(tp, tp_c, new);
 917                tcf_unbind_filter(tp, &n->res);
 918                call_rcu(&n->rcu, u32_delete_key_rcu);
 919                return 0;
 920        }
 921
 922        if (tb[TCA_U32_DIVISOR]) {
 923                unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
 924
 925                if (--divisor > 0x100)
 926                        return -EINVAL;
 927                if (TC_U32_KEY(handle))
 928                        return -EINVAL;
 929                if (handle == 0) {
 930                        handle = gen_new_htid(tp->data);
 931                        if (handle == 0)
 932                                return -ENOMEM;
 933                }
 934                ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
 935                if (ht == NULL)
 936                        return -ENOBUFS;
 937                ht->tp_c = tp_c;
 938                ht->refcnt = 1;
 939                ht->divisor = divisor;
 940                ht->handle = handle;
 941                ht->prio = tp->prio;
 942
 943                err = u32_replace_hw_hnode(tp, ht, flags);
 944                if (err) {
 945                        kfree(ht);
 946                        return err;
 947                }
 948
 949                RCU_INIT_POINTER(ht->next, tp_c->hlist);
 950                rcu_assign_pointer(tp_c->hlist, ht);
 951                *arg = (unsigned long)ht;
 952
 953                return 0;
 954        }
 955
 956        if (tb[TCA_U32_HASH]) {
 957                htid = nla_get_u32(tb[TCA_U32_HASH]);
 958                if (TC_U32_HTID(htid) == TC_U32_ROOT) {
 959                        ht = rtnl_dereference(tp->root);
 960                        htid = ht->handle;
 961                } else {
 962                        ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
 963                        if (ht == NULL)
 964                                return -EINVAL;
 965                }
 966        } else {
 967                ht = rtnl_dereference(tp->root);
 968                htid = ht->handle;
 969        }
 970
 971        if (ht->divisor < TC_U32_HASH(htid))
 972                return -EINVAL;
 973
 974        if (handle) {
 975                if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
 976                        return -EINVAL;
 977                handle = htid | TC_U32_NODE(handle);
 978        } else
 979                handle = gen_new_kid(ht, htid);
 980
 981        if (tb[TCA_U32_SEL] == NULL)
 982                return -EINVAL;
 983
 984        s = nla_data(tb[TCA_U32_SEL]);
 985
 986        n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
 987        if (n == NULL)
 988                return -ENOBUFS;
 989
 990#ifdef CONFIG_CLS_U32_PERF
 991        size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
 992        n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
 993        if (!n->pf) {
 994                kfree(n);
 995                return -ENOBUFS;
 996        }
 997#endif
 998
 999        memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
1000        RCU_INIT_POINTER(n->ht_up, ht);
1001        n->handle = handle;
1002        n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
1003        n->flags = flags;
1004        n->tp = tp;
1005
1006        err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
1007        if (err < 0)
1008                goto errout;
1009
1010#ifdef CONFIG_CLS_U32_MARK
1011        n->pcpu_success = alloc_percpu(u32);
1012        if (!n->pcpu_success) {
1013                err = -ENOMEM;
1014                goto errout;
1015        }
1016
1017        if (tb[TCA_U32_MARK]) {
1018                struct tc_u32_mark *mark;
1019
1020                mark = nla_data(tb[TCA_U32_MARK]);
1021                n->val = mark->val;
1022                n->mask = mark->mask;
1023        }
1024#endif
1025
1026        err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
1027        if (err == 0) {
1028                struct tc_u_knode __rcu **ins;
1029                struct tc_u_knode *pins;
1030
1031                err = u32_replace_hw_knode(tp, n, flags);
1032                if (err)
1033                        goto errhw;
1034
1035                if (!tc_in_hw(n->flags))
1036                        n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
1037
1038                ins = &ht->ht[TC_U32_HASH(handle)];
1039                for (pins = rtnl_dereference(*ins); pins;
1040                     ins = &pins->next, pins = rtnl_dereference(*ins))
1041                        if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
1042                                break;
1043
1044                RCU_INIT_POINTER(n->next, pins);
1045                rcu_assign_pointer(*ins, n);
1046                *arg = (unsigned long)n;
1047                return 0;
1048        }
1049
1050errhw:
1051#ifdef CONFIG_CLS_U32_MARK
1052        free_percpu(n->pcpu_success);
1053#endif
1054
1055errout:
1056        tcf_exts_destroy(&n->exts);
1057#ifdef CONFIG_CLS_U32_PERF
1058        free_percpu(n->pf);
1059#endif
1060        kfree(n);
1061        return err;
1062}
1063
1064static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
1065{
1066        struct tc_u_common *tp_c = tp->data;
1067        struct tc_u_hnode *ht;
1068        struct tc_u_knode *n;
1069        unsigned int h;
1070
1071        if (arg->stop)
1072                return;
1073
1074        for (ht = rtnl_dereference(tp_c->hlist);
1075             ht;
1076             ht = rtnl_dereference(ht->next)) {
1077                if (ht->prio != tp->prio)
1078                        continue;
1079                if (arg->count >= arg->skip) {
1080                        if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
1081                                arg->stop = 1;
1082                                return;
1083                        }
1084                }
1085                arg->count++;
1086                for (h = 0; h <= ht->divisor; h++) {
1087                        for (n = rtnl_dereference(ht->ht[h]);
1088                             n;
1089                             n = rtnl_dereference(n->next)) {
1090                                if (arg->count < arg->skip) {
1091                                        arg->count++;
1092                                        continue;
1093                                }
1094                                if (arg->fn(tp, (unsigned long)n, arg) < 0) {
1095                                        arg->stop = 1;
1096                                        return;
1097                                }
1098                                arg->count++;
1099                        }
1100                }
1101        }
1102}
1103
1104static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
1105                    struct sk_buff *skb, struct tcmsg *t)
1106{
1107        struct tc_u_knode *n = (struct tc_u_knode *)fh;
1108        struct tc_u_hnode *ht_up, *ht_down;
1109        struct nlattr *nest;
1110
1111        if (n == NULL)
1112                return skb->len;
1113
1114        t->tcm_handle = n->handle;
1115
1116        nest = nla_nest_start(skb, TCA_OPTIONS);
1117        if (nest == NULL)
1118                goto nla_put_failure;
1119
1120        if (TC_U32_KEY(n->handle) == 0) {
1121                struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
1122                u32 divisor = ht->divisor + 1;
1123
1124                if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
1125                        goto nla_put_failure;
1126        } else {
1127#ifdef CONFIG_CLS_U32_PERF
1128                struct tc_u32_pcnt *gpf;
1129                int cpu;
1130#endif
1131
1132                if (nla_put(skb, TCA_U32_SEL,
1133                            sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
1134                            &n->sel))
1135                        goto nla_put_failure;
1136
1137                ht_up = rtnl_dereference(n->ht_up);
1138                if (ht_up) {
1139                        u32 htid = n->handle & 0xFFFFF000;
1140                        if (nla_put_u32(skb, TCA_U32_HASH, htid))
1141                                goto nla_put_failure;
1142                }
1143                if (n->res.classid &&
1144                    nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
1145                        goto nla_put_failure;
1146
1147                ht_down = rtnl_dereference(n->ht_down);
1148                if (ht_down &&
1149                    nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
1150                        goto nla_put_failure;
1151
1152                if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
1153                        goto nla_put_failure;
1154
1155#ifdef CONFIG_CLS_U32_MARK
1156                if ((n->val || n->mask)) {
1157                        struct tc_u32_mark mark = {.val = n->val,
1158                                                   .mask = n->mask,
1159                                                   .success = 0};
1160                        int cpum;
1161
1162                        for_each_possible_cpu(cpum) {
1163                                __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
1164
1165                                mark.success += cnt;
1166                        }
1167
1168                        if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
1169                                goto nla_put_failure;
1170                }
1171#endif
1172
1173                if (tcf_exts_dump(skb, &n->exts) < 0)
1174                        goto nla_put_failure;
1175
1176#ifdef CONFIG_NET_CLS_IND
1177                if (n->ifindex) {
1178                        struct net_device *dev;
1179                        dev = __dev_get_by_index(net, n->ifindex);
1180                        if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
1181                                goto nla_put_failure;
1182                }
1183#endif
1184#ifdef CONFIG_CLS_U32_PERF
1185                gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
1186                              n->sel.nkeys * sizeof(u64),
1187                              GFP_KERNEL);
1188                if (!gpf)
1189                        goto nla_put_failure;
1190
1191                for_each_possible_cpu(cpu) {
1192                        int i;
1193                        struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);
1194
1195                        gpf->rcnt += pf->rcnt;
1196                        gpf->rhit += pf->rhit;
1197                        for (i = 0; i < n->sel.nkeys; i++)
1198                                gpf->kcnts[i] += pf->kcnts[i];
1199                }
1200
1201                if (nla_put_64bit(skb, TCA_U32_PCNT,
1202                                  sizeof(struct tc_u32_pcnt) +
1203                                  n->sel.nkeys * sizeof(u64),
1204                                  gpf, TCA_U32_PAD)) {
1205                        kfree(gpf);
1206                        goto nla_put_failure;
1207                }
1208                kfree(gpf);
1209#endif
1210        }
1211
1212        nla_nest_end(skb, nest);
1213
1214        if (TC_U32_KEY(n->handle))
1215                if (tcf_exts_dump_stats(skb, &n->exts) < 0)
1216                        goto nla_put_failure;
1217        return skb->len;
1218
1219nla_put_failure:
1220        nla_nest_cancel(skb, nest);
1221        return -1;
1222}
1223
1224static struct tcf_proto_ops cls_u32_ops __read_mostly = {
1225        .kind           =       "u32",
1226        .classify       =       u32_classify,
1227        .init           =       u32_init,
1228        .destroy        =       u32_destroy,
1229        .get            =       u32_get,
1230        .change         =       u32_change,
1231        .delete         =       u32_delete,
1232        .walk           =       u32_walk,
1233        .dump           =       u32_dump,
1234        .owner          =       THIS_MODULE,
1235};
1236
1237static int __init init_u32(void)
1238{
1239        pr_info("u32 classifier\n");
1240#ifdef CONFIG_CLS_U32_PERF
1241        pr_info("    Performance counters on\n");
1242#endif
1243#ifdef CONFIG_NET_CLS_IND
1244        pr_info("    input device check on\n");
1245#endif
1246#ifdef CONFIG_NET_CLS_ACT
1247        pr_info("    Actions configured\n");
1248#endif
1249        return register_tcf_proto_ops(&cls_u32_ops);
1250}
1251
1252static void __exit exit_u32(void)
1253{
1254        unregister_tcf_proto_ops(&cls_u32_ops);
1255}
1256
1257module_init(init_u32)
1258module_exit(exit_u32)
1259MODULE_LICENSE("GPL");
1260