linux/net/sched/cls_u32.c
<<
>>
Prefs
   1/*
   2 * net/sched/cls_u32.c  Ugly (or Universal) 32bit key Packet Classifier.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 *      The filters are packed to hash tables of key nodes
  12 *      with a set of 32bit key/mask pairs at every node.
  13 *      Nodes reference next level hash tables etc.
  14 *
  15 *      This scheme is the best universal classifier I managed to
  16 *      invent; it is not super-fast, but it is not slow (provided you
  17 *      program it correctly), and general enough.  And its relative
  18 *      speed grows as the number of rules becomes larger.
  19 *
  20 *      It seems that it represents the best middle point between
  21 *      speed and manageability both by human and by machine.
  22 *
  23 *      It is especially useful for link sharing combined with QoS;
  24 *      pure RSVP doesn't need such a general approach and can use
  25 *      much simpler (and faster) schemes, sort of cls_rsvp.c.
  26 *
  27 *      JHS: We should remove the CONFIG_NET_CLS_IND from here
  28 *      eventually when the meta match extension is made available
  29 *
  30 *      nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
  31 */
  32
  33#include <linux/module.h>
  34#include <linux/slab.h>
  35#include <linux/types.h>
  36#include <linux/kernel.h>
  37#include <linux/string.h>
  38#include <linux/errno.h>
  39#include <linux/percpu.h>
  40#include <linux/rtnetlink.h>
  41#include <linux/skbuff.h>
  42#include <linux/bitmap.h>
  43#include <net/netlink.h>
  44#include <net/act_api.h>
  45#include <net/pkt_cls.h>
  46#include <linux/netdevice.h>
  47
  48struct tc_u_knode {
  49        struct tc_u_knode __rcu *next;
  50        u32                     handle;
  51        struct tc_u_hnode __rcu *ht_up;
  52        struct tcf_exts         exts;
  53#ifdef CONFIG_NET_CLS_IND
  54        int                     ifindex;
  55#endif
  56        u8                      fshift;
  57        struct tcf_result       res;
  58        struct tc_u_hnode __rcu *ht_down;
  59#ifdef CONFIG_CLS_U32_PERF
  60        struct tc_u32_pcnt __percpu *pf;
  61#endif
  62        u32                     flags;
  63#ifdef CONFIG_CLS_U32_MARK
  64        u32                     val;
  65        u32                     mask;
  66        u32 __percpu            *pcpu_success;
  67#endif
  68        struct tcf_proto        *tp;
  69        struct rcu_head         rcu;
  70        /* The 'sel' field MUST be the last field in structure to allow for
  71         * tc_u32_keys allocated at end of structure.
  72         */
  73        struct tc_u32_sel       sel;
  74};
  75
  76struct tc_u_hnode {
  77        struct tc_u_hnode __rcu *next;
  78        u32                     handle;
  79        u32                     prio;
  80        struct tc_u_common      *tp_c;
  81        int                     refcnt;
  82        unsigned int            divisor;
  83        struct rcu_head         rcu;
  84        /* The 'ht' field MUST be the last field in structure to allow for
  85         * more entries allocated at end of structure.
  86         */
  87        struct tc_u_knode __rcu *ht[1];
  88};
  89
  90struct tc_u_common {
  91        struct tc_u_hnode __rcu *hlist;
  92        struct Qdisc            *q;
  93        int                     refcnt;
  94        u32                     hgenerator;
  95        struct rcu_head         rcu;
  96};
  97
  98static inline unsigned int u32_hash_fold(__be32 key,
  99                                         const struct tc_u32_sel *sel,
 100                                         u8 fshift)
 101{
 102        unsigned int h = ntohl(key & sel->hmask) >> fshift;
 103
 104        return h;
 105}
 106
 107static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res)
 108{
 109        struct {
 110                struct tc_u_knode *knode;
 111                unsigned int      off;
 112        } stack[TC_U32_MAXDEPTH];
 113
 114        struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
 115        unsigned int off = skb_network_offset(skb);
 116        struct tc_u_knode *n;
 117        int sdepth = 0;
 118        int off2 = 0;
 119        int sel = 0;
 120#ifdef CONFIG_CLS_U32_PERF
 121        int j;
 122#endif
 123        int i, r;
 124
 125next_ht:
 126        n = rcu_dereference_bh(ht->ht[sel]);
 127
 128next_knode:
 129        if (n) {
 130                struct tc_u32_key *key = n->sel.keys;
 131
 132#ifdef CONFIG_CLS_U32_PERF
 133                __this_cpu_inc(n->pf->rcnt);
 134                j = 0;
 135#endif
 136
 137#ifdef CONFIG_CLS_U32_MARK
 138                if ((skb->mark & n->mask) != n->val) {
 139                        n = rcu_dereference_bh(n->next);
 140                        goto next_knode;
 141                } else {
 142                        __this_cpu_inc(*n->pcpu_success);
 143                }
 144#endif
 145
 146                for (i = n->sel.nkeys; i > 0; i--, key++) {
 147                        int toff = off + key->off + (off2 & key->offmask);
 148                        __be32 *data, hdata;
 149
 150                        if (skb_headroom(skb) + toff > INT_MAX)
 151                                goto out;
 152
 153                        data = skb_header_pointer(skb, toff, 4, &hdata);
 154                        if (!data)
 155                                goto out;
 156                        if ((*data ^ key->val) & key->mask) {
 157                                n = rcu_dereference_bh(n->next);
 158                                goto next_knode;
 159                        }
 160#ifdef CONFIG_CLS_U32_PERF
 161                        __this_cpu_inc(n->pf->kcnts[j]);
 162                        j++;
 163#endif
 164                }
 165
 166                ht = rcu_dereference_bh(n->ht_down);
 167                if (!ht) {
 168check_terminal:
 169                        if (n->sel.flags & TC_U32_TERMINAL) {
 170
 171                                *res = n->res;
 172#ifdef CONFIG_NET_CLS_IND
 173                                if (!tcf_match_indev(skb, n->ifindex)) {
 174                                        n = rcu_dereference_bh(n->next);
 175                                        goto next_knode;
 176                                }
 177#endif
 178#ifdef CONFIG_CLS_U32_PERF
 179                                __this_cpu_inc(n->pf->rhit);
 180#endif
 181                                r = tcf_exts_exec(skb, &n->exts, res);
 182                                if (r < 0) {
 183                                        n = rcu_dereference_bh(n->next);
 184                                        goto next_knode;
 185                                }
 186
 187                                return r;
 188                        }
 189                        n = rcu_dereference_bh(n->next);
 190                        goto next_knode;
 191                }
 192
 193                /* PUSH */
 194                if (sdepth >= TC_U32_MAXDEPTH)
 195                        goto deadloop;
 196                stack[sdepth].knode = n;
 197                stack[sdepth].off = off;
 198                sdepth++;
 199
 200                ht = rcu_dereference_bh(n->ht_down);
 201                sel = 0;
 202                if (ht->divisor) {
 203                        __be32 *data, hdata;
 204
 205                        data = skb_header_pointer(skb, off + n->sel.hoff, 4,
 206                                                  &hdata);
 207                        if (!data)
 208                                goto out;
 209                        sel = ht->divisor & u32_hash_fold(*data, &n->sel,
 210                                                          n->fshift);
 211                }
 212                if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
 213                        goto next_ht;
 214
 215                if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
 216                        off2 = n->sel.off + 3;
 217                        if (n->sel.flags & TC_U32_VAROFFSET) {
 218                                __be16 *data, hdata;
 219
 220                                data = skb_header_pointer(skb,
 221                                                          off + n->sel.offoff,
 222                                                          2, &hdata);
 223                                if (!data)
 224                                        goto out;
 225                                off2 += ntohs(n->sel.offmask & *data) >>
 226                                        n->sel.offshift;
 227                        }
 228                        off2 &= ~3;
 229                }
 230                if (n->sel.flags & TC_U32_EAT) {
 231                        off += off2;
 232                        off2 = 0;
 233                }
 234
 235                if (off < skb->len)
 236                        goto next_ht;
 237        }
 238
 239        /* POP */
 240        if (sdepth--) {
 241                n = stack[sdepth].knode;
 242                ht = rcu_dereference_bh(n->ht_up);
 243                off = stack[sdepth].off;
 244                goto check_terminal;
 245        }
 246out:
 247        return -1;
 248
 249deadloop:
 250        net_warn_ratelimited("cls_u32: dead loop\n");
 251        return -1;
 252}
 253
 254static struct tc_u_hnode *
 255u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
 256{
 257        struct tc_u_hnode *ht;
 258
 259        for (ht = rtnl_dereference(tp_c->hlist);
 260             ht;
 261             ht = rtnl_dereference(ht->next))
 262                if (ht->handle == handle)
 263                        break;
 264
 265        return ht;
 266}
 267
 268static struct tc_u_knode *
 269u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
 270{
 271        unsigned int sel;
 272        struct tc_u_knode *n = NULL;
 273
 274        sel = TC_U32_HASH(handle);
 275        if (sel > ht->divisor)
 276                goto out;
 277
 278        for (n = rtnl_dereference(ht->ht[sel]);
 279             n;
 280             n = rtnl_dereference(n->next))
 281                if (n->handle == handle)
 282                        break;
 283out:
 284        return n;
 285}
 286
 287
 288static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
 289{
 290        struct tc_u_hnode *ht;
 291        struct tc_u_common *tp_c = tp->data;
 292
 293        if (TC_U32_HTID(handle) == TC_U32_ROOT)
 294                ht = rtnl_dereference(tp->root);
 295        else
 296                ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
 297
 298        if (!ht)
 299                return 0;
 300
 301        if (TC_U32_KEY(handle) == 0)
 302                return (unsigned long)ht;
 303
 304        return (unsigned long)u32_lookup_key(ht, handle);
 305}
 306
 307static u32 gen_new_htid(struct tc_u_common *tp_c)
 308{
 309        int i = 0x800;
 310
 311        /* hgenerator only used inside rtnl lock it is safe to increment
 312         * without read _copy_ update semantics
 313         */
 314        do {
 315                if (++tp_c->hgenerator == 0x7FF)
 316                        tp_c->hgenerator = 1;
 317        } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
 318
 319        return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
 320}
 321
 322static int u32_init(struct tcf_proto *tp)
 323{
 324        struct tc_u_hnode *root_ht;
 325        struct tc_u_common *tp_c;
 326
 327        tp_c = tp->q->u32_node;
 328
 329        root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
 330        if (root_ht == NULL)
 331                return -ENOBUFS;
 332
 333        root_ht->divisor = 0;
 334        root_ht->refcnt++;
 335        root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
 336        root_ht->prio = tp->prio;
 337
 338        if (tp_c == NULL) {
 339                tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
 340                if (tp_c == NULL) {
 341                        kfree(root_ht);
 342                        return -ENOBUFS;
 343                }
 344                tp_c->q = tp->q;
 345                tp->q->u32_node = tp_c;
 346        }
 347
 348        tp_c->refcnt++;
 349        RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
 350        rcu_assign_pointer(tp_c->hlist, root_ht);
 351        root_ht->tp_c = tp_c;
 352
 353        rcu_assign_pointer(tp->root, root_ht);
 354        tp->data = tp_c;
 355        return 0;
 356}
 357
 358static int u32_destroy_key(struct tcf_proto *tp,
 359                           struct tc_u_knode *n,
 360                           bool free_pf)
 361{
 362        tcf_exts_destroy(&n->exts);
 363        if (n->ht_down)
 364                n->ht_down->refcnt--;
 365#ifdef CONFIG_CLS_U32_PERF
 366        if (free_pf)
 367                free_percpu(n->pf);
 368#endif
 369#ifdef CONFIG_CLS_U32_MARK
 370        if (free_pf)
 371                free_percpu(n->pcpu_success);
 372#endif
 373        kfree(n);
 374        return 0;
 375}
 376
 377/* u32_delete_key_rcu should be called when free'ing a copied
 378 * version of a tc_u_knode obtained from u32_init_knode(). When
 379 * copies are obtained from u32_init_knode() the statistics are
 380 * shared between the old and new copies to allow readers to
 381 * continue to update the statistics during the copy. To support
 382 * this the u32_delete_key_rcu variant does not free the percpu
 383 * statistics.
 384 */
 385static void u32_delete_key_rcu(struct rcu_head *rcu)
 386{
 387        struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
 388
 389        u32_destroy_key(key->tp, key, false);
 390}
 391
 392/* u32_delete_key_freepf_rcu is the rcu callback variant
 393 * that free's the entire structure including the statistics
 394 * percpu variables. Only use this if the key is not a copy
 395 * returned by u32_init_knode(). See u32_delete_key_rcu()
 396 * for the variant that should be used with keys return from
 397 * u32_init_knode()
 398 */
 399static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
 400{
 401        struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
 402
 403        u32_destroy_key(key->tp, key, true);
 404}
 405
 406static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 407{
 408        struct tc_u_knode __rcu **kp;
 409        struct tc_u_knode *pkp;
 410        struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
 411
 412        if (ht) {
 413                kp = &ht->ht[TC_U32_HASH(key->handle)];
 414                for (pkp = rtnl_dereference(*kp); pkp;
 415                     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
 416                        if (pkp == key) {
 417                                RCU_INIT_POINTER(*kp, key->next);
 418
 419                                tcf_unbind_filter(tp, &key->res);
 420                                call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
 421                                return 0;
 422                        }
 423                }
 424        }
 425        WARN_ON(1);
 426        return 0;
 427}
 428
 429static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
 430{
 431        struct net_device *dev = tp->q->dev_queue->dev;
 432        struct tc_cls_u32_offload u32_offload = {0};
 433        struct tc_to_netdev offload;
 434
 435        offload.type = TC_SETUP_CLSU32;
 436        offload.cls_u32 = &u32_offload;
 437
 438        if (tc_should_offload(dev, 0)) {
 439                offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
 440                offload.cls_u32->knode.handle = handle;
 441                dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
 442                                              tp->protocol, &offload);
 443        }
 444}
 445
 446static void u32_replace_hw_hnode(struct tcf_proto *tp,
 447                                 struct tc_u_hnode *h,
 448                                 u32 flags)
 449{
 450        struct net_device *dev = tp->q->dev_queue->dev;
 451        struct tc_cls_u32_offload u32_offload = {0};
 452        struct tc_to_netdev offload;
 453
 454        offload.type = TC_SETUP_CLSU32;
 455        offload.cls_u32 = &u32_offload;
 456
 457        if (tc_should_offload(dev, flags)) {
 458                offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
 459                offload.cls_u32->hnode.divisor = h->divisor;
 460                offload.cls_u32->hnode.handle = h->handle;
 461                offload.cls_u32->hnode.prio = h->prio;
 462
 463                dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
 464                                              tp->protocol, &offload);
 465        }
 466}
 467
 468static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
 469{
 470        struct net_device *dev = tp->q->dev_queue->dev;
 471        struct tc_cls_u32_offload u32_offload = {0};
 472        struct tc_to_netdev offload;
 473
 474        offload.type = TC_SETUP_CLSU32;
 475        offload.cls_u32 = &u32_offload;
 476
 477        if (tc_should_offload(dev, 0)) {
 478                offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
 479                offload.cls_u32->hnode.divisor = h->divisor;
 480                offload.cls_u32->hnode.handle = h->handle;
 481                offload.cls_u32->hnode.prio = h->prio;
 482
 483                dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
 484                                              tp->protocol, &offload);
 485        }
 486}
 487
 488static void u32_replace_hw_knode(struct tcf_proto *tp,
 489                                 struct tc_u_knode *n,
 490                                 u32 flags)
 491{
 492        struct net_device *dev = tp->q->dev_queue->dev;
 493        struct tc_cls_u32_offload u32_offload = {0};
 494        struct tc_to_netdev offload;
 495
 496        offload.type = TC_SETUP_CLSU32;
 497        offload.cls_u32 = &u32_offload;
 498
 499        if (tc_should_offload(dev, flags)) {
 500                offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
 501                offload.cls_u32->knode.handle = n->handle;
 502                offload.cls_u32->knode.fshift = n->fshift;
 503#ifdef CONFIG_CLS_U32_MARK
 504                offload.cls_u32->knode.val = n->val;
 505                offload.cls_u32->knode.mask = n->mask;
 506#else
 507                offload.cls_u32->knode.val = 0;
 508                offload.cls_u32->knode.mask = 0;
 509#endif
 510                offload.cls_u32->knode.sel = &n->sel;
 511                offload.cls_u32->knode.exts = &n->exts;
 512                if (n->ht_down)
 513                        offload.cls_u32->knode.link_handle = n->ht_down->handle;
 514
 515                dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
 516                                              tp->protocol, &offload);
 517        }
 518}
 519
 520static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 521{
 522        struct tc_u_knode *n;
 523        unsigned int h;
 524
 525        for (h = 0; h <= ht->divisor; h++) {
 526                while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
 527                        RCU_INIT_POINTER(ht->ht[h],
 528                                         rtnl_dereference(n->next));
 529                        tcf_unbind_filter(tp, &n->res);
 530                        u32_remove_hw_knode(tp, n->handle);
 531                        call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
 532                }
 533        }
 534}
 535
 536static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 537{
 538        struct tc_u_common *tp_c = tp->data;
 539        struct tc_u_hnode __rcu **hn;
 540        struct tc_u_hnode *phn;
 541
 542        WARN_ON(ht->refcnt);
 543
 544        u32_clear_hnode(tp, ht);
 545
 546        hn = &tp_c->hlist;
 547        for (phn = rtnl_dereference(*hn);
 548             phn;
 549             hn = &phn->next, phn = rtnl_dereference(*hn)) {
 550                if (phn == ht) {
 551                        u32_clear_hw_hnode(tp, ht);
 552                        RCU_INIT_POINTER(*hn, ht->next);
 553                        kfree_rcu(ht, rcu);
 554                        return 0;
 555                }
 556        }
 557
 558        return -ENOENT;
 559}
 560
 561static bool ht_empty(struct tc_u_hnode *ht)
 562{
 563        unsigned int h;
 564
 565        for (h = 0; h <= ht->divisor; h++)
 566                if (rcu_access_pointer(ht->ht[h]))
 567                        return false;
 568
 569        return true;
 570}
 571
 572static bool u32_destroy(struct tcf_proto *tp, bool force)
 573{
 574        struct tc_u_common *tp_c = tp->data;
 575        struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
 576
 577        WARN_ON(root_ht == NULL);
 578
 579        if (!force) {
 580                if (root_ht) {
 581                        if (root_ht->refcnt > 1)
 582                                return false;
 583                        if (root_ht->refcnt == 1) {
 584                                if (!ht_empty(root_ht))
 585                                        return false;
 586                        }
 587                }
 588
 589                if (tp_c->refcnt > 1)
 590                        return false;
 591
 592                if (tp_c->refcnt == 1) {
 593                        struct tc_u_hnode *ht;
 594
 595                        for (ht = rtnl_dereference(tp_c->hlist);
 596                             ht;
 597                             ht = rtnl_dereference(ht->next))
 598                                if (!ht_empty(ht))
 599                                        return false;
 600                }
 601        }
 602
 603        if (root_ht && --root_ht->refcnt == 0)
 604                u32_destroy_hnode(tp, root_ht);
 605
 606        if (--tp_c->refcnt == 0) {
 607                struct tc_u_hnode *ht;
 608
 609                tp->q->u32_node = NULL;
 610
 611                for (ht = rtnl_dereference(tp_c->hlist);
 612                     ht;
 613                     ht = rtnl_dereference(ht->next)) {
 614                        ht->refcnt--;
 615                        u32_clear_hnode(tp, ht);
 616                }
 617
 618                while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
 619                        RCU_INIT_POINTER(tp_c->hlist, ht->next);
 620                        kfree_rcu(ht, rcu);
 621                }
 622
 623                kfree(tp_c);
 624        }
 625
 626        tp->data = NULL;
 627        return true;
 628}
 629
 630static int u32_delete(struct tcf_proto *tp, unsigned long arg)
 631{
 632        struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
 633        struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
 634
 635        if (ht == NULL)
 636                return 0;
 637
 638        if (TC_U32_KEY(ht->handle)) {
 639                u32_remove_hw_knode(tp, ht->handle);
 640                return u32_delete_key(tp, (struct tc_u_knode *)ht);
 641        }
 642
 643        if (root_ht == ht)
 644                return -EINVAL;
 645
 646        if (ht->refcnt == 1) {
 647                ht->refcnt--;
 648                u32_destroy_hnode(tp, ht);
 649        } else {
 650                return -EBUSY;
 651        }
 652
 653        return 0;
 654}
 655
 656#define NR_U32_NODE (1<<12)
 657static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
 658{
 659        struct tc_u_knode *n;
 660        unsigned long i;
 661        unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
 662                                        GFP_KERNEL);
 663        if (!bitmap)
 664                return handle | 0xFFF;
 665
 666        for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
 667             n;
 668             n = rtnl_dereference(n->next))
 669                set_bit(TC_U32_NODE(n->handle), bitmap);
 670
 671        i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
 672        if (i >= NR_U32_NODE)
 673                i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
 674
 675        kfree(bitmap);
 676        return handle | (i >= NR_U32_NODE ? 0xFFF : i);
 677}
 678
 679static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
 680        [TCA_U32_CLASSID]       = { .type = NLA_U32 },
 681        [TCA_U32_HASH]          = { .type = NLA_U32 },
 682        [TCA_U32_LINK]          = { .type = NLA_U32 },
 683        [TCA_U32_DIVISOR]       = { .type = NLA_U32 },
 684        [TCA_U32_SEL]           = { .len = sizeof(struct tc_u32_sel) },
 685        [TCA_U32_INDEV]         = { .type = NLA_STRING, .len = IFNAMSIZ },
 686        [TCA_U32_MARK]          = { .len = sizeof(struct tc_u32_mark) },
 687        [TCA_U32_FLAGS]         = { .type = NLA_U32 },
 688};
 689
 690static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 691                         unsigned long base, struct tc_u_hnode *ht,
 692                         struct tc_u_knode *n, struct nlattr **tb,
 693                         struct nlattr *est, bool ovr)
 694{
 695        int err;
 696        struct tcf_exts e;
 697
 698        tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
 699        err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
 700        if (err < 0)
 701                return err;
 702
 703        err = -EINVAL;
 704        if (tb[TCA_U32_LINK]) {
 705                u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
 706                struct tc_u_hnode *ht_down = NULL, *ht_old;
 707
 708                if (TC_U32_KEY(handle))
 709                        goto errout;
 710
 711                if (handle) {
 712                        ht_down = u32_lookup_ht(ht->tp_c, handle);
 713
 714                        if (ht_down == NULL)
 715                                goto errout;
 716                        ht_down->refcnt++;
 717                }
 718
 719                ht_old = rtnl_dereference(n->ht_down);
 720                rcu_assign_pointer(n->ht_down, ht_down);
 721
 722                if (ht_old)
 723                        ht_old->refcnt--;
 724        }
 725        if (tb[TCA_U32_CLASSID]) {
 726                n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
 727                tcf_bind_filter(tp, &n->res, base);
 728        }
 729
 730#ifdef CONFIG_NET_CLS_IND
 731        if (tb[TCA_U32_INDEV]) {
 732                int ret;
 733                ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
 734                if (ret < 0)
 735                        goto errout;
 736                n->ifindex = ret;
 737        }
 738#endif
 739        tcf_exts_change(tp, &n->exts, &e);
 740
 741        return 0;
 742errout:
 743        tcf_exts_destroy(&e);
 744        return err;
 745}
 746
 747static void u32_replace_knode(struct tcf_proto *tp,
 748                              struct tc_u_common *tp_c,
 749                              struct tc_u_knode *n)
 750{
 751        struct tc_u_knode __rcu **ins;
 752        struct tc_u_knode *pins;
 753        struct tc_u_hnode *ht;
 754
 755        if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
 756                ht = rtnl_dereference(tp->root);
 757        else
 758                ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));
 759
 760        ins = &ht->ht[TC_U32_HASH(n->handle)];
 761
 762        /* The node must always exist for it to be replaced if this is not the
 763         * case then something went very wrong elsewhere.
 764         */
 765        for (pins = rtnl_dereference(*ins); ;
 766             ins = &pins->next, pins = rtnl_dereference(*ins))
 767                if (pins->handle == n->handle)
 768                        break;
 769
 770        RCU_INIT_POINTER(n->next, pins->next);
 771        rcu_assign_pointer(*ins, n);
 772}
 773
 774static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
 775                                         struct tc_u_knode *n)
 776{
 777        struct tc_u_knode *new;
 778        struct tc_u32_sel *s = &n->sel;
 779
 780        new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
 781                      GFP_KERNEL);
 782
 783        if (!new)
 784                return NULL;
 785
 786        RCU_INIT_POINTER(new->next, n->next);
 787        new->handle = n->handle;
 788        RCU_INIT_POINTER(new->ht_up, n->ht_up);
 789
 790#ifdef CONFIG_NET_CLS_IND
 791        new->ifindex = n->ifindex;
 792#endif
 793        new->fshift = n->fshift;
 794        new->res = n->res;
 795        new->flags = n->flags;
 796        RCU_INIT_POINTER(new->ht_down, n->ht_down);
 797
 798        /* bump reference count as long as we hold pointer to structure */
 799        if (new->ht_down)
 800                new->ht_down->refcnt++;
 801
 802#ifdef CONFIG_CLS_U32_PERF
 803        /* Statistics may be incremented by readers during update
 804         * so we must keep them in tact. When the node is later destroyed
 805         * a special destroy call must be made to not free the pf memory.
 806         */
 807        new->pf = n->pf;
 808#endif
 809
 810#ifdef CONFIG_CLS_U32_MARK
 811        new->val = n->val;
 812        new->mask = n->mask;
 813        /* Similarly success statistics must be moved as pointers */
 814        new->pcpu_success = n->pcpu_success;
 815#endif
 816        new->tp = tp;
 817        memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
 818
 819        tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE);
 820
 821        return new;
 822}
 823
 824static int u32_change(struct net *net, struct sk_buff *in_skb,
 825                      struct tcf_proto *tp, unsigned long base, u32 handle,
 826                      struct nlattr **tca,
 827                      unsigned long *arg, bool ovr)
 828{
 829        struct tc_u_common *tp_c = tp->data;
 830        struct tc_u_hnode *ht;
 831        struct tc_u_knode *n;
 832        struct tc_u32_sel *s;
 833        struct nlattr *opt = tca[TCA_OPTIONS];
 834        struct nlattr *tb[TCA_U32_MAX + 1];
 835        u32 htid, flags = 0;
 836        int err;
 837#ifdef CONFIG_CLS_U32_PERF
 838        size_t size;
 839#endif
 840
 841        if (opt == NULL)
 842                return handle ? -EINVAL : 0;
 843
 844        err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy);
 845        if (err < 0)
 846                return err;
 847
 848        if (tb[TCA_U32_FLAGS])
 849                flags = nla_get_u32(tb[TCA_U32_FLAGS]);
 850
 851        n = (struct tc_u_knode *)*arg;
 852        if (n) {
 853                struct tc_u_knode *new;
 854
 855                if (TC_U32_KEY(n->handle) == 0)
 856                        return -EINVAL;
 857
 858                if (n->flags != flags)
 859                        return -EINVAL;
 860
 861                new = u32_init_knode(tp, n);
 862                if (!new)
 863                        return -ENOMEM;
 864
 865                err = u32_set_parms(net, tp, base,
 866                                    rtnl_dereference(n->ht_up), new, tb,
 867                                    tca[TCA_RATE], ovr);
 868
 869                if (err) {
 870                        u32_destroy_key(tp, new, false);
 871                        return err;
 872                }
 873
 874                u32_replace_knode(tp, tp_c, new);
 875                tcf_unbind_filter(tp, &n->res);
 876                call_rcu(&n->rcu, u32_delete_key_rcu);
 877                u32_replace_hw_knode(tp, new, flags);
 878                return 0;
 879        }
 880
 881        if (tb[TCA_U32_DIVISOR]) {
 882                unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
 883
 884                if (--divisor > 0x100)
 885                        return -EINVAL;
 886                if (TC_U32_KEY(handle))
 887                        return -EINVAL;
 888                if (handle == 0) {
 889                        handle = gen_new_htid(tp->data);
 890                        if (handle == 0)
 891                                return -ENOMEM;
 892                }
 893                ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
 894                if (ht == NULL)
 895                        return -ENOBUFS;
 896                ht->tp_c = tp_c;
 897                ht->refcnt = 1;
 898                ht->divisor = divisor;
 899                ht->handle = handle;
 900                ht->prio = tp->prio;
 901                RCU_INIT_POINTER(ht->next, tp_c->hlist);
 902                rcu_assign_pointer(tp_c->hlist, ht);
 903                *arg = (unsigned long)ht;
 904
 905                u32_replace_hw_hnode(tp, ht, flags);
 906                return 0;
 907        }
 908
 909        if (tb[TCA_U32_HASH]) {
 910                htid = nla_get_u32(tb[TCA_U32_HASH]);
 911                if (TC_U32_HTID(htid) == TC_U32_ROOT) {
 912                        ht = rtnl_dereference(tp->root);
 913                        htid = ht->handle;
 914                } else {
 915                        ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
 916                        if (ht == NULL)
 917                                return -EINVAL;
 918                }
 919        } else {
 920                ht = rtnl_dereference(tp->root);
 921                htid = ht->handle;
 922        }
 923
 924        if (ht->divisor < TC_U32_HASH(htid))
 925                return -EINVAL;
 926
 927        if (handle) {
 928                if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
 929                        return -EINVAL;
 930                handle = htid | TC_U32_NODE(handle);
 931        } else
 932                handle = gen_new_kid(ht, htid);
 933
 934        if (tb[TCA_U32_SEL] == NULL)
 935                return -EINVAL;
 936
 937        s = nla_data(tb[TCA_U32_SEL]);
 938
 939        n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
 940        if (n == NULL)
 941                return -ENOBUFS;
 942
 943#ifdef CONFIG_CLS_U32_PERF
 944        size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
 945        n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
 946        if (!n->pf) {
 947                kfree(n);
 948                return -ENOBUFS;
 949        }
 950#endif
 951
 952        memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
 953        RCU_INIT_POINTER(n->ht_up, ht);
 954        n->handle = handle;
 955        n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
 956        n->flags = flags;
 957        tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
 958        n->tp = tp;
 959
 960#ifdef CONFIG_CLS_U32_MARK
 961        n->pcpu_success = alloc_percpu(u32);
 962        if (!n->pcpu_success) {
 963                err = -ENOMEM;
 964                goto errout;
 965        }
 966
 967        if (tb[TCA_U32_MARK]) {
 968                struct tc_u32_mark *mark;
 969
 970                mark = nla_data(tb[TCA_U32_MARK]);
 971                n->val = mark->val;
 972                n->mask = mark->mask;
 973        }
 974#endif
 975
 976        err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
 977        if (err == 0) {
 978                struct tc_u_knode __rcu **ins;
 979                struct tc_u_knode *pins;
 980
 981                ins = &ht->ht[TC_U32_HASH(handle)];
 982                for (pins = rtnl_dereference(*ins); pins;
 983                     ins = &pins->next, pins = rtnl_dereference(*ins))
 984                        if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
 985                                break;
 986
 987                RCU_INIT_POINTER(n->next, pins);
 988                rcu_assign_pointer(*ins, n);
 989                u32_replace_hw_knode(tp, n, flags);
 990                *arg = (unsigned long)n;
 991                return 0;
 992        }
 993
 994#ifdef CONFIG_CLS_U32_MARK
 995        free_percpu(n->pcpu_success);
 996errout:
 997#endif
 998
 999#ifdef CONFIG_CLS_U32_PERF
1000        free_percpu(n->pf);
1001#endif
1002        kfree(n);
1003        return err;
1004}
1005
1006static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
1007{
1008        struct tc_u_common *tp_c = tp->data;
1009        struct tc_u_hnode *ht;
1010        struct tc_u_knode *n;
1011        unsigned int h;
1012
1013        if (arg->stop)
1014                return;
1015
1016        for (ht = rtnl_dereference(tp_c->hlist);
1017             ht;
1018             ht = rtnl_dereference(ht->next)) {
1019                if (ht->prio != tp->prio)
1020                        continue;
1021                if (arg->count >= arg->skip) {
1022                        if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
1023                                arg->stop = 1;
1024                                return;
1025                        }
1026                }
1027                arg->count++;
1028                for (h = 0; h <= ht->divisor; h++) {
1029                        for (n = rtnl_dereference(ht->ht[h]);
1030                             n;
1031                             n = rtnl_dereference(n->next)) {
1032                                if (arg->count < arg->skip) {
1033                                        arg->count++;
1034                                        continue;
1035                                }
1036                                if (arg->fn(tp, (unsigned long)n, arg) < 0) {
1037                                        arg->stop = 1;
1038                                        return;
1039                                }
1040                                arg->count++;
1041                        }
1042                }
1043        }
1044}
1045
1046static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
1047                     struct sk_buff *skb, struct tcmsg *t)
1048{
1049        struct tc_u_knode *n = (struct tc_u_knode *)fh;
1050        struct tc_u_hnode *ht_up, *ht_down;
1051        struct nlattr *nest;
1052
1053        if (n == NULL)
1054                return skb->len;
1055
1056        t->tcm_handle = n->handle;
1057
1058        nest = nla_nest_start(skb, TCA_OPTIONS);
1059        if (nest == NULL)
1060                goto nla_put_failure;
1061
1062        if (TC_U32_KEY(n->handle) == 0) {
1063                struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
1064                u32 divisor = ht->divisor + 1;
1065
1066                if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
1067                        goto nla_put_failure;
1068        } else {
1069#ifdef CONFIG_CLS_U32_PERF
1070                struct tc_u32_pcnt *gpf;
1071                int cpu;
1072#endif
1073
1074                if (nla_put(skb, TCA_U32_SEL,
1075                            sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
1076                            &n->sel))
1077                        goto nla_put_failure;
1078
1079                ht_up = rtnl_dereference(n->ht_up);
1080                if (ht_up) {
1081                        u32 htid = n->handle & 0xFFFFF000;
1082                        if (nla_put_u32(skb, TCA_U32_HASH, htid))
1083                                goto nla_put_failure;
1084                }
1085                if (n->res.classid &&
1086                    nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
1087                        goto nla_put_failure;
1088
1089                ht_down = rtnl_dereference(n->ht_down);
1090                if (ht_down &&
1091                    nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
1092                        goto nla_put_failure;
1093
1094                if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
1095                        goto nla_put_failure;
1096
1097#ifdef CONFIG_CLS_U32_MARK
1098                if ((n->val || n->mask)) {
1099                        struct tc_u32_mark mark = {.val = n->val,
1100                                                   .mask = n->mask,
1101                                                   .success = 0};
1102                        int cpum;
1103
1104                        for_each_possible_cpu(cpum) {
1105                                __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
1106
1107                                mark.success += cnt;
1108                        }
1109
1110                        if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
1111                                goto nla_put_failure;
1112                }
1113#endif
1114
1115                if (tcf_exts_dump(skb, &n->exts) < 0)
1116                        goto nla_put_failure;
1117
1118#ifdef CONFIG_NET_CLS_IND
1119                if (n->ifindex) {
1120                        struct net_device *dev;
1121                        dev = __dev_get_by_index(net, n->ifindex);
1122                        if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
1123                                goto nla_put_failure;
1124                }
1125#endif
1126#ifdef CONFIG_CLS_U32_PERF
1127                gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
1128                              n->sel.nkeys * sizeof(u64),
1129                              GFP_KERNEL);
1130                if (!gpf)
1131                        goto nla_put_failure;
1132
1133                for_each_possible_cpu(cpu) {
1134                        int i;
1135                        struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);
1136
1137                        gpf->rcnt += pf->rcnt;
1138                        gpf->rhit += pf->rhit;
1139                        for (i = 0; i < n->sel.nkeys; i++)
1140                                gpf->kcnts[i] += pf->kcnts[i];
1141                }
1142
1143                if (nla_put(skb, TCA_U32_PCNT,
1144                            sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
1145                            gpf)) {
1146                        kfree(gpf);
1147                        goto nla_put_failure;
1148                }
1149                kfree(gpf);
1150#endif
1151        }
1152
1153        nla_nest_end(skb, nest);
1154
1155        if (TC_U32_KEY(n->handle))
1156                if (tcf_exts_dump_stats(skb, &n->exts) < 0)
1157                        goto nla_put_failure;
1158        return skb->len;
1159
1160nla_put_failure:
1161        nla_nest_cancel(skb, nest);
1162        return -1;
1163}
1164
1165static struct tcf_proto_ops cls_u32_ops __read_mostly = {
1166        .kind           =       "u32",
1167        .classify       =       u32_classify,
1168        .init           =       u32_init,
1169        .destroy        =       u32_destroy,
1170        .get            =       u32_get,
1171        .change         =       u32_change,
1172        .delete         =       u32_delete,
1173        .walk           =       u32_walk,
1174        .dump           =       u32_dump,
1175        .owner          =       THIS_MODULE,
1176};
1177
1178static int __init init_u32(void)
1179{
1180        pr_info("u32 classifier\n");
1181#ifdef CONFIG_CLS_U32_PERF
1182        pr_info("    Performance counters on\n");
1183#endif
1184#ifdef CONFIG_NET_CLS_IND
1185        pr_info("    input device check on\n");
1186#endif
1187#ifdef CONFIG_NET_CLS_ACT
1188        pr_info("    Actions configured\n");
1189#endif
1190        return register_tcf_proto_ops(&cls_u32_ops);
1191}
1192
1193static void __exit exit_u32(void)
1194{
1195        unregister_tcf_proto_ops(&cls_u32_ops);
1196}
1197
1198module_init(init_u32)
1199module_exit(exit_u32)
1200MODULE_LICENSE("GPL");
1201