linux/net/netfilter/nft_set_hash.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License version 2 as
   6 * published by the Free Software Foundation.
   7 *
   8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
   9 */
  10
  11#include <linux/kernel.h>
  12#include <linux/init.h>
  13#include <linux/module.h>
  14#include <linux/list.h>
  15#include <linux/log2.h>
  16#include <linux/jhash.h>
  17#include <linux/netlink.h>
  18#include <linux/workqueue.h>
  19#include <linux/rhashtable.h>
  20#include <linux/netfilter.h>
  21#include <linux/netfilter/nf_tables.h>
  22#include <net/netfilter/nf_tables.h>
  23
  24/* We target a hash table size of 4, element hint is 75% of final size */
  25#define NFT_RHASH_ELEMENT_HINT 3
  26
  27struct nft_rhash {
  28        struct rhashtable               ht;
  29        struct delayed_work             gc_work;
  30};
  31
  32struct nft_rhash_elem {
  33        struct rhash_head               node;
  34        struct nft_set_ext              ext;
  35};
  36
  37struct nft_rhash_cmp_arg {
  38        const struct nft_set            *set;
  39        const u32                       *key;
  40        u8                              genmask;
  41};
  42
  43static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
  44{
  45        const struct nft_rhash_cmp_arg *arg = data;
  46
  47        return jhash(arg->key, len, seed);
  48}
  49
  50static inline u32 nft_rhash_obj(const void *data, u32 len, u32 seed)
  51{
  52        const struct nft_rhash_elem *he = data;
  53
  54        return jhash(nft_set_ext_key(&he->ext), len, seed);
  55}
  56
  57static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
  58                                const void *ptr)
  59{
  60        const struct nft_rhash_cmp_arg *x = arg->key;
  61        const struct nft_rhash_elem *he = ptr;
  62
  63        if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
  64                return 1;
  65        if (nft_set_elem_expired(&he->ext))
  66                return 1;
  67        if (!nft_set_elem_active(&he->ext, x->genmask))
  68                return 1;
  69        return 0;
  70}
  71
  72static const struct rhashtable_params nft_rhash_params = {
  73        .head_offset            = offsetof(struct nft_rhash_elem, node),
  74        .hashfn                 = nft_rhash_key,
  75        .obj_hashfn             = nft_rhash_obj,
  76        .obj_cmpfn              = nft_rhash_cmp,
  77        .automatic_shrinking    = true,
  78};
  79
  80static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
  81                             const u32 *key, const struct nft_set_ext **ext)
  82{
  83        struct nft_rhash *priv = nft_set_priv(set);
  84        const struct nft_rhash_elem *he;
  85        struct nft_rhash_cmp_arg arg = {
  86                .genmask = nft_genmask_cur(net),
  87                .set     = set,
  88                .key     = key,
  89        };
  90
  91        he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
  92        if (he != NULL)
  93                *ext = &he->ext;
  94
  95        return !!he;
  96}
  97
  98static bool nft_rhash_update(struct nft_set *set, const u32 *key,
  99                             void *(*new)(struct nft_set *,
 100                                          const struct nft_expr *,
 101                                          struct nft_regs *regs),
 102                             const struct nft_expr *expr,
 103                             struct nft_regs *regs,
 104                             const struct nft_set_ext **ext)
 105{
 106        struct nft_rhash *priv = nft_set_priv(set);
 107        struct nft_rhash_elem *he, *prev;
 108        struct nft_rhash_cmp_arg arg = {
 109                .genmask = NFT_GENMASK_ANY,
 110                .set     = set,
 111                .key     = key,
 112        };
 113
 114        he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
 115        if (he != NULL)
 116                goto out;
 117
 118        he = new(set, expr, regs);
 119        if (he == NULL)
 120                goto err1;
 121
 122        prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
 123                                                nft_rhash_params);
 124        if (IS_ERR(prev))
 125                goto err2;
 126
 127        /* Another cpu may race to insert the element with the same key */
 128        if (prev) {
 129                nft_set_elem_destroy(set, he, true);
 130                he = prev;
 131        }
 132
 133out:
 134        *ext = &he->ext;
 135        return true;
 136
 137err2:
 138        nft_set_elem_destroy(set, he, true);
 139err1:
 140        return false;
 141}
 142
 143static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
 144                            const struct nft_set_elem *elem,
 145                            struct nft_set_ext **ext)
 146{
 147        struct nft_rhash *priv = nft_set_priv(set);
 148        struct nft_rhash_elem *he = elem->priv;
 149        struct nft_rhash_cmp_arg arg = {
 150                .genmask = nft_genmask_next(net),
 151                .set     = set,
 152                .key     = elem->key.val.data,
 153        };
 154        struct nft_rhash_elem *prev;
 155
 156        prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
 157                                                nft_rhash_params);
 158        if (IS_ERR(prev))
 159                return PTR_ERR(prev);
 160        if (prev) {
 161                *ext = &prev->ext;
 162                return -EEXIST;
 163        }
 164        return 0;
 165}
 166
 167static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
 168                               const struct nft_set_elem *elem)
 169{
 170        struct nft_rhash_elem *he = elem->priv;
 171
 172        nft_set_elem_change_active(net, set, &he->ext);
 173        nft_set_elem_clear_busy(&he->ext);
 174}
 175
 176static bool nft_rhash_flush(const struct net *net,
 177                            const struct nft_set *set, void *priv)
 178{
 179        struct nft_rhash_elem *he = priv;
 180
 181        if (!nft_set_elem_mark_busy(&he->ext) ||
 182            !nft_is_active(net, &he->ext)) {
 183                nft_set_elem_change_active(net, set, &he->ext);
 184                return true;
 185        }
 186        return false;
 187}
 188
 189static void *nft_rhash_deactivate(const struct net *net,
 190                                  const struct nft_set *set,
 191                                  const struct nft_set_elem *elem)
 192{
 193        struct nft_rhash *priv = nft_set_priv(set);
 194        struct nft_rhash_elem *he;
 195        struct nft_rhash_cmp_arg arg = {
 196                .genmask = nft_genmask_next(net),
 197                .set     = set,
 198                .key     = elem->key.val.data,
 199        };
 200
 201        rcu_read_lock();
 202        he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
 203        if (he != NULL &&
 204            !nft_rhash_flush(net, set, he))
 205                he = NULL;
 206
 207        rcu_read_unlock();
 208
 209        return he;
 210}
 211
 212static void nft_rhash_remove(const struct net *net,
 213                             const struct nft_set *set,
 214                             const struct nft_set_elem *elem)
 215{
 216        struct nft_rhash *priv = nft_set_priv(set);
 217        struct nft_rhash_elem *he = elem->priv;
 218
 219        rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
 220}
 221
 222static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 223                           struct nft_set_iter *iter)
 224{
 225        struct nft_rhash *priv = nft_set_priv(set);
 226        struct nft_rhash_elem *he;
 227        struct rhashtable_iter hti;
 228        struct nft_set_elem elem;
 229        int err;
 230
 231        err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC);
 232        iter->err = err;
 233        if (err)
 234                return;
 235
 236        err = rhashtable_walk_start(&hti);
 237        if (err && err != -EAGAIN) {
 238                iter->err = err;
 239                goto out;
 240        }
 241
 242        while ((he = rhashtable_walk_next(&hti))) {
 243                if (IS_ERR(he)) {
 244                        err = PTR_ERR(he);
 245                        if (err != -EAGAIN) {
 246                                iter->err = err;
 247                                goto out;
 248                        }
 249
 250                        continue;
 251                }
 252
 253                if (iter->count < iter->skip)
 254                        goto cont;
 255                if (nft_set_elem_expired(&he->ext))
 256                        goto cont;
 257                if (!nft_set_elem_active(&he->ext, iter->genmask))
 258                        goto cont;
 259
 260                elem.priv = he;
 261
 262                iter->err = iter->fn(ctx, set, iter, &elem);
 263                if (iter->err < 0)
 264                        goto out;
 265
 266cont:
 267                iter->count++;
 268        }
 269
 270out:
 271        rhashtable_walk_stop(&hti);
 272        rhashtable_walk_exit(&hti);
 273}
 274
 275static void nft_rhash_gc(struct work_struct *work)
 276{
 277        struct nft_set *set;
 278        struct nft_rhash_elem *he;
 279        struct nft_rhash *priv;
 280        struct nft_set_gc_batch *gcb = NULL;
 281        struct rhashtable_iter hti;
 282        int err;
 283
 284        priv = container_of(work, struct nft_rhash, gc_work.work);
 285        set  = nft_set_container_of(priv);
 286
 287        err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
 288        if (err)
 289                goto schedule;
 290
 291        err = rhashtable_walk_start(&hti);
 292        if (err && err != -EAGAIN)
 293                goto out;
 294
 295        while ((he = rhashtable_walk_next(&hti))) {
 296                if (IS_ERR(he)) {
 297                        if (PTR_ERR(he) != -EAGAIN)
 298                                goto out;
 299                        continue;
 300                }
 301
 302                if (!nft_set_elem_expired(&he->ext))
 303                        continue;
 304                if (nft_set_elem_mark_busy(&he->ext))
 305                        continue;
 306
 307                gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
 308                if (gcb == NULL)
 309                        goto out;
 310                rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
 311                atomic_dec(&set->nelems);
 312                nft_set_gc_batch_add(gcb, he);
 313        }
 314out:
 315        rhashtable_walk_stop(&hti);
 316        rhashtable_walk_exit(&hti);
 317
 318        nft_set_gc_batch_complete(gcb);
 319schedule:
 320        queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
 321                           nft_set_gc_interval(set));
 322}
 323
 324static unsigned int nft_rhash_privsize(const struct nlattr * const nla[],
 325                                       const struct nft_set_desc *desc)
 326{
 327        return sizeof(struct nft_rhash);
 328}
 329
 330static int nft_rhash_init(const struct nft_set *set,
 331                          const struct nft_set_desc *desc,
 332                          const struct nlattr * const tb[])
 333{
 334        struct nft_rhash *priv = nft_set_priv(set);
 335        struct rhashtable_params params = nft_rhash_params;
 336        int err;
 337
 338        params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT;
 339        params.key_len    = set->klen;
 340
 341        err = rhashtable_init(&priv->ht, &params);
 342        if (err < 0)
 343                return err;
 344
 345        INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
 346        if (set->flags & NFT_SET_TIMEOUT)
 347                queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
 348                                   nft_set_gc_interval(set));
 349        return 0;
 350}
 351
 352static void nft_rhash_elem_destroy(void *ptr, void *arg)
 353{
 354        nft_set_elem_destroy(arg, ptr, true);
 355}
 356
 357static void nft_rhash_destroy(const struct nft_set *set)
 358{
 359        struct nft_rhash *priv = nft_set_priv(set);
 360
 361        cancel_delayed_work_sync(&priv->gc_work);
 362        rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
 363                                    (void *)set);
 364}
 365
 366static u32 nft_hash_buckets(u32 size)
 367{
 368        return roundup_pow_of_two(size * 4 / 3);
 369}
 370
 371static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features,
 372                               struct nft_set_estimate *est)
 373{
 374        est->size   = ~0;
 375        est->lookup = NFT_SET_CLASS_O_1;
 376        est->space  = NFT_SET_CLASS_O_N;
 377
 378        return true;
 379}
 380
 381struct nft_hash {
 382        u32                             seed;
 383        u32                             buckets;
 384        struct hlist_head               table[];
 385};
 386
 387struct nft_hash_elem {
 388        struct hlist_node               node;
 389        struct nft_set_ext              ext;
 390};
 391
 392static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
 393                            const u32 *key, const struct nft_set_ext **ext)
 394{
 395        struct nft_hash *priv = nft_set_priv(set);
 396        u8 genmask = nft_genmask_cur(net);
 397        const struct nft_hash_elem *he;
 398        u32 hash;
 399
 400        hash = jhash(key, set->klen, priv->seed);
 401        hash = reciprocal_scale(hash, priv->buckets);
 402        hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
 403                if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) &&
 404                    nft_set_elem_active(&he->ext, genmask)) {
 405                        *ext = &he->ext;
 406                        return true;
 407                }
 408        }
 409        return false;
 410}
 411
 412/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
 413static inline u32 nft_hash_key(const u32 *key, u32 klen)
 414{
 415        if (klen == 4)
 416                return *key;
 417
 418        return *(u16 *)key;
 419}
 420
 421static bool nft_hash_lookup_fast(const struct net *net,
 422                                 const struct nft_set *set,
 423                                 const u32 *key, const struct nft_set_ext **ext)
 424{
 425        struct nft_hash *priv = nft_set_priv(set);
 426        u8 genmask = nft_genmask_cur(net);
 427        const struct nft_hash_elem *he;
 428        u32 hash, k1, k2;
 429
 430        k1 = nft_hash_key(key, set->klen);
 431        hash = jhash_1word(k1, priv->seed);
 432        hash = reciprocal_scale(hash, priv->buckets);
 433        hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
 434                k2 = nft_hash_key(nft_set_ext_key(&he->ext)->data, set->klen);
 435                if (k1 == k2 &&
 436                    nft_set_elem_active(&he->ext, genmask)) {
 437                        *ext = &he->ext;
 438                        return true;
 439                }
 440        }
 441        return false;
 442}
 443
 444static int nft_hash_insert(const struct net *net, const struct nft_set *set,
 445                           const struct nft_set_elem *elem,
 446                           struct nft_set_ext **ext)
 447{
 448        struct nft_hash_elem *this = elem->priv, *he;
 449        struct nft_hash *priv = nft_set_priv(set);
 450        u8 genmask = nft_genmask_next(net);
 451        u32 hash;
 452
 453        hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed);
 454        hash = reciprocal_scale(hash, priv->buckets);
 455        hlist_for_each_entry(he, &priv->table[hash], node) {
 456                if (!memcmp(nft_set_ext_key(&this->ext),
 457                            nft_set_ext_key(&he->ext), set->klen) &&
 458                    nft_set_elem_active(&he->ext, genmask)) {
 459                        *ext = &he->ext;
 460                        return -EEXIST;
 461                }
 462        }
 463        hlist_add_head_rcu(&this->node, &priv->table[hash]);
 464        return 0;
 465}
 466
 467static void nft_hash_activate(const struct net *net, const struct nft_set *set,
 468                              const struct nft_set_elem *elem)
 469{
 470        struct nft_hash_elem *he = elem->priv;
 471
 472        nft_set_elem_change_active(net, set, &he->ext);
 473}
 474
 475static bool nft_hash_flush(const struct net *net,
 476                           const struct nft_set *set, void *priv)
 477{
 478        struct nft_hash_elem *he = priv;
 479
 480        nft_set_elem_change_active(net, set, &he->ext);
 481        return true;
 482}
 483
 484static void *nft_hash_deactivate(const struct net *net,
 485                                 const struct nft_set *set,
 486                                 const struct nft_set_elem *elem)
 487{
 488        struct nft_hash *priv = nft_set_priv(set);
 489        struct nft_hash_elem *this = elem->priv, *he;
 490        u8 genmask = nft_genmask_next(net);
 491        u32 hash;
 492
 493        hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed);
 494        hash = reciprocal_scale(hash, priv->buckets);
 495        hlist_for_each_entry(he, &priv->table[hash], node) {
 496                if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val,
 497                            set->klen) ||
 498                    nft_set_elem_active(&he->ext, genmask)) {
 499                        nft_set_elem_change_active(net, set, &he->ext);
 500                        return he;
 501                }
 502        }
 503        return NULL;
 504}
 505
 506static void nft_hash_remove(const struct net *net,
 507                            const struct nft_set *set,
 508                            const struct nft_set_elem *elem)
 509{
 510        struct nft_hash_elem *he = elem->priv;
 511
 512        hlist_del_rcu(&he->node);
 513}
 514
 515static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 516                          struct nft_set_iter *iter)
 517{
 518        struct nft_hash *priv = nft_set_priv(set);
 519        struct nft_hash_elem *he;
 520        struct nft_set_elem elem;
 521        int i;
 522
 523        for (i = 0; i < priv->buckets; i++) {
 524                hlist_for_each_entry_rcu(he, &priv->table[i], node) {
 525                        if (iter->count < iter->skip)
 526                                goto cont;
 527                        if (!nft_set_elem_active(&he->ext, iter->genmask))
 528                                goto cont;
 529
 530                        elem.priv = he;
 531
 532                        iter->err = iter->fn(ctx, set, iter, &elem);
 533                        if (iter->err < 0)
 534                                return;
 535cont:
 536                        iter->count++;
 537                }
 538        }
 539}
 540
 541static unsigned int nft_hash_privsize(const struct nlattr * const nla[],
 542                                      const struct nft_set_desc *desc)
 543{
 544        return sizeof(struct nft_hash) +
 545               nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
 546}
 547
 548static int nft_hash_init(const struct nft_set *set,
 549                         const struct nft_set_desc *desc,
 550                         const struct nlattr * const tb[])
 551{
 552        struct nft_hash *priv = nft_set_priv(set);
 553
 554        priv->buckets = nft_hash_buckets(desc->size);
 555        get_random_bytes(&priv->seed, sizeof(priv->seed));
 556
 557        return 0;
 558}
 559
 560static void nft_hash_destroy(const struct nft_set *set)
 561{
 562        struct nft_hash *priv = nft_set_priv(set);
 563        struct nft_hash_elem *he;
 564        struct hlist_node *next;
 565        int i;
 566
 567        for (i = 0; i < priv->buckets; i++) {
 568                hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
 569                        hlist_del_rcu(&he->node);
 570                        nft_set_elem_destroy(set, he, true);
 571                }
 572        }
 573}
 574
 575static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
 576                              struct nft_set_estimate *est)
 577{
 578        est->size   = sizeof(struct nft_hash) +
 579                      nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
 580                      desc->size * sizeof(struct nft_hash_elem);
 581        est->lookup = NFT_SET_CLASS_O_1;
 582        est->space  = NFT_SET_CLASS_O_N;
 583
 584        return true;
 585}
 586
 587static struct nft_set_type nft_hash_type;
 588static struct nft_set_ops nft_rhash_ops __read_mostly = {
 589        .type           = &nft_hash_type,
 590        .privsize       = nft_rhash_privsize,
 591        .elemsize       = offsetof(struct nft_rhash_elem, ext),
 592        .estimate       = nft_rhash_estimate,
 593        .init           = nft_rhash_init,
 594        .destroy        = nft_rhash_destroy,
 595        .insert         = nft_rhash_insert,
 596        .activate       = nft_rhash_activate,
 597        .deactivate     = nft_rhash_deactivate,
 598        .flush          = nft_rhash_flush,
 599        .remove         = nft_rhash_remove,
 600        .lookup         = nft_rhash_lookup,
 601        .update         = nft_rhash_update,
 602        .walk           = nft_rhash_walk,
 603        .features       = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
 604};
 605
 606static struct nft_set_ops nft_hash_ops __read_mostly = {
 607        .type           = &nft_hash_type,
 608        .privsize       = nft_hash_privsize,
 609        .elemsize       = offsetof(struct nft_hash_elem, ext),
 610        .estimate       = nft_hash_estimate,
 611        .init           = nft_hash_init,
 612        .destroy        = nft_hash_destroy,
 613        .insert         = nft_hash_insert,
 614        .activate       = nft_hash_activate,
 615        .deactivate     = nft_hash_deactivate,
 616        .flush          = nft_hash_flush,
 617        .remove         = nft_hash_remove,
 618        .lookup         = nft_hash_lookup,
 619        .walk           = nft_hash_walk,
 620        .features       = NFT_SET_MAP | NFT_SET_OBJECT,
 621};
 622
 623static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
 624        .type           = &nft_hash_type,
 625        .privsize       = nft_hash_privsize,
 626        .elemsize       = offsetof(struct nft_hash_elem, ext),
 627        .estimate       = nft_hash_estimate,
 628        .init           = nft_hash_init,
 629        .destroy        = nft_hash_destroy,
 630        .insert         = nft_hash_insert,
 631        .activate       = nft_hash_activate,
 632        .deactivate     = nft_hash_deactivate,
 633        .flush          = nft_hash_flush,
 634        .remove         = nft_hash_remove,
 635        .lookup         = nft_hash_lookup_fast,
 636        .walk           = nft_hash_walk,
 637        .features       = NFT_SET_MAP | NFT_SET_OBJECT,
 638};
 639
 640static const struct nft_set_ops *
 641nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
 642                    u32 flags)
 643{
 644        if (desc->size) {
 645                switch (desc->klen) {
 646                case 4:
 647                        return &nft_hash_fast_ops;
 648                default:
 649                        return &nft_hash_ops;
 650                }
 651        }
 652
 653        return &nft_rhash_ops;
 654}
 655
 656static struct nft_set_type nft_hash_type __read_mostly = {
 657        .select_ops     = nft_hash_select_ops,
 658        .owner          = THIS_MODULE,
 659};
 660
 661static int __init nft_hash_module_init(void)
 662{
 663        return nft_register_set(&nft_hash_type);
 664}
 665
 666static void __exit nft_hash_module_exit(void)
 667{
 668        nft_unregister_set(&nft_hash_type);
 669}
 670
 671module_init(nft_hash_module_init);
 672module_exit(nft_hash_module_exit);
 673
 674MODULE_LICENSE("GPL");
 675MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 676MODULE_ALIAS_NFT_SET();
 677