linux/net/netfilter/ipset/ip_set_hash_gen.h
<<
>>
Prefs
   1/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
   2 *
   3 * This program is free software; you can redistribute it and/or modify
   4 * it under the terms of the GNU General Public License version 2 as
   5 * published by the Free Software Foundation.
   6 */
   7
   8#ifndef _IP_SET_HASH_GEN_H
   9#define _IP_SET_HASH_GEN_H
  10
  11#include <linux/rcupdate.h>
  12#include <linux/jhash.h>
  13#include <linux/types.h>
  14#include <linux/netfilter/ipset/ip_set_timeout.h>
  15
  16#define __ipset_dereference_protected(p, c)     rcu_dereference_protected(p, c)
  17#define ipset_dereference_protected(p, set) \
  18        __ipset_dereference_protected(p, spin_is_locked(&(set)->lock))
  19
  20#define rcu_dereference_bh_nfnl(p)      rcu_dereference_bh_check(p, 1)
  21
  22/* Hashing which uses arrays to resolve clashing. The hash table is resized
  23 * (doubled) when searching becomes too long.
  24 * Internally jhash is used with the assumption that the size of the
  25 * stored data is a multiple of sizeof(u32).
  26 *
  27 * Readers and resizing
  28 *
  29 * Resizing can be triggered by userspace command only, and those
  30 * are serialized by the nfnl mutex. During resizing the set is
  31 * read-locked, so the only possible concurrent operations are
  32 * the kernel side readers. Those must be protected by proper RCU locking.
  33 */
  34
  35/* Number of elements to store in an initial array block */
  36#define AHASH_INIT_SIZE                 4
  37/* Max number of elements to store in an array block */
  38#define AHASH_MAX_SIZE                  (3 * AHASH_INIT_SIZE)
  39/* Max muber of elements in the array block when tuned */
  40#define AHASH_MAX_TUNED                 64
  41
  42/* Max number of elements can be tuned */
  43#ifdef IP_SET_HASH_WITH_MULTI
  44#define AHASH_MAX(h)                    ((h)->ahash_max)
  45
  46static inline u8
  47tune_ahash_max(u8 curr, u32 multi)
  48{
  49        u32 n;
  50
  51        if (multi < curr)
  52                return curr;
  53
  54        n = curr + AHASH_INIT_SIZE;
  55        /* Currently, at listing one hash bucket must fit into a message.
  56         * Therefore we have a hard limit here.
  57         */
  58        return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
  59}
  60
  61#define TUNE_AHASH_MAX(h, multi)        \
  62        ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
  63#else
  64#define AHASH_MAX(h)                    AHASH_MAX_SIZE
  65#define TUNE_AHASH_MAX(h, multi)
  66#endif
  67
  68/* A hash bucket */
  69struct hbucket {
  70        struct rcu_head rcu;    /* for call_rcu_bh */
  71        /* Which positions are used in the array */
  72        DECLARE_BITMAP(used, AHASH_MAX_TUNED);
  73        u8 size;                /* size of the array */
  74        u8 pos;                 /* position of the first free entry */
  75        unsigned char value[0]  /* the array of the values */
  76                __aligned(__alignof__(u64));
  77};
  78
  79/* The hash table: the table size stored here in order to make resizing easy */
  80struct htable {
  81        atomic_t ref;           /* References for resizing */
  82        atomic_t uref;          /* References for dumping */
  83        u8 htable_bits;         /* size of hash table == 2^htable_bits */
  84        struct hbucket __rcu *bucket[0]; /* hashtable buckets */
  85};
  86
  87#define hbucket(h, i)           ((h)->bucket[i])
  88#define ext_size(n, dsize)      \
  89        (sizeof(struct hbucket) + (n) * (dsize))
  90
  91#ifndef IPSET_NET_COUNT
  92#define IPSET_NET_COUNT         1
  93#endif
  94
  95/* Book-keeping of the prefixes added to the set */
  96struct net_prefixes {
  97        u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */
  98        u8 cidr[IPSET_NET_COUNT];  /* the cidr value */
  99};
 100
 101/* Compute the hash table size */
 102static size_t
 103htable_size(u8 hbits)
 104{
 105        size_t hsize;
 106
 107        /* We must fit both into u32 in jhash and size_t */
 108        if (hbits > 31)
 109                return 0;
 110        hsize = jhash_size(hbits);
 111        if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
 112            < hsize)
 113                return 0;
 114
 115        return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
 116}
 117
 118/* Compute htable_bits from the user input parameter hashsize */
 119static u8
 120htable_bits(u32 hashsize)
 121{
 122        /* Assume that hashsize == 2^htable_bits */
 123        u8 bits = fls(hashsize - 1);
 124
 125        if (jhash_size(bits) != hashsize)
 126                /* Round up to the first 2^n value */
 127                bits = fls(hashsize);
 128
 129        return bits;
 130}
 131
 132#ifdef IP_SET_HASH_WITH_NETS
 133#if IPSET_NET_COUNT > 1
 134#define __CIDR(cidr, i)         (cidr[i])
 135#else
 136#define __CIDR(cidr, i)         (cidr)
 137#endif
 138
 139/* cidr + 1 is stored in net_prefixes to support /0 */
 140#define NCIDR_PUT(cidr)         ((cidr) + 1)
 141#define NCIDR_GET(cidr)         ((cidr) - 1)
 142
 143#ifdef IP_SET_HASH_WITH_NETS_PACKED
 144/* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */
 145#define DCIDR_PUT(cidr)         ((cidr) - 1)
 146#define DCIDR_GET(cidr, i)      (__CIDR(cidr, i) + 1)
 147#else
 148#define DCIDR_PUT(cidr)         (cidr)
 149#define DCIDR_GET(cidr, i)      __CIDR(cidr, i)
 150#endif
 151
 152#define INIT_CIDR(cidr, host_mask)      \
 153        DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
 154
 155#ifdef IP_SET_HASH_WITH_NET0
 156/* cidr from 0 to HOST_MASK value and c = cidr + 1 */
 157#define NLEN                    (HOST_MASK + 1)
 158#define CIDR_POS(c)             ((c) - 1)
 159#else
 160/* cidr from 1 to HOST_MASK value and c = cidr + 1 */
 161#define NLEN                    HOST_MASK
 162#define CIDR_POS(c)             ((c) - 2)
 163#endif
 164
 165#else
 166#define NLEN                    0
 167#endif /* IP_SET_HASH_WITH_NETS */
 168
 169#endif /* _IP_SET_HASH_GEN_H */
 170
 171#ifndef MTYPE
 172#error "MTYPE is not defined!"
 173#endif
 174
 175#ifndef HTYPE
 176#error "HTYPE is not defined!"
 177#endif
 178
 179#ifndef HOST_MASK
 180#error "HOST_MASK is not defined!"
 181#endif
 182
 183/* Family dependent templates */
 184
 185#undef ahash_data
 186#undef mtype_data_equal
 187#undef mtype_do_data_match
 188#undef mtype_data_set_flags
 189#undef mtype_data_reset_elem
 190#undef mtype_data_reset_flags
 191#undef mtype_data_netmask
 192#undef mtype_data_list
 193#undef mtype_data_next
 194#undef mtype_elem
 195
 196#undef mtype_ahash_destroy
 197#undef mtype_ext_cleanup
 198#undef mtype_add_cidr
 199#undef mtype_del_cidr
 200#undef mtype_ahash_memsize
 201#undef mtype_flush
 202#undef mtype_destroy
 203#undef mtype_same_set
 204#undef mtype_kadt
 205#undef mtype_uadt
 206
 207#undef mtype_add
 208#undef mtype_del
 209#undef mtype_test_cidrs
 210#undef mtype_test
 211#undef mtype_uref
 212#undef mtype_expire
 213#undef mtype_resize
 214#undef mtype_head
 215#undef mtype_list
 216#undef mtype_gc
 217#undef mtype_gc_init
 218#undef mtype_variant
 219#undef mtype_data_match
 220
 221#undef htype
 222#undef HKEY
 223
 224#define mtype_data_equal        IPSET_TOKEN(MTYPE, _data_equal)
 225#ifdef IP_SET_HASH_WITH_NETS
 226#define mtype_do_data_match     IPSET_TOKEN(MTYPE, _do_data_match)
 227#else
 228#define mtype_do_data_match(d)  1
 229#endif
 230#define mtype_data_set_flags    IPSET_TOKEN(MTYPE, _data_set_flags)
 231#define mtype_data_reset_elem   IPSET_TOKEN(MTYPE, _data_reset_elem)
 232#define mtype_data_reset_flags  IPSET_TOKEN(MTYPE, _data_reset_flags)
 233#define mtype_data_netmask      IPSET_TOKEN(MTYPE, _data_netmask)
 234#define mtype_data_list         IPSET_TOKEN(MTYPE, _data_list)
 235#define mtype_data_next         IPSET_TOKEN(MTYPE, _data_next)
 236#define mtype_elem              IPSET_TOKEN(MTYPE, _elem)
 237
 238#define mtype_ahash_destroy     IPSET_TOKEN(MTYPE, _ahash_destroy)
 239#define mtype_ext_cleanup       IPSET_TOKEN(MTYPE, _ext_cleanup)
 240#define mtype_add_cidr          IPSET_TOKEN(MTYPE, _add_cidr)
 241#define mtype_del_cidr          IPSET_TOKEN(MTYPE, _del_cidr)
 242#define mtype_ahash_memsize     IPSET_TOKEN(MTYPE, _ahash_memsize)
 243#define mtype_flush             IPSET_TOKEN(MTYPE, _flush)
 244#define mtype_destroy           IPSET_TOKEN(MTYPE, _destroy)
 245#define mtype_same_set          IPSET_TOKEN(MTYPE, _same_set)
 246#define mtype_kadt              IPSET_TOKEN(MTYPE, _kadt)
 247#define mtype_uadt              IPSET_TOKEN(MTYPE, _uadt)
 248
 249#define mtype_add               IPSET_TOKEN(MTYPE, _add)
 250#define mtype_del               IPSET_TOKEN(MTYPE, _del)
 251#define mtype_test_cidrs        IPSET_TOKEN(MTYPE, _test_cidrs)
 252#define mtype_test              IPSET_TOKEN(MTYPE, _test)
 253#define mtype_uref              IPSET_TOKEN(MTYPE, _uref)
 254#define mtype_expire            IPSET_TOKEN(MTYPE, _expire)
 255#define mtype_resize            IPSET_TOKEN(MTYPE, _resize)
 256#define mtype_head              IPSET_TOKEN(MTYPE, _head)
 257#define mtype_list              IPSET_TOKEN(MTYPE, _list)
 258#define mtype_gc                IPSET_TOKEN(MTYPE, _gc)
 259#define mtype_gc_init           IPSET_TOKEN(MTYPE, _gc_init)
 260#define mtype_variant           IPSET_TOKEN(MTYPE, _variant)
 261#define mtype_data_match        IPSET_TOKEN(MTYPE, _data_match)
 262
 263#ifndef HKEY_DATALEN
 264#define HKEY_DATALEN            sizeof(struct mtype_elem)
 265#endif
 266
 267#define htype                   MTYPE
 268
 269#define HKEY(data, initval, htable_bits)                        \
 270({                                                              \
 271        const u32 *__k = (const u32 *)data;                     \
 272        u32 __l = HKEY_DATALEN / sizeof(u32);                   \
 273                                                                \
 274        BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0);          \
 275                                                                \
 276        jhash2(__k, __l, initval) & jhash_mask(htable_bits);    \
 277})
 278
 279/* The generic hash structure */
 280struct htype {
 281        struct htable __rcu *table; /* the hash table */
 282        struct timer_list gc;   /* garbage collection when timeout enabled */
 283        struct ip_set *set;     /* attached to this ip_set */
 284        u32 maxelem;            /* max elements in the hash */
 285        u32 initval;            /* random jhash init value */
 286#ifdef IP_SET_HASH_WITH_MARKMASK
 287        u32 markmask;           /* markmask value for mark mask to store */
 288#endif
 289#ifdef IP_SET_HASH_WITH_MULTI
 290        u8 ahash_max;           /* max elements in an array block */
 291#endif
 292#ifdef IP_SET_HASH_WITH_NETMASK
 293        u8 netmask;             /* netmask value for subnets to store */
 294#endif
 295        struct mtype_elem next; /* temporary storage for uadd */
 296#ifdef IP_SET_HASH_WITH_NETS
 297        struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */
 298#endif
 299};
 300
 301#ifdef IP_SET_HASH_WITH_NETS
 302/* Network cidr size book keeping when the hash stores different
 303 * sized networks. cidr == real cidr + 1 to support /0.
 304 */
 305static void
 306mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
 307{
 308        int i, j;
 309
 310        /* Add in increasing prefix order, so larger cidr first */
 311        for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) {
 312                if (j != -1) {
 313                        continue;
 314                } else if (h->nets[i].cidr[n] < cidr) {
 315                        j = i;
 316                } else if (h->nets[i].cidr[n] == cidr) {
 317                        h->nets[CIDR_POS(cidr)].nets[n]++;
 318                        return;
 319                }
 320        }
 321        if (j != -1) {
 322                for (; i > j; i--)
 323                        h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
 324        }
 325        h->nets[i].cidr[n] = cidr;
 326        h->nets[CIDR_POS(cidr)].nets[n] = 1;
 327}
 328
 329static void
 330mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
 331{
 332        u8 i, j, net_end = NLEN - 1;
 333
 334        for (i = 0; i < NLEN; i++) {
 335                if (h->nets[i].cidr[n] != cidr)
 336                        continue;
 337                h->nets[CIDR_POS(cidr)].nets[n]--;
 338                if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
 339                        return;
 340                for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
 341                        h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
 342                h->nets[j].cidr[n] = 0;
 343                return;
 344        }
 345}
 346#endif
 347
 348/* Calculate the actual memory size of the set data */
 349static size_t
 350mtype_ahash_memsize(const struct htype *h, const struct htable *t)
 351{
 352        return sizeof(*h) + sizeof(*t);
 353}
 354
 355/* Get the ith element from the array block n */
 356#define ahash_data(n, i, dsize) \
 357        ((struct mtype_elem *)((n)->value + ((i) * (dsize))))
 358
 359static void
 360mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
 361{
 362        int i;
 363
 364        for (i = 0; i < n->pos; i++)
 365                if (test_bit(i, n->used))
 366                        ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
 367}
 368
 369/* Flush a hash type of set: destroy all elements */
 370static void
 371mtype_flush(struct ip_set *set)
 372{
 373        struct htype *h = set->data;
 374        struct htable *t;
 375        struct hbucket *n;
 376        u32 i;
 377
 378        t = ipset_dereference_protected(h->table, set);
 379        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 380                n = __ipset_dereference_protected(hbucket(t, i), 1);
 381                if (!n)
 382                        continue;
 383                if (set->extensions & IPSET_EXT_DESTROY)
 384                        mtype_ext_cleanup(set, n);
 385                /* FIXME: use slab cache */
 386                rcu_assign_pointer(hbucket(t, i), NULL);
 387                kfree_rcu(n, rcu);
 388        }
 389#ifdef IP_SET_HASH_WITH_NETS
 390        memset(h->nets, 0, sizeof(h->nets));
 391#endif
 392        set->elements = 0;
 393        set->ext_size = 0;
 394}
 395
 396/* Destroy the hashtable part of the set */
 397static void
 398mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
 399{
 400        struct hbucket *n;
 401        u32 i;
 402
 403        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 404                n = __ipset_dereference_protected(hbucket(t, i), 1);
 405                if (!n)
 406                        continue;
 407                if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
 408                        mtype_ext_cleanup(set, n);
 409                /* FIXME: use slab cache */
 410                kfree(n);
 411        }
 412
 413        ip_set_free(t);
 414}
 415
 416/* Destroy a hash type of set */
 417static void
 418mtype_destroy(struct ip_set *set)
 419{
 420        struct htype *h = set->data;
 421
 422        if (SET_WITH_TIMEOUT(set))
 423                del_timer_sync(&h->gc);
 424
 425        mtype_ahash_destroy(set,
 426                            __ipset_dereference_protected(h->table, 1), true);
 427        kfree(h);
 428
 429        set->data = NULL;
 430}
 431
 432static void
 433mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
 434{
 435        struct htype *h = set->data;
 436
 437        timer_setup(&h->gc, gc, 0);
 438        mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
 439        pr_debug("gc initialized, run in every %u\n",
 440                 IPSET_GC_PERIOD(set->timeout));
 441}
 442
 443static bool
 444mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 445{
 446        const struct htype *x = a->data;
 447        const struct htype *y = b->data;
 448
 449        /* Resizing changes htable_bits, so we ignore it */
 450        return x->maxelem == y->maxelem &&
 451               a->timeout == b->timeout &&
 452#ifdef IP_SET_HASH_WITH_NETMASK
 453               x->netmask == y->netmask &&
 454#endif
 455#ifdef IP_SET_HASH_WITH_MARKMASK
 456               x->markmask == y->markmask &&
 457#endif
 458               a->extensions == b->extensions;
 459}
 460
 461/* Delete expired elements from the hashtable */
 462static void
 463mtype_expire(struct ip_set *set, struct htype *h)
 464{
 465        struct htable *t;
 466        struct hbucket *n, *tmp;
 467        struct mtype_elem *data;
 468        u32 i, j, d;
 469        size_t dsize = set->dsize;
 470#ifdef IP_SET_HASH_WITH_NETS
 471        u8 k;
 472#endif
 473
 474        t = ipset_dereference_protected(h->table, set);
 475        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 476                n = __ipset_dereference_protected(hbucket(t, i), 1);
 477                if (!n)
 478                        continue;
 479                for (j = 0, d = 0; j < n->pos; j++) {
 480                        if (!test_bit(j, n->used)) {
 481                                d++;
 482                                continue;
 483                        }
 484                        data = ahash_data(n, j, dsize);
 485                        if (!ip_set_timeout_expired(ext_timeout(data, set)))
 486                                continue;
 487                        pr_debug("expired %u/%u\n", i, j);
 488                        clear_bit(j, n->used);
 489                        smp_mb__after_atomic();
 490#ifdef IP_SET_HASH_WITH_NETS
 491                        for (k = 0; k < IPSET_NET_COUNT; k++)
 492                                mtype_del_cidr(h,
 493                                        NCIDR_PUT(DCIDR_GET(data->cidr, k)),
 494                                        k);
 495#endif
 496                        ip_set_ext_destroy(set, data);
 497                        set->elements--;
 498                        d++;
 499                }
 500                if (d >= AHASH_INIT_SIZE) {
 501                        if (d >= n->size) {
 502                                rcu_assign_pointer(hbucket(t, i), NULL);
 503                                kfree_rcu(n, rcu);
 504                                continue;
 505                        }
 506                        tmp = kzalloc(sizeof(*tmp) +
 507                                      (n->size - AHASH_INIT_SIZE) * dsize,
 508                                      GFP_ATOMIC);
 509                        if (!tmp)
 510                                /* Still try to delete expired elements */
 511                                continue;
 512                        tmp->size = n->size - AHASH_INIT_SIZE;
 513                        for (j = 0, d = 0; j < n->pos; j++) {
 514                                if (!test_bit(j, n->used))
 515                                        continue;
 516                                data = ahash_data(n, j, dsize);
 517                                memcpy(tmp->value + d * dsize, data, dsize);
 518                                set_bit(d, tmp->used);
 519                                d++;
 520                        }
 521                        tmp->pos = d;
 522                        set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
 523                        rcu_assign_pointer(hbucket(t, i), tmp);
 524                        kfree_rcu(n, rcu);
 525                }
 526        }
 527}
 528
 529static void
 530mtype_gc(struct timer_list *t)
 531{
 532        struct htype *h = from_timer(h, t, gc);
 533        struct ip_set *set = h->set;
 534
 535        pr_debug("called\n");
 536        spin_lock_bh(&set->lock);
 537        mtype_expire(set, h);
 538        spin_unlock_bh(&set->lock);
 539
 540        h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 541        add_timer(&h->gc);
 542}
 543
 544/* Resize a hash: create a new hash table with doubling the hashsize
 545 * and inserting the elements to it. Repeat until we succeed or
 546 * fail due to memory pressures.
 547 */
 548static int
 549mtype_resize(struct ip_set *set, bool retried)
 550{
 551        struct htype *h = set->data;
 552        struct htable *t, *orig;
 553        u8 htable_bits;
 554        size_t extsize, dsize = set->dsize;
 555#ifdef IP_SET_HASH_WITH_NETS
 556        u8 flags;
 557        struct mtype_elem *tmp;
 558#endif
 559        struct mtype_elem *data;
 560        struct mtype_elem *d;
 561        struct hbucket *n, *m;
 562        u32 i, j, key;
 563        int ret;
 564
 565#ifdef IP_SET_HASH_WITH_NETS
 566        tmp = kmalloc(dsize, GFP_KERNEL);
 567        if (!tmp)
 568                return -ENOMEM;
 569#endif
 570        rcu_read_lock_bh();
 571        orig = rcu_dereference_bh_nfnl(h->table);
 572        htable_bits = orig->htable_bits;
 573        rcu_read_unlock_bh();
 574
 575retry:
 576        ret = 0;
 577        htable_bits++;
 578        if (!htable_bits) {
 579                /* In case we have plenty of memory :-) */
 580                pr_warn("Cannot increase the hashsize of set %s further\n",
 581                        set->name);
 582                ret = -IPSET_ERR_HASH_FULL;
 583                goto out;
 584        }
 585        t = ip_set_alloc(htable_size(htable_bits));
 586        if (!t) {
 587                ret = -ENOMEM;
 588                goto out;
 589        }
 590        t->htable_bits = htable_bits;
 591
 592        spin_lock_bh(&set->lock);
 593        orig = __ipset_dereference_protected(h->table, 1);
 594        /* There can't be another parallel resizing, but dumping is possible */
 595        atomic_set(&orig->ref, 1);
 596        atomic_inc(&orig->uref);
 597        extsize = 0;
 598        pr_debug("attempt to resize set %s from %u to %u, t %p\n",
 599                 set->name, orig->htable_bits, htable_bits, orig);
 600        for (i = 0; i < jhash_size(orig->htable_bits); i++) {
 601                n = __ipset_dereference_protected(hbucket(orig, i), 1);
 602                if (!n)
 603                        continue;
 604                for (j = 0; j < n->pos; j++) {
 605                        if (!test_bit(j, n->used))
 606                                continue;
 607                        data = ahash_data(n, j, dsize);
 608#ifdef IP_SET_HASH_WITH_NETS
 609                        /* We have readers running parallel with us,
 610                         * so the live data cannot be modified.
 611                         */
 612                        flags = 0;
 613                        memcpy(tmp, data, dsize);
 614                        data = tmp;
 615                        mtype_data_reset_flags(data, &flags);
 616#endif
 617                        key = HKEY(data, h->initval, htable_bits);
 618                        m = __ipset_dereference_protected(hbucket(t, key), 1);
 619                        if (!m) {
 620                                m = kzalloc(sizeof(*m) +
 621                                            AHASH_INIT_SIZE * dsize,
 622                                            GFP_ATOMIC);
 623                                if (!m) {
 624                                        ret = -ENOMEM;
 625                                        goto cleanup;
 626                                }
 627                                m->size = AHASH_INIT_SIZE;
 628                                extsize = ext_size(AHASH_INIT_SIZE, dsize);
 629                                RCU_INIT_POINTER(hbucket(t, key), m);
 630                        } else if (m->pos >= m->size) {
 631                                struct hbucket *ht;
 632
 633                                if (m->size >= AHASH_MAX(h)) {
 634                                        ret = -EAGAIN;
 635                                } else {
 636                                        ht = kzalloc(sizeof(*ht) +
 637                                                (m->size + AHASH_INIT_SIZE)
 638                                                * dsize,
 639                                                GFP_ATOMIC);
 640                                        if (!ht)
 641                                                ret = -ENOMEM;
 642                                }
 643                                if (ret < 0)
 644                                        goto cleanup;
 645                                memcpy(ht, m, sizeof(struct hbucket) +
 646                                              m->size * dsize);
 647                                ht->size = m->size + AHASH_INIT_SIZE;
 648                                extsize += ext_size(AHASH_INIT_SIZE, dsize);
 649                                kfree(m);
 650                                m = ht;
 651                                RCU_INIT_POINTER(hbucket(t, key), ht);
 652                        }
 653                        d = ahash_data(m, m->pos, dsize);
 654                        memcpy(d, data, dsize);
 655                        set_bit(m->pos++, m->used);
 656#ifdef IP_SET_HASH_WITH_NETS
 657                        mtype_data_reset_flags(d, &flags);
 658#endif
 659                }
 660        }
 661        rcu_assign_pointer(h->table, t);
 662        set->ext_size = extsize;
 663
 664        spin_unlock_bh(&set->lock);
 665
 666        /* Give time to other readers of the set */
 667        synchronize_rcu_bh();
 668
 669        pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
 670                 orig->htable_bits, orig, t->htable_bits, t);
 671        /* If there's nobody else dumping the table, destroy it */
 672        if (atomic_dec_and_test(&orig->uref)) {
 673                pr_debug("Table destroy by resize %p\n", orig);
 674                mtype_ahash_destroy(set, orig, false);
 675        }
 676
 677out:
 678#ifdef IP_SET_HASH_WITH_NETS
 679        kfree(tmp);
 680#endif
 681        return ret;
 682
 683cleanup:
 684        atomic_set(&orig->ref, 0);
 685        atomic_dec(&orig->uref);
 686        spin_unlock_bh(&set->lock);
 687        mtype_ahash_destroy(set, t, false);
 688        if (ret == -EAGAIN)
 689                goto retry;
 690        goto out;
 691}
 692
 693/* Add an element to a hash and update the internal counters when succeeded,
 694 * otherwise report the proper error code.
 695 */
 696static int
 697mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 698          struct ip_set_ext *mext, u32 flags)
 699{
 700        struct htype *h = set->data;
 701        struct htable *t;
 702        const struct mtype_elem *d = value;
 703        struct mtype_elem *data;
 704        struct hbucket *n, *old = ERR_PTR(-ENOENT);
 705        int i, j = -1;
 706        bool flag_exist = flags & IPSET_FLAG_EXIST;
 707        bool deleted = false, forceadd = false, reuse = false;
 708        u32 key, multi = 0;
 709
 710        if (set->elements >= h->maxelem) {
 711                if (SET_WITH_TIMEOUT(set))
 712                        /* FIXME: when set is full, we slow down here */
 713                        mtype_expire(set, h);
 714                if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set))
 715                        forceadd = true;
 716        }
 717
 718        t = ipset_dereference_protected(h->table, set);
 719        key = HKEY(value, h->initval, t->htable_bits);
 720        n = __ipset_dereference_protected(hbucket(t, key), 1);
 721        if (!n) {
 722                if (forceadd || set->elements >= h->maxelem)
 723                        goto set_full;
 724                old = NULL;
 725                n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
 726                            GFP_ATOMIC);
 727                if (!n)
 728                        return -ENOMEM;
 729                n->size = AHASH_INIT_SIZE;
 730                set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
 731                goto copy_elem;
 732        }
 733        for (i = 0; i < n->pos; i++) {
 734                if (!test_bit(i, n->used)) {
 735                        /* Reuse first deleted entry */
 736                        if (j == -1) {
 737                                deleted = reuse = true;
 738                                j = i;
 739                        }
 740                        continue;
 741                }
 742                data = ahash_data(n, i, set->dsize);
 743                if (mtype_data_equal(data, d, &multi)) {
 744                        if (flag_exist ||
 745                            (SET_WITH_TIMEOUT(set) &&
 746                             ip_set_timeout_expired(ext_timeout(data, set)))) {
 747                                /* Just the extensions could be overwritten */
 748                                j = i;
 749                                goto overwrite_extensions;
 750                        }
 751                        return -IPSET_ERR_EXIST;
 752                }
 753                /* Reuse first timed out entry */
 754                if (SET_WITH_TIMEOUT(set) &&
 755                    ip_set_timeout_expired(ext_timeout(data, set)) &&
 756                    j == -1) {
 757                        j = i;
 758                        reuse = true;
 759                }
 760        }
 761        if (reuse || forceadd) {
 762                data = ahash_data(n, j, set->dsize);
 763                if (!deleted) {
 764#ifdef IP_SET_HASH_WITH_NETS
 765                        for (i = 0; i < IPSET_NET_COUNT; i++)
 766                                mtype_del_cidr(h,
 767                                        NCIDR_PUT(DCIDR_GET(data->cidr, i)),
 768                                        i);
 769#endif
 770                        ip_set_ext_destroy(set, data);
 771                        set->elements--;
 772                }
 773                goto copy_data;
 774        }
 775        if (set->elements >= h->maxelem)
 776                goto set_full;
 777        /* Create a new slot */
 778        if (n->pos >= n->size) {
 779                TUNE_AHASH_MAX(h, multi);
 780                if (n->size >= AHASH_MAX(h)) {
 781                        /* Trigger rehashing */
 782                        mtype_data_next(&h->next, d);
 783                        return -EAGAIN;
 784                }
 785                old = n;
 786                n = kzalloc(sizeof(*n) +
 787                            (old->size + AHASH_INIT_SIZE) * set->dsize,
 788                            GFP_ATOMIC);
 789                if (!n)
 790                        return -ENOMEM;
 791                memcpy(n, old, sizeof(struct hbucket) +
 792                       old->size * set->dsize);
 793                n->size = old->size + AHASH_INIT_SIZE;
 794                set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
 795        }
 796
 797copy_elem:
 798        j = n->pos++;
 799        data = ahash_data(n, j, set->dsize);
 800copy_data:
 801        set->elements++;
 802#ifdef IP_SET_HASH_WITH_NETS
 803        for (i = 0; i < IPSET_NET_COUNT; i++)
 804                mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
 805#endif
 806        memcpy(data, d, sizeof(struct mtype_elem));
 807overwrite_extensions:
 808#ifdef IP_SET_HASH_WITH_NETS
 809        mtype_data_set_flags(data, flags);
 810#endif
 811        if (SET_WITH_COUNTER(set))
 812                ip_set_init_counter(ext_counter(data, set), ext);
 813        if (SET_WITH_COMMENT(set))
 814                ip_set_init_comment(set, ext_comment(data, set), ext);
 815        if (SET_WITH_SKBINFO(set))
 816                ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
 817        /* Must come last for the case when timed out entry is reused */
 818        if (SET_WITH_TIMEOUT(set))
 819                ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
 820        smp_mb__before_atomic();
 821        set_bit(j, n->used);
 822        if (old != ERR_PTR(-ENOENT)) {
 823                rcu_assign_pointer(hbucket(t, key), n);
 824                if (old)
 825                        kfree_rcu(old, rcu);
 826        }
 827
 828        return 0;
 829set_full:
 830        if (net_ratelimit())
 831                pr_warn("Set %s is full, maxelem %u reached\n",
 832                        set->name, h->maxelem);
 833        return -IPSET_ERR_HASH_FULL;
 834}
 835
 836/* Delete an element from the hash and free up space if possible.
 837 */
 838static int
 839mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 840          struct ip_set_ext *mext, u32 flags)
 841{
 842        struct htype *h = set->data;
 843        struct htable *t;
 844        const struct mtype_elem *d = value;
 845        struct mtype_elem *data;
 846        struct hbucket *n;
 847        int i, j, k, ret = -IPSET_ERR_EXIST;
 848        u32 key, multi = 0;
 849        size_t dsize = set->dsize;
 850
 851        t = ipset_dereference_protected(h->table, set);
 852        key = HKEY(value, h->initval, t->htable_bits);
 853        n = __ipset_dereference_protected(hbucket(t, key), 1);
 854        if (!n)
 855                goto out;
 856        for (i = 0, k = 0; i < n->pos; i++) {
 857                if (!test_bit(i, n->used)) {
 858                        k++;
 859                        continue;
 860                }
 861                data = ahash_data(n, i, dsize);
 862                if (!mtype_data_equal(data, d, &multi))
 863                        continue;
 864                if (SET_WITH_TIMEOUT(set) &&
 865                    ip_set_timeout_expired(ext_timeout(data, set)))
 866                        goto out;
 867
 868                ret = 0;
 869                clear_bit(i, n->used);
 870                smp_mb__after_atomic();
 871                if (i + 1 == n->pos)
 872                        n->pos--;
 873                set->elements--;
 874#ifdef IP_SET_HASH_WITH_NETS
 875                for (j = 0; j < IPSET_NET_COUNT; j++)
 876                        mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
 877                                       j);
 878#endif
 879                ip_set_ext_destroy(set, data);
 880
 881                for (; i < n->pos; i++) {
 882                        if (!test_bit(i, n->used))
 883                                k++;
 884                }
 885                if (n->pos == 0 && k == 0) {
 886                        set->ext_size -= ext_size(n->size, dsize);
 887                        rcu_assign_pointer(hbucket(t, key), NULL);
 888                        kfree_rcu(n, rcu);
 889                } else if (k >= AHASH_INIT_SIZE) {
 890                        struct hbucket *tmp = kzalloc(sizeof(*tmp) +
 891                                        (n->size - AHASH_INIT_SIZE) * dsize,
 892                                        GFP_ATOMIC);
 893                        if (!tmp)
 894                                goto out;
 895                        tmp->size = n->size - AHASH_INIT_SIZE;
 896                        for (j = 0, k = 0; j < n->pos; j++) {
 897                                if (!test_bit(j, n->used))
 898                                        continue;
 899                                data = ahash_data(n, j, dsize);
 900                                memcpy(tmp->value + k * dsize, data, dsize);
 901                                set_bit(k, tmp->used);
 902                                k++;
 903                        }
 904                        tmp->pos = k;
 905                        set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
 906                        rcu_assign_pointer(hbucket(t, key), tmp);
 907                        kfree_rcu(n, rcu);
 908                }
 909                goto out;
 910        }
 911
 912out:
 913        return ret;
 914}
 915
 916static inline int
 917mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
 918                 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
 919{
 920        if (!ip_set_match_extensions(set, ext, mext, flags, data))
 921                return 0;
 922        /* nomatch entries return -ENOTEMPTY */
 923        return mtype_do_data_match(data);
 924}
 925
 926#ifdef IP_SET_HASH_WITH_NETS
 927/* Special test function which takes into account the different network
 928 * sizes added to the set
 929 */
 930static int
 931mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 932                 const struct ip_set_ext *ext,
 933                 struct ip_set_ext *mext, u32 flags)
 934{
 935        struct htype *h = set->data;
 936        struct htable *t = rcu_dereference_bh(h->table);
 937        struct hbucket *n;
 938        struct mtype_elem *data;
 939#if IPSET_NET_COUNT == 2
 940        struct mtype_elem orig = *d;
 941        int ret, i, j = 0, k;
 942#else
 943        int ret, i, j = 0;
 944#endif
 945        u32 key, multi = 0;
 946
 947        pr_debug("test by nets\n");
 948        for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) {
 949#if IPSET_NET_COUNT == 2
 950                mtype_data_reset_elem(d, &orig);
 951                mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
 952                for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi;
 953                     k++) {
 954                        mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
 955                                           true);
 956#else
 957                mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
 958#endif
 959                key = HKEY(d, h->initval, t->htable_bits);
 960                n =  rcu_dereference_bh(hbucket(t, key));
 961                if (!n)
 962                        continue;
 963                for (i = 0; i < n->pos; i++) {
 964                        if (!test_bit(i, n->used))
 965                                continue;
 966                        data = ahash_data(n, i, set->dsize);
 967                        if (!mtype_data_equal(data, d, &multi))
 968                                continue;
 969                        ret = mtype_data_match(data, ext, mext, set, flags);
 970                        if (ret != 0)
 971                                return ret;
 972#ifdef IP_SET_HASH_WITH_MULTI
 973                        /* No match, reset multiple match flag */
 974                        multi = 0;
 975#endif
 976                }
 977#if IPSET_NET_COUNT == 2
 978                }
 979#endif
 980        }
 981        return 0;
 982}
 983#endif
 984
 985/* Test whether the element is added to the set */
 986static int
 987mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 988           struct ip_set_ext *mext, u32 flags)
 989{
 990        struct htype *h = set->data;
 991        struct htable *t;
 992        struct mtype_elem *d = value;
 993        struct hbucket *n;
 994        struct mtype_elem *data;
 995        int i, ret = 0;
 996        u32 key, multi = 0;
 997
 998        t = rcu_dereference_bh(h->table);
 999#ifdef IP_SET_HASH_WITH_NETS
1000        /* If we test an IP address and not a network address,
1001         * try all possible network sizes
1002         */
1003        for (i = 0; i < IPSET_NET_COUNT; i++)
1004                if (DCIDR_GET(d->cidr, i) != HOST_MASK)
1005                        break;
1006        if (i == IPSET_NET_COUNT) {
1007                ret = mtype_test_cidrs(set, d, ext, mext, flags);
1008                goto out;
1009        }
1010#endif
1011
1012        key = HKEY(d, h->initval, t->htable_bits);
1013        n = rcu_dereference_bh(hbucket(t, key));
1014        if (!n) {
1015                ret = 0;
1016                goto out;
1017        }
1018        for (i = 0; i < n->pos; i++) {
1019                if (!test_bit(i, n->used))
1020                        continue;
1021                data = ahash_data(n, i, set->dsize);
1022                if (!mtype_data_equal(data, d, &multi))
1023                        continue;
1024                ret = mtype_data_match(data, ext, mext, set, flags);
1025                if (ret != 0)
1026                        goto out;
1027        }
1028out:
1029        return ret;
1030}
1031
1032/* Reply a HEADER request: fill out the header part of the set */
1033static int
1034mtype_head(struct ip_set *set, struct sk_buff *skb)
1035{
1036        struct htype *h = set->data;
1037        const struct htable *t;
1038        struct nlattr *nested;
1039        size_t memsize;
1040        u8 htable_bits;
1041
1042        /* If any members have expired, set->elements will be wrong
1043         * mytype_expire function will update it with the right count.
1044         * we do not hold set->lock here, so grab it first.
1045         * set->elements can still be incorrect in the case of a huge set,
1046         * because elements might time out during the listing.
1047         */
1048        if (SET_WITH_TIMEOUT(set)) {
1049                spin_lock_bh(&set->lock);
1050                mtype_expire(set, h);
1051                spin_unlock_bh(&set->lock);
1052        }
1053
1054        rcu_read_lock_bh();
1055        t = rcu_dereference_bh_nfnl(h->table);
1056        memsize = mtype_ahash_memsize(h, t) + set->ext_size;
1057        htable_bits = t->htable_bits;
1058        rcu_read_unlock_bh();
1059
1060        nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
1061        if (!nested)
1062                goto nla_put_failure;
1063        if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
1064                          htonl(jhash_size(htable_bits))) ||
1065            nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
1066                goto nla_put_failure;
1067#ifdef IP_SET_HASH_WITH_NETMASK
1068        if (h->netmask != HOST_MASK &&
1069            nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
1070                goto nla_put_failure;
1071#endif
1072#ifdef IP_SET_HASH_WITH_MARKMASK
1073        if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
1074                goto nla_put_failure;
1075#endif
1076        if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
1077            nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
1078            nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
1079                goto nla_put_failure;
1080        if (unlikely(ip_set_put_flags(skb, set)))
1081                goto nla_put_failure;
1082        ipset_nest_end(skb, nested);
1083
1084        return 0;
1085nla_put_failure:
1086        return -EMSGSIZE;
1087}
1088
1089/* Make possible to run dumping parallel with resizing */
1090static void
1091mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
1092{
1093        struct htype *h = set->data;
1094        struct htable *t;
1095
1096        if (start) {
1097                rcu_read_lock_bh();
1098                t = rcu_dereference_bh_nfnl(h->table);
1099                atomic_inc(&t->uref);
1100                cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
1101                rcu_read_unlock_bh();
1102        } else if (cb->args[IPSET_CB_PRIVATE]) {
1103                t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
1104                if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
1105                        /* Resizing didn't destroy the hash table */
1106                        pr_debug("Table destroy by dump: %p\n", t);
1107                        mtype_ahash_destroy(set, t, false);
1108                }
1109                cb->args[IPSET_CB_PRIVATE] = 0;
1110        }
1111}
1112
1113/* Reply a LIST/SAVE request: dump the elements of the specified set */
1114static int
1115mtype_list(const struct ip_set *set,
1116           struct sk_buff *skb, struct netlink_callback *cb)
1117{
1118        const struct htable *t;
1119        struct nlattr *atd, *nested;
1120        const struct hbucket *n;
1121        const struct mtype_elem *e;
1122        u32 first = cb->args[IPSET_CB_ARG0];
1123        /* We assume that one hash bucket fills into one page */
1124        void *incomplete;
1125        int i, ret = 0;
1126
1127        atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
1128        if (!atd)
1129                return -EMSGSIZE;
1130
1131        pr_debug("list hash set %s\n", set->name);
1132        t = (const struct htable *)cb->args[IPSET_CB_PRIVATE];
1133        /* Expire may replace a hbucket with another one */
1134        rcu_read_lock();
1135        for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
1136             cb->args[IPSET_CB_ARG0]++) {
1137                cond_resched_rcu();
1138                incomplete = skb_tail_pointer(skb);
1139                n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
1140                pr_debug("cb->arg bucket: %lu, t %p n %p\n",
1141                         cb->args[IPSET_CB_ARG0], t, n);
1142                if (!n)
1143                        continue;
1144                for (i = 0; i < n->pos; i++) {
1145                        if (!test_bit(i, n->used))
1146                                continue;
1147                        e = ahash_data(n, i, set->dsize);
1148                        if (SET_WITH_TIMEOUT(set) &&
1149                            ip_set_timeout_expired(ext_timeout(e, set)))
1150                                continue;
1151                        pr_debug("list hash %lu hbucket %p i %u, data %p\n",
1152                                 cb->args[IPSET_CB_ARG0], n, i, e);
1153                        nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
1154                        if (!nested) {
1155                                if (cb->args[IPSET_CB_ARG0] == first) {
1156                                        nla_nest_cancel(skb, atd);
1157                                        ret = -EMSGSIZE;
1158                                        goto out;
1159                                }
1160                                goto nla_put_failure;
1161                        }
1162                        if (mtype_data_list(skb, e))
1163                                goto nla_put_failure;
1164                        if (ip_set_put_extensions(skb, set, e, true))
1165                                goto nla_put_failure;
1166                        ipset_nest_end(skb, nested);
1167                }
1168        }
1169        ipset_nest_end(skb, atd);
1170        /* Set listing finished */
1171        cb->args[IPSET_CB_ARG0] = 0;
1172
1173        goto out;
1174
1175nla_put_failure:
1176        nlmsg_trim(skb, incomplete);
1177        if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
1178                pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
1179                        set->name);
1180                cb->args[IPSET_CB_ARG0] = 0;
1181                ret = -EMSGSIZE;
1182        } else {
1183                ipset_nest_end(skb, atd);
1184        }
1185out:
1186        rcu_read_unlock();
1187        return ret;
1188}
1189
1190static int
1191IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
1192                          const struct xt_action_param *par,
1193                          enum ipset_adt adt, struct ip_set_adt_opt *opt);
1194
1195static int
1196IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
1197                          enum ipset_adt adt, u32 *lineno, u32 flags,
1198                          bool retried);
1199
1200static const struct ip_set_type_variant mtype_variant = {
1201        .kadt   = mtype_kadt,
1202        .uadt   = mtype_uadt,
1203        .adt    = {
1204                [IPSET_ADD] = mtype_add,
1205                [IPSET_DEL] = mtype_del,
1206                [IPSET_TEST] = mtype_test,
1207        },
1208        .destroy = mtype_destroy,
1209        .flush  = mtype_flush,
1210        .head   = mtype_head,
1211        .list   = mtype_list,
1212        .uref   = mtype_uref,
1213        .resize = mtype_resize,
1214        .same_set = mtype_same_set,
1215};
1216
1217#ifdef IP_SET_EMIT_CREATE
1218static int
1219IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1220                            struct nlattr *tb[], u32 flags)
1221{
1222        u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
1223#ifdef IP_SET_HASH_WITH_MARKMASK
1224        u32 markmask;
1225#endif
1226        u8 hbits;
1227#ifdef IP_SET_HASH_WITH_NETMASK
1228        u8 netmask;
1229#endif
1230        size_t hsize;
1231        struct htype *h;
1232        struct htable *t;
1233
1234        pr_debug("Create set %s with family %s\n",
1235                 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
1236
1237#ifdef IP_SET_PROTO_UNDEF
1238        if (set->family != NFPROTO_UNSPEC)
1239                return -IPSET_ERR_INVALID_FAMILY;
1240#else
1241        if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
1242                return -IPSET_ERR_INVALID_FAMILY;
1243#endif
1244
1245        if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
1246                     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
1247                     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
1248                     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
1249                return -IPSET_ERR_PROTOCOL;
1250
1251#ifdef IP_SET_HASH_WITH_MARKMASK
1252        /* Separated condition in order to avoid directive in argument list */
1253        if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
1254                return -IPSET_ERR_PROTOCOL;
1255
1256        markmask = 0xffffffff;
1257        if (tb[IPSET_ATTR_MARKMASK]) {
1258                markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
1259                if (markmask == 0)
1260                        return -IPSET_ERR_INVALID_MARKMASK;
1261        }
1262#endif
1263
1264#ifdef IP_SET_HASH_WITH_NETMASK
1265        netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
1266        if (tb[IPSET_ATTR_NETMASK]) {
1267                netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
1268
1269                if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
1270                    (set->family == NFPROTO_IPV6 && netmask > 128) ||
1271                    netmask == 0)
1272                        return -IPSET_ERR_INVALID_NETMASK;
1273        }
1274#endif
1275
1276        if (tb[IPSET_ATTR_HASHSIZE]) {
1277                hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
1278                if (hashsize < IPSET_MIMINAL_HASHSIZE)
1279                        hashsize = IPSET_MIMINAL_HASHSIZE;
1280        }
1281
1282        if (tb[IPSET_ATTR_MAXELEM])
1283                maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
1284
1285        hsize = sizeof(*h);
1286        h = kzalloc(hsize, GFP_KERNEL);
1287        if (!h)
1288                return -ENOMEM;
1289
1290        hbits = htable_bits(hashsize);
1291        hsize = htable_size(hbits);
1292        if (hsize == 0) {
1293                kfree(h);
1294                return -ENOMEM;
1295        }
1296        t = ip_set_alloc(hsize);
1297        if (!t) {
1298                kfree(h);
1299                return -ENOMEM;
1300        }
1301        h->maxelem = maxelem;
1302#ifdef IP_SET_HASH_WITH_NETMASK
1303        h->netmask = netmask;
1304#endif
1305#ifdef IP_SET_HASH_WITH_MARKMASK
1306        h->markmask = markmask;
1307#endif
1308        get_random_bytes(&h->initval, sizeof(h->initval));
1309
1310        t->htable_bits = hbits;
1311        RCU_INIT_POINTER(h->table, t);
1312
1313        h->set = set;
1314        set->data = h;
1315#ifndef IP_SET_PROTO_UNDEF
1316        if (set->family == NFPROTO_IPV4) {
1317#endif
1318                set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
1319                set->dsize = ip_set_elem_len(set, tb,
1320                        sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
1321                        __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
1322#ifndef IP_SET_PROTO_UNDEF
1323        } else {
1324                set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
1325                set->dsize = ip_set_elem_len(set, tb,
1326                        sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
1327                        __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
1328        }
1329#endif
1330        set->timeout = IPSET_NO_TIMEOUT;
1331        if (tb[IPSET_ATTR_TIMEOUT]) {
1332                set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
1333#ifndef IP_SET_PROTO_UNDEF
1334                if (set->family == NFPROTO_IPV4)
1335#endif
1336                        IPSET_TOKEN(HTYPE, 4_gc_init)(set,
1337                                IPSET_TOKEN(HTYPE, 4_gc));
1338#ifndef IP_SET_PROTO_UNDEF
1339                else
1340                        IPSET_TOKEN(HTYPE, 6_gc_init)(set,
1341                                IPSET_TOKEN(HTYPE, 6_gc));
1342#endif
1343        }
1344        pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
1345                 set->name, jhash_size(t->htable_bits),
1346                 t->htable_bits, h->maxelem, set->data, t);
1347
1348        return 0;
1349}
1350#endif /* IP_SET_EMIT_CREATE */
1351
1352#undef HKEY_DATALEN
1353