linux/net/netfilter/ipset/ip_set_hash_gen.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
   3
   4#ifndef _IP_SET_HASH_GEN_H
   5#define _IP_SET_HASH_GEN_H
   6
   7#include <linux/rcupdate.h>
   8#include <linux/jhash.h>
   9#include <linux/types.h>
  10#include <linux/netfilter/ipset/ip_set_timeout.h>
  11
  12#define __ipset_dereference_protected(p, c)     rcu_dereference_protected(p, c)
  13#define ipset_dereference_protected(p, set) \
  14        __ipset_dereference_protected(p, lockdep_is_held(&(set)->lock))
  15
  16#define rcu_dereference_bh_nfnl(p)      rcu_dereference_bh_check(p, 1)
  17
  18/* Hashing which uses arrays to resolve clashing. The hash table is resized
  19 * (doubled) when searching becomes too long.
  20 * Internally jhash is used with the assumption that the size of the
  21 * stored data is a multiple of sizeof(u32).
  22 *
  23 * Readers and resizing
  24 *
  25 * Resizing can be triggered by userspace command only, and those
  26 * are serialized by the nfnl mutex. During resizing the set is
  27 * read-locked, so the only possible concurrent operations are
  28 * the kernel side readers. Those must be protected by proper RCU locking.
  29 */
  30
  31/* Number of elements to store in an initial array block */
  32#define AHASH_INIT_SIZE                 4
  33/* Max number of elements to store in an array block */
  34#define AHASH_MAX_SIZE                  (3 * AHASH_INIT_SIZE)
  35/* Max muber of elements in the array block when tuned */
  36#define AHASH_MAX_TUNED                 64
  37
  38/* Max number of elements can be tuned */
  39#ifdef IP_SET_HASH_WITH_MULTI
  40#define AHASH_MAX(h)                    ((h)->ahash_max)
  41
  42static inline u8
  43tune_ahash_max(u8 curr, u32 multi)
  44{
  45        u32 n;
  46
  47        if (multi < curr)
  48                return curr;
  49
  50        n = curr + AHASH_INIT_SIZE;
  51        /* Currently, at listing one hash bucket must fit into a message.
  52         * Therefore we have a hard limit here.
  53         */
  54        return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
  55}
  56
  57#define TUNE_AHASH_MAX(h, multi)        \
  58        ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
  59#else
  60#define AHASH_MAX(h)                    AHASH_MAX_SIZE
  61#define TUNE_AHASH_MAX(h, multi)
  62#endif
  63
  64/* A hash bucket */
  65struct hbucket {
  66        struct rcu_head rcu;    /* for call_rcu */
  67        /* Which positions are used in the array */
  68        DECLARE_BITMAP(used, AHASH_MAX_TUNED);
  69        u8 size;                /* size of the array */
  70        u8 pos;                 /* position of the first free entry */
  71        unsigned char value[0]  /* the array of the values */
  72                __aligned(__alignof__(u64));
  73};
  74
  75/* The hash table: the table size stored here in order to make resizing easy */
  76struct htable {
  77        atomic_t ref;           /* References for resizing */
  78        atomic_t uref;          /* References for dumping */
  79        u8 htable_bits;         /* size of hash table == 2^htable_bits */
  80        struct hbucket __rcu *bucket[0]; /* hashtable buckets */
  81};
  82
  83#define hbucket(h, i)           ((h)->bucket[i])
  84#define ext_size(n, dsize)      \
  85        (sizeof(struct hbucket) + (n) * (dsize))
  86
  87#ifndef IPSET_NET_COUNT
  88#define IPSET_NET_COUNT         1
  89#endif
  90
  91/* Book-keeping of the prefixes added to the set */
  92struct net_prefixes {
  93        u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */
  94        u8 cidr[IPSET_NET_COUNT];  /* the cidr value */
  95};
  96
  97/* Compute the hash table size */
  98static size_t
  99htable_size(u8 hbits)
 100{
 101        size_t hsize;
 102
 103        /* We must fit both into u32 in jhash and size_t */
 104        if (hbits > 31)
 105                return 0;
 106        hsize = jhash_size(hbits);
 107        if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
 108            < hsize)
 109                return 0;
 110
 111        return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
 112}
 113
 114/* Compute htable_bits from the user input parameter hashsize */
 115static u8
 116htable_bits(u32 hashsize)
 117{
 118        /* Assume that hashsize == 2^htable_bits */
 119        u8 bits = fls(hashsize - 1);
 120
 121        if (jhash_size(bits) != hashsize)
 122                /* Round up to the first 2^n value */
 123                bits = fls(hashsize);
 124
 125        return bits;
 126}
 127
 128#ifdef IP_SET_HASH_WITH_NETS
 129#if IPSET_NET_COUNT > 1
 130#define __CIDR(cidr, i)         (cidr[i])
 131#else
 132#define __CIDR(cidr, i)         (cidr)
 133#endif
 134
 135/* cidr + 1 is stored in net_prefixes to support /0 */
 136#define NCIDR_PUT(cidr)         ((cidr) + 1)
 137#define NCIDR_GET(cidr)         ((cidr) - 1)
 138
 139#ifdef IP_SET_HASH_WITH_NETS_PACKED
 140/* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */
 141#define DCIDR_PUT(cidr)         ((cidr) - 1)
 142#define DCIDR_GET(cidr, i)      (__CIDR(cidr, i) + 1)
 143#else
 144#define DCIDR_PUT(cidr)         (cidr)
 145#define DCIDR_GET(cidr, i)      __CIDR(cidr, i)
 146#endif
 147
 148#define INIT_CIDR(cidr, host_mask)      \
 149        DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
 150
 151#ifdef IP_SET_HASH_WITH_NET0
 152/* cidr from 0 to HOST_MASK value and c = cidr + 1 */
 153#define NLEN                    (HOST_MASK + 1)
 154#define CIDR_POS(c)             ((c) - 1)
 155#else
 156/* cidr from 1 to HOST_MASK value and c = cidr + 1 */
 157#define NLEN                    HOST_MASK
 158#define CIDR_POS(c)             ((c) - 2)
 159#endif
 160
 161#else
 162#define NLEN                    0
 163#endif /* IP_SET_HASH_WITH_NETS */
 164
 165#endif /* _IP_SET_HASH_GEN_H */
 166
 167#ifndef MTYPE
 168#error "MTYPE is not defined!"
 169#endif
 170
 171#ifndef HTYPE
 172#error "HTYPE is not defined!"
 173#endif
 174
 175#ifndef HOST_MASK
 176#error "HOST_MASK is not defined!"
 177#endif
 178
 179/* Family dependent templates */
 180
 181#undef ahash_data
 182#undef mtype_data_equal
 183#undef mtype_do_data_match
 184#undef mtype_data_set_flags
 185#undef mtype_data_reset_elem
 186#undef mtype_data_reset_flags
 187#undef mtype_data_netmask
 188#undef mtype_data_list
 189#undef mtype_data_next
 190#undef mtype_elem
 191
 192#undef mtype_ahash_destroy
 193#undef mtype_ext_cleanup
 194#undef mtype_add_cidr
 195#undef mtype_del_cidr
 196#undef mtype_ahash_memsize
 197#undef mtype_flush
 198#undef mtype_destroy
 199#undef mtype_same_set
 200#undef mtype_kadt
 201#undef mtype_uadt
 202
 203#undef mtype_add
 204#undef mtype_del
 205#undef mtype_test_cidrs
 206#undef mtype_test
 207#undef mtype_uref
 208#undef mtype_expire
 209#undef mtype_resize
 210#undef mtype_head
 211#undef mtype_list
 212#undef mtype_gc
 213#undef mtype_gc_init
 214#undef mtype_variant
 215#undef mtype_data_match
 216
 217#undef htype
 218#undef HKEY
 219
 220#define mtype_data_equal        IPSET_TOKEN(MTYPE, _data_equal)
 221#ifdef IP_SET_HASH_WITH_NETS
 222#define mtype_do_data_match     IPSET_TOKEN(MTYPE, _do_data_match)
 223#else
 224#define mtype_do_data_match(d)  1
 225#endif
 226#define mtype_data_set_flags    IPSET_TOKEN(MTYPE, _data_set_flags)
 227#define mtype_data_reset_elem   IPSET_TOKEN(MTYPE, _data_reset_elem)
 228#define mtype_data_reset_flags  IPSET_TOKEN(MTYPE, _data_reset_flags)
 229#define mtype_data_netmask      IPSET_TOKEN(MTYPE, _data_netmask)
 230#define mtype_data_list         IPSET_TOKEN(MTYPE, _data_list)
 231#define mtype_data_next         IPSET_TOKEN(MTYPE, _data_next)
 232#define mtype_elem              IPSET_TOKEN(MTYPE, _elem)
 233
 234#define mtype_ahash_destroy     IPSET_TOKEN(MTYPE, _ahash_destroy)
 235#define mtype_ext_cleanup       IPSET_TOKEN(MTYPE, _ext_cleanup)
 236#define mtype_add_cidr          IPSET_TOKEN(MTYPE, _add_cidr)
 237#define mtype_del_cidr          IPSET_TOKEN(MTYPE, _del_cidr)
 238#define mtype_ahash_memsize     IPSET_TOKEN(MTYPE, _ahash_memsize)
 239#define mtype_flush             IPSET_TOKEN(MTYPE, _flush)
 240#define mtype_destroy           IPSET_TOKEN(MTYPE, _destroy)
 241#define mtype_same_set          IPSET_TOKEN(MTYPE, _same_set)
 242#define mtype_kadt              IPSET_TOKEN(MTYPE, _kadt)
 243#define mtype_uadt              IPSET_TOKEN(MTYPE, _uadt)
 244
 245#define mtype_add               IPSET_TOKEN(MTYPE, _add)
 246#define mtype_del               IPSET_TOKEN(MTYPE, _del)
 247#define mtype_test_cidrs        IPSET_TOKEN(MTYPE, _test_cidrs)
 248#define mtype_test              IPSET_TOKEN(MTYPE, _test)
 249#define mtype_uref              IPSET_TOKEN(MTYPE, _uref)
 250#define mtype_expire            IPSET_TOKEN(MTYPE, _expire)
 251#define mtype_resize            IPSET_TOKEN(MTYPE, _resize)
 252#define mtype_head              IPSET_TOKEN(MTYPE, _head)
 253#define mtype_list              IPSET_TOKEN(MTYPE, _list)
 254#define mtype_gc                IPSET_TOKEN(MTYPE, _gc)
 255#define mtype_gc_init           IPSET_TOKEN(MTYPE, _gc_init)
 256#define mtype_variant           IPSET_TOKEN(MTYPE, _variant)
 257#define mtype_data_match        IPSET_TOKEN(MTYPE, _data_match)
 258
 259#ifndef HKEY_DATALEN
 260#define HKEY_DATALEN            sizeof(struct mtype_elem)
 261#endif
 262
 263#define htype                   MTYPE
 264
 265#define HKEY(data, initval, htable_bits)                        \
 266({                                                              \
 267        const u32 *__k = (const u32 *)data;                     \
 268        u32 __l = HKEY_DATALEN / sizeof(u32);                   \
 269                                                                \
 270        BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0);          \
 271                                                                \
 272        jhash2(__k, __l, initval) & jhash_mask(htable_bits);    \
 273})
 274
 275/* The generic hash structure */
 276struct htype {
 277        struct htable __rcu *table; /* the hash table */
 278        struct timer_list gc;   /* garbage collection when timeout enabled */
 279        struct ip_set *set;     /* attached to this ip_set */
 280        u32 maxelem;            /* max elements in the hash */
 281        u32 initval;            /* random jhash init value */
 282#ifdef IP_SET_HASH_WITH_MARKMASK
 283        u32 markmask;           /* markmask value for mark mask to store */
 284#endif
 285#ifdef IP_SET_HASH_WITH_MULTI
 286        u8 ahash_max;           /* max elements in an array block */
 287#endif
 288#ifdef IP_SET_HASH_WITH_NETMASK
 289        u8 netmask;             /* netmask value for subnets to store */
 290#endif
 291        struct mtype_elem next; /* temporary storage for uadd */
 292#ifdef IP_SET_HASH_WITH_NETS
 293        struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */
 294#endif
 295};
 296
 297#ifdef IP_SET_HASH_WITH_NETS
 298/* Network cidr size book keeping when the hash stores different
 299 * sized networks. cidr == real cidr + 1 to support /0.
 300 */
 301static void
 302mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
 303{
 304        int i, j;
 305
 306        /* Add in increasing prefix order, so larger cidr first */
 307        for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) {
 308                if (j != -1) {
 309                        continue;
 310                } else if (h->nets[i].cidr[n] < cidr) {
 311                        j = i;
 312                } else if (h->nets[i].cidr[n] == cidr) {
 313                        h->nets[CIDR_POS(cidr)].nets[n]++;
 314                        return;
 315                }
 316        }
 317        if (j != -1) {
 318                for (; i > j; i--)
 319                        h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
 320        }
 321        h->nets[i].cidr[n] = cidr;
 322        h->nets[CIDR_POS(cidr)].nets[n] = 1;
 323}
 324
 325static void
 326mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
 327{
 328        u8 i, j, net_end = NLEN - 1;
 329
 330        for (i = 0; i < NLEN; i++) {
 331                if (h->nets[i].cidr[n] != cidr)
 332                        continue;
 333                h->nets[CIDR_POS(cidr)].nets[n]--;
 334                if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
 335                        return;
 336                for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
 337                        h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
 338                h->nets[j].cidr[n] = 0;
 339                return;
 340        }
 341}
 342#endif
 343
 344/* Calculate the actual memory size of the set data */
 345static size_t
 346mtype_ahash_memsize(const struct htype *h, const struct htable *t)
 347{
 348        return sizeof(*h) + sizeof(*t);
 349}
 350
 351/* Get the ith element from the array block n */
 352#define ahash_data(n, i, dsize) \
 353        ((struct mtype_elem *)((n)->value + ((i) * (dsize))))
 354
 355static void
 356mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
 357{
 358        int i;
 359
 360        for (i = 0; i < n->pos; i++)
 361                if (test_bit(i, n->used))
 362                        ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
 363}
 364
 365/* Flush a hash type of set: destroy all elements */
 366static void
 367mtype_flush(struct ip_set *set)
 368{
 369        struct htype *h = set->data;
 370        struct htable *t;
 371        struct hbucket *n;
 372        u32 i;
 373
 374        t = ipset_dereference_protected(h->table, set);
 375        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 376                n = __ipset_dereference_protected(hbucket(t, i), 1);
 377                if (!n)
 378                        continue;
 379                if (set->extensions & IPSET_EXT_DESTROY)
 380                        mtype_ext_cleanup(set, n);
 381                /* FIXME: use slab cache */
 382                rcu_assign_pointer(hbucket(t, i), NULL);
 383                kfree_rcu(n, rcu);
 384        }
 385#ifdef IP_SET_HASH_WITH_NETS
 386        memset(h->nets, 0, sizeof(h->nets));
 387#endif
 388        set->elements = 0;
 389        set->ext_size = 0;
 390}
 391
 392/* Destroy the hashtable part of the set */
 393static void
 394mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
 395{
 396        struct hbucket *n;
 397        u32 i;
 398
 399        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 400                n = __ipset_dereference_protected(hbucket(t, i), 1);
 401                if (!n)
 402                        continue;
 403                if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
 404                        mtype_ext_cleanup(set, n);
 405                /* FIXME: use slab cache */
 406                kfree(n);
 407        }
 408
 409        ip_set_free(t);
 410}
 411
 412/* Destroy a hash type of set */
 413static void
 414mtype_destroy(struct ip_set *set)
 415{
 416        struct htype *h = set->data;
 417
 418        if (SET_WITH_TIMEOUT(set))
 419                del_timer_sync(&h->gc);
 420
 421        mtype_ahash_destroy(set,
 422                            __ipset_dereference_protected(h->table, 1), true);
 423        kfree(h);
 424
 425        set->data = NULL;
 426}
 427
 428static void
 429mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
 430{
 431        struct htype *h = set->data;
 432
 433        timer_setup(&h->gc, gc, 0);
 434        mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
 435        pr_debug("gc initialized, run in every %u\n",
 436                 IPSET_GC_PERIOD(set->timeout));
 437}
 438
 439static bool
 440mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 441{
 442        const struct htype *x = a->data;
 443        const struct htype *y = b->data;
 444
 445        /* Resizing changes htable_bits, so we ignore it */
 446        return x->maxelem == y->maxelem &&
 447               a->timeout == b->timeout &&
 448#ifdef IP_SET_HASH_WITH_NETMASK
 449               x->netmask == y->netmask &&
 450#endif
 451#ifdef IP_SET_HASH_WITH_MARKMASK
 452               x->markmask == y->markmask &&
 453#endif
 454               a->extensions == b->extensions;
 455}
 456
 457/* Delete expired elements from the hashtable */
 458static void
 459mtype_expire(struct ip_set *set, struct htype *h)
 460{
 461        struct htable *t;
 462        struct hbucket *n, *tmp;
 463        struct mtype_elem *data;
 464        u32 i, j, d;
 465        size_t dsize = set->dsize;
 466#ifdef IP_SET_HASH_WITH_NETS
 467        u8 k;
 468#endif
 469
 470        t = ipset_dereference_protected(h->table, set);
 471        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 472                n = __ipset_dereference_protected(hbucket(t, i), 1);
 473                if (!n)
 474                        continue;
 475                for (j = 0, d = 0; j < n->pos; j++) {
 476                        if (!test_bit(j, n->used)) {
 477                                d++;
 478                                continue;
 479                        }
 480                        data = ahash_data(n, j, dsize);
 481                        if (!ip_set_timeout_expired(ext_timeout(data, set)))
 482                                continue;
 483                        pr_debug("expired %u/%u\n", i, j);
 484                        clear_bit(j, n->used);
 485                        smp_mb__after_atomic();
 486#ifdef IP_SET_HASH_WITH_NETS
 487                        for (k = 0; k < IPSET_NET_COUNT; k++)
 488                                mtype_del_cidr(h,
 489                                        NCIDR_PUT(DCIDR_GET(data->cidr, k)),
 490                                        k);
 491#endif
 492                        ip_set_ext_destroy(set, data);
 493                        set->elements--;
 494                        d++;
 495                }
 496                if (d >= AHASH_INIT_SIZE) {
 497                        if (d >= n->size) {
 498                                rcu_assign_pointer(hbucket(t, i), NULL);
 499                                kfree_rcu(n, rcu);
 500                                continue;
 501                        }
 502                        tmp = kzalloc(sizeof(*tmp) +
 503                                      (n->size - AHASH_INIT_SIZE) * dsize,
 504                                      GFP_ATOMIC);
 505                        if (!tmp)
 506                                /* Still try to delete expired elements */
 507                                continue;
 508                        tmp->size = n->size - AHASH_INIT_SIZE;
 509                        for (j = 0, d = 0; j < n->pos; j++) {
 510                                if (!test_bit(j, n->used))
 511                                        continue;
 512                                data = ahash_data(n, j, dsize);
 513                                memcpy(tmp->value + d * dsize, data, dsize);
 514                                set_bit(d, tmp->used);
 515                                d++;
 516                        }
 517                        tmp->pos = d;
 518                        set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
 519                        rcu_assign_pointer(hbucket(t, i), tmp);
 520                        kfree_rcu(n, rcu);
 521                }
 522        }
 523}
 524
 525static void
 526mtype_gc(struct timer_list *t)
 527{
 528        struct htype *h = from_timer(h, t, gc);
 529        struct ip_set *set = h->set;
 530
 531        pr_debug("called\n");
 532        spin_lock_bh(&set->lock);
 533        mtype_expire(set, h);
 534        spin_unlock_bh(&set->lock);
 535
 536        h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 537        add_timer(&h->gc);
 538}
 539
 540/* Resize a hash: create a new hash table with doubling the hashsize
 541 * and inserting the elements to it. Repeat until we succeed or
 542 * fail due to memory pressures.
 543 */
 544static int
 545mtype_resize(struct ip_set *set, bool retried)
 546{
 547        struct htype *h = set->data;
 548        struct htable *t, *orig;
 549        u8 htable_bits;
 550        size_t extsize, dsize = set->dsize;
 551#ifdef IP_SET_HASH_WITH_NETS
 552        u8 flags;
 553        struct mtype_elem *tmp;
 554#endif
 555        struct mtype_elem *data;
 556        struct mtype_elem *d;
 557        struct hbucket *n, *m;
 558        u32 i, j, key;
 559        int ret;
 560
 561#ifdef IP_SET_HASH_WITH_NETS
 562        tmp = kmalloc(dsize, GFP_KERNEL);
 563        if (!tmp)
 564                return -ENOMEM;
 565#endif
 566        rcu_read_lock_bh();
 567        orig = rcu_dereference_bh_nfnl(h->table);
 568        htable_bits = orig->htable_bits;
 569        rcu_read_unlock_bh();
 570
 571retry:
 572        ret = 0;
 573        htable_bits++;
 574        if (!htable_bits) {
 575                /* In case we have plenty of memory :-) */
 576                pr_warn("Cannot increase the hashsize of set %s further\n",
 577                        set->name);
 578                ret = -IPSET_ERR_HASH_FULL;
 579                goto out;
 580        }
 581        t = ip_set_alloc(htable_size(htable_bits));
 582        if (!t) {
 583                ret = -ENOMEM;
 584                goto out;
 585        }
 586        t->htable_bits = htable_bits;
 587
 588        spin_lock_bh(&set->lock);
 589        orig = __ipset_dereference_protected(h->table, 1);
 590        /* There can't be another parallel resizing, but dumping is possible */
 591        atomic_set(&orig->ref, 1);
 592        atomic_inc(&orig->uref);
 593        extsize = 0;
 594        pr_debug("attempt to resize set %s from %u to %u, t %p\n",
 595                 set->name, orig->htable_bits, htable_bits, orig);
 596        for (i = 0; i < jhash_size(orig->htable_bits); i++) {
 597                n = __ipset_dereference_protected(hbucket(orig, i), 1);
 598                if (!n)
 599                        continue;
 600                for (j = 0; j < n->pos; j++) {
 601                        if (!test_bit(j, n->used))
 602                                continue;
 603                        data = ahash_data(n, j, dsize);
 604#ifdef IP_SET_HASH_WITH_NETS
 605                        /* We have readers running parallel with us,
 606                         * so the live data cannot be modified.
 607                         */
 608                        flags = 0;
 609                        memcpy(tmp, data, dsize);
 610                        data = tmp;
 611                        mtype_data_reset_flags(data, &flags);
 612#endif
 613                        key = HKEY(data, h->initval, htable_bits);
 614                        m = __ipset_dereference_protected(hbucket(t, key), 1);
 615                        if (!m) {
 616                                m = kzalloc(sizeof(*m) +
 617                                            AHASH_INIT_SIZE * dsize,
 618                                            GFP_ATOMIC);
 619                                if (!m) {
 620                                        ret = -ENOMEM;
 621                                        goto cleanup;
 622                                }
 623                                m->size = AHASH_INIT_SIZE;
 624                                extsize += ext_size(AHASH_INIT_SIZE, dsize);
 625                                RCU_INIT_POINTER(hbucket(t, key), m);
 626                        } else if (m->pos >= m->size) {
 627                                struct hbucket *ht;
 628
 629                                if (m->size >= AHASH_MAX(h)) {
 630                                        ret = -EAGAIN;
 631                                } else {
 632                                        ht = kzalloc(sizeof(*ht) +
 633                                                (m->size + AHASH_INIT_SIZE)
 634                                                * dsize,
 635                                                GFP_ATOMIC);
 636                                        if (!ht)
 637                                                ret = -ENOMEM;
 638                                }
 639                                if (ret < 0)
 640                                        goto cleanup;
 641                                memcpy(ht, m, sizeof(struct hbucket) +
 642                                              m->size * dsize);
 643                                ht->size = m->size + AHASH_INIT_SIZE;
 644                                extsize += ext_size(AHASH_INIT_SIZE, dsize);
 645                                kfree(m);
 646                                m = ht;
 647                                RCU_INIT_POINTER(hbucket(t, key), ht);
 648                        }
 649                        d = ahash_data(m, m->pos, dsize);
 650                        memcpy(d, data, dsize);
 651                        set_bit(m->pos++, m->used);
 652#ifdef IP_SET_HASH_WITH_NETS
 653                        mtype_data_reset_flags(d, &flags);
 654#endif
 655                }
 656        }
 657        rcu_assign_pointer(h->table, t);
 658        set->ext_size = extsize;
 659
 660        spin_unlock_bh(&set->lock);
 661
 662        /* Give time to other readers of the set */
 663        synchronize_rcu();
 664
 665        pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
 666                 orig->htable_bits, orig, t->htable_bits, t);
 667        /* If there's nobody else dumping the table, destroy it */
 668        if (atomic_dec_and_test(&orig->uref)) {
 669                pr_debug("Table destroy by resize %p\n", orig);
 670                mtype_ahash_destroy(set, orig, false);
 671        }
 672
 673out:
 674#ifdef IP_SET_HASH_WITH_NETS
 675        kfree(tmp);
 676#endif
 677        return ret;
 678
 679cleanup:
 680        atomic_set(&orig->ref, 0);
 681        atomic_dec(&orig->uref);
 682        spin_unlock_bh(&set->lock);
 683        mtype_ahash_destroy(set, t, false);
 684        if (ret == -EAGAIN)
 685                goto retry;
 686        goto out;
 687}
 688
 689/* Add an element to a hash and update the internal counters when succeeded,
 690 * otherwise report the proper error code.
 691 */
 692static int
 693mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 694          struct ip_set_ext *mext, u32 flags)
 695{
 696        struct htype *h = set->data;
 697        struct htable *t;
 698        const struct mtype_elem *d = value;
 699        struct mtype_elem *data;
 700        struct hbucket *n, *old = ERR_PTR(-ENOENT);
 701        int i, j = -1;
 702        bool flag_exist = flags & IPSET_FLAG_EXIST;
 703        bool deleted = false, forceadd = false, reuse = false;
 704        u32 key, multi = 0;
 705
 706        if (set->elements >= h->maxelem) {
 707                if (SET_WITH_TIMEOUT(set))
 708                        /* FIXME: when set is full, we slow down here */
 709                        mtype_expire(set, h);
 710                if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set))
 711                        forceadd = true;
 712        }
 713
 714        t = ipset_dereference_protected(h->table, set);
 715        key = HKEY(value, h->initval, t->htable_bits);
 716        n = __ipset_dereference_protected(hbucket(t, key), 1);
 717        if (!n) {
 718                if (forceadd || set->elements >= h->maxelem)
 719                        goto set_full;
 720                old = NULL;
 721                n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
 722                            GFP_ATOMIC);
 723                if (!n)
 724                        return -ENOMEM;
 725                n->size = AHASH_INIT_SIZE;
 726                set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
 727                goto copy_elem;
 728        }
 729        for (i = 0; i < n->pos; i++) {
 730                if (!test_bit(i, n->used)) {
 731                        /* Reuse first deleted entry */
 732                        if (j == -1) {
 733                                deleted = reuse = true;
 734                                j = i;
 735                        }
 736                        continue;
 737                }
 738                data = ahash_data(n, i, set->dsize);
 739                if (mtype_data_equal(data, d, &multi)) {
 740                        if (flag_exist ||
 741                            (SET_WITH_TIMEOUT(set) &&
 742                             ip_set_timeout_expired(ext_timeout(data, set)))) {
 743                                /* Just the extensions could be overwritten */
 744                                j = i;
 745                                goto overwrite_extensions;
 746                        }
 747                        return -IPSET_ERR_EXIST;
 748                }
 749                /* Reuse first timed out entry */
 750                if (SET_WITH_TIMEOUT(set) &&
 751                    ip_set_timeout_expired(ext_timeout(data, set)) &&
 752                    j == -1) {
 753                        j = i;
 754                        reuse = true;
 755                }
 756        }
 757        if (reuse || forceadd) {
 758                data = ahash_data(n, j, set->dsize);
 759                if (!deleted) {
 760#ifdef IP_SET_HASH_WITH_NETS
 761                        for (i = 0; i < IPSET_NET_COUNT; i++)
 762                                mtype_del_cidr(h,
 763                                        NCIDR_PUT(DCIDR_GET(data->cidr, i)),
 764                                        i);
 765#endif
 766                        ip_set_ext_destroy(set, data);
 767                        set->elements--;
 768                }
 769                goto copy_data;
 770        }
 771        if (set->elements >= h->maxelem)
 772                goto set_full;
 773        /* Create a new slot */
 774        if (n->pos >= n->size) {
 775                TUNE_AHASH_MAX(h, multi);
 776                if (n->size >= AHASH_MAX(h)) {
 777                        /* Trigger rehashing */
 778                        mtype_data_next(&h->next, d);
 779                        return -EAGAIN;
 780                }
 781                old = n;
 782                n = kzalloc(sizeof(*n) +
 783                            (old->size + AHASH_INIT_SIZE) * set->dsize,
 784                            GFP_ATOMIC);
 785                if (!n)
 786                        return -ENOMEM;
 787                memcpy(n, old, sizeof(struct hbucket) +
 788                       old->size * set->dsize);
 789                n->size = old->size + AHASH_INIT_SIZE;
 790                set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
 791        }
 792
 793copy_elem:
 794        j = n->pos++;
 795        data = ahash_data(n, j, set->dsize);
 796copy_data:
 797        set->elements++;
 798#ifdef IP_SET_HASH_WITH_NETS
 799        for (i = 0; i < IPSET_NET_COUNT; i++)
 800                mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
 801#endif
 802        memcpy(data, d, sizeof(struct mtype_elem));
 803overwrite_extensions:
 804#ifdef IP_SET_HASH_WITH_NETS
 805        mtype_data_set_flags(data, flags);
 806#endif
 807        if (SET_WITH_COUNTER(set))
 808                ip_set_init_counter(ext_counter(data, set), ext);
 809        if (SET_WITH_COMMENT(set))
 810                ip_set_init_comment(set, ext_comment(data, set), ext);
 811        if (SET_WITH_SKBINFO(set))
 812                ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
 813        /* Must come last for the case when timed out entry is reused */
 814        if (SET_WITH_TIMEOUT(set))
 815                ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
 816        smp_mb__before_atomic();
 817        set_bit(j, n->used);
 818        if (old != ERR_PTR(-ENOENT)) {
 819                rcu_assign_pointer(hbucket(t, key), n);
 820                if (old)
 821                        kfree_rcu(old, rcu);
 822        }
 823
 824        return 0;
 825set_full:
 826        if (net_ratelimit())
 827                pr_warn("Set %s is full, maxelem %u reached\n",
 828                        set->name, h->maxelem);
 829        return -IPSET_ERR_HASH_FULL;
 830}
 831
 832/* Delete an element from the hash and free up space if possible.
 833 */
 834static int
 835mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 836          struct ip_set_ext *mext, u32 flags)
 837{
 838        struct htype *h = set->data;
 839        struct htable *t;
 840        const struct mtype_elem *d = value;
 841        struct mtype_elem *data;
 842        struct hbucket *n;
 843        int i, j, k, ret = -IPSET_ERR_EXIST;
 844        u32 key, multi = 0;
 845        size_t dsize = set->dsize;
 846
 847        t = ipset_dereference_protected(h->table, set);
 848        key = HKEY(value, h->initval, t->htable_bits);
 849        n = __ipset_dereference_protected(hbucket(t, key), 1);
 850        if (!n)
 851                goto out;
 852        for (i = 0, k = 0; i < n->pos; i++) {
 853                if (!test_bit(i, n->used)) {
 854                        k++;
 855                        continue;
 856                }
 857                data = ahash_data(n, i, dsize);
 858                if (!mtype_data_equal(data, d, &multi))
 859                        continue;
 860                if (SET_WITH_TIMEOUT(set) &&
 861                    ip_set_timeout_expired(ext_timeout(data, set)))
 862                        goto out;
 863
 864                ret = 0;
 865                clear_bit(i, n->used);
 866                smp_mb__after_atomic();
 867                if (i + 1 == n->pos)
 868                        n->pos--;
 869                set->elements--;
 870#ifdef IP_SET_HASH_WITH_NETS
 871                for (j = 0; j < IPSET_NET_COUNT; j++)
 872                        mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
 873                                       j);
 874#endif
 875                ip_set_ext_destroy(set, data);
 876
 877                for (; i < n->pos; i++) {
 878                        if (!test_bit(i, n->used))
 879                                k++;
 880                }
 881                if (n->pos == 0 && k == 0) {
 882                        set->ext_size -= ext_size(n->size, dsize);
 883                        rcu_assign_pointer(hbucket(t, key), NULL);
 884                        kfree_rcu(n, rcu);
 885                } else if (k >= AHASH_INIT_SIZE) {
 886                        struct hbucket *tmp = kzalloc(sizeof(*tmp) +
 887                                        (n->size - AHASH_INIT_SIZE) * dsize,
 888                                        GFP_ATOMIC);
 889                        if (!tmp)
 890                                goto out;
 891                        tmp->size = n->size - AHASH_INIT_SIZE;
 892                        for (j = 0, k = 0; j < n->pos; j++) {
 893                                if (!test_bit(j, n->used))
 894                                        continue;
 895                                data = ahash_data(n, j, dsize);
 896                                memcpy(tmp->value + k * dsize, data, dsize);
 897                                set_bit(k, tmp->used);
 898                                k++;
 899                        }
 900                        tmp->pos = k;
 901                        set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
 902                        rcu_assign_pointer(hbucket(t, key), tmp);
 903                        kfree_rcu(n, rcu);
 904                }
 905                goto out;
 906        }
 907
 908out:
 909        return ret;
 910}
 911
 912static inline int
 913mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
 914                 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
 915{
 916        if (!ip_set_match_extensions(set, ext, mext, flags, data))
 917                return 0;
 918        /* nomatch entries return -ENOTEMPTY */
 919        return mtype_do_data_match(data);
 920}
 921
 922#ifdef IP_SET_HASH_WITH_NETS
 923/* Special test function which takes into account the different network
 924 * sizes added to the set
 925 */
 926static int
 927mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 928                 const struct ip_set_ext *ext,
 929                 struct ip_set_ext *mext, u32 flags)
 930{
 931        struct htype *h = set->data;
 932        struct htable *t = rcu_dereference_bh(h->table);
 933        struct hbucket *n;
 934        struct mtype_elem *data;
 935#if IPSET_NET_COUNT == 2
 936        struct mtype_elem orig = *d;
 937        int ret, i, j = 0, k;
 938#else
 939        int ret, i, j = 0;
 940#endif
 941        u32 key, multi = 0;
 942
 943        pr_debug("test by nets\n");
 944        for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) {
 945#if IPSET_NET_COUNT == 2
 946                mtype_data_reset_elem(d, &orig);
 947                mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
 948                for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi;
 949                     k++) {
 950                        mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
 951                                           true);
 952#else
 953                mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
 954#endif
 955                key = HKEY(d, h->initval, t->htable_bits);
 956                n =  rcu_dereference_bh(hbucket(t, key));
 957                if (!n)
 958                        continue;
 959                for (i = 0; i < n->pos; i++) {
 960                        if (!test_bit(i, n->used))
 961                                continue;
 962                        data = ahash_data(n, i, set->dsize);
 963                        if (!mtype_data_equal(data, d, &multi))
 964                                continue;
 965                        ret = mtype_data_match(data, ext, mext, set, flags);
 966                        if (ret != 0)
 967                                return ret;
 968#ifdef IP_SET_HASH_WITH_MULTI
 969                        /* No match, reset multiple match flag */
 970                        multi = 0;
 971#endif
 972                }
 973#if IPSET_NET_COUNT == 2
 974                }
 975#endif
 976        }
 977        return 0;
 978}
 979#endif
 980
 981/* Test whether the element is added to the set */
 982static int
 983mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 984           struct ip_set_ext *mext, u32 flags)
 985{
 986        struct htype *h = set->data;
 987        struct htable *t;
 988        struct mtype_elem *d = value;
 989        struct hbucket *n;
 990        struct mtype_elem *data;
 991        int i, ret = 0;
 992        u32 key, multi = 0;
 993
 994        t = rcu_dereference_bh(h->table);
 995#ifdef IP_SET_HASH_WITH_NETS
 996        /* If we test an IP address and not a network address,
 997         * try all possible network sizes
 998         */
 999        for (i = 0; i < IPSET_NET_COUNT; i++)
1000                if (DCIDR_GET(d->cidr, i) != HOST_MASK)
1001                        break;
1002        if (i == IPSET_NET_COUNT) {
1003                ret = mtype_test_cidrs(set, d, ext, mext, flags);
1004                goto out;
1005        }
1006#endif
1007
1008        key = HKEY(d, h->initval, t->htable_bits);
1009        n = rcu_dereference_bh(hbucket(t, key));
1010        if (!n) {
1011                ret = 0;
1012                goto out;
1013        }
1014        for (i = 0; i < n->pos; i++) {
1015                if (!test_bit(i, n->used))
1016                        continue;
1017                data = ahash_data(n, i, set->dsize);
1018                if (!mtype_data_equal(data, d, &multi))
1019                        continue;
1020                ret = mtype_data_match(data, ext, mext, set, flags);
1021                if (ret != 0)
1022                        goto out;
1023        }
1024out:
1025        return ret;
1026}
1027
1028/* Reply a HEADER request: fill out the header part of the set */
1029static int
1030mtype_head(struct ip_set *set, struct sk_buff *skb)
1031{
1032        struct htype *h = set->data;
1033        const struct htable *t;
1034        struct nlattr *nested;
1035        size_t memsize;
1036        u8 htable_bits;
1037
1038        /* If any members have expired, set->elements will be wrong
1039         * mytype_expire function will update it with the right count.
1040         * we do not hold set->lock here, so grab it first.
1041         * set->elements can still be incorrect in the case of a huge set,
1042         * because elements might time out during the listing.
1043         */
1044        if (SET_WITH_TIMEOUT(set)) {
1045                spin_lock_bh(&set->lock);
1046                mtype_expire(set, h);
1047                spin_unlock_bh(&set->lock);
1048        }
1049
1050        rcu_read_lock_bh();
1051        t = rcu_dereference_bh_nfnl(h->table);
1052        memsize = mtype_ahash_memsize(h, t) + set->ext_size;
1053        htable_bits = t->htable_bits;
1054        rcu_read_unlock_bh();
1055
1056        nested = nla_nest_start(skb, IPSET_ATTR_DATA);
1057        if (!nested)
1058                goto nla_put_failure;
1059        if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
1060                          htonl(jhash_size(htable_bits))) ||
1061            nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
1062                goto nla_put_failure;
1063#ifdef IP_SET_HASH_WITH_NETMASK
1064        if (h->netmask != HOST_MASK &&
1065            nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
1066                goto nla_put_failure;
1067#endif
1068#ifdef IP_SET_HASH_WITH_MARKMASK
1069        if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
1070                goto nla_put_failure;
1071#endif
1072        if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
1073            nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
1074            nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
1075                goto nla_put_failure;
1076        if (unlikely(ip_set_put_flags(skb, set)))
1077                goto nla_put_failure;
1078        nla_nest_end(skb, nested);
1079
1080        return 0;
1081nla_put_failure:
1082        return -EMSGSIZE;
1083}
1084
1085/* Make possible to run dumping parallel with resizing */
1086static void
1087mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
1088{
1089        struct htype *h = set->data;
1090        struct htable *t;
1091
1092        if (start) {
1093                rcu_read_lock_bh();
1094                t = rcu_dereference_bh_nfnl(h->table);
1095                atomic_inc(&t->uref);
1096                cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
1097                rcu_read_unlock_bh();
1098        } else if (cb->args[IPSET_CB_PRIVATE]) {
1099                t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
1100                if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
1101                        /* Resizing didn't destroy the hash table */
1102                        pr_debug("Table destroy by dump: %p\n", t);
1103                        mtype_ahash_destroy(set, t, false);
1104                }
1105                cb->args[IPSET_CB_PRIVATE] = 0;
1106        }
1107}
1108
1109/* Reply a LIST/SAVE request: dump the elements of the specified set */
1110static int
1111mtype_list(const struct ip_set *set,
1112           struct sk_buff *skb, struct netlink_callback *cb)
1113{
1114        const struct htable *t;
1115        struct nlattr *atd, *nested;
1116        const struct hbucket *n;
1117        const struct mtype_elem *e;
1118        u32 first = cb->args[IPSET_CB_ARG0];
1119        /* We assume that one hash bucket fills into one page */
1120        void *incomplete;
1121        int i, ret = 0;
1122
1123        atd = nla_nest_start(skb, IPSET_ATTR_ADT);
1124        if (!atd)
1125                return -EMSGSIZE;
1126
1127        pr_debug("list hash set %s\n", set->name);
1128        t = (const struct htable *)cb->args[IPSET_CB_PRIVATE];
1129        /* Expire may replace a hbucket with another one */
1130        rcu_read_lock();
1131        for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
1132             cb->args[IPSET_CB_ARG0]++) {
1133                cond_resched_rcu();
1134                incomplete = skb_tail_pointer(skb);
1135                n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
1136                pr_debug("cb->arg bucket: %lu, t %p n %p\n",
1137                         cb->args[IPSET_CB_ARG0], t, n);
1138                if (!n)
1139                        continue;
1140                for (i = 0; i < n->pos; i++) {
1141                        if (!test_bit(i, n->used))
1142                                continue;
1143                        e = ahash_data(n, i, set->dsize);
1144                        if (SET_WITH_TIMEOUT(set) &&
1145                            ip_set_timeout_expired(ext_timeout(e, set)))
1146                                continue;
1147                        pr_debug("list hash %lu hbucket %p i %u, data %p\n",
1148                                 cb->args[IPSET_CB_ARG0], n, i, e);
1149                        nested = nla_nest_start(skb, IPSET_ATTR_DATA);
1150                        if (!nested) {
1151                                if (cb->args[IPSET_CB_ARG0] == first) {
1152                                        nla_nest_cancel(skb, atd);
1153                                        ret = -EMSGSIZE;
1154                                        goto out;
1155                                }
1156                                goto nla_put_failure;
1157                        }
1158                        if (mtype_data_list(skb, e))
1159                                goto nla_put_failure;
1160                        if (ip_set_put_extensions(skb, set, e, true))
1161                                goto nla_put_failure;
1162                        nla_nest_end(skb, nested);
1163                }
1164        }
1165        nla_nest_end(skb, atd);
1166        /* Set listing finished */
1167        cb->args[IPSET_CB_ARG0] = 0;
1168
1169        goto out;
1170
1171nla_put_failure:
1172        nlmsg_trim(skb, incomplete);
1173        if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
1174                pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
1175                        set->name);
1176                cb->args[IPSET_CB_ARG0] = 0;
1177                ret = -EMSGSIZE;
1178        } else {
1179                nla_nest_end(skb, atd);
1180        }
1181out:
1182        rcu_read_unlock();
1183        return ret;
1184}
1185
1186static int
1187IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
1188                          const struct xt_action_param *par,
1189                          enum ipset_adt adt, struct ip_set_adt_opt *opt);
1190
1191static int
1192IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
1193                          enum ipset_adt adt, u32 *lineno, u32 flags,
1194                          bool retried);
1195
1196static const struct ip_set_type_variant mtype_variant = {
1197        .kadt   = mtype_kadt,
1198        .uadt   = mtype_uadt,
1199        .adt    = {
1200                [IPSET_ADD] = mtype_add,
1201                [IPSET_DEL] = mtype_del,
1202                [IPSET_TEST] = mtype_test,
1203        },
1204        .destroy = mtype_destroy,
1205        .flush  = mtype_flush,
1206        .head   = mtype_head,
1207        .list   = mtype_list,
1208        .uref   = mtype_uref,
1209        .resize = mtype_resize,
1210        .same_set = mtype_same_set,
1211};
1212
1213#ifdef IP_SET_EMIT_CREATE
1214static int
1215IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1216                            struct nlattr *tb[], u32 flags)
1217{
1218        u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
1219#ifdef IP_SET_HASH_WITH_MARKMASK
1220        u32 markmask;
1221#endif
1222        u8 hbits;
1223#ifdef IP_SET_HASH_WITH_NETMASK
1224        u8 netmask;
1225#endif
1226        size_t hsize;
1227        struct htype *h;
1228        struct htable *t;
1229
1230        pr_debug("Create set %s with family %s\n",
1231                 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
1232
1233#ifdef IP_SET_PROTO_UNDEF
1234        if (set->family != NFPROTO_UNSPEC)
1235                return -IPSET_ERR_INVALID_FAMILY;
1236#else
1237        if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
1238                return -IPSET_ERR_INVALID_FAMILY;
1239#endif
1240
1241        if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
1242                     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
1243                     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
1244                     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
1245                return -IPSET_ERR_PROTOCOL;
1246
1247#ifdef IP_SET_HASH_WITH_MARKMASK
1248        /* Separated condition in order to avoid directive in argument list */
1249        if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
1250                return -IPSET_ERR_PROTOCOL;
1251
1252        markmask = 0xffffffff;
1253        if (tb[IPSET_ATTR_MARKMASK]) {
1254                markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
1255                if (markmask == 0)
1256                        return -IPSET_ERR_INVALID_MARKMASK;
1257        }
1258#endif
1259
1260#ifdef IP_SET_HASH_WITH_NETMASK
1261        netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
1262        if (tb[IPSET_ATTR_NETMASK]) {
1263                netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
1264
1265                if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
1266                    (set->family == NFPROTO_IPV6 && netmask > 128) ||
1267                    netmask == 0)
1268                        return -IPSET_ERR_INVALID_NETMASK;
1269        }
1270#endif
1271
1272        if (tb[IPSET_ATTR_HASHSIZE]) {
1273                hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
1274                if (hashsize < IPSET_MIMINAL_HASHSIZE)
1275                        hashsize = IPSET_MIMINAL_HASHSIZE;
1276        }
1277
1278        if (tb[IPSET_ATTR_MAXELEM])
1279                maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
1280
1281        hsize = sizeof(*h);
1282        h = kzalloc(hsize, GFP_KERNEL);
1283        if (!h)
1284                return -ENOMEM;
1285
1286        hbits = htable_bits(hashsize);
1287        hsize = htable_size(hbits);
1288        if (hsize == 0) {
1289                kfree(h);
1290                return -ENOMEM;
1291        }
1292        t = ip_set_alloc(hsize);
1293        if (!t) {
1294                kfree(h);
1295                return -ENOMEM;
1296        }
1297        h->maxelem = maxelem;
1298#ifdef IP_SET_HASH_WITH_NETMASK
1299        h->netmask = netmask;
1300#endif
1301#ifdef IP_SET_HASH_WITH_MARKMASK
1302        h->markmask = markmask;
1303#endif
1304        get_random_bytes(&h->initval, sizeof(h->initval));
1305
1306        t->htable_bits = hbits;
1307        RCU_INIT_POINTER(h->table, t);
1308
1309        h->set = set;
1310        set->data = h;
1311#ifndef IP_SET_PROTO_UNDEF
1312        if (set->family == NFPROTO_IPV4) {
1313#endif
1314                set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
1315                set->dsize = ip_set_elem_len(set, tb,
1316                        sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
1317                        __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
1318#ifndef IP_SET_PROTO_UNDEF
1319        } else {
1320                set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
1321                set->dsize = ip_set_elem_len(set, tb,
1322                        sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
1323                        __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
1324        }
1325#endif
1326        set->timeout = IPSET_NO_TIMEOUT;
1327        if (tb[IPSET_ATTR_TIMEOUT]) {
1328                set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
1329#ifndef IP_SET_PROTO_UNDEF
1330                if (set->family == NFPROTO_IPV4)
1331#endif
1332                        IPSET_TOKEN(HTYPE, 4_gc_init)(set,
1333                                IPSET_TOKEN(HTYPE, 4_gc));
1334#ifndef IP_SET_PROTO_UNDEF
1335                else
1336                        IPSET_TOKEN(HTYPE, 6_gc_init)(set,
1337                                IPSET_TOKEN(HTYPE, 6_gc));
1338#endif
1339        }
1340        pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
1341                 set->name, jhash_size(t->htable_bits),
1342                 t->htable_bits, h->maxelem, set->data, t);
1343
1344        return 0;
1345}
1346#endif /* IP_SET_EMIT_CREATE */
1347
1348#undef HKEY_DATALEN
1349