linux/net/netfilter/ipset/ip_set_hash_gen.h
<<
>>
Prefs
   1/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
   2 *
   3 * This program is free software; you can redistribute it and/or modify
   4 * it under the terms of the GNU General Public License version 2 as
   5 * published by the Free Software Foundation.
   6 */
   7
   8#ifndef _IP_SET_HASH_GEN_H
   9#define _IP_SET_HASH_GEN_H
  10
  11#include <linux/rcupdate.h>
  12#include <linux/jhash.h>
  13#include <linux/types.h>
  14#include <linux/netfilter/ipset/ip_set_timeout.h>
  15
  16#define __ipset_dereference_protected(p, c)     rcu_dereference_protected(p, c)
  17#define ipset_dereference_protected(p, set) \
  18        __ipset_dereference_protected(p, spin_is_locked(&(set)->lock))
  19
  20#define rcu_dereference_bh_nfnl(p)      rcu_dereference_bh_check(p, 1)
  21
  22/* Hashing which uses arrays to resolve clashing. The hash table is resized
  23 * (doubled) when searching becomes too long.
  24 * Internally jhash is used with the assumption that the size of the
  25 * stored data is a multiple of sizeof(u32).
  26 *
  27 * Readers and resizing
  28 *
  29 * Resizing can be triggered by userspace command only, and those
  30 * are serialized by the nfnl mutex. During resizing the set is
  31 * read-locked, so the only possible concurrent operations are
  32 * the kernel side readers. Those must be protected by proper RCU locking.
  33 */
  34
  35/* Number of elements to store in an initial array block */
  36#define AHASH_INIT_SIZE                 4
  37/* Max number of elements to store in an array block */
  38#define AHASH_MAX_SIZE                  (3 * AHASH_INIT_SIZE)
  39/* Max muber of elements in the array block when tuned */
  40#define AHASH_MAX_TUNED                 64
  41
  42/* Max number of elements can be tuned */
  43#ifdef IP_SET_HASH_WITH_MULTI
  44#define AHASH_MAX(h)                    ((h)->ahash_max)
  45
  46static inline u8
  47tune_ahash_max(u8 curr, u32 multi)
  48{
  49        u32 n;
  50
  51        if (multi < curr)
  52                return curr;
  53
  54        n = curr + AHASH_INIT_SIZE;
  55        /* Currently, at listing one hash bucket must fit into a message.
  56         * Therefore we have a hard limit here.
  57         */
  58        return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
  59}
  60
  61#define TUNE_AHASH_MAX(h, multi)        \
  62        ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
  63#else
  64#define AHASH_MAX(h)                    AHASH_MAX_SIZE
  65#define TUNE_AHASH_MAX(h, multi)
  66#endif
  67
  68/* A hash bucket */
  69struct hbucket {
  70        struct rcu_head rcu;    /* for call_rcu_bh */
  71        /* Which positions are used in the array */
  72        DECLARE_BITMAP(used, AHASH_MAX_TUNED);
  73        u8 size;                /* size of the array */
  74        u8 pos;                 /* position of the first free entry */
  75        unsigned char value[0]  /* the array of the values */
  76                __aligned(__alignof__(u64));
  77};
  78
  79/* The hash table: the table size stored here in order to make resizing easy */
  80struct htable {
  81        atomic_t ref;           /* References for resizing */
  82        atomic_t uref;          /* References for dumping */
  83        u8 htable_bits;         /* size of hash table == 2^htable_bits */
  84        struct hbucket __rcu *bucket[0]; /* hashtable buckets */
  85};
  86
  87#define hbucket(h, i)           ((h)->bucket[i])
  88#define ext_size(n, dsize)      \
  89        (sizeof(struct hbucket) + (n) * (dsize))
  90
  91#ifndef IPSET_NET_COUNT
  92#define IPSET_NET_COUNT         1
  93#endif
  94
  95/* Book-keeping of the prefixes added to the set */
  96struct net_prefixes {
  97        u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */
  98        u8 cidr[IPSET_NET_COUNT];  /* the cidr value */
  99};
 100
 101/* Compute the hash table size */
 102static size_t
 103htable_size(u8 hbits)
 104{
 105        size_t hsize;
 106
 107        /* We must fit both into u32 in jhash and size_t */
 108        if (hbits > 31)
 109                return 0;
 110        hsize = jhash_size(hbits);
 111        if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
 112            < hsize)
 113                return 0;
 114
 115        return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
 116}
 117
 118/* Compute htable_bits from the user input parameter hashsize */
 119static u8
 120htable_bits(u32 hashsize)
 121{
 122        /* Assume that hashsize == 2^htable_bits */
 123        u8 bits = fls(hashsize - 1);
 124
 125        if (jhash_size(bits) != hashsize)
 126                /* Round up to the first 2^n value */
 127                bits = fls(hashsize);
 128
 129        return bits;
 130}
 131
 132#ifdef IP_SET_HASH_WITH_NETS
 133#if IPSET_NET_COUNT > 1
 134#define __CIDR(cidr, i)         (cidr[i])
 135#else
 136#define __CIDR(cidr, i)         (cidr)
 137#endif
 138
 139/* cidr + 1 is stored in net_prefixes to support /0 */
 140#define NCIDR_PUT(cidr)         ((cidr) + 1)
 141#define NCIDR_GET(cidr)         ((cidr) - 1)
 142
 143#ifdef IP_SET_HASH_WITH_NETS_PACKED
 144/* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */
 145#define DCIDR_PUT(cidr)         ((cidr) - 1)
 146#define DCIDR_GET(cidr, i)      (__CIDR(cidr, i) + 1)
 147#else
 148#define DCIDR_PUT(cidr)         (cidr)
 149#define DCIDR_GET(cidr, i)      __CIDR(cidr, i)
 150#endif
 151
 152#define INIT_CIDR(cidr, host_mask)      \
 153        DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
 154
 155#ifdef IP_SET_HASH_WITH_NET0
 156/* cidr from 0 to HOST_MASK value and c = cidr + 1 */
 157#define NLEN                    (HOST_MASK + 1)
 158#define CIDR_POS(c)             ((c) - 1)
 159#else
 160/* cidr from 1 to HOST_MASK value and c = cidr + 1 */
 161#define NLEN                    HOST_MASK
 162#define CIDR_POS(c)             ((c) - 2)
 163#endif
 164
 165#else
 166#define NLEN                    0
 167#endif /* IP_SET_HASH_WITH_NETS */
 168
 169#endif /* _IP_SET_HASH_GEN_H */
 170
 171#ifndef MTYPE
 172#error "MTYPE is not defined!"
 173#endif
 174
 175#ifndef HTYPE
 176#error "HTYPE is not defined!"
 177#endif
 178
 179#ifndef HOST_MASK
 180#error "HOST_MASK is not defined!"
 181#endif
 182
 183/* Family dependent templates */
 184
 185#undef ahash_data
 186#undef mtype_data_equal
 187#undef mtype_do_data_match
 188#undef mtype_data_set_flags
 189#undef mtype_data_reset_elem
 190#undef mtype_data_reset_flags
 191#undef mtype_data_netmask
 192#undef mtype_data_list
 193#undef mtype_data_next
 194#undef mtype_elem
 195
 196#undef mtype_ahash_destroy
 197#undef mtype_ext_cleanup
 198#undef mtype_add_cidr
 199#undef mtype_del_cidr
 200#undef mtype_ahash_memsize
 201#undef mtype_flush
 202#undef mtype_destroy
 203#undef mtype_same_set
 204#undef mtype_kadt
 205#undef mtype_uadt
 206
 207#undef mtype_add
 208#undef mtype_del
 209#undef mtype_test_cidrs
 210#undef mtype_test
 211#undef mtype_uref
 212#undef mtype_expire
 213#undef mtype_resize
 214#undef mtype_head
 215#undef mtype_list
 216#undef mtype_gc
 217#undef mtype_gc_init
 218#undef mtype_variant
 219#undef mtype_data_match
 220
 221#undef htype
 222#undef HKEY
 223
 224#define mtype_data_equal        IPSET_TOKEN(MTYPE, _data_equal)
 225#ifdef IP_SET_HASH_WITH_NETS
 226#define mtype_do_data_match     IPSET_TOKEN(MTYPE, _do_data_match)
 227#else
 228#define mtype_do_data_match(d)  1
 229#endif
 230#define mtype_data_set_flags    IPSET_TOKEN(MTYPE, _data_set_flags)
 231#define mtype_data_reset_elem   IPSET_TOKEN(MTYPE, _data_reset_elem)
 232#define mtype_data_reset_flags  IPSET_TOKEN(MTYPE, _data_reset_flags)
 233#define mtype_data_netmask      IPSET_TOKEN(MTYPE, _data_netmask)
 234#define mtype_data_list         IPSET_TOKEN(MTYPE, _data_list)
 235#define mtype_data_next         IPSET_TOKEN(MTYPE, _data_next)
 236#define mtype_elem              IPSET_TOKEN(MTYPE, _elem)
 237
 238#define mtype_ahash_destroy     IPSET_TOKEN(MTYPE, _ahash_destroy)
 239#define mtype_ext_cleanup       IPSET_TOKEN(MTYPE, _ext_cleanup)
 240#define mtype_add_cidr          IPSET_TOKEN(MTYPE, _add_cidr)
 241#define mtype_del_cidr          IPSET_TOKEN(MTYPE, _del_cidr)
 242#define mtype_ahash_memsize     IPSET_TOKEN(MTYPE, _ahash_memsize)
 243#define mtype_flush             IPSET_TOKEN(MTYPE, _flush)
 244#define mtype_destroy           IPSET_TOKEN(MTYPE, _destroy)
 245#define mtype_same_set          IPSET_TOKEN(MTYPE, _same_set)
 246#define mtype_kadt              IPSET_TOKEN(MTYPE, _kadt)
 247#define mtype_uadt              IPSET_TOKEN(MTYPE, _uadt)
 248
 249#define mtype_add               IPSET_TOKEN(MTYPE, _add)
 250#define mtype_del               IPSET_TOKEN(MTYPE, _del)
 251#define mtype_test_cidrs        IPSET_TOKEN(MTYPE, _test_cidrs)
 252#define mtype_test              IPSET_TOKEN(MTYPE, _test)
 253#define mtype_uref              IPSET_TOKEN(MTYPE, _uref)
 254#define mtype_expire            IPSET_TOKEN(MTYPE, _expire)
 255#define mtype_resize            IPSET_TOKEN(MTYPE, _resize)
 256#define mtype_head              IPSET_TOKEN(MTYPE, _head)
 257#define mtype_list              IPSET_TOKEN(MTYPE, _list)
 258#define mtype_gc                IPSET_TOKEN(MTYPE, _gc)
 259#define mtype_gc_init           IPSET_TOKEN(MTYPE, _gc_init)
 260#define mtype_variant           IPSET_TOKEN(MTYPE, _variant)
 261#define mtype_data_match        IPSET_TOKEN(MTYPE, _data_match)
 262
 263#ifndef HKEY_DATALEN
 264#define HKEY_DATALEN            sizeof(struct mtype_elem)
 265#endif
 266
 267#define htype                   MTYPE
 268
 269#define HKEY(data, initval, htable_bits)                        \
 270({                                                              \
 271        const u32 *__k = (const u32 *)data;                     \
 272        u32 __l = HKEY_DATALEN / sizeof(u32);                   \
 273                                                                \
 274        BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0);          \
 275                                                                \
 276        jhash2(__k, __l, initval) & jhash_mask(htable_bits);    \
 277})
 278
 279/* The generic hash structure */
 280struct htype {
 281        struct htable __rcu *table; /* the hash table */
 282        struct timer_list gc;   /* garbage collection when timeout enabled */
 283        u32 maxelem;            /* max elements in the hash */
 284        u32 initval;            /* random jhash init value */
 285#ifdef IP_SET_HASH_WITH_MARKMASK
 286        u32 markmask;           /* markmask value for mark mask to store */
 287#endif
 288#ifdef IP_SET_HASH_WITH_MULTI
 289        u8 ahash_max;           /* max elements in an array block */
 290#endif
 291#ifdef IP_SET_HASH_WITH_NETMASK
 292        u8 netmask;             /* netmask value for subnets to store */
 293#endif
 294        struct mtype_elem next; /* temporary storage for uadd */
 295#ifdef IP_SET_HASH_WITH_NETS
 296        struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */
 297#endif
 298};
 299
 300#ifdef IP_SET_HASH_WITH_NETS
 301/* Network cidr size book keeping when the hash stores different
 302 * sized networks. cidr == real cidr + 1 to support /0.
 303 */
 304static void
 305mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
 306{
 307        int i, j;
 308
 309        /* Add in increasing prefix order, so larger cidr first */
 310        for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) {
 311                if (j != -1) {
 312                        continue;
 313                } else if (h->nets[i].cidr[n] < cidr) {
 314                        j = i;
 315                } else if (h->nets[i].cidr[n] == cidr) {
 316                        h->nets[CIDR_POS(cidr)].nets[n]++;
 317                        return;
 318                }
 319        }
 320        if (j != -1) {
 321                for (; i > j; i--)
 322                        h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
 323        }
 324        h->nets[i].cidr[n] = cidr;
 325        h->nets[CIDR_POS(cidr)].nets[n] = 1;
 326}
 327
 328static void
 329mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
 330{
 331        u8 i, j, net_end = NLEN - 1;
 332
 333        for (i = 0; i < NLEN; i++) {
 334                if (h->nets[i].cidr[n] != cidr)
 335                        continue;
 336                h->nets[CIDR_POS(cidr)].nets[n]--;
 337                if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
 338                        return;
 339                for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
 340                        h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
 341                h->nets[j].cidr[n] = 0;
 342                return;
 343        }
 344}
 345#endif
 346
 347/* Calculate the actual memory size of the set data */
 348static size_t
 349mtype_ahash_memsize(const struct htype *h, const struct htable *t)
 350{
 351        return sizeof(*h) + sizeof(*t);
 352}
 353
 354/* Get the ith element from the array block n */
 355#define ahash_data(n, i, dsize) \
 356        ((struct mtype_elem *)((n)->value + ((i) * (dsize))))
 357
 358static void
 359mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
 360{
 361        int i;
 362
 363        for (i = 0; i < n->pos; i++)
 364                if (test_bit(i, n->used))
 365                        ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
 366}
 367
 368/* Flush a hash type of set: destroy all elements */
 369static void
 370mtype_flush(struct ip_set *set)
 371{
 372        struct htype *h = set->data;
 373        struct htable *t;
 374        struct hbucket *n;
 375        u32 i;
 376
 377        t = ipset_dereference_protected(h->table, set);
 378        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 379                n = __ipset_dereference_protected(hbucket(t, i), 1);
 380                if (!n)
 381                        continue;
 382                if (set->extensions & IPSET_EXT_DESTROY)
 383                        mtype_ext_cleanup(set, n);
 384                /* FIXME: use slab cache */
 385                rcu_assign_pointer(hbucket(t, i), NULL);
 386                kfree_rcu(n, rcu);
 387        }
 388#ifdef IP_SET_HASH_WITH_NETS
 389        memset(h->nets, 0, sizeof(h->nets));
 390#endif
 391        set->elements = 0;
 392        set->ext_size = 0;
 393}
 394
 395/* Destroy the hashtable part of the set */
 396static void
 397mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
 398{
 399        struct hbucket *n;
 400        u32 i;
 401
 402        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 403                n = __ipset_dereference_protected(hbucket(t, i), 1);
 404                if (!n)
 405                        continue;
 406                if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
 407                        mtype_ext_cleanup(set, n);
 408                /* FIXME: use slab cache */
 409                kfree(n);
 410        }
 411
 412        ip_set_free(t);
 413}
 414
 415/* Destroy a hash type of set */
 416static void
 417mtype_destroy(struct ip_set *set)
 418{
 419        struct htype *h = set->data;
 420
 421        if (SET_WITH_TIMEOUT(set))
 422                del_timer_sync(&h->gc);
 423
 424        mtype_ahash_destroy(set,
 425                            __ipset_dereference_protected(h->table, 1), true);
 426        kfree(h);
 427
 428        set->data = NULL;
 429}
 430
 431static void
 432mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 433{
 434        struct htype *h = set->data;
 435
 436        setup_timer(&h->gc, gc, (unsigned long)set);
 437        mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
 438        pr_debug("gc initialized, run in every %u\n",
 439                 IPSET_GC_PERIOD(set->timeout));
 440}
 441
 442static bool
 443mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 444{
 445        const struct htype *x = a->data;
 446        const struct htype *y = b->data;
 447
 448        /* Resizing changes htable_bits, so we ignore it */
 449        return x->maxelem == y->maxelem &&
 450               a->timeout == b->timeout &&
 451#ifdef IP_SET_HASH_WITH_NETMASK
 452               x->netmask == y->netmask &&
 453#endif
 454#ifdef IP_SET_HASH_WITH_MARKMASK
 455               x->markmask == y->markmask &&
 456#endif
 457               a->extensions == b->extensions;
 458}
 459
 460/* Delete expired elements from the hashtable */
 461static void
 462mtype_expire(struct ip_set *set, struct htype *h)
 463{
 464        struct htable *t;
 465        struct hbucket *n, *tmp;
 466        struct mtype_elem *data;
 467        u32 i, j, d;
 468        size_t dsize = set->dsize;
 469#ifdef IP_SET_HASH_WITH_NETS
 470        u8 k;
 471#endif
 472
 473        t = ipset_dereference_protected(h->table, set);
 474        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 475                n = __ipset_dereference_protected(hbucket(t, i), 1);
 476                if (!n)
 477                        continue;
 478                for (j = 0, d = 0; j < n->pos; j++) {
 479                        if (!test_bit(j, n->used)) {
 480                                d++;
 481                                continue;
 482                        }
 483                        data = ahash_data(n, j, dsize);
 484                        if (!ip_set_timeout_expired(ext_timeout(data, set)))
 485                                continue;
 486                        pr_debug("expired %u/%u\n", i, j);
 487                        clear_bit(j, n->used);
 488                        smp_mb__after_atomic();
 489#ifdef IP_SET_HASH_WITH_NETS
 490                        for (k = 0; k < IPSET_NET_COUNT; k++)
 491                                mtype_del_cidr(h,
 492                                        NCIDR_PUT(DCIDR_GET(data->cidr, k)),
 493                                        k);
 494#endif
 495                        ip_set_ext_destroy(set, data);
 496                        set->elements--;
 497                        d++;
 498                }
 499                if (d >= AHASH_INIT_SIZE) {
 500                        if (d >= n->size) {
 501                                rcu_assign_pointer(hbucket(t, i), NULL);
 502                                kfree_rcu(n, rcu);
 503                                continue;
 504                        }
 505                        tmp = kzalloc(sizeof(*tmp) +
 506                                      (n->size - AHASH_INIT_SIZE) * dsize,
 507                                      GFP_ATOMIC);
 508                        if (!tmp)
 509                                /* Still try to delete expired elements */
 510                                continue;
 511                        tmp->size = n->size - AHASH_INIT_SIZE;
 512                        for (j = 0, d = 0; j < n->pos; j++) {
 513                                if (!test_bit(j, n->used))
 514                                        continue;
 515                                data = ahash_data(n, j, dsize);
 516                                memcpy(tmp->value + d * dsize, data, dsize);
 517                                set_bit(d, tmp->used);
 518                                d++;
 519                        }
 520                        tmp->pos = d;
 521                        set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
 522                        rcu_assign_pointer(hbucket(t, i), tmp);
 523                        kfree_rcu(n, rcu);
 524                }
 525        }
 526}
 527
 528static void
 529mtype_gc(unsigned long ul_set)
 530{
 531        struct ip_set *set = (struct ip_set *)ul_set;
 532        struct htype *h = set->data;
 533
 534        pr_debug("called\n");
 535        spin_lock_bh(&set->lock);
 536        mtype_expire(set, h);
 537        spin_unlock_bh(&set->lock);
 538
 539        h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 540        add_timer(&h->gc);
 541}
 542
 543/* Resize a hash: create a new hash table with doubling the hashsize
 544 * and inserting the elements to it. Repeat until we succeed or
 545 * fail due to memory pressures.
 546 */
 547static int
 548mtype_resize(struct ip_set *set, bool retried)
 549{
 550        struct htype *h = set->data;
 551        struct htable *t, *orig;
 552        u8 htable_bits;
 553        size_t extsize, dsize = set->dsize;
 554#ifdef IP_SET_HASH_WITH_NETS
 555        u8 flags;
 556        struct mtype_elem *tmp;
 557#endif
 558        struct mtype_elem *data;
 559        struct mtype_elem *d;
 560        struct hbucket *n, *m;
 561        u32 i, j, key;
 562        int ret;
 563
 564#ifdef IP_SET_HASH_WITH_NETS
 565        tmp = kmalloc(dsize, GFP_KERNEL);
 566        if (!tmp)
 567                return -ENOMEM;
 568#endif
 569        rcu_read_lock_bh();
 570        orig = rcu_dereference_bh_nfnl(h->table);
 571        htable_bits = orig->htable_bits;
 572        rcu_read_unlock_bh();
 573
 574retry:
 575        ret = 0;
 576        htable_bits++;
 577        if (!htable_bits) {
 578                /* In case we have plenty of memory :-) */
 579                pr_warn("Cannot increase the hashsize of set %s further\n",
 580                        set->name);
 581                ret = -IPSET_ERR_HASH_FULL;
 582                goto out;
 583        }
 584        t = ip_set_alloc(htable_size(htable_bits));
 585        if (!t) {
 586                ret = -ENOMEM;
 587                goto out;
 588        }
 589        t->htable_bits = htable_bits;
 590
 591        spin_lock_bh(&set->lock);
 592        orig = __ipset_dereference_protected(h->table, 1);
 593        /* There can't be another parallel resizing, but dumping is possible */
 594        atomic_set(&orig->ref, 1);
 595        atomic_inc(&orig->uref);
 596        extsize = 0;
 597        pr_debug("attempt to resize set %s from %u to %u, t %p\n",
 598                 set->name, orig->htable_bits, htable_bits, orig);
 599        for (i = 0; i < jhash_size(orig->htable_bits); i++) {
 600                n = __ipset_dereference_protected(hbucket(orig, i), 1);
 601                if (!n)
 602                        continue;
 603                for (j = 0; j < n->pos; j++) {
 604                        if (!test_bit(j, n->used))
 605                                continue;
 606                        data = ahash_data(n, j, dsize);
 607#ifdef IP_SET_HASH_WITH_NETS
 608                        /* We have readers running parallel with us,
 609                         * so the live data cannot be modified.
 610                         */
 611                        flags = 0;
 612                        memcpy(tmp, data, dsize);
 613                        data = tmp;
 614                        mtype_data_reset_flags(data, &flags);
 615#endif
 616                        key = HKEY(data, h->initval, htable_bits);
 617                        m = __ipset_dereference_protected(hbucket(t, key), 1);
 618                        if (!m) {
 619                                m = kzalloc(sizeof(*m) +
 620                                            AHASH_INIT_SIZE * dsize,
 621                                            GFP_ATOMIC);
 622                                if (!m) {
 623                                        ret = -ENOMEM;
 624                                        goto cleanup;
 625                                }
 626                                m->size = AHASH_INIT_SIZE;
 627                                extsize = ext_size(AHASH_INIT_SIZE, dsize);
 628                                RCU_INIT_POINTER(hbucket(t, key), m);
 629                        } else if (m->pos >= m->size) {
 630                                struct hbucket *ht;
 631
 632                                if (m->size >= AHASH_MAX(h)) {
 633                                        ret = -EAGAIN;
 634                                } else {
 635                                        ht = kzalloc(sizeof(*ht) +
 636                                                (m->size + AHASH_INIT_SIZE)
 637                                                * dsize,
 638                                                GFP_ATOMIC);
 639                                        if (!ht)
 640                                                ret = -ENOMEM;
 641                                }
 642                                if (ret < 0)
 643                                        goto cleanup;
 644                                memcpy(ht, m, sizeof(struct hbucket) +
 645                                              m->size * dsize);
 646                                ht->size = m->size + AHASH_INIT_SIZE;
 647                                extsize += ext_size(AHASH_INIT_SIZE, dsize);
 648                                kfree(m);
 649                                m = ht;
 650                                RCU_INIT_POINTER(hbucket(t, key), ht);
 651                        }
 652                        d = ahash_data(m, m->pos, dsize);
 653                        memcpy(d, data, dsize);
 654                        set_bit(m->pos++, m->used);
 655#ifdef IP_SET_HASH_WITH_NETS
 656                        mtype_data_reset_flags(d, &flags);
 657#endif
 658                }
 659        }
 660        rcu_assign_pointer(h->table, t);
 661        set->ext_size = extsize;
 662
 663        spin_unlock_bh(&set->lock);
 664
 665        /* Give time to other readers of the set */
 666        synchronize_rcu_bh();
 667
 668        pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
 669                 orig->htable_bits, orig, t->htable_bits, t);
 670        /* If there's nobody else dumping the table, destroy it */
 671        if (atomic_dec_and_test(&orig->uref)) {
 672                pr_debug("Table destroy by resize %p\n", orig);
 673                mtype_ahash_destroy(set, orig, false);
 674        }
 675
 676out:
 677#ifdef IP_SET_HASH_WITH_NETS
 678        kfree(tmp);
 679#endif
 680        return ret;
 681
 682cleanup:
 683        atomic_set(&orig->ref, 0);
 684        atomic_dec(&orig->uref);
 685        spin_unlock_bh(&set->lock);
 686        mtype_ahash_destroy(set, t, false);
 687        if (ret == -EAGAIN)
 688                goto retry;
 689        goto out;
 690}
 691
 692/* Add an element to a hash and update the internal counters when succeeded,
 693 * otherwise report the proper error code.
 694 */
 695static int
 696mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 697          struct ip_set_ext *mext, u32 flags)
 698{
 699        struct htype *h = set->data;
 700        struct htable *t;
 701        const struct mtype_elem *d = value;
 702        struct mtype_elem *data;
 703        struct hbucket *n, *old = ERR_PTR(-ENOENT);
 704        int i, j = -1;
 705        bool flag_exist = flags & IPSET_FLAG_EXIST;
 706        bool deleted = false, forceadd = false, reuse = false;
 707        u32 key, multi = 0;
 708
 709        if (set->elements >= h->maxelem) {
 710                if (SET_WITH_TIMEOUT(set))
 711                        /* FIXME: when set is full, we slow down here */
 712                        mtype_expire(set, h);
 713                if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set))
 714                        forceadd = true;
 715        }
 716
 717        t = ipset_dereference_protected(h->table, set);
 718        key = HKEY(value, h->initval, t->htable_bits);
 719        n = __ipset_dereference_protected(hbucket(t, key), 1);
 720        if (!n) {
 721                if (forceadd || set->elements >= h->maxelem)
 722                        goto set_full;
 723                old = NULL;
 724                n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
 725                            GFP_ATOMIC);
 726                if (!n)
 727                        return -ENOMEM;
 728                n->size = AHASH_INIT_SIZE;
 729                set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
 730                goto copy_elem;
 731        }
 732        for (i = 0; i < n->pos; i++) {
 733                if (!test_bit(i, n->used)) {
 734                        /* Reuse first deleted entry */
 735                        if (j == -1) {
 736                                deleted = reuse = true;
 737                                j = i;
 738                        }
 739                        continue;
 740                }
 741                data = ahash_data(n, i, set->dsize);
 742                if (mtype_data_equal(data, d, &multi)) {
 743                        if (flag_exist ||
 744                            (SET_WITH_TIMEOUT(set) &&
 745                             ip_set_timeout_expired(ext_timeout(data, set)))) {
 746                                /* Just the extensions could be overwritten */
 747                                j = i;
 748                                goto overwrite_extensions;
 749                        }
 750                        return -IPSET_ERR_EXIST;
 751                }
 752                /* Reuse first timed out entry */
 753                if (SET_WITH_TIMEOUT(set) &&
 754                    ip_set_timeout_expired(ext_timeout(data, set)) &&
 755                    j == -1) {
 756                        j = i;
 757                        reuse = true;
 758                }
 759        }
 760        if (reuse || forceadd) {
 761                data = ahash_data(n, j, set->dsize);
 762                if (!deleted) {
 763#ifdef IP_SET_HASH_WITH_NETS
 764                        for (i = 0; i < IPSET_NET_COUNT; i++)
 765                                mtype_del_cidr(h,
 766                                        NCIDR_PUT(DCIDR_GET(data->cidr, i)),
 767                                        i);
 768#endif
 769                        ip_set_ext_destroy(set, data);
 770                        set->elements--;
 771                }
 772                goto copy_data;
 773        }
 774        if (set->elements >= h->maxelem)
 775                goto set_full;
 776        /* Create a new slot */
 777        if (n->pos >= n->size) {
 778                TUNE_AHASH_MAX(h, multi);
 779                if (n->size >= AHASH_MAX(h)) {
 780                        /* Trigger rehashing */
 781                        mtype_data_next(&h->next, d);
 782                        return -EAGAIN;
 783                }
 784                old = n;
 785                n = kzalloc(sizeof(*n) +
 786                            (old->size + AHASH_INIT_SIZE) * set->dsize,
 787                            GFP_ATOMIC);
 788                if (!n)
 789                        return -ENOMEM;
 790                memcpy(n, old, sizeof(struct hbucket) +
 791                       old->size * set->dsize);
 792                n->size = old->size + AHASH_INIT_SIZE;
 793                set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
 794        }
 795
 796copy_elem:
 797        j = n->pos++;
 798        data = ahash_data(n, j, set->dsize);
 799copy_data:
 800        set->elements++;
 801#ifdef IP_SET_HASH_WITH_NETS
 802        for (i = 0; i < IPSET_NET_COUNT; i++)
 803                mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
 804#endif
 805        memcpy(data, d, sizeof(struct mtype_elem));
 806overwrite_extensions:
 807#ifdef IP_SET_HASH_WITH_NETS
 808        mtype_data_set_flags(data, flags);
 809#endif
 810        if (SET_WITH_COUNTER(set))
 811                ip_set_init_counter(ext_counter(data, set), ext);
 812        if (SET_WITH_COMMENT(set))
 813                ip_set_init_comment(set, ext_comment(data, set), ext);
 814        if (SET_WITH_SKBINFO(set))
 815                ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
 816        /* Must come last for the case when timed out entry is reused */
 817        if (SET_WITH_TIMEOUT(set))
 818                ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
 819        smp_mb__before_atomic();
 820        set_bit(j, n->used);
 821        if (old != ERR_PTR(-ENOENT)) {
 822                rcu_assign_pointer(hbucket(t, key), n);
 823                if (old)
 824                        kfree_rcu(old, rcu);
 825        }
 826
 827        return 0;
 828set_full:
 829        if (net_ratelimit())
 830                pr_warn("Set %s is full, maxelem %u reached\n",
 831                        set->name, h->maxelem);
 832        return -IPSET_ERR_HASH_FULL;
 833}
 834
 835/* Delete an element from the hash and free up space if possible.
 836 */
 837static int
 838mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 839          struct ip_set_ext *mext, u32 flags)
 840{
 841        struct htype *h = set->data;
 842        struct htable *t;
 843        const struct mtype_elem *d = value;
 844        struct mtype_elem *data;
 845        struct hbucket *n;
 846        int i, j, k, ret = -IPSET_ERR_EXIST;
 847        u32 key, multi = 0;
 848        size_t dsize = set->dsize;
 849
 850        t = ipset_dereference_protected(h->table, set);
 851        key = HKEY(value, h->initval, t->htable_bits);
 852        n = __ipset_dereference_protected(hbucket(t, key), 1);
 853        if (!n)
 854                goto out;
 855        for (i = 0, k = 0; i < n->pos; i++) {
 856                if (!test_bit(i, n->used)) {
 857                        k++;
 858                        continue;
 859                }
 860                data = ahash_data(n, i, dsize);
 861                if (!mtype_data_equal(data, d, &multi))
 862                        continue;
 863                if (SET_WITH_TIMEOUT(set) &&
 864                    ip_set_timeout_expired(ext_timeout(data, set)))
 865                        goto out;
 866
 867                ret = 0;
 868                clear_bit(i, n->used);
 869                smp_mb__after_atomic();
 870                if (i + 1 == n->pos)
 871                        n->pos--;
 872                set->elements--;
 873#ifdef IP_SET_HASH_WITH_NETS
 874                for (j = 0; j < IPSET_NET_COUNT; j++)
 875                        mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
 876                                       j);
 877#endif
 878                ip_set_ext_destroy(set, data);
 879
 880                for (; i < n->pos; i++) {
 881                        if (!test_bit(i, n->used))
 882                                k++;
 883                }
 884                if (n->pos == 0 && k == 0) {
 885                        set->ext_size -= ext_size(n->size, dsize);
 886                        rcu_assign_pointer(hbucket(t, key), NULL);
 887                        kfree_rcu(n, rcu);
 888                } else if (k >= AHASH_INIT_SIZE) {
 889                        struct hbucket *tmp = kzalloc(sizeof(*tmp) +
 890                                        (n->size - AHASH_INIT_SIZE) * dsize,
 891                                        GFP_ATOMIC);
 892                        if (!tmp)
 893                                goto out;
 894                        tmp->size = n->size - AHASH_INIT_SIZE;
 895                        for (j = 0, k = 0; j < n->pos; j++) {
 896                                if (!test_bit(j, n->used))
 897                                        continue;
 898                                data = ahash_data(n, j, dsize);
 899                                memcpy(tmp->value + k * dsize, data, dsize);
 900                                set_bit(k, tmp->used);
 901                                k++;
 902                        }
 903                        tmp->pos = k;
 904                        set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
 905                        rcu_assign_pointer(hbucket(t, key), tmp);
 906                        kfree_rcu(n, rcu);
 907                }
 908                goto out;
 909        }
 910
 911out:
 912        return ret;
 913}
 914
 915static inline int
 916mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
 917                 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
 918{
 919        if (SET_WITH_COUNTER(set))
 920                ip_set_update_counter(ext_counter(data, set),
 921                                      ext, mext, flags);
 922        if (SET_WITH_SKBINFO(set))
 923                ip_set_get_skbinfo(ext_skbinfo(data, set),
 924                                   ext, mext, flags);
 925        return mtype_do_data_match(data);
 926}
 927
 928#ifdef IP_SET_HASH_WITH_NETS
 929/* Special test function which takes into account the different network
 930 * sizes added to the set
 931 */
 932static int
 933mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 934                 const struct ip_set_ext *ext,
 935                 struct ip_set_ext *mext, u32 flags)
 936{
 937        struct htype *h = set->data;
 938        struct htable *t = rcu_dereference_bh(h->table);
 939        struct hbucket *n;
 940        struct mtype_elem *data;
 941#if IPSET_NET_COUNT == 2
 942        struct mtype_elem orig = *d;
 943        int i, j = 0, k;
 944#else
 945        int i, j = 0;
 946#endif
 947        u32 key, multi = 0;
 948
 949        pr_debug("test by nets\n");
 950        for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) {
 951#if IPSET_NET_COUNT == 2
 952                mtype_data_reset_elem(d, &orig);
 953                mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
 954                for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi;
 955                     k++) {
 956                        mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
 957                                           true);
 958#else
 959                mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
 960#endif
 961                key = HKEY(d, h->initval, t->htable_bits);
 962                n =  rcu_dereference_bh(hbucket(t, key));
 963                if (!n)
 964                        continue;
 965                for (i = 0; i < n->pos; i++) {
 966                        if (!test_bit(i, n->used))
 967                                continue;
 968                        data = ahash_data(n, i, set->dsize);
 969                        if (!mtype_data_equal(data, d, &multi))
 970                                continue;
 971                        if (SET_WITH_TIMEOUT(set)) {
 972                                if (!ip_set_timeout_expired(
 973                                                ext_timeout(data, set)))
 974                                        return mtype_data_match(data, ext,
 975                                                                mext, set,
 976                                                                flags);
 977#ifdef IP_SET_HASH_WITH_MULTI
 978                                multi = 0;
 979#endif
 980                        } else
 981                                return mtype_data_match(data, ext,
 982                                                        mext, set, flags);
 983                }
 984#if IPSET_NET_COUNT == 2
 985                }
 986#endif
 987        }
 988        return 0;
 989}
 990#endif
 991
 992/* Test whether the element is added to the set */
 993static int
 994mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 995           struct ip_set_ext *mext, u32 flags)
 996{
 997        struct htype *h = set->data;
 998        struct htable *t;
 999        struct mtype_elem *d = value;
1000        struct hbucket *n;
1001        struct mtype_elem *data;
1002        int i, ret = 0;
1003        u32 key, multi = 0;
1004
1005        t = rcu_dereference_bh(h->table);
1006#ifdef IP_SET_HASH_WITH_NETS
1007        /* If we test an IP address and not a network address,
1008         * try all possible network sizes
1009         */
1010        for (i = 0; i < IPSET_NET_COUNT; i++)
1011                if (DCIDR_GET(d->cidr, i) != HOST_MASK)
1012                        break;
1013        if (i == IPSET_NET_COUNT) {
1014                ret = mtype_test_cidrs(set, d, ext, mext, flags);
1015                goto out;
1016        }
1017#endif
1018
1019        key = HKEY(d, h->initval, t->htable_bits);
1020        n = rcu_dereference_bh(hbucket(t, key));
1021        if (!n) {
1022                ret = 0;
1023                goto out;
1024        }
1025        for (i = 0; i < n->pos; i++) {
1026                if (!test_bit(i, n->used))
1027                        continue;
1028                data = ahash_data(n, i, set->dsize);
1029                if (mtype_data_equal(data, d, &multi) &&
1030                    !(SET_WITH_TIMEOUT(set) &&
1031                      ip_set_timeout_expired(ext_timeout(data, set)))) {
1032                        ret = mtype_data_match(data, ext, mext, set, flags);
1033                        goto out;
1034                }
1035        }
1036out:
1037        return ret;
1038}
1039
1040/* Reply a HEADER request: fill out the header part of the set */
1041static int
1042mtype_head(struct ip_set *set, struct sk_buff *skb)
1043{
1044        const struct htype *h = set->data;
1045        const struct htable *t;
1046        struct nlattr *nested;
1047        size_t memsize;
1048        u8 htable_bits;
1049
1050        rcu_read_lock_bh();
1051        t = rcu_dereference_bh_nfnl(h->table);
1052        memsize = mtype_ahash_memsize(h, t) + set->ext_size;
1053        htable_bits = t->htable_bits;
1054        rcu_read_unlock_bh();
1055
1056        nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
1057        if (!nested)
1058                goto nla_put_failure;
1059        if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
1060                          htonl(jhash_size(htable_bits))) ||
1061            nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
1062                goto nla_put_failure;
1063#ifdef IP_SET_HASH_WITH_NETMASK
1064        if (h->netmask != HOST_MASK &&
1065            nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
1066                goto nla_put_failure;
1067#endif
1068#ifdef IP_SET_HASH_WITH_MARKMASK
1069        if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
1070                goto nla_put_failure;
1071#endif
1072        if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
1073            nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
1074            nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
1075                goto nla_put_failure;
1076        if (unlikely(ip_set_put_flags(skb, set)))
1077                goto nla_put_failure;
1078        ipset_nest_end(skb, nested);
1079
1080        return 0;
1081nla_put_failure:
1082        return -EMSGSIZE;
1083}
1084
1085/* Make possible to run dumping parallel with resizing */
1086static void
1087mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
1088{
1089        struct htype *h = set->data;
1090        struct htable *t;
1091
1092        if (start) {
1093                rcu_read_lock_bh();
1094                t = rcu_dereference_bh_nfnl(h->table);
1095                atomic_inc(&t->uref);
1096                cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
1097                rcu_read_unlock_bh();
1098        } else if (cb->args[IPSET_CB_PRIVATE]) {
1099                t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
1100                if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
1101                        /* Resizing didn't destroy the hash table */
1102                        pr_debug("Table destroy by dump: %p\n", t);
1103                        mtype_ahash_destroy(set, t, false);
1104                }
1105                cb->args[IPSET_CB_PRIVATE] = 0;
1106        }
1107}
1108
1109/* Reply a LIST/SAVE request: dump the elements of the specified set */
1110static int
1111mtype_list(const struct ip_set *set,
1112           struct sk_buff *skb, struct netlink_callback *cb)
1113{
1114        const struct htable *t;
1115        struct nlattr *atd, *nested;
1116        const struct hbucket *n;
1117        const struct mtype_elem *e;
1118        u32 first = cb->args[IPSET_CB_ARG0];
1119        /* We assume that one hash bucket fills into one page */
1120        void *incomplete;
1121        int i, ret = 0;
1122
1123        atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
1124        if (!atd)
1125                return -EMSGSIZE;
1126
1127        pr_debug("list hash set %s\n", set->name);
1128        t = (const struct htable *)cb->args[IPSET_CB_PRIVATE];
1129        /* Expire may replace a hbucket with another one */
1130        rcu_read_lock();
1131        for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
1132             cb->args[IPSET_CB_ARG0]++) {
1133                incomplete = skb_tail_pointer(skb);
1134                n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
1135                pr_debug("cb->arg bucket: %lu, t %p n %p\n",
1136                         cb->args[IPSET_CB_ARG0], t, n);
1137                if (!n)
1138                        continue;
1139                for (i = 0; i < n->pos; i++) {
1140                        if (!test_bit(i, n->used))
1141                                continue;
1142                        e = ahash_data(n, i, set->dsize);
1143                        if (SET_WITH_TIMEOUT(set) &&
1144                            ip_set_timeout_expired(ext_timeout(e, set)))
1145                                continue;
1146                        pr_debug("list hash %lu hbucket %p i %u, data %p\n",
1147                                 cb->args[IPSET_CB_ARG0], n, i, e);
1148                        nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
1149                        if (!nested) {
1150                                if (cb->args[IPSET_CB_ARG0] == first) {
1151                                        nla_nest_cancel(skb, atd);
1152                                        ret = -EMSGSIZE;
1153                                        goto out;
1154                                }
1155                                goto nla_put_failure;
1156                        }
1157                        if (mtype_data_list(skb, e))
1158                                goto nla_put_failure;
1159                        if (ip_set_put_extensions(skb, set, e, true))
1160                                goto nla_put_failure;
1161                        ipset_nest_end(skb, nested);
1162                }
1163        }
1164        ipset_nest_end(skb, atd);
1165        /* Set listing finished */
1166        cb->args[IPSET_CB_ARG0] = 0;
1167
1168        goto out;
1169
1170nla_put_failure:
1171        nlmsg_trim(skb, incomplete);
1172        if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
1173                pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
1174                        set->name);
1175                cb->args[IPSET_CB_ARG0] = 0;
1176                ret = -EMSGSIZE;
1177        } else {
1178                ipset_nest_end(skb, atd);
1179        }
1180out:
1181        rcu_read_unlock();
1182        return ret;
1183}
1184
1185static int
1186IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
1187                          const struct xt_action_param *par,
1188                          enum ipset_adt adt, struct ip_set_adt_opt *opt);
1189
1190static int
1191IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
1192                          enum ipset_adt adt, u32 *lineno, u32 flags,
1193                          bool retried);
1194
1195static const struct ip_set_type_variant mtype_variant = {
1196        .kadt   = mtype_kadt,
1197        .uadt   = mtype_uadt,
1198        .adt    = {
1199                [IPSET_ADD] = mtype_add,
1200                [IPSET_DEL] = mtype_del,
1201                [IPSET_TEST] = mtype_test,
1202        },
1203        .destroy = mtype_destroy,
1204        .flush  = mtype_flush,
1205        .head   = mtype_head,
1206        .list   = mtype_list,
1207        .uref   = mtype_uref,
1208        .resize = mtype_resize,
1209        .same_set = mtype_same_set,
1210};
1211
1212#ifdef IP_SET_EMIT_CREATE
1213static int
1214IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1215                            struct nlattr *tb[], u32 flags)
1216{
1217        u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
1218#ifdef IP_SET_HASH_WITH_MARKMASK
1219        u32 markmask;
1220#endif
1221        u8 hbits;
1222#ifdef IP_SET_HASH_WITH_NETMASK
1223        u8 netmask;
1224#endif
1225        size_t hsize;
1226        struct htype *h;
1227        struct htable *t;
1228
1229        pr_debug("Create set %s with family %s\n",
1230                 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
1231
1232#ifndef IP_SET_PROTO_UNDEF
1233        if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
1234                return -IPSET_ERR_INVALID_FAMILY;
1235#endif
1236
1237        if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
1238                     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
1239                     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
1240                     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
1241                return -IPSET_ERR_PROTOCOL;
1242
1243#ifdef IP_SET_HASH_WITH_MARKMASK
1244        /* Separated condition in order to avoid directive in argument list */
1245        if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
1246                return -IPSET_ERR_PROTOCOL;
1247
1248        markmask = 0xffffffff;
1249        if (tb[IPSET_ATTR_MARKMASK]) {
1250                markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
1251                if (markmask == 0)
1252                        return -IPSET_ERR_INVALID_MARKMASK;
1253        }
1254#endif
1255
1256#ifdef IP_SET_HASH_WITH_NETMASK
1257        netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
1258        if (tb[IPSET_ATTR_NETMASK]) {
1259                netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
1260
1261                if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
1262                    (set->family == NFPROTO_IPV6 && netmask > 128) ||
1263                    netmask == 0)
1264                        return -IPSET_ERR_INVALID_NETMASK;
1265        }
1266#endif
1267
1268        if (tb[IPSET_ATTR_HASHSIZE]) {
1269                hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
1270                if (hashsize < IPSET_MIMINAL_HASHSIZE)
1271                        hashsize = IPSET_MIMINAL_HASHSIZE;
1272        }
1273
1274        if (tb[IPSET_ATTR_MAXELEM])
1275                maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
1276
1277        hsize = sizeof(*h);
1278        h = kzalloc(hsize, GFP_KERNEL);
1279        if (!h)
1280                return -ENOMEM;
1281
1282        hbits = htable_bits(hashsize);
1283        hsize = htable_size(hbits);
1284        if (hsize == 0) {
1285                kfree(h);
1286                return -ENOMEM;
1287        }
1288        t = ip_set_alloc(hsize);
1289        if (!t) {
1290                kfree(h);
1291                return -ENOMEM;
1292        }
1293        h->maxelem = maxelem;
1294#ifdef IP_SET_HASH_WITH_NETMASK
1295        h->netmask = netmask;
1296#endif
1297#ifdef IP_SET_HASH_WITH_MARKMASK
1298        h->markmask = markmask;
1299#endif
1300        get_random_bytes(&h->initval, sizeof(h->initval));
1301
1302        t->htable_bits = hbits;
1303        RCU_INIT_POINTER(h->table, t);
1304
1305        set->data = h;
1306#ifndef IP_SET_PROTO_UNDEF
1307        if (set->family == NFPROTO_IPV4) {
1308#endif
1309                set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
1310                set->dsize = ip_set_elem_len(set, tb,
1311                        sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
1312                        __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
1313#ifndef IP_SET_PROTO_UNDEF
1314        } else {
1315                set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
1316                set->dsize = ip_set_elem_len(set, tb,
1317                        sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
1318                        __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
1319        }
1320#endif
1321        set->timeout = IPSET_NO_TIMEOUT;
1322        if (tb[IPSET_ATTR_TIMEOUT]) {
1323                set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
1324#ifndef IP_SET_PROTO_UNDEF
1325                if (set->family == NFPROTO_IPV4)
1326#endif
1327                        IPSET_TOKEN(HTYPE, 4_gc_init)(set,
1328                                IPSET_TOKEN(HTYPE, 4_gc));
1329#ifndef IP_SET_PROTO_UNDEF
1330                else
1331                        IPSET_TOKEN(HTYPE, 6_gc_init)(set,
1332                                IPSET_TOKEN(HTYPE, 6_gc));
1333#endif
1334        }
1335        pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
1336                 set->name, jhash_size(t->htable_bits),
1337                 t->htable_bits, h->maxelem, set->data, t);
1338
1339        return 0;
1340}
1341#endif /* IP_SET_EMIT_CREATE */
1342
1343#undef HKEY_DATALEN
1344