linux/net/netfilter/ipset/ip_set_hash_gen.h
<<
>>
Prefs
   1/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
   2 *
   3 * This program is free software; you can redistribute it and/or modify
   4 * it under the terms of the GNU General Public License version 2 as
   5 * published by the Free Software Foundation.
   6 */
   7
   8#ifndef _IP_SET_HASH_GEN_H
   9#define _IP_SET_HASH_GEN_H
  10
  11#include <linux/rcupdate.h>
  12#include <linux/jhash.h>
  13#include <linux/types.h>
  14#include <linux/netfilter/ipset/ip_set_timeout.h>
  15
  16#define __ipset_dereference_protected(p, c)     rcu_dereference_protected(p, c)
  17#define ipset_dereference_protected(p, set) \
  18        __ipset_dereference_protected(p, spin_is_locked(&(set)->lock))
  19
  20#define rcu_dereference_bh_nfnl(p)      rcu_dereference_bh_check(p, 1)
  21
  22/* Hashing which uses arrays to resolve clashing. The hash table is resized
  23 * (doubled) when searching becomes too long.
  24 * Internally jhash is used with the assumption that the size of the
  25 * stored data is a multiple of sizeof(u32).
  26 *
  27 * Readers and resizing
  28 *
  29 * Resizing can be triggered by userspace command only, and those
  30 * are serialized by the nfnl mutex. During resizing the set is
  31 * read-locked, so the only possible concurrent operations are
  32 * the kernel side readers. Those must be protected by proper RCU locking.
  33 */
  34
  35/* Number of elements to store in an initial array block */
  36#define AHASH_INIT_SIZE                 4
  37/* Max number of elements to store in an array block */
  38#define AHASH_MAX_SIZE                  (3 * AHASH_INIT_SIZE)
  39/* Max muber of elements in the array block when tuned */
  40#define AHASH_MAX_TUNED                 64
  41
  42/* Max number of elements can be tuned */
  43#ifdef IP_SET_HASH_WITH_MULTI
  44#define AHASH_MAX(h)                    ((h)->ahash_max)
  45
  46static inline u8
  47tune_ahash_max(u8 curr, u32 multi)
  48{
  49        u32 n;
  50
  51        if (multi < curr)
  52                return curr;
  53
  54        n = curr + AHASH_INIT_SIZE;
  55        /* Currently, at listing one hash bucket must fit into a message.
  56         * Therefore we have a hard limit here.
  57         */
  58        return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
  59}
  60
  61#define TUNE_AHASH_MAX(h, multi)        \
  62        ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
  63#else
  64#define AHASH_MAX(h)                    AHASH_MAX_SIZE
  65#define TUNE_AHASH_MAX(h, multi)
  66#endif
  67
  68/* A hash bucket */
  69struct hbucket {
  70        struct rcu_head rcu;    /* for call_rcu_bh */
  71        /* Which positions are used in the array */
  72        DECLARE_BITMAP(used, AHASH_MAX_TUNED);
  73        u8 size;                /* size of the array */
  74        u8 pos;                 /* position of the first free entry */
  75        unsigned char value[0]  /* the array of the values */
  76                __aligned(__alignof__(u64));
  77};
  78
  79/* The hash table: the table size stored here in order to make resizing easy */
  80struct htable {
  81        atomic_t ref;           /* References for resizing */
  82        atomic_t uref;          /* References for dumping */
  83        u8 htable_bits;         /* size of hash table == 2^htable_bits */
  84        struct hbucket __rcu *bucket[0]; /* hashtable buckets */
  85};
  86
  87#define hbucket(h, i)           ((h)->bucket[i])
  88
  89#ifndef IPSET_NET_COUNT
  90#define IPSET_NET_COUNT         1
  91#endif
  92
  93/* Book-keeping of the prefixes added to the set */
  94struct net_prefixes {
  95        u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */
  96        u8 cidr[IPSET_NET_COUNT];  /* the cidr value */
  97};
  98
  99/* Compute the hash table size */
 100static size_t
 101htable_size(u8 hbits)
 102{
 103        size_t hsize;
 104
 105        /* We must fit both into u32 in jhash and size_t */
 106        if (hbits > 31)
 107                return 0;
 108        hsize = jhash_size(hbits);
 109        if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
 110            < hsize)
 111                return 0;
 112
 113        return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
 114}
 115
 116/* Compute htable_bits from the user input parameter hashsize */
 117static u8
 118htable_bits(u32 hashsize)
 119{
 120        /* Assume that hashsize == 2^htable_bits */
 121        u8 bits = fls(hashsize - 1);
 122
 123        if (jhash_size(bits) != hashsize)
 124                /* Round up to the first 2^n value */
 125                bits = fls(hashsize);
 126
 127        return bits;
 128}
 129
 130#ifdef IP_SET_HASH_WITH_NETS
 131#if IPSET_NET_COUNT > 1
 132#define __CIDR(cidr, i)         (cidr[i])
 133#else
 134#define __CIDR(cidr, i)         (cidr)
 135#endif
 136
 137/* cidr + 1 is stored in net_prefixes to support /0 */
 138#define NCIDR_PUT(cidr)         ((cidr) + 1)
 139#define NCIDR_GET(cidr)         ((cidr) - 1)
 140
 141#ifdef IP_SET_HASH_WITH_NETS_PACKED
 142/* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */
 143#define DCIDR_PUT(cidr)         ((cidr) - 1)
 144#define DCIDR_GET(cidr, i)      (__CIDR(cidr, i) + 1)
 145#else
 146#define DCIDR_PUT(cidr)         (cidr)
 147#define DCIDR_GET(cidr, i)      __CIDR(cidr, i)
 148#endif
 149
 150#define INIT_CIDR(cidr, host_mask)      \
 151        DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
 152
 153#define SET_HOST_MASK(family)   (family == AF_INET ? 32 : 128)
 154
 155#ifdef IP_SET_HASH_WITH_NET0
 156/* cidr from 0 to SET_HOST_MASK() value and c = cidr + 1 */
 157#define NLEN(family)            (SET_HOST_MASK(family) + 1)
 158#define CIDR_POS(c)             ((c) - 1)
 159#else
 160/* cidr from 1 to SET_HOST_MASK() value and c = cidr + 1 */
 161#define NLEN(family)            SET_HOST_MASK(family)
 162#define CIDR_POS(c)             ((c) - 2)
 163#endif
 164
 165#else
 166#define NLEN(family)            0
 167#endif /* IP_SET_HASH_WITH_NETS */
 168
 169#endif /* _IP_SET_HASH_GEN_H */
 170
 171/* Family dependent templates */
 172
 173#undef ahash_data
 174#undef mtype_data_equal
 175#undef mtype_do_data_match
 176#undef mtype_data_set_flags
 177#undef mtype_data_reset_elem
 178#undef mtype_data_reset_flags
 179#undef mtype_data_netmask
 180#undef mtype_data_list
 181#undef mtype_data_next
 182#undef mtype_elem
 183
 184#undef mtype_ahash_destroy
 185#undef mtype_ext_cleanup
 186#undef mtype_add_cidr
 187#undef mtype_del_cidr
 188#undef mtype_ahash_memsize
 189#undef mtype_flush
 190#undef mtype_destroy
 191#undef mtype_same_set
 192#undef mtype_kadt
 193#undef mtype_uadt
 194#undef mtype
 195
 196#undef mtype_add
 197#undef mtype_del
 198#undef mtype_test_cidrs
 199#undef mtype_test
 200#undef mtype_uref
 201#undef mtype_expire
 202#undef mtype_resize
 203#undef mtype_head
 204#undef mtype_list
 205#undef mtype_gc
 206#undef mtype_gc_init
 207#undef mtype_variant
 208#undef mtype_data_match
 209
 210#undef HKEY
 211
 212#define mtype_data_equal        IPSET_TOKEN(MTYPE, _data_equal)
 213#ifdef IP_SET_HASH_WITH_NETS
 214#define mtype_do_data_match     IPSET_TOKEN(MTYPE, _do_data_match)
 215#else
 216#define mtype_do_data_match(d)  1
 217#endif
 218#define mtype_data_set_flags    IPSET_TOKEN(MTYPE, _data_set_flags)
 219#define mtype_data_reset_elem   IPSET_TOKEN(MTYPE, _data_reset_elem)
 220#define mtype_data_reset_flags  IPSET_TOKEN(MTYPE, _data_reset_flags)
 221#define mtype_data_netmask      IPSET_TOKEN(MTYPE, _data_netmask)
 222#define mtype_data_list         IPSET_TOKEN(MTYPE, _data_list)
 223#define mtype_data_next         IPSET_TOKEN(MTYPE, _data_next)
 224#define mtype_elem              IPSET_TOKEN(MTYPE, _elem)
 225
 226#define mtype_ahash_destroy     IPSET_TOKEN(MTYPE, _ahash_destroy)
 227#define mtype_ext_cleanup       IPSET_TOKEN(MTYPE, _ext_cleanup)
 228#define mtype_add_cidr          IPSET_TOKEN(MTYPE, _add_cidr)
 229#define mtype_del_cidr          IPSET_TOKEN(MTYPE, _del_cidr)
 230#define mtype_ahash_memsize     IPSET_TOKEN(MTYPE, _ahash_memsize)
 231#define mtype_flush             IPSET_TOKEN(MTYPE, _flush)
 232#define mtype_destroy           IPSET_TOKEN(MTYPE, _destroy)
 233#define mtype_same_set          IPSET_TOKEN(MTYPE, _same_set)
 234#define mtype_kadt              IPSET_TOKEN(MTYPE, _kadt)
 235#define mtype_uadt              IPSET_TOKEN(MTYPE, _uadt)
 236#define mtype                   MTYPE
 237
 238#define mtype_add               IPSET_TOKEN(MTYPE, _add)
 239#define mtype_del               IPSET_TOKEN(MTYPE, _del)
 240#define mtype_test_cidrs        IPSET_TOKEN(MTYPE, _test_cidrs)
 241#define mtype_test              IPSET_TOKEN(MTYPE, _test)
 242#define mtype_uref              IPSET_TOKEN(MTYPE, _uref)
 243#define mtype_expire            IPSET_TOKEN(MTYPE, _expire)
 244#define mtype_resize            IPSET_TOKEN(MTYPE, _resize)
 245#define mtype_head              IPSET_TOKEN(MTYPE, _head)
 246#define mtype_list              IPSET_TOKEN(MTYPE, _list)
 247#define mtype_gc                IPSET_TOKEN(MTYPE, _gc)
 248#define mtype_gc_init           IPSET_TOKEN(MTYPE, _gc_init)
 249#define mtype_variant           IPSET_TOKEN(MTYPE, _variant)
 250#define mtype_data_match        IPSET_TOKEN(MTYPE, _data_match)
 251
 252#ifndef MTYPE
 253#error "MTYPE is not defined!"
 254#endif
 255
 256#ifndef HOST_MASK
 257#error "HOST_MASK is not defined!"
 258#endif
 259
 260#ifndef HKEY_DATALEN
 261#define HKEY_DATALEN            sizeof(struct mtype_elem)
 262#endif
 263
 264#define HKEY(data, initval, htable_bits)                        \
 265(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval)     \
 266        & jhash_mask(htable_bits))
 267
 268#ifndef htype
 269#ifndef HTYPE
 270#error "HTYPE is not defined!"
 271#endif /* HTYPE */
 272#define htype                   HTYPE
 273
 274/* The generic hash structure */
 275struct htype {
 276        struct htable __rcu *table; /* the hash table */
 277        u32 maxelem;            /* max elements in the hash */
 278        u32 elements;           /* current element (vs timeout) */
 279        u32 initval;            /* random jhash init value */
 280#ifdef IP_SET_HASH_WITH_MARKMASK
 281        u32 markmask;           /* markmask value for mark mask to store */
 282#endif
 283        struct timer_list gc;   /* garbage collection when timeout enabled */
 284        struct mtype_elem next; /* temporary storage for uadd */
 285#ifdef IP_SET_HASH_WITH_MULTI
 286        u8 ahash_max;           /* max elements in an array block */
 287#endif
 288#ifdef IP_SET_HASH_WITH_NETMASK
 289        u8 netmask;             /* netmask value for subnets to store */
 290#endif
 291#ifdef IP_SET_HASH_WITH_NETS
 292        struct net_prefixes nets[0]; /* book-keeping of prefixes */
 293#endif
 294};
 295#endif /* htype */
 296
 297#ifdef IP_SET_HASH_WITH_NETS
 298/* Network cidr size book keeping when the hash stores different
 299 * sized networks. cidr == real cidr + 1 to support /0.
 300 */
 301static void
 302mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 303{
 304        int i, j;
 305
 306        /* Add in increasing prefix order, so larger cidr first */
 307        for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) {
 308                if (j != -1) {
 309                        continue;
 310                } else if (h->nets[i].cidr[n] < cidr) {
 311                        j = i;
 312                } else if (h->nets[i].cidr[n] == cidr) {
 313                        h->nets[CIDR_POS(cidr)].nets[n]++;
 314                        return;
 315                }
 316        }
 317        if (j != -1) {
 318                for (; i > j; i--)
 319                        h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
 320        }
 321        h->nets[i].cidr[n] = cidr;
 322        h->nets[CIDR_POS(cidr)].nets[n] = 1;
 323}
 324
 325static void
 326mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 327{
 328        u8 i, j, net_end = nets_length - 1;
 329
 330        for (i = 0; i < nets_length; i++) {
 331                if (h->nets[i].cidr[n] != cidr)
 332                        continue;
 333                h->nets[CIDR_POS(cidr)].nets[n]--;
 334                if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
 335                        return;
 336                for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
 337                        h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
 338                h->nets[j].cidr[n] = 0;
 339                return;
 340        }
 341}
 342#endif
 343
 344/* Calculate the actual memory size of the set data */
 345static size_t
 346mtype_ahash_memsize(const struct htype *h, const struct htable *t,
 347                    u8 nets_length, size_t dsize)
 348{
 349        u32 i;
 350        struct hbucket *n;
 351        size_t memsize = sizeof(*h) + sizeof(*t);
 352
 353#ifdef IP_SET_HASH_WITH_NETS
 354        memsize += sizeof(struct net_prefixes) * nets_length;
 355#endif
 356        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 357                n = rcu_dereference_bh(hbucket(t, i));
 358                if (!n)
 359                        continue;
 360                memsize += sizeof(struct hbucket) + n->size * dsize;
 361        }
 362
 363        return memsize;
 364}
 365
 366/* Get the ith element from the array block n */
 367#define ahash_data(n, i, dsize) \
 368        ((struct mtype_elem *)((n)->value + ((i) * (dsize))))
 369
 370static void
 371mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
 372{
 373        int i;
 374
 375        for (i = 0; i < n->pos; i++)
 376                if (test_bit(i, n->used))
 377                        ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
 378}
 379
 380/* Flush a hash type of set: destroy all elements */
 381static void
 382mtype_flush(struct ip_set *set)
 383{
 384        struct htype *h = set->data;
 385        struct htable *t;
 386        struct hbucket *n;
 387        u32 i;
 388
 389        t = ipset_dereference_protected(h->table, set);
 390        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 391                n = __ipset_dereference_protected(hbucket(t, i), 1);
 392                if (!n)
 393                        continue;
 394                if (set->extensions & IPSET_EXT_DESTROY)
 395                        mtype_ext_cleanup(set, n);
 396                /* FIXME: use slab cache */
 397                rcu_assign_pointer(hbucket(t, i), NULL);
 398                kfree_rcu(n, rcu);
 399        }
 400#ifdef IP_SET_HASH_WITH_NETS
 401        memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
 402#endif
 403        h->elements = 0;
 404}
 405
 406/* Destroy the hashtable part of the set */
 407static void
 408mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
 409{
 410        struct hbucket *n;
 411        u32 i;
 412
 413        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 414                n = __ipset_dereference_protected(hbucket(t, i), 1);
 415                if (!n)
 416                        continue;
 417                if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
 418                        mtype_ext_cleanup(set, n);
 419                /* FIXME: use slab cache */
 420                kfree(n);
 421        }
 422
 423        ip_set_free(t);
 424}
 425
 426/* Destroy a hash type of set */
 427static void
 428mtype_destroy(struct ip_set *set)
 429{
 430        struct htype *h = set->data;
 431
 432        if (SET_WITH_TIMEOUT(set))
 433                del_timer_sync(&h->gc);
 434
 435        mtype_ahash_destroy(set,
 436                            __ipset_dereference_protected(h->table, 1), true);
 437        kfree(h);
 438
 439        set->data = NULL;
 440}
 441
 442static void
 443mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 444{
 445        struct htype *h = set->data;
 446
 447        init_timer(&h->gc);
 448        h->gc.data = (unsigned long)set;
 449        h->gc.function = gc;
 450        h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 451        add_timer(&h->gc);
 452        pr_debug("gc initialized, run in every %u\n",
 453                 IPSET_GC_PERIOD(set->timeout));
 454}
 455
 456static bool
 457mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 458{
 459        const struct htype *x = a->data;
 460        const struct htype *y = b->data;
 461
 462        /* Resizing changes htable_bits, so we ignore it */
 463        return x->maxelem == y->maxelem &&
 464               a->timeout == b->timeout &&
 465#ifdef IP_SET_HASH_WITH_NETMASK
 466               x->netmask == y->netmask &&
 467#endif
 468#ifdef IP_SET_HASH_WITH_MARKMASK
 469               x->markmask == y->markmask &&
 470#endif
 471               a->extensions == b->extensions;
 472}
 473
 474/* Delete expired elements from the hashtable */
 475static void
 476mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 477{
 478        struct htable *t;
 479        struct hbucket *n, *tmp;
 480        struct mtype_elem *data;
 481        u32 i, j, d;
 482#ifdef IP_SET_HASH_WITH_NETS
 483        u8 k;
 484#endif
 485
 486        t = ipset_dereference_protected(h->table, set);
 487        for (i = 0; i < jhash_size(t->htable_bits); i++) {
 488                n = __ipset_dereference_protected(hbucket(t, i), 1);
 489                if (!n)
 490                        continue;
 491                for (j = 0, d = 0; j < n->pos; j++) {
 492                        if (!test_bit(j, n->used)) {
 493                                d++;
 494                                continue;
 495                        }
 496                        data = ahash_data(n, j, dsize);
 497                        if (ip_set_timeout_expired(ext_timeout(data, set))) {
 498                                pr_debug("expired %u/%u\n", i, j);
 499                                clear_bit(j, n->used);
 500                                smp_mb__after_atomic();
 501#ifdef IP_SET_HASH_WITH_NETS
 502                                for (k = 0; k < IPSET_NET_COUNT; k++)
 503                                        mtype_del_cidr(h,
 504                                                NCIDR_PUT(DCIDR_GET(data->cidr,
 505                                                                    k)),
 506                                                nets_length, k);
 507#endif
 508                                ip_set_ext_destroy(set, data);
 509                                h->elements--;
 510                                d++;
 511                        }
 512                }
 513                if (d >= AHASH_INIT_SIZE) {
 514                        if (d >= n->size) {
 515                                rcu_assign_pointer(hbucket(t, i), NULL);
 516                                kfree_rcu(n, rcu);
 517                                continue;
 518                        }
 519                        tmp = kzalloc(sizeof(*tmp) +
 520                                      (n->size - AHASH_INIT_SIZE) * dsize,
 521                                      GFP_ATOMIC);
 522                        if (!tmp)
 523                                /* Still try to delete expired elements */
 524                                continue;
 525                        tmp->size = n->size - AHASH_INIT_SIZE;
 526                        for (j = 0, d = 0; j < n->pos; j++) {
 527                                if (!test_bit(j, n->used))
 528                                        continue;
 529                                data = ahash_data(n, j, dsize);
 530                                memcpy(tmp->value + d * dsize, data, dsize);
 531                                set_bit(d, tmp->used);
 532                                d++;
 533                        }
 534                        tmp->pos = d;
 535                        rcu_assign_pointer(hbucket(t, i), tmp);
 536                        kfree_rcu(n, rcu);
 537                }
 538        }
 539}
 540
 541static void
 542mtype_gc(unsigned long ul_set)
 543{
 544        struct ip_set *set = (struct ip_set *)ul_set;
 545        struct htype *h = set->data;
 546
 547        pr_debug("called\n");
 548        spin_lock_bh(&set->lock);
 549        mtype_expire(set, h, NLEN(set->family), set->dsize);
 550        spin_unlock_bh(&set->lock);
 551
 552        h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 553        add_timer(&h->gc);
 554}
 555
 556/* Resize a hash: create a new hash table with doubling the hashsize
 557 * and inserting the elements to it. Repeat until we succeed or
 558 * fail due to memory pressures.
 559 */
 560static int
 561mtype_resize(struct ip_set *set, bool retried)
 562{
 563        struct htype *h = set->data;
 564        struct htable *t, *orig;
 565        u8 htable_bits;
 566        size_t dsize = set->dsize;
 567#ifdef IP_SET_HASH_WITH_NETS
 568        u8 flags;
 569        struct mtype_elem *tmp;
 570#endif
 571        struct mtype_elem *data;
 572        struct mtype_elem *d;
 573        struct hbucket *n, *m;
 574        u32 i, j, key;
 575        int ret;
 576
 577#ifdef IP_SET_HASH_WITH_NETS
 578        tmp = kmalloc(dsize, GFP_KERNEL);
 579        if (!tmp)
 580                return -ENOMEM;
 581#endif
 582        rcu_read_lock_bh();
 583        orig = rcu_dereference_bh_nfnl(h->table);
 584        htable_bits = orig->htable_bits;
 585        rcu_read_unlock_bh();
 586
 587retry:
 588        ret = 0;
 589        htable_bits++;
 590        if (!htable_bits) {
 591                /* In case we have plenty of memory :-) */
 592                pr_warn("Cannot increase the hashsize of set %s further\n",
 593                        set->name);
 594                ret = -IPSET_ERR_HASH_FULL;
 595                goto out;
 596        }
 597        t = ip_set_alloc(htable_size(htable_bits));
 598        if (!t) {
 599                ret = -ENOMEM;
 600                goto out;
 601        }
 602        t->htable_bits = htable_bits;
 603
 604        spin_lock_bh(&set->lock);
 605        orig = __ipset_dereference_protected(h->table, 1);
 606        /* There can't be another parallel resizing, but dumping is possible */
 607        atomic_set(&orig->ref, 1);
 608        atomic_inc(&orig->uref);
 609        pr_debug("attempt to resize set %s from %u to %u, t %p\n",
 610                 set->name, orig->htable_bits, htable_bits, orig);
 611        for (i = 0; i < jhash_size(orig->htable_bits); i++) {
 612                n = __ipset_dereference_protected(hbucket(orig, i), 1);
 613                if (!n)
 614                        continue;
 615                for (j = 0; j < n->pos; j++) {
 616                        if (!test_bit(j, n->used))
 617                                continue;
 618                        data = ahash_data(n, j, dsize);
 619#ifdef IP_SET_HASH_WITH_NETS
 620                        /* We have readers running parallel with us,
 621                         * so the live data cannot be modified.
 622                         */
 623                        flags = 0;
 624                        memcpy(tmp, data, dsize);
 625                        data = tmp;
 626                        mtype_data_reset_flags(data, &flags);
 627#endif
 628                        key = HKEY(data, h->initval, htable_bits);
 629                        m = __ipset_dereference_protected(hbucket(t, key), 1);
 630                        if (!m) {
 631                                m = kzalloc(sizeof(*m) +
 632                                            AHASH_INIT_SIZE * dsize,
 633                                            GFP_ATOMIC);
 634                                if (!m) {
 635                                        ret = -ENOMEM;
 636                                        goto cleanup;
 637                                }
 638                                m->size = AHASH_INIT_SIZE;
 639                                RCU_INIT_POINTER(hbucket(t, key), m);
 640                        } else if (m->pos >= m->size) {
 641                                struct hbucket *ht;
 642
 643                                if (m->size >= AHASH_MAX(h)) {
 644                                        ret = -EAGAIN;
 645                                } else {
 646                                        ht = kzalloc(sizeof(*ht) +
 647                                                (m->size + AHASH_INIT_SIZE)
 648                                                * dsize,
 649                                                GFP_ATOMIC);
 650                                        if (!ht)
 651                                                ret = -ENOMEM;
 652                                }
 653                                if (ret < 0)
 654                                        goto cleanup;
 655                                memcpy(ht, m, sizeof(struct hbucket) +
 656                                              m->size * dsize);
 657                                ht->size = m->size + AHASH_INIT_SIZE;
 658                                kfree(m);
 659                                m = ht;
 660                                RCU_INIT_POINTER(hbucket(t, key), ht);
 661                        }
 662                        d = ahash_data(m, m->pos, dsize);
 663                        memcpy(d, data, dsize);
 664                        set_bit(m->pos++, m->used);
 665#ifdef IP_SET_HASH_WITH_NETS
 666                        mtype_data_reset_flags(d, &flags);
 667#endif
 668                }
 669        }
 670        rcu_assign_pointer(h->table, t);
 671
 672        spin_unlock_bh(&set->lock);
 673
 674        /* Give time to other readers of the set */
 675        synchronize_rcu_bh();
 676
 677        pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
 678                 orig->htable_bits, orig, t->htable_bits, t);
 679        /* If there's nobody else dumping the table, destroy it */
 680        if (atomic_dec_and_test(&orig->uref)) {
 681                pr_debug("Table destroy by resize %p\n", orig);
 682                mtype_ahash_destroy(set, orig, false);
 683        }
 684
 685out:
 686#ifdef IP_SET_HASH_WITH_NETS
 687        kfree(tmp);
 688#endif
 689        return ret;
 690
 691cleanup:
 692        atomic_set(&orig->ref, 0);
 693        atomic_dec(&orig->uref);
 694        spin_unlock_bh(&set->lock);
 695        mtype_ahash_destroy(set, t, false);
 696        if (ret == -EAGAIN)
 697                goto retry;
 698        goto out;
 699}
 700
 701/* Add an element to a hash and update the internal counters when succeeded,
 702 * otherwise report the proper error code.
 703 */
 704static int
 705mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 706          struct ip_set_ext *mext, u32 flags)
 707{
 708        struct htype *h = set->data;
 709        struct htable *t;
 710        const struct mtype_elem *d = value;
 711        struct mtype_elem *data;
 712        struct hbucket *n, *old = ERR_PTR(-ENOENT);
 713        int i, j = -1;
 714        bool flag_exist = flags & IPSET_FLAG_EXIST;
 715        bool deleted = false, forceadd = false, reuse = false;
 716        u32 key, multi = 0;
 717
 718        if (h->elements >= h->maxelem) {
 719                if (SET_WITH_TIMEOUT(set))
 720                        /* FIXME: when set is full, we slow down here */
 721                        mtype_expire(set, h, NLEN(set->family), set->dsize);
 722                if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set))
 723                        forceadd = true;
 724        }
 725
 726        t = ipset_dereference_protected(h->table, set);
 727        key = HKEY(value, h->initval, t->htable_bits);
 728        n = __ipset_dereference_protected(hbucket(t, key), 1);
 729        if (!n) {
 730                if (forceadd) {
 731                        if (net_ratelimit())
 732                                pr_warn("Set %s is full, maxelem %u reached\n",
 733                                        set->name, h->maxelem);
 734                        return -IPSET_ERR_HASH_FULL;
 735                } else if (h->elements >= h->maxelem) {
 736                        goto set_full;
 737                }
 738                old = NULL;
 739                n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
 740                            GFP_ATOMIC);
 741                if (!n)
 742                        return -ENOMEM;
 743                n->size = AHASH_INIT_SIZE;
 744                goto copy_elem;
 745        }
 746        for (i = 0; i < n->pos; i++) {
 747                if (!test_bit(i, n->used)) {
 748                        /* Reuse first deleted entry */
 749                        if (j == -1) {
 750                                deleted = reuse = true;
 751                                j = i;
 752                        }
 753                        continue;
 754                }
 755                data = ahash_data(n, i, set->dsize);
 756                if (mtype_data_equal(data, d, &multi)) {
 757                        if (flag_exist ||
 758                            (SET_WITH_TIMEOUT(set) &&
 759                             ip_set_timeout_expired(ext_timeout(data, set)))) {
 760                                /* Just the extensions could be overwritten */
 761                                j = i;
 762                                goto overwrite_extensions;
 763                        }
 764                        return -IPSET_ERR_EXIST;
 765                }
 766                /* Reuse first timed out entry */
 767                if (SET_WITH_TIMEOUT(set) &&
 768                    ip_set_timeout_expired(ext_timeout(data, set)) &&
 769                    j == -1) {
 770                        j = i;
 771                        reuse = true;
 772                }
 773        }
 774        if (reuse || forceadd) {
 775                data = ahash_data(n, j, set->dsize);
 776                if (!deleted) {
 777#ifdef IP_SET_HASH_WITH_NETS
 778                        for (i = 0; i < IPSET_NET_COUNT; i++)
 779                                mtype_del_cidr(h,
 780                                        NCIDR_PUT(DCIDR_GET(data->cidr, i)),
 781                                        NLEN(set->family), i);
 782#endif
 783                        ip_set_ext_destroy(set, data);
 784                        h->elements--;
 785                }
 786                goto copy_data;
 787        }
 788        if (h->elements >= h->maxelem)
 789                goto set_full;
 790        /* Create a new slot */
 791        if (n->pos >= n->size) {
 792                TUNE_AHASH_MAX(h, multi);
 793                if (n->size >= AHASH_MAX(h)) {
 794                        /* Trigger rehashing */
 795                        mtype_data_next(&h->next, d);
 796                        return -EAGAIN;
 797                }
 798                old = n;
 799                n = kzalloc(sizeof(*n) +
 800                            (old->size + AHASH_INIT_SIZE) * set->dsize,
 801                            GFP_ATOMIC);
 802                if (!n)
 803                        return -ENOMEM;
 804                memcpy(n, old, sizeof(struct hbucket) +
 805                       old->size * set->dsize);
 806                n->size = old->size + AHASH_INIT_SIZE;
 807        }
 808
 809copy_elem:
 810        j = n->pos++;
 811        data = ahash_data(n, j, set->dsize);
 812copy_data:
 813        h->elements++;
 814#ifdef IP_SET_HASH_WITH_NETS
 815        for (i = 0; i < IPSET_NET_COUNT; i++)
 816                mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)),
 817                               NLEN(set->family), i);
 818#endif
 819        memcpy(data, d, sizeof(struct mtype_elem));
 820overwrite_extensions:
 821#ifdef IP_SET_HASH_WITH_NETS
 822        mtype_data_set_flags(data, flags);
 823#endif
 824        if (SET_WITH_COUNTER(set))
 825                ip_set_init_counter(ext_counter(data, set), ext);
 826        if (SET_WITH_COMMENT(set))
 827                ip_set_init_comment(ext_comment(data, set), ext);
 828        if (SET_WITH_SKBINFO(set))
 829                ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
 830        /* Must come last for the case when timed out entry is reused */
 831        if (SET_WITH_TIMEOUT(set))
 832                ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
 833        smp_mb__before_atomic();
 834        set_bit(j, n->used);
 835        if (old != ERR_PTR(-ENOENT)) {
 836                rcu_assign_pointer(hbucket(t, key), n);
 837                if (old)
 838                        kfree_rcu(old, rcu);
 839        }
 840
 841        return 0;
 842set_full:
 843        if (net_ratelimit())
 844                pr_warn("Set %s is full, maxelem %u reached\n",
 845                        set->name, h->maxelem);
 846        return -IPSET_ERR_HASH_FULL;
 847}
 848
 849/* Delete an element from the hash and free up space if possible.
 850 */
 851static int
 852mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 853          struct ip_set_ext *mext, u32 flags)
 854{
 855        struct htype *h = set->data;
 856        struct htable *t;
 857        const struct mtype_elem *d = value;
 858        struct mtype_elem *data;
 859        struct hbucket *n;
 860        int i, j, k, ret = -IPSET_ERR_EXIST;
 861        u32 key, multi = 0;
 862        size_t dsize = set->dsize;
 863
 864        t = ipset_dereference_protected(h->table, set);
 865        key = HKEY(value, h->initval, t->htable_bits);
 866        n = __ipset_dereference_protected(hbucket(t, key), 1);
 867        if (!n)
 868                goto out;
 869        for (i = 0, k = 0; i < n->pos; i++) {
 870                if (!test_bit(i, n->used)) {
 871                        k++;
 872                        continue;
 873                }
 874                data = ahash_data(n, i, dsize);
 875                if (!mtype_data_equal(data, d, &multi))
 876                        continue;
 877                if (SET_WITH_TIMEOUT(set) &&
 878                    ip_set_timeout_expired(ext_timeout(data, set)))
 879                        goto out;
 880
 881                ret = 0;
 882                clear_bit(i, n->used);
 883                smp_mb__after_atomic();
 884                if (i + 1 == n->pos)
 885                        n->pos--;
 886                h->elements--;
 887#ifdef IP_SET_HASH_WITH_NETS
 888                for (j = 0; j < IPSET_NET_COUNT; j++)
 889                        mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
 890                                       NLEN(set->family), j);
 891#endif
 892                ip_set_ext_destroy(set, data);
 893
 894                for (; i < n->pos; i++) {
 895                        if (!test_bit(i, n->used))
 896                                k++;
 897                }
 898                if (n->pos == 0 && k == 0) {
 899                        rcu_assign_pointer(hbucket(t, key), NULL);
 900                        kfree_rcu(n, rcu);
 901                } else if (k >= AHASH_INIT_SIZE) {
 902                        struct hbucket *tmp = kzalloc(sizeof(*tmp) +
 903                                        (n->size - AHASH_INIT_SIZE) * dsize,
 904                                        GFP_ATOMIC);
 905                        if (!tmp)
 906                                goto out;
 907                        tmp->size = n->size - AHASH_INIT_SIZE;
 908                        for (j = 0, k = 0; j < n->pos; j++) {
 909                                if (!test_bit(j, n->used))
 910                                        continue;
 911                                data = ahash_data(n, j, dsize);
 912                                memcpy(tmp->value + k * dsize, data, dsize);
 913                                set_bit(j, tmp->used);
 914                                k++;
 915                        }
 916                        tmp->pos = k;
 917                        rcu_assign_pointer(hbucket(t, key), tmp);
 918                        kfree_rcu(n, rcu);
 919                }
 920                goto out;
 921        }
 922
 923out:
 924        return ret;
 925}
 926
 927static inline int
 928mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
 929                 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
 930{
 931        if (SET_WITH_COUNTER(set))
 932                ip_set_update_counter(ext_counter(data, set),
 933                                      ext, mext, flags);
 934        if (SET_WITH_SKBINFO(set))
 935                ip_set_get_skbinfo(ext_skbinfo(data, set),
 936                                   ext, mext, flags);
 937        return mtype_do_data_match(data);
 938}
 939
 940#ifdef IP_SET_HASH_WITH_NETS
 941/* Special test function which takes into account the different network
 942 * sizes added to the set
 943 */
 944static int
 945mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 946                 const struct ip_set_ext *ext,
 947                 struct ip_set_ext *mext, u32 flags)
 948{
 949        struct htype *h = set->data;
 950        struct htable *t = rcu_dereference_bh(h->table);
 951        struct hbucket *n;
 952        struct mtype_elem *data;
 953#if IPSET_NET_COUNT == 2
 954        struct mtype_elem orig = *d;
 955        int i, j = 0, k;
 956#else
 957        int i, j = 0;
 958#endif
 959        u32 key, multi = 0;
 960        u8 nets_length = NLEN(set->family);
 961
 962        pr_debug("test by nets\n");
 963        for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) {
 964#if IPSET_NET_COUNT == 2
 965                mtype_data_reset_elem(d, &orig);
 966                mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
 967                for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi;
 968                     k++) {
 969                        mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
 970                                           true);
 971#else
 972                mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
 973#endif
 974                key = HKEY(d, h->initval, t->htable_bits);
 975                n =  rcu_dereference_bh(hbucket(t, key));
 976                if (!n)
 977                        continue;
 978                for (i = 0; i < n->pos; i++) {
 979                        if (!test_bit(i, n->used))
 980                                continue;
 981                        data = ahash_data(n, i, set->dsize);
 982                        if (!mtype_data_equal(data, d, &multi))
 983                                continue;
 984                        if (SET_WITH_TIMEOUT(set)) {
 985                                if (!ip_set_timeout_expired(
 986                                                ext_timeout(data, set)))
 987                                        return mtype_data_match(data, ext,
 988                                                                mext, set,
 989                                                                flags);
 990#ifdef IP_SET_HASH_WITH_MULTI
 991                                multi = 0;
 992#endif
 993                        } else
 994                                return mtype_data_match(data, ext,
 995                                                        mext, set, flags);
 996                }
 997#if IPSET_NET_COUNT == 2
 998                }
 999#endif
1000        }
1001        return 0;
1002}
1003#endif
1004
1005/* Test whether the element is added to the set */
1006static int
1007mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
1008           struct ip_set_ext *mext, u32 flags)
1009{
1010        struct htype *h = set->data;
1011        struct htable *t;
1012        struct mtype_elem *d = value;
1013        struct hbucket *n;
1014        struct mtype_elem *data;
1015        int i, ret = 0;
1016        u32 key, multi = 0;
1017
1018        t = rcu_dereference_bh(h->table);
1019#ifdef IP_SET_HASH_WITH_NETS
1020        /* If we test an IP address and not a network address,
1021         * try all possible network sizes
1022         */
1023        for (i = 0; i < IPSET_NET_COUNT; i++)
1024                if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family))
1025                        break;
1026        if (i == IPSET_NET_COUNT) {
1027                ret = mtype_test_cidrs(set, d, ext, mext, flags);
1028                goto out;
1029        }
1030#endif
1031
1032        key = HKEY(d, h->initval, t->htable_bits);
1033        n = rcu_dereference_bh(hbucket(t, key));
1034        if (!n) {
1035                ret = 0;
1036                goto out;
1037        }
1038        for (i = 0; i < n->pos; i++) {
1039                if (!test_bit(i, n->used))
1040                        continue;
1041                data = ahash_data(n, i, set->dsize);
1042                if (mtype_data_equal(data, d, &multi) &&
1043                    !(SET_WITH_TIMEOUT(set) &&
1044                      ip_set_timeout_expired(ext_timeout(data, set)))) {
1045                        ret = mtype_data_match(data, ext, mext, set, flags);
1046                        goto out;
1047                }
1048        }
1049out:
1050        return ret;
1051}
1052
1053/* Reply a HEADER request: fill out the header part of the set */
1054static int
1055mtype_head(struct ip_set *set, struct sk_buff *skb)
1056{
1057        const struct htype *h = set->data;
1058        const struct htable *t;
1059        struct nlattr *nested;
1060        size_t memsize;
1061        u8 htable_bits;
1062
1063        rcu_read_lock_bh();
1064        t = rcu_dereference_bh_nfnl(h->table);
1065        memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize);
1066        htable_bits = t->htable_bits;
1067        rcu_read_unlock_bh();
1068
1069        nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
1070        if (!nested)
1071                goto nla_put_failure;
1072        if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
1073                          htonl(jhash_size(htable_bits))) ||
1074            nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
1075                goto nla_put_failure;
1076#ifdef IP_SET_HASH_WITH_NETMASK
1077        if (h->netmask != HOST_MASK &&
1078            nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
1079                goto nla_put_failure;
1080#endif
1081#ifdef IP_SET_HASH_WITH_MARKMASK
1082        if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
1083                goto nla_put_failure;
1084#endif
1085        if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
1086            nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
1087                goto nla_put_failure;
1088        if (unlikely(ip_set_put_flags(skb, set)))
1089                goto nla_put_failure;
1090        ipset_nest_end(skb, nested);
1091
1092        return 0;
1093nla_put_failure:
1094        return -EMSGSIZE;
1095}
1096
1097/* Make possible to run dumping parallel with resizing */
1098static void
1099mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
1100{
1101        struct htype *h = set->data;
1102        struct htable *t;
1103
1104        if (start) {
1105                rcu_read_lock_bh();
1106                t = rcu_dereference_bh_nfnl(h->table);
1107                atomic_inc(&t->uref);
1108                cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
1109                rcu_read_unlock_bh();
1110        } else if (cb->args[IPSET_CB_PRIVATE]) {
1111                t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
1112                if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
1113                        /* Resizing didn't destroy the hash table */
1114                        pr_debug("Table destroy by dump: %p\n", t);
1115                        mtype_ahash_destroy(set, t, false);
1116                }
1117                cb->args[IPSET_CB_PRIVATE] = 0;
1118        }
1119}
1120
1121/* Reply a LIST/SAVE request: dump the elements of the specified set */
1122static int
1123mtype_list(const struct ip_set *set,
1124           struct sk_buff *skb, struct netlink_callback *cb)
1125{
1126        const struct htable *t;
1127        struct nlattr *atd, *nested;
1128        const struct hbucket *n;
1129        const struct mtype_elem *e;
1130        u32 first = cb->args[IPSET_CB_ARG0];
1131        /* We assume that one hash bucket fills into one page */
1132        void *incomplete;
1133        int i, ret = 0;
1134
1135        atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
1136        if (!atd)
1137                return -EMSGSIZE;
1138
1139        pr_debug("list hash set %s\n", set->name);
1140        t = (const struct htable *)cb->args[IPSET_CB_PRIVATE];
1141        /* Expire may replace a hbucket with another one */
1142        rcu_read_lock();
1143        for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
1144             cb->args[IPSET_CB_ARG0]++) {
1145                incomplete = skb_tail_pointer(skb);
1146                n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
1147                pr_debug("cb->arg bucket: %lu, t %p n %p\n",
1148                         cb->args[IPSET_CB_ARG0], t, n);
1149                if (!n)
1150                        continue;
1151                for (i = 0; i < n->pos; i++) {
1152                        if (!test_bit(i, n->used))
1153                                continue;
1154                        e = ahash_data(n, i, set->dsize);
1155                        if (SET_WITH_TIMEOUT(set) &&
1156                            ip_set_timeout_expired(ext_timeout(e, set)))
1157                                continue;
1158                        pr_debug("list hash %lu hbucket %p i %u, data %p\n",
1159                                 cb->args[IPSET_CB_ARG0], n, i, e);
1160                        nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
1161                        if (!nested) {
1162                                if (cb->args[IPSET_CB_ARG0] == first) {
1163                                        nla_nest_cancel(skb, atd);
1164                                        ret = -EMSGSIZE;
1165                                        goto out;
1166                                }
1167                                goto nla_put_failure;
1168                        }
1169                        if (mtype_data_list(skb, e))
1170                                goto nla_put_failure;
1171                        if (ip_set_put_extensions(skb, set, e, true))
1172                                goto nla_put_failure;
1173                        ipset_nest_end(skb, nested);
1174                }
1175        }
1176        ipset_nest_end(skb, atd);
1177        /* Set listing finished */
1178        cb->args[IPSET_CB_ARG0] = 0;
1179
1180        goto out;
1181
1182nla_put_failure:
1183        nlmsg_trim(skb, incomplete);
1184        if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
1185                pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
1186                        set->name);
1187                cb->args[IPSET_CB_ARG0] = 0;
1188                ret = -EMSGSIZE;
1189        } else {
1190                ipset_nest_end(skb, atd);
1191        }
1192out:
1193        rcu_read_unlock();
1194        return ret;
1195}
1196
1197static int
1198IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
1199                          const struct xt_action_param *par,
1200                          enum ipset_adt adt, struct ip_set_adt_opt *opt);
1201
1202static int
1203IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
1204                          enum ipset_adt adt, u32 *lineno, u32 flags,
1205                          bool retried);
1206
1207static const struct ip_set_type_variant mtype_variant = {
1208        .kadt   = mtype_kadt,
1209        .uadt   = mtype_uadt,
1210        .adt    = {
1211                [IPSET_ADD] = mtype_add,
1212                [IPSET_DEL] = mtype_del,
1213                [IPSET_TEST] = mtype_test,
1214        },
1215        .destroy = mtype_destroy,
1216        .flush  = mtype_flush,
1217        .head   = mtype_head,
1218        .list   = mtype_list,
1219        .uref   = mtype_uref,
1220        .resize = mtype_resize,
1221        .same_set = mtype_same_set,
1222};
1223
1224#ifdef IP_SET_EMIT_CREATE
1225static int
1226IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1227                            struct nlattr *tb[], u32 flags)
1228{
1229        u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
1230#ifdef IP_SET_HASH_WITH_MARKMASK
1231        u32 markmask;
1232#endif
1233        u8 hbits;
1234#ifdef IP_SET_HASH_WITH_NETMASK
1235        u8 netmask;
1236#endif
1237        size_t hsize;
1238        struct htype *h;
1239        struct htable *t;
1240
1241#ifndef IP_SET_PROTO_UNDEF
1242        if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
1243                return -IPSET_ERR_INVALID_FAMILY;
1244#endif
1245
1246#ifdef IP_SET_HASH_WITH_MARKMASK
1247        markmask = 0xffffffff;
1248#endif
1249#ifdef IP_SET_HASH_WITH_NETMASK
1250        netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
1251        pr_debug("Create set %s with family %s\n",
1252                 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
1253#endif
1254
1255        if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
1256                     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
1257                     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
1258                     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
1259                return -IPSET_ERR_PROTOCOL;
1260#ifdef IP_SET_HASH_WITH_MARKMASK
1261        /* Separated condition in order to avoid directive in argument list */
1262        if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
1263                return -IPSET_ERR_PROTOCOL;
1264#endif
1265
1266        if (tb[IPSET_ATTR_HASHSIZE]) {
1267                hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
1268                if (hashsize < IPSET_MIMINAL_HASHSIZE)
1269                        hashsize = IPSET_MIMINAL_HASHSIZE;
1270        }
1271
1272        if (tb[IPSET_ATTR_MAXELEM])
1273                maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
1274
1275#ifdef IP_SET_HASH_WITH_NETMASK
1276        if (tb[IPSET_ATTR_NETMASK]) {
1277                netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
1278
1279                if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
1280                    (set->family == NFPROTO_IPV6 && netmask > 128) ||
1281                    netmask == 0)
1282                        return -IPSET_ERR_INVALID_NETMASK;
1283        }
1284#endif
1285#ifdef IP_SET_HASH_WITH_MARKMASK
1286        if (tb[IPSET_ATTR_MARKMASK]) {
1287                markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
1288
1289                if (markmask == 0)
1290                        return -IPSET_ERR_INVALID_MARKMASK;
1291        }
1292#endif
1293
1294        hsize = sizeof(*h);
1295#ifdef IP_SET_HASH_WITH_NETS
1296        hsize += sizeof(struct net_prefixes) * NLEN(set->family);
1297#endif
1298        h = kzalloc(hsize, GFP_KERNEL);
1299        if (!h)
1300                return -ENOMEM;
1301
1302        h->maxelem = maxelem;
1303#ifdef IP_SET_HASH_WITH_NETMASK
1304        h->netmask = netmask;
1305#endif
1306#ifdef IP_SET_HASH_WITH_MARKMASK
1307        h->markmask = markmask;
1308#endif
1309        get_random_bytes(&h->initval, sizeof(h->initval));
1310        set->timeout = IPSET_NO_TIMEOUT;
1311
1312        hbits = htable_bits(hashsize);
1313        hsize = htable_size(hbits);
1314        if (hsize == 0) {
1315                kfree(h);
1316                return -ENOMEM;
1317        }
1318        t = ip_set_alloc(hsize);
1319        if (!t) {
1320                kfree(h);
1321                return -ENOMEM;
1322        }
1323        t->htable_bits = hbits;
1324        rcu_assign_pointer(h->table, t);
1325
1326        set->data = h;
1327#ifndef IP_SET_PROTO_UNDEF
1328        if (set->family == NFPROTO_IPV4) {
1329#endif
1330                set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
1331                set->dsize = ip_set_elem_len(set, tb,
1332                        sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
1333                        __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
1334#ifndef IP_SET_PROTO_UNDEF
1335        } else {
1336                set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
1337                set->dsize = ip_set_elem_len(set, tb,
1338                        sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
1339                        __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
1340        }
1341#endif
1342        if (tb[IPSET_ATTR_TIMEOUT]) {
1343                set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
1344#ifndef IP_SET_PROTO_UNDEF
1345                if (set->family == NFPROTO_IPV4)
1346#endif
1347                        IPSET_TOKEN(HTYPE, 4_gc_init)(set,
1348                                IPSET_TOKEN(HTYPE, 4_gc));
1349#ifndef IP_SET_PROTO_UNDEF
1350                else
1351                        IPSET_TOKEN(HTYPE, 6_gc_init)(set,
1352                                IPSET_TOKEN(HTYPE, 6_gc));
1353#endif
1354        }
1355        pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
1356                 set->name, jhash_size(t->htable_bits),
1357                 t->htable_bits, h->maxelem, set->data, t);
1358
1359        return 0;
1360}
1361#endif /* IP_SET_EMIT_CREATE */
1362
1363#undef HKEY_DATALEN
1364