linux/net/netfilter/ipset/ip_set_core.c
<<
>>
Prefs
   1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
   2 *                         Patrick Schaaf <bof@bof.de>
   3 * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
   4 *
   5 * This program is free software; you can redistribute it and/or modify
   6 * it under the terms of the GNU General Public License version 2 as
   7 * published by the Free Software Foundation.
   8 */
   9
  10/* Kernel module for IP set management */
  11
  12#include <linux/init.h>
  13#include <linux/module.h>
  14#include <linux/moduleparam.h>
  15#include <linux/ip.h>
  16#include <linux/skbuff.h>
  17#include <linux/spinlock.h>
  18#include <linux/rculist.h>
  19#include <net/netlink.h>
  20
  21#include <linux/netfilter.h>
  22#include <linux/netfilter/x_tables.h>
  23#include <linux/netfilter/nfnetlink.h>
  24#include <linux/netfilter/ipset/ip_set.h>
  25
  26static LIST_HEAD(ip_set_type_list);             /* all registered set types */
  27static DEFINE_MUTEX(ip_set_type_mutex);         /* protects ip_set_type_list */
  28static DEFINE_RWLOCK(ip_set_ref_lock);          /* protects the set refs */
  29
  30static struct ip_set * __rcu *ip_set_list;      /* all individual sets */
  31static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
  32
  33#define IP_SET_INC      64
  34#define STREQ(a, b)     (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
  35
  36static unsigned int max_sets;
  37
  38module_param(max_sets, int, 0600);
  39MODULE_PARM_DESC(max_sets, "maximal number of sets");
  40MODULE_LICENSE("GPL");
  41MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
  42MODULE_DESCRIPTION("core IP set support");
  43MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
  44
  45/* When the nfnl mutex is held: */
  46#define nfnl_dereference(p)             \
  47        rcu_dereference_protected(p, 1)
  48#define nfnl_set(id)                    \
  49        nfnl_dereference(ip_set_list)[id]
  50
  51/*
  52 * The set types are implemented in modules and registered set types
  53 * can be found in ip_set_type_list. Adding/deleting types is
  54 * serialized by ip_set_type_mutex.
  55 */
  56
  57static inline void
  58ip_set_type_lock(void)
  59{
  60        mutex_lock(&ip_set_type_mutex);
  61}
  62
  63static inline void
  64ip_set_type_unlock(void)
  65{
  66        mutex_unlock(&ip_set_type_mutex);
  67}
  68
  69/* Register and deregister settype */
  70
  71static struct ip_set_type *
  72find_set_type(const char *name, u8 family, u8 revision)
  73{
  74        struct ip_set_type *type;
  75
  76        list_for_each_entry_rcu(type, &ip_set_type_list, list)
  77                if (STREQ(type->name, name) &&
  78                    (type->family == family ||
  79                     type->family == NFPROTO_UNSPEC) &&
  80                    revision >= type->revision_min &&
  81                    revision <= type->revision_max)
  82                        return type;
  83        return NULL;
  84}
  85
  86/* Unlock, try to load a set type module and lock again */
  87static bool
  88load_settype(const char *name)
  89{
  90        nfnl_unlock(NFNL_SUBSYS_IPSET);
  91        pr_debug("try to load ip_set_%s\n", name);
  92        if (request_module("ip_set_%s", name) < 0) {
  93                pr_warning("Can't find ip_set type %s\n", name);
  94                nfnl_lock(NFNL_SUBSYS_IPSET);
  95                return false;
  96        }
  97        nfnl_lock(NFNL_SUBSYS_IPSET);
  98        return true;
  99}
 100
 101/* Find a set type and reference it */
 102#define find_set_type_get(name, family, revision, found)        \
 103        __find_set_type_get(name, family, revision, found, false)
 104
 105static int
 106__find_set_type_get(const char *name, u8 family, u8 revision,
 107                    struct ip_set_type **found, bool retry)
 108{
 109        struct ip_set_type *type;
 110        int err;
 111
 112        if (retry && !load_settype(name))
 113                return -IPSET_ERR_FIND_TYPE;
 114
 115        rcu_read_lock();
 116        *found = find_set_type(name, family, revision);
 117        if (*found) {
 118                err = !try_module_get((*found)->me) ? -EFAULT : 0;
 119                goto unlock;
 120        }
 121        /* Make sure the type is already loaded
 122         * but we don't support the revision */
 123        list_for_each_entry_rcu(type, &ip_set_type_list, list)
 124                if (STREQ(type->name, name)) {
 125                        err = -IPSET_ERR_FIND_TYPE;
 126                        goto unlock;
 127                }
 128        rcu_read_unlock();
 129
 130        return retry ? -IPSET_ERR_FIND_TYPE :
 131                __find_set_type_get(name, family, revision, found, true);
 132
 133unlock:
 134        rcu_read_unlock();
 135        return err;
 136}
 137
 138/* Find a given set type by name and family.
 139 * If we succeeded, the supported minimal and maximum revisions are
 140 * filled out.
 141 */
 142#define find_set_type_minmax(name, family, min, max) \
 143        __find_set_type_minmax(name, family, min, max, false)
 144
 145static int
 146__find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
 147                       bool retry)
 148{
 149        struct ip_set_type *type;
 150        bool found = false;
 151
 152        if (retry && !load_settype(name))
 153                return -IPSET_ERR_FIND_TYPE;
 154
 155        *min = 255; *max = 0;
 156        rcu_read_lock();
 157        list_for_each_entry_rcu(type, &ip_set_type_list, list)
 158                if (STREQ(type->name, name) &&
 159                    (type->family == family ||
 160                     type->family == NFPROTO_UNSPEC)) {
 161                        found = true;
 162                        if (type->revision_min < *min)
 163                                *min = type->revision_min;
 164                        if (type->revision_max > *max)
 165                                *max = type->revision_max;
 166                }
 167        rcu_read_unlock();
 168        if (found)
 169                return 0;
 170
 171        return retry ? -IPSET_ERR_FIND_TYPE :
 172                __find_set_type_minmax(name, family, min, max, true);
 173}
 174
 175#define family_name(f)  ((f) == NFPROTO_IPV4 ? "inet" : \
 176                         (f) == NFPROTO_IPV6 ? "inet6" : "any")
 177
 178/* Register a set type structure. The type is identified by
 179 * the unique triple of name, family and revision.
 180 */
 181int
 182ip_set_type_register(struct ip_set_type *type)
 183{
 184        int ret = 0;
 185
 186        if (type->protocol != IPSET_PROTOCOL) {
 187                pr_warning("ip_set type %s, family %s, revision %u:%u uses "
 188                           "wrong protocol version %u (want %u)\n",
 189                           type->name, family_name(type->family),
 190                           type->revision_min, type->revision_max,
 191                           type->protocol, IPSET_PROTOCOL);
 192                return -EINVAL;
 193        }
 194
 195        ip_set_type_lock();
 196        if (find_set_type(type->name, type->family, type->revision_min)) {
 197                /* Duplicate! */
 198                pr_warning("ip_set type %s, family %s with revision min %u "
 199                           "already registered!\n", type->name,
 200                           family_name(type->family), type->revision_min);
 201                ret = -EINVAL;
 202                goto unlock;
 203        }
 204        list_add_rcu(&type->list, &ip_set_type_list);
 205        pr_debug("type %s, family %s, revision %u:%u registered.\n",
 206                 type->name, family_name(type->family),
 207                 type->revision_min, type->revision_max);
 208unlock:
 209        ip_set_type_unlock();
 210        return ret;
 211}
 212EXPORT_SYMBOL_GPL(ip_set_type_register);
 213
 214/* Unregister a set type. There's a small race with ip_set_create */
 215void
 216ip_set_type_unregister(struct ip_set_type *type)
 217{
 218        ip_set_type_lock();
 219        if (!find_set_type(type->name, type->family, type->revision_min)) {
 220                pr_warning("ip_set type %s, family %s with revision min %u "
 221                           "not registered\n", type->name,
 222                           family_name(type->family), type->revision_min);
 223                goto unlock;
 224        }
 225        list_del_rcu(&type->list);
 226        pr_debug("type %s, family %s with revision min %u unregistered.\n",
 227                 type->name, family_name(type->family), type->revision_min);
 228unlock:
 229        ip_set_type_unlock();
 230
 231        synchronize_rcu();
 232}
 233EXPORT_SYMBOL_GPL(ip_set_type_unregister);
 234
 235/* Utility functions */
 236void *
 237ip_set_alloc(size_t size)
 238{
 239        void *members = NULL;
 240
 241        if (size < KMALLOC_MAX_SIZE)
 242                members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
 243
 244        if (members) {
 245                pr_debug("%p: allocated with kmalloc\n", members);
 246                return members;
 247        }
 248
 249        members = vzalloc(size);
 250        if (!members)
 251                return NULL;
 252        pr_debug("%p: allocated with vmalloc\n", members);
 253
 254        return members;
 255}
 256EXPORT_SYMBOL_GPL(ip_set_alloc);
 257
 258void
 259ip_set_free(void *members)
 260{
 261        pr_debug("%p: free with %s\n", members,
 262                 is_vmalloc_addr(members) ? "vfree" : "kfree");
 263        if (is_vmalloc_addr(members))
 264                vfree(members);
 265        else
 266                kfree(members);
 267}
 268EXPORT_SYMBOL_GPL(ip_set_free);
 269
 270static inline bool
 271flag_nested(const struct nlattr *nla)
 272{
 273        return nla->nla_type & NLA_F_NESTED;
 274}
 275
 276static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
 277        [IPSET_ATTR_IPADDR_IPV4]        = { .type = NLA_U32 },
 278        [IPSET_ATTR_IPADDR_IPV6]        = { .type = NLA_BINARY,
 279                                            .len = sizeof(struct in6_addr) },
 280};
 281
 282int
 283ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr)
 284{
 285        struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
 286
 287        if (unlikely(!flag_nested(nla)))
 288                return -IPSET_ERR_PROTOCOL;
 289        if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
 290                return -IPSET_ERR_PROTOCOL;
 291        if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
 292                return -IPSET_ERR_PROTOCOL;
 293
 294        *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
 295        return 0;
 296}
 297EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
 298
 299int
 300ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 301{
 302        struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
 303
 304        if (unlikely(!flag_nested(nla)))
 305                return -IPSET_ERR_PROTOCOL;
 306
 307        if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
 308                return -IPSET_ERR_PROTOCOL;
 309        if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
 310                return -IPSET_ERR_PROTOCOL;
 311
 312        memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
 313                sizeof(struct in6_addr));
 314        return 0;
 315}
 316EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 317
 318int
 319ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 320                      struct ip_set_ext *ext)
 321{
 322        if (tb[IPSET_ATTR_TIMEOUT]) {
 323                if (!(set->extensions & IPSET_EXT_TIMEOUT))
 324                        return -IPSET_ERR_TIMEOUT;
 325                ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 326        }
 327        if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
 328                if (!(set->extensions & IPSET_EXT_COUNTER))
 329                        return -IPSET_ERR_COUNTER;
 330                if (tb[IPSET_ATTR_BYTES])
 331                        ext->bytes = be64_to_cpu(nla_get_be64(
 332                                                 tb[IPSET_ATTR_BYTES]));
 333                if (tb[IPSET_ATTR_PACKETS])
 334                        ext->packets = be64_to_cpu(nla_get_be64(
 335                                                   tb[IPSET_ATTR_PACKETS]));
 336        }
 337        return 0;
 338}
 339EXPORT_SYMBOL_GPL(ip_set_get_extensions);
 340
 341/*
 342 * Creating/destroying/renaming/swapping affect the existence and
 343 * the properties of a set. All of these can be executed from userspace
 344 * only and serialized by the nfnl mutex indirectly from nfnetlink.
 345 *
 346 * Sets are identified by their index in ip_set_list and the index
 347 * is used by the external references (set/SET netfilter modules).
 348 *
 349 * The set behind an index may change by swapping only, from userspace.
 350 */
 351
 352static inline void
 353__ip_set_get(struct ip_set *set)
 354{
 355        write_lock_bh(&ip_set_ref_lock);
 356        set->ref++;
 357        write_unlock_bh(&ip_set_ref_lock);
 358}
 359
 360static inline void
 361__ip_set_put(struct ip_set *set)
 362{
 363        write_lock_bh(&ip_set_ref_lock);
 364        BUG_ON(set->ref == 0);
 365        set->ref--;
 366        write_unlock_bh(&ip_set_ref_lock);
 367}
 368
 369/*
 370 * Add, del and test set entries from kernel.
 371 *
 372 * The set behind the index must exist and must be referenced
 373 * so it can't be destroyed (or changed) under our foot.
 374 */
 375
 376static inline struct ip_set *
 377ip_set_rcu_get(ip_set_id_t index)
 378{
 379        struct ip_set *set;
 380
 381        rcu_read_lock();
 382        /* ip_set_list itself needs to be protected */
 383        set = rcu_dereference(ip_set_list)[index];
 384        rcu_read_unlock();
 385
 386        return set;
 387}
 388
 389int
 390ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 391            const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 392{
 393        struct ip_set *set = ip_set_rcu_get(index);
 394        int ret = 0;
 395
 396        BUG_ON(set == NULL);
 397        pr_debug("set %s, index %u\n", set->name, index);
 398
 399        if (opt->dim < set->type->dimension ||
 400            !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
 401                return 0;
 402
 403        read_lock_bh(&set->lock);
 404        ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
 405        read_unlock_bh(&set->lock);
 406
 407        if (ret == -EAGAIN) {
 408                /* Type requests element to be completed */
 409                pr_debug("element must be competed, ADD is triggered\n");
 410                write_lock_bh(&set->lock);
 411                set->variant->kadt(set, skb, par, IPSET_ADD, opt);
 412                write_unlock_bh(&set->lock);
 413                ret = 1;
 414        } else {
 415                /* --return-nomatch: invert matched element */
 416                if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&
 417                    (set->type->features & IPSET_TYPE_NOMATCH) &&
 418                    (ret > 0 || ret == -ENOTEMPTY))
 419                        ret = -ret;
 420        }
 421
 422        /* Convert error codes to nomatch */
 423        return (ret < 0 ? 0 : ret);
 424}
 425EXPORT_SYMBOL_GPL(ip_set_test);
 426
 427int
 428ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
 429           const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 430{
 431        struct ip_set *set = ip_set_rcu_get(index);
 432        int ret;
 433
 434        BUG_ON(set == NULL);
 435        pr_debug("set %s, index %u\n", set->name, index);
 436
 437        if (opt->dim < set->type->dimension ||
 438            !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
 439                return 0;
 440
 441        write_lock_bh(&set->lock);
 442        ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
 443        write_unlock_bh(&set->lock);
 444
 445        return ret;
 446}
 447EXPORT_SYMBOL_GPL(ip_set_add);
 448
 449int
 450ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 451           const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 452{
 453        struct ip_set *set = ip_set_rcu_get(index);
 454        int ret = 0;
 455
 456        BUG_ON(set == NULL);
 457        pr_debug("set %s, index %u\n", set->name, index);
 458
 459        if (opt->dim < set->type->dimension ||
 460            !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
 461                return 0;
 462
 463        write_lock_bh(&set->lock);
 464        ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
 465        write_unlock_bh(&set->lock);
 466
 467        return ret;
 468}
 469EXPORT_SYMBOL_GPL(ip_set_del);
 470
 471/*
 472 * Find set by name, reference it once. The reference makes sure the
 473 * thing pointed to, does not go away under our feet.
 474 *
 475 */
 476ip_set_id_t
 477ip_set_get_byname(const char *name, struct ip_set **set)
 478{
 479        ip_set_id_t i, index = IPSET_INVALID_ID;
 480        struct ip_set *s;
 481
 482        rcu_read_lock();
 483        for (i = 0; i < ip_set_max; i++) {
 484                s = rcu_dereference(ip_set_list)[i];
 485                if (s != NULL && STREQ(s->name, name)) {
 486                        __ip_set_get(s);
 487                        index = i;
 488                        *set = s;
 489                        break;
 490                }
 491        }
 492        rcu_read_unlock();
 493
 494        return index;
 495}
 496EXPORT_SYMBOL_GPL(ip_set_get_byname);
 497
 498/*
 499 * If the given set pointer points to a valid set, decrement
 500 * reference count by 1. The caller shall not assume the index
 501 * to be valid, after calling this function.
 502 *
 503 */
 504void
 505ip_set_put_byindex(ip_set_id_t index)
 506{
 507        struct ip_set *set;
 508
 509        rcu_read_lock();
 510        set = rcu_dereference(ip_set_list)[index];
 511        if (set != NULL)
 512                __ip_set_put(set);
 513        rcu_read_unlock();
 514}
 515EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 516
 517/*
 518 * Get the name of a set behind a set index.
 519 * We assume the set is referenced, so it does exist and
 520 * can't be destroyed. The set cannot be renamed due to
 521 * the referencing either.
 522 *
 523 */
 524const char *
 525ip_set_name_byindex(ip_set_id_t index)
 526{
 527        const struct ip_set *set = ip_set_rcu_get(index);
 528
 529        BUG_ON(set == NULL);
 530        BUG_ON(set->ref == 0);
 531
 532        /* Referenced, so it's safe */
 533        return set->name;
 534}
 535EXPORT_SYMBOL_GPL(ip_set_name_byindex);
 536
 537/*
 538 * Routines to call by external subsystems, which do not
 539 * call nfnl_lock for us.
 540 */
 541
 542/*
 543 * Find set by name, reference it once. The reference makes sure the
 544 * thing pointed to, does not go away under our feet.
 545 *
 546 * The nfnl mutex is used in the function.
 547 */
 548ip_set_id_t
 549ip_set_nfnl_get(const char *name)
 550{
 551        ip_set_id_t i, index = IPSET_INVALID_ID;
 552        struct ip_set *s;
 553
 554        nfnl_lock(NFNL_SUBSYS_IPSET);
 555        for (i = 0; i < ip_set_max; i++) {
 556                s = nfnl_set(i);
 557                if (s != NULL && STREQ(s->name, name)) {
 558                        __ip_set_get(s);
 559                        index = i;
 560                        break;
 561                }
 562        }
 563        nfnl_unlock(NFNL_SUBSYS_IPSET);
 564
 565        return index;
 566}
 567EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
 568
 569/*
 570 * Find set by index, reference it once. The reference makes sure the
 571 * thing pointed to, does not go away under our feet.
 572 *
 573 * The nfnl mutex is used in the function.
 574 */
 575ip_set_id_t
 576ip_set_nfnl_get_byindex(ip_set_id_t index)
 577{
 578        struct ip_set *set;
 579
 580        if (index > ip_set_max)
 581                return IPSET_INVALID_ID;
 582
 583        nfnl_lock(NFNL_SUBSYS_IPSET);
 584        set = nfnl_set(index);
 585        if (set)
 586                __ip_set_get(set);
 587        else
 588                index = IPSET_INVALID_ID;
 589        nfnl_unlock(NFNL_SUBSYS_IPSET);
 590
 591        return index;
 592}
 593EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
 594
 595/*
 596 * If the given set pointer points to a valid set, decrement
 597 * reference count by 1. The caller shall not assume the index
 598 * to be valid, after calling this function.
 599 *
 600 * The nfnl mutex is used in the function.
 601 */
 602void
 603ip_set_nfnl_put(ip_set_id_t index)
 604{
 605        struct ip_set *set;
 606        nfnl_lock(NFNL_SUBSYS_IPSET);
 607        set = nfnl_set(index);
 608        if (set != NULL)
 609                __ip_set_put(set);
 610        nfnl_unlock(NFNL_SUBSYS_IPSET);
 611}
 612EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
 613
 614/*
 615 * Communication protocol with userspace over netlink.
 616 *
 617 * The commands are serialized by the nfnl mutex.
 618 */
 619
 620static inline bool
 621protocol_failed(const struct nlattr * const tb[])
 622{
 623        return !tb[IPSET_ATTR_PROTOCOL] ||
 624               nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
 625}
 626
 627static inline u32
 628flag_exist(const struct nlmsghdr *nlh)
 629{
 630        return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
 631}
 632
 633static struct nlmsghdr *
 634start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
 635          enum ipset_cmd cmd)
 636{
 637        struct nlmsghdr *nlh;
 638        struct nfgenmsg *nfmsg;
 639
 640        nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
 641                        sizeof(*nfmsg), flags);
 642        if (nlh == NULL)
 643                return NULL;
 644
 645        nfmsg = nlmsg_data(nlh);
 646        nfmsg->nfgen_family = NFPROTO_IPV4;
 647        nfmsg->version = NFNETLINK_V0;
 648        nfmsg->res_id = 0;
 649
 650        return nlh;
 651}
 652
 653/* Create a set */
 654
 655static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
 656        [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
 657        [IPSET_ATTR_SETNAME]    = { .type = NLA_NUL_STRING,
 658                                    .len = IPSET_MAXNAMELEN - 1 },
 659        [IPSET_ATTR_TYPENAME]   = { .type = NLA_NUL_STRING,
 660                                    .len = IPSET_MAXNAMELEN - 1},
 661        [IPSET_ATTR_REVISION]   = { .type = NLA_U8 },
 662        [IPSET_ATTR_FAMILY]     = { .type = NLA_U8 },
 663        [IPSET_ATTR_DATA]       = { .type = NLA_NESTED },
 664};
 665
 666static struct ip_set *
 667find_set_and_id(const char *name, ip_set_id_t *id)
 668{
 669        struct ip_set *set = NULL;
 670        ip_set_id_t i;
 671
 672        *id = IPSET_INVALID_ID;
 673        for (i = 0; i < ip_set_max; i++) {
 674                set = nfnl_set(i);
 675                if (set != NULL && STREQ(set->name, name)) {
 676                        *id = i;
 677                        break;
 678                }
 679        }
 680        return (*id == IPSET_INVALID_ID ? NULL : set);
 681}
 682
 683static inline struct ip_set *
 684find_set(const char *name)
 685{
 686        ip_set_id_t id;
 687
 688        return find_set_and_id(name, &id);
 689}
 690
 691static int
 692find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
 693{
 694        struct ip_set *s;
 695        ip_set_id_t i;
 696
 697        *index = IPSET_INVALID_ID;
 698        for (i = 0;  i < ip_set_max; i++) {
 699                s = nfnl_set(i);
 700                if (s == NULL) {
 701                        if (*index == IPSET_INVALID_ID)
 702                                *index = i;
 703                } else if (STREQ(name, s->name)) {
 704                        /* Name clash */
 705                        *set = s;
 706                        return -EEXIST;
 707                }
 708        }
 709        if (*index == IPSET_INVALID_ID)
 710                /* No free slot remained */
 711                return -IPSET_ERR_MAX_SETS;
 712        return 0;
 713}
 714
 715static int
 716ip_set_none(struct sock *ctnl, struct sk_buff *skb,
 717            const struct nlmsghdr *nlh,
 718            const struct nlattr * const attr[])
 719{
 720        return -EOPNOTSUPP;
 721}
 722
 723static int
 724ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 725              const struct nlmsghdr *nlh,
 726              const struct nlattr * const attr[])
 727{
 728        struct ip_set *set, *clash = NULL;
 729        ip_set_id_t index = IPSET_INVALID_ID;
 730        struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
 731        const char *name, *typename;
 732        u8 family, revision;
 733        u32 flags = flag_exist(nlh);
 734        int ret = 0;
 735
 736        if (unlikely(protocol_failed(attr) ||
 737                     attr[IPSET_ATTR_SETNAME] == NULL ||
 738                     attr[IPSET_ATTR_TYPENAME] == NULL ||
 739                     attr[IPSET_ATTR_REVISION] == NULL ||
 740                     attr[IPSET_ATTR_FAMILY] == NULL ||
 741                     (attr[IPSET_ATTR_DATA] != NULL &&
 742                      !flag_nested(attr[IPSET_ATTR_DATA]))))
 743                return -IPSET_ERR_PROTOCOL;
 744
 745        name = nla_data(attr[IPSET_ATTR_SETNAME]);
 746        typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
 747        family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
 748        revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
 749        pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
 750                 name, typename, family_name(family), revision);
 751
 752        /*
 753         * First, and without any locks, allocate and initialize
 754         * a normal base set structure.
 755         */
 756        set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
 757        if (!set)
 758                return -ENOMEM;
 759        rwlock_init(&set->lock);
 760        strlcpy(set->name, name, IPSET_MAXNAMELEN);
 761        set->family = family;
 762        set->revision = revision;
 763
 764        /*
 765         * Next, check that we know the type, and take
 766         * a reference on the type, to make sure it stays available
 767         * while constructing our new set.
 768         *
 769         * After referencing the type, we try to create the type
 770         * specific part of the set without holding any locks.
 771         */
 772        ret = find_set_type_get(typename, family, revision, &(set->type));
 773        if (ret)
 774                goto out;
 775
 776        /*
 777         * Without holding any locks, create private part.
 778         */
 779        if (attr[IPSET_ATTR_DATA] &&
 780            nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
 781                             set->type->create_policy)) {
 782                ret = -IPSET_ERR_PROTOCOL;
 783                goto put_out;
 784        }
 785
 786        ret = set->type->create(set, tb, flags);
 787        if (ret != 0)
 788                goto put_out;
 789
 790        /* BTW, ret==0 here. */
 791
 792        /*
 793         * Here, we have a valid, constructed set and we are protected
 794         * by the nfnl mutex. Find the first free index in ip_set_list
 795         * and check clashing.
 796         */
 797        ret = find_free_id(set->name, &index, &clash);
 798        if (ret == -EEXIST) {
 799                /* If this is the same set and requested, ignore error */
 800                if ((flags & IPSET_FLAG_EXIST) &&
 801                    STREQ(set->type->name, clash->type->name) &&
 802                    set->type->family == clash->type->family &&
 803                    set->type->revision_min == clash->type->revision_min &&
 804                    set->type->revision_max == clash->type->revision_max &&
 805                    set->variant->same_set(set, clash))
 806                        ret = 0;
 807                goto cleanup;
 808        } else if (ret == -IPSET_ERR_MAX_SETS) {
 809                struct ip_set **list, **tmp;
 810                ip_set_id_t i = ip_set_max + IP_SET_INC;
 811
 812                if (i < ip_set_max || i == IPSET_INVALID_ID)
 813                        /* Wraparound */
 814                        goto cleanup;
 815
 816                list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL);
 817                if (!list)
 818                        goto cleanup;
 819                /* nfnl mutex is held, both lists are valid */
 820                tmp = nfnl_dereference(ip_set_list);
 821                memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max);
 822                rcu_assign_pointer(ip_set_list, list);
 823                /* Make sure all current packets have passed through */
 824                synchronize_net();
 825                /* Use new list */
 826                index = ip_set_max;
 827                ip_set_max = i;
 828                kfree(tmp);
 829                ret = 0;
 830        } else if (ret)
 831                goto cleanup;
 832
 833        /*
 834         * Finally! Add our shiny new set to the list, and be done.
 835         */
 836        pr_debug("create: '%s' created with index %u!\n", set->name, index);
 837        nfnl_set(index) = set;
 838
 839        return ret;
 840
 841cleanup:
 842        set->variant->destroy(set);
 843put_out:
 844        module_put(set->type->me);
 845out:
 846        kfree(set);
 847        return ret;
 848}
 849
 850/* Destroy sets */
 851
 852static const struct nla_policy
 853ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
 854        [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
 855        [IPSET_ATTR_SETNAME]    = { .type = NLA_NUL_STRING,
 856                                    .len = IPSET_MAXNAMELEN - 1 },
 857};
 858
 859static void
 860ip_set_destroy_set(ip_set_id_t index)
 861{
 862        struct ip_set *set = nfnl_set(index);
 863
 864        pr_debug("set: %s\n",  set->name);
 865        nfnl_set(index) = NULL;
 866
 867        /* Must call it without holding any lock */
 868        set->variant->destroy(set);
 869        module_put(set->type->me);
 870        kfree(set);
 871}
 872
 873static int
 874ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 875               const struct nlmsghdr *nlh,
 876               const struct nlattr * const attr[])
 877{
 878        struct ip_set *s;
 879        ip_set_id_t i;
 880        int ret = 0;
 881
 882        if (unlikely(protocol_failed(attr)))
 883                return -IPSET_ERR_PROTOCOL;
 884
 885        /* Commands are serialized and references are
 886         * protected by the ip_set_ref_lock.
 887         * External systems (i.e. xt_set) must call
 888         * ip_set_put|get_nfnl_* functions, that way we
 889         * can safely check references here.
 890         *
 891         * list:set timer can only decrement the reference
 892         * counter, so if it's already zero, we can proceed
 893         * without holding the lock.
 894         */
 895        read_lock_bh(&ip_set_ref_lock);
 896        if (!attr[IPSET_ATTR_SETNAME]) {
 897                for (i = 0; i < ip_set_max; i++) {
 898                        s = nfnl_set(i);
 899                        if (s != NULL && s->ref) {
 900                                ret = -IPSET_ERR_BUSY;
 901                                goto out;
 902                        }
 903                }
 904                read_unlock_bh(&ip_set_ref_lock);
 905                for (i = 0; i < ip_set_max; i++) {
 906                        s = nfnl_set(i);
 907                        if (s != NULL)
 908                                ip_set_destroy_set(i);
 909                }
 910        } else {
 911                s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i);
 912                if (s == NULL) {
 913                        ret = -ENOENT;
 914                        goto out;
 915                } else if (s->ref) {
 916                        ret = -IPSET_ERR_BUSY;
 917                        goto out;
 918                }
 919                read_unlock_bh(&ip_set_ref_lock);
 920
 921                ip_set_destroy_set(i);
 922        }
 923        return 0;
 924out:
 925        read_unlock_bh(&ip_set_ref_lock);
 926        return ret;
 927}
 928
 929/* Flush sets */
 930
 931static void
 932ip_set_flush_set(struct ip_set *set)
 933{
 934        pr_debug("set: %s\n",  set->name);
 935
 936        write_lock_bh(&set->lock);
 937        set->variant->flush(set);
 938        write_unlock_bh(&set->lock);
 939}
 940
 941static int
 942ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
 943             const struct nlmsghdr *nlh,
 944             const struct nlattr * const attr[])
 945{
 946        struct ip_set *s;
 947        ip_set_id_t i;
 948
 949        if (unlikely(protocol_failed(attr)))
 950                return -IPSET_ERR_PROTOCOL;
 951
 952        if (!attr[IPSET_ATTR_SETNAME]) {
 953                for (i = 0; i < ip_set_max; i++) {
 954                        s = nfnl_set(i);
 955                        if (s != NULL)
 956                                ip_set_flush_set(s);
 957                }
 958        } else {
 959                s = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
 960                if (s == NULL)
 961                        return -ENOENT;
 962
 963                ip_set_flush_set(s);
 964        }
 965
 966        return 0;
 967}
 968
 969/* Rename a set */
 970
 971static const struct nla_policy
 972ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
 973        [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
 974        [IPSET_ATTR_SETNAME]    = { .type = NLA_NUL_STRING,
 975                                    .len = IPSET_MAXNAMELEN - 1 },
 976        [IPSET_ATTR_SETNAME2]   = { .type = NLA_NUL_STRING,
 977                                    .len = IPSET_MAXNAMELEN - 1 },
 978};
 979
 980static int
 981ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 982              const struct nlmsghdr *nlh,
 983              const struct nlattr * const attr[])
 984{
 985        struct ip_set *set, *s;
 986        const char *name2;
 987        ip_set_id_t i;
 988        int ret = 0;
 989
 990        if (unlikely(protocol_failed(attr) ||
 991                     attr[IPSET_ATTR_SETNAME] == NULL ||
 992                     attr[IPSET_ATTR_SETNAME2] == NULL))
 993                return -IPSET_ERR_PROTOCOL;
 994
 995        set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
 996        if (set == NULL)
 997                return -ENOENT;
 998
 999        read_lock_bh(&ip_set_ref_lock);
1000        if (set->ref != 0) {
1001                ret = -IPSET_ERR_REFERENCED;
1002                goto out;
1003        }
1004
1005        name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
1006        for (i = 0; i < ip_set_max; i++) {
1007                s = nfnl_set(i);
1008                if (s != NULL && STREQ(s->name, name2)) {
1009                        ret = -IPSET_ERR_EXIST_SETNAME2;
1010                        goto out;
1011                }
1012        }
1013        strncpy(set->name, name2, IPSET_MAXNAMELEN);
1014
1015out:
1016        read_unlock_bh(&ip_set_ref_lock);
1017        return ret;
1018}
1019
1020/* Swap two sets so that name/index points to the other.
1021 * References and set names are also swapped.
1022 *
1023 * The commands are serialized by the nfnl mutex and references are
1024 * protected by the ip_set_ref_lock. The kernel interfaces
1025 * do not hold the mutex but the pointer settings are atomic
1026 * so the ip_set_list always contains valid pointers to the sets.
1027 */
1028
1029static int
1030ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
1031            const struct nlmsghdr *nlh,
1032            const struct nlattr * const attr[])
1033{
1034        struct ip_set *from, *to;
1035        ip_set_id_t from_id, to_id;
1036        char from_name[IPSET_MAXNAMELEN];
1037
1038        if (unlikely(protocol_failed(attr) ||
1039                     attr[IPSET_ATTR_SETNAME] == NULL ||
1040                     attr[IPSET_ATTR_SETNAME2] == NULL))
1041                return -IPSET_ERR_PROTOCOL;
1042
1043        from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id);
1044        if (from == NULL)
1045                return -ENOENT;
1046
1047        to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id);
1048        if (to == NULL)
1049                return -IPSET_ERR_EXIST_SETNAME2;
1050
1051        /* Features must not change.
1052         * Not an artificial restriction anymore, as we must prevent
1053         * possible loops created by swapping in setlist type of sets. */
1054        if (!(from->type->features == to->type->features &&
1055              from->type->family == to->type->family))
1056                return -IPSET_ERR_TYPE_MISMATCH;
1057
1058        strncpy(from_name, from->name, IPSET_MAXNAMELEN);
1059        strncpy(from->name, to->name, IPSET_MAXNAMELEN);
1060        strncpy(to->name, from_name, IPSET_MAXNAMELEN);
1061
1062        write_lock_bh(&ip_set_ref_lock);
1063        swap(from->ref, to->ref);
1064        nfnl_set(from_id) = to;
1065        nfnl_set(to_id) = from;
1066        write_unlock_bh(&ip_set_ref_lock);
1067
1068        return 0;
1069}
1070
1071/* List/save set data */
1072
1073#define DUMP_INIT       0
1074#define DUMP_ALL        1
1075#define DUMP_ONE        2
1076#define DUMP_LAST       3
1077
1078#define DUMP_TYPE(arg)          (((u32)(arg)) & 0x0000FFFF)
1079#define DUMP_FLAGS(arg)         (((u32)(arg)) >> 16)
1080
1081static int
1082ip_set_dump_done(struct netlink_callback *cb)
1083{
1084        if (cb->args[2]) {
1085                pr_debug("release set %s\n", nfnl_set(cb->args[1])->name);
1086                ip_set_put_byindex((ip_set_id_t) cb->args[1]);
1087        }
1088        return 0;
1089}
1090
1091static inline void
1092dump_attrs(struct nlmsghdr *nlh)
1093{
1094        const struct nlattr *attr;
1095        int rem;
1096
1097        pr_debug("dump nlmsg\n");
1098        nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
1099                pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
1100        }
1101}
1102
1103static int
1104dump_init(struct netlink_callback *cb)
1105{
1106        struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
1107        int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1108        struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1109        struct nlattr *attr = (void *)nlh + min_len;
1110        u32 dump_type;
1111        ip_set_id_t index;
1112
1113        /* Second pass, so parser can't fail */
1114        nla_parse(cda, IPSET_ATTR_CMD_MAX,
1115                  attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
1116
1117        /* cb->args[0] : dump single set/all sets
1118         *         [1] : set index
1119         *         [..]: type specific
1120         */
1121
1122        if (cda[IPSET_ATTR_SETNAME]) {
1123                struct ip_set *set;
1124
1125                set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]),
1126                                      &index);
1127                if (set == NULL)
1128                        return -ENOENT;
1129
1130                dump_type = DUMP_ONE;
1131                cb->args[1] = index;
1132        } else
1133                dump_type = DUMP_ALL;
1134
1135        if (cda[IPSET_ATTR_FLAGS]) {
1136                u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
1137                dump_type |= (f << 16);
1138        }
1139        cb->args[0] = dump_type;
1140
1141        return 0;
1142}
1143
1144static int
1145ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
1146{
1147        ip_set_id_t index = IPSET_INVALID_ID, max;
1148        struct ip_set *set = NULL;
1149        struct nlmsghdr *nlh = NULL;
1150        unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
1151        u32 dump_type, dump_flags;
1152        int ret = 0;
1153
1154        if (!cb->args[0]) {
1155                ret = dump_init(cb);
1156                if (ret < 0) {
1157                        nlh = nlmsg_hdr(cb->skb);
1158                        /* We have to create and send the error message
1159                         * manually :-( */
1160                        if (nlh->nlmsg_flags & NLM_F_ACK)
1161                                netlink_ack(cb->skb, nlh, ret);
1162                        return ret;
1163                }
1164        }
1165
1166        if (cb->args[1] >= ip_set_max)
1167                goto out;
1168
1169        dump_type = DUMP_TYPE(cb->args[0]);
1170        dump_flags = DUMP_FLAGS(cb->args[0]);
1171        max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
1172dump_last:
1173        pr_debug("args[0]: %u %u args[1]: %ld\n",
1174                 dump_type, dump_flags, cb->args[1]);
1175        for (; cb->args[1] < max; cb->args[1]++) {
1176                index = (ip_set_id_t) cb->args[1];
1177                set = nfnl_set(index);
1178                if (set == NULL) {
1179                        if (dump_type == DUMP_ONE) {
1180                                ret = -ENOENT;
1181                                goto out;
1182                        }
1183                        continue;
1184                }
1185                /* When dumping all sets, we must dump "sorted"
1186                 * so that lists (unions of sets) are dumped last.
1187                 */
1188                if (dump_type != DUMP_ONE &&
1189                    ((dump_type == DUMP_ALL) ==
1190                     !!(set->type->features & IPSET_DUMP_LAST)))
1191                        continue;
1192                pr_debug("List set: %s\n", set->name);
1193                if (!cb->args[2]) {
1194                        /* Start listing: make sure set won't be destroyed */
1195                        pr_debug("reference set\n");
1196                        __ip_set_get(set);
1197                }
1198                nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
1199                                cb->nlh->nlmsg_seq, flags,
1200                                IPSET_CMD_LIST);
1201                if (!nlh) {
1202                        ret = -EMSGSIZE;
1203                        goto release_refcount;
1204                }
1205                if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1206                    nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
1207                        goto nla_put_failure;
1208                if (dump_flags & IPSET_FLAG_LIST_SETNAME)
1209                        goto next_set;
1210                switch (cb->args[2]) {
1211                case 0:
1212                        /* Core header data */
1213                        if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
1214                                           set->type->name) ||
1215                            nla_put_u8(skb, IPSET_ATTR_FAMILY,
1216                                       set->family) ||
1217                            nla_put_u8(skb, IPSET_ATTR_REVISION,
1218                                       set->revision))
1219                                goto nla_put_failure;
1220                        ret = set->variant->head(set, skb);
1221                        if (ret < 0)
1222                                goto release_refcount;
1223                        if (dump_flags & IPSET_FLAG_LIST_HEADER)
1224                                goto next_set;
1225                        /* Fall through and add elements */
1226                default:
1227                        read_lock_bh(&set->lock);
1228                        ret = set->variant->list(set, skb, cb);
1229                        read_unlock_bh(&set->lock);
1230                        if (!cb->args[2])
1231                                /* Set is done, proceed with next one */
1232                                goto next_set;
1233                        goto release_refcount;
1234                }
1235        }
1236        /* If we dump all sets, continue with dumping last ones */
1237        if (dump_type == DUMP_ALL) {
1238                dump_type = DUMP_LAST;
1239                cb->args[0] = dump_type | (dump_flags << 16);
1240                cb->args[1] = 0;
1241                goto dump_last;
1242        }
1243        goto out;
1244
1245nla_put_failure:
1246        ret = -EFAULT;
1247next_set:
1248        if (dump_type == DUMP_ONE)
1249                cb->args[1] = IPSET_INVALID_ID;
1250        else
1251                cb->args[1]++;
1252release_refcount:
1253        /* If there was an error or set is done, release set */
1254        if (ret || !cb->args[2]) {
1255                pr_debug("release set %s\n", nfnl_set(index)->name);
1256                ip_set_put_byindex(index);
1257                cb->args[2] = 0;
1258        }
1259out:
1260        if (nlh) {
1261                nlmsg_end(skb, nlh);
1262                pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1263                dump_attrs(nlh);
1264        }
1265
1266        return ret < 0 ? ret : skb->len;
1267}
1268
1269static int
1270ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
1271            const struct nlmsghdr *nlh,
1272            const struct nlattr * const attr[])
1273{
1274        if (unlikely(protocol_failed(attr)))
1275                return -IPSET_ERR_PROTOCOL;
1276
1277        {
1278                struct netlink_dump_control c = {
1279                        .dump = ip_set_dump_start,
1280                        .done = ip_set_dump_done,
1281                };
1282                return netlink_dump_start(ctnl, skb, nlh, &c);
1283        }
1284}
1285
1286/* Add, del and test */
1287
1288static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1289        [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
1290        [IPSET_ATTR_SETNAME]    = { .type = NLA_NUL_STRING,
1291                                    .len = IPSET_MAXNAMELEN - 1 },
1292        [IPSET_ATTR_LINENO]     = { .type = NLA_U32 },
1293        [IPSET_ATTR_DATA]       = { .type = NLA_NESTED },
1294        [IPSET_ATTR_ADT]        = { .type = NLA_NESTED },
1295};
1296
1297static int
1298call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1299        struct nlattr *tb[], enum ipset_adt adt,
1300        u32 flags, bool use_lineno)
1301{
1302        int ret;
1303        u32 lineno = 0;
1304        bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
1305
1306        do {
1307                write_lock_bh(&set->lock);
1308                ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
1309                write_unlock_bh(&set->lock);
1310                retried = true;
1311        } while (ret == -EAGAIN &&
1312                 set->variant->resize &&
1313                 (ret = set->variant->resize(set, retried)) == 0);
1314
1315        if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1316                return 0;
1317        if (lineno && use_lineno) {
1318                /* Error in restore/batch mode: send back lineno */
1319                struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1320                struct sk_buff *skb2;
1321                struct nlmsgerr *errmsg;
1322                size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
1323                int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1324                struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1325                struct nlattr *cmdattr;
1326                u32 *errline;
1327
1328                skb2 = nlmsg_new(payload, GFP_KERNEL);
1329                if (skb2 == NULL)
1330                        return -ENOMEM;
1331                rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
1332                                  nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1333                errmsg = nlmsg_data(rep);
1334                errmsg->error = ret;
1335                memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1336                cmdattr = (void *)&errmsg->msg + min_len;
1337
1338                nla_parse(cda, IPSET_ATTR_CMD_MAX,
1339                          cmdattr, nlh->nlmsg_len - min_len,
1340                          ip_set_adt_policy);
1341
1342                errline = nla_data(cda[IPSET_ATTR_LINENO]);
1343
1344                *errline = lineno;
1345
1346                netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1347                /* Signal netlink not to send its ACK/errmsg.  */
1348                return -EINTR;
1349        }
1350
1351        return ret;
1352}
1353
1354static int
1355ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1356            const struct nlmsghdr *nlh,
1357            const struct nlattr * const attr[])
1358{
1359        struct ip_set *set;
1360        struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1361        const struct nlattr *nla;
1362        u32 flags = flag_exist(nlh);
1363        bool use_lineno;
1364        int ret = 0;
1365
1366        if (unlikely(protocol_failed(attr) ||
1367                     attr[IPSET_ATTR_SETNAME] == NULL ||
1368                     !((attr[IPSET_ATTR_DATA] != NULL) ^
1369                       (attr[IPSET_ATTR_ADT] != NULL)) ||
1370                     (attr[IPSET_ATTR_DATA] != NULL &&
1371                      !flag_nested(attr[IPSET_ATTR_DATA])) ||
1372                     (attr[IPSET_ATTR_ADT] != NULL &&
1373                      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1374                       attr[IPSET_ATTR_LINENO] == NULL))))
1375                return -IPSET_ERR_PROTOCOL;
1376
1377        set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1378        if (set == NULL)
1379                return -ENOENT;
1380
1381        use_lineno = !!attr[IPSET_ATTR_LINENO];
1382        if (attr[IPSET_ATTR_DATA]) {
1383                if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1384                                     attr[IPSET_ATTR_DATA],
1385                                     set->type->adt_policy))
1386                        return -IPSET_ERR_PROTOCOL;
1387                ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
1388                              use_lineno);
1389        } else {
1390                int nla_rem;
1391
1392                nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1393                        memset(tb, 0, sizeof(tb));
1394                        if (nla_type(nla) != IPSET_ATTR_DATA ||
1395                            !flag_nested(nla) ||
1396                            nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1397                                             set->type->adt_policy))
1398                                return -IPSET_ERR_PROTOCOL;
1399                        ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
1400                                      flags, use_lineno);
1401                        if (ret < 0)
1402                                return ret;
1403                }
1404        }
1405        return ret;
1406}
1407
1408static int
1409ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1410            const struct nlmsghdr *nlh,
1411            const struct nlattr * const attr[])
1412{
1413        struct ip_set *set;
1414        struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1415        const struct nlattr *nla;
1416        u32 flags = flag_exist(nlh);
1417        bool use_lineno;
1418        int ret = 0;
1419
1420        if (unlikely(protocol_failed(attr) ||
1421                     attr[IPSET_ATTR_SETNAME] == NULL ||
1422                     !((attr[IPSET_ATTR_DATA] != NULL) ^
1423                       (attr[IPSET_ATTR_ADT] != NULL)) ||
1424                     (attr[IPSET_ATTR_DATA] != NULL &&
1425                      !flag_nested(attr[IPSET_ATTR_DATA])) ||
1426                     (attr[IPSET_ATTR_ADT] != NULL &&
1427                      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1428                       attr[IPSET_ATTR_LINENO] == NULL))))
1429                return -IPSET_ERR_PROTOCOL;
1430
1431        set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1432        if (set == NULL)
1433                return -ENOENT;
1434
1435        use_lineno = !!attr[IPSET_ATTR_LINENO];
1436        if (attr[IPSET_ATTR_DATA]) {
1437                if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1438                                     attr[IPSET_ATTR_DATA],
1439                                     set->type->adt_policy))
1440                        return -IPSET_ERR_PROTOCOL;
1441                ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
1442                              use_lineno);
1443        } else {
1444                int nla_rem;
1445
1446                nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1447                        memset(tb, 0, sizeof(*tb));
1448                        if (nla_type(nla) != IPSET_ATTR_DATA ||
1449                            !flag_nested(nla) ||
1450                            nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1451                                             set->type->adt_policy))
1452                                return -IPSET_ERR_PROTOCOL;
1453                        ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
1454                                      flags, use_lineno);
1455                        if (ret < 0)
1456                                return ret;
1457                }
1458        }
1459        return ret;
1460}
1461
1462static int
1463ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1464             const struct nlmsghdr *nlh,
1465             const struct nlattr * const attr[])
1466{
1467        struct ip_set *set;
1468        struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1469        int ret = 0;
1470
1471        if (unlikely(protocol_failed(attr) ||
1472                     attr[IPSET_ATTR_SETNAME] == NULL ||
1473                     attr[IPSET_ATTR_DATA] == NULL ||
1474                     !flag_nested(attr[IPSET_ATTR_DATA])))
1475                return -IPSET_ERR_PROTOCOL;
1476
1477        set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1478        if (set == NULL)
1479                return -ENOENT;
1480
1481        if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1482                             set->type->adt_policy))
1483                return -IPSET_ERR_PROTOCOL;
1484
1485        read_lock_bh(&set->lock);
1486        ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
1487        read_unlock_bh(&set->lock);
1488        /* Userspace can't trigger element to be re-added */
1489        if (ret == -EAGAIN)
1490                ret = 1;
1491
1492        return (ret < 0 && ret != -ENOTEMPTY) ? ret :
1493                ret > 0 ? 0 : -IPSET_ERR_EXIST;
1494}
1495
1496/* Get headed data of a set */
1497
1498static int
1499ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1500              const struct nlmsghdr *nlh,
1501              const struct nlattr * const attr[])
1502{
1503        const struct ip_set *set;
1504        struct sk_buff *skb2;
1505        struct nlmsghdr *nlh2;
1506        int ret = 0;
1507
1508        if (unlikely(protocol_failed(attr) ||
1509                     attr[IPSET_ATTR_SETNAME] == NULL))
1510                return -IPSET_ERR_PROTOCOL;
1511
1512        set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1513        if (set == NULL)
1514                return -ENOENT;
1515
1516        skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1517        if (skb2 == NULL)
1518                return -ENOMEM;
1519
1520        nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1521                         IPSET_CMD_HEADER);
1522        if (!nlh2)
1523                goto nlmsg_failure;
1524        if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1525            nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
1526            nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
1527            nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1528            nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
1529                goto nla_put_failure;
1530        nlmsg_end(skb2, nlh2);
1531
1532        ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1533        if (ret < 0)
1534                return ret;
1535
1536        return 0;
1537
1538nla_put_failure:
1539        nlmsg_cancel(skb2, nlh2);
1540nlmsg_failure:
1541        kfree_skb(skb2);
1542        return -EMSGSIZE;
1543}
1544
1545/* Get type data */
1546
1547static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1548        [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
1549        [IPSET_ATTR_TYPENAME]   = { .type = NLA_NUL_STRING,
1550                                    .len = IPSET_MAXNAMELEN - 1 },
1551        [IPSET_ATTR_FAMILY]     = { .type = NLA_U8 },
1552};
1553
1554static int
1555ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1556            const struct nlmsghdr *nlh,
1557            const struct nlattr * const attr[])
1558{
1559        struct sk_buff *skb2;
1560        struct nlmsghdr *nlh2;
1561        u8 family, min, max;
1562        const char *typename;
1563        int ret = 0;
1564
1565        if (unlikely(protocol_failed(attr) ||
1566                     attr[IPSET_ATTR_TYPENAME] == NULL ||
1567                     attr[IPSET_ATTR_FAMILY] == NULL))
1568                return -IPSET_ERR_PROTOCOL;
1569
1570        family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1571        typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1572        ret = find_set_type_minmax(typename, family, &min, &max);
1573        if (ret)
1574                return ret;
1575
1576        skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1577        if (skb2 == NULL)
1578                return -ENOMEM;
1579
1580        nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1581                         IPSET_CMD_TYPE);
1582        if (!nlh2)
1583                goto nlmsg_failure;
1584        if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1585            nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
1586            nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
1587            nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
1588            nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min))
1589                goto nla_put_failure;
1590        nlmsg_end(skb2, nlh2);
1591
1592        pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
1593        ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1594        if (ret < 0)
1595                return ret;
1596
1597        return 0;
1598
1599nla_put_failure:
1600        nlmsg_cancel(skb2, nlh2);
1601nlmsg_failure:
1602        kfree_skb(skb2);
1603        return -EMSGSIZE;
1604}
1605
1606/* Get protocol version */
1607
1608static const struct nla_policy
1609ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1610        [IPSET_ATTR_PROTOCOL]   = { .type = NLA_U8 },
1611};
1612
1613static int
1614ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1615                const struct nlmsghdr *nlh,
1616                const struct nlattr * const attr[])
1617{
1618        struct sk_buff *skb2;
1619        struct nlmsghdr *nlh2;
1620        int ret = 0;
1621
1622        if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
1623                return -IPSET_ERR_PROTOCOL;
1624
1625        skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1626        if (skb2 == NULL)
1627                return -ENOMEM;
1628
1629        nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1630                         IPSET_CMD_PROTOCOL);
1631        if (!nlh2)
1632                goto nlmsg_failure;
1633        if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
1634                goto nla_put_failure;
1635        nlmsg_end(skb2, nlh2);
1636
1637        ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1638        if (ret < 0)
1639                return ret;
1640
1641        return 0;
1642
1643nla_put_failure:
1644        nlmsg_cancel(skb2, nlh2);
1645nlmsg_failure:
1646        kfree_skb(skb2);
1647        return -EMSGSIZE;
1648}
1649
1650static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1651        [IPSET_CMD_NONE]        = {
1652                .call           = ip_set_none,
1653                .attr_count     = IPSET_ATTR_CMD_MAX,
1654        },
1655        [IPSET_CMD_CREATE]      = {
1656                .call           = ip_set_create,
1657                .attr_count     = IPSET_ATTR_CMD_MAX,
1658                .policy         = ip_set_create_policy,
1659        },
1660        [IPSET_CMD_DESTROY]     = {
1661                .call           = ip_set_destroy,
1662                .attr_count     = IPSET_ATTR_CMD_MAX,
1663                .policy         = ip_set_setname_policy,
1664        },
1665        [IPSET_CMD_FLUSH]       = {
1666                .call           = ip_set_flush,
1667                .attr_count     = IPSET_ATTR_CMD_MAX,
1668                .policy         = ip_set_setname_policy,
1669        },
1670        [IPSET_CMD_RENAME]      = {
1671                .call           = ip_set_rename,
1672                .attr_count     = IPSET_ATTR_CMD_MAX,
1673                .policy         = ip_set_setname2_policy,
1674        },
1675        [IPSET_CMD_SWAP]        = {
1676                .call           = ip_set_swap,
1677                .attr_count     = IPSET_ATTR_CMD_MAX,
1678                .policy         = ip_set_setname2_policy,
1679        },
1680        [IPSET_CMD_LIST]        = {
1681                .call           = ip_set_dump,
1682                .attr_count     = IPSET_ATTR_CMD_MAX,
1683                .policy         = ip_set_setname_policy,
1684        },
1685        [IPSET_CMD_SAVE]        = {
1686                .call           = ip_set_dump,
1687                .attr_count     = IPSET_ATTR_CMD_MAX,
1688                .policy         = ip_set_setname_policy,
1689        },
1690        [IPSET_CMD_ADD] = {
1691                .call           = ip_set_uadd,
1692                .attr_count     = IPSET_ATTR_CMD_MAX,
1693                .policy         = ip_set_adt_policy,
1694        },
1695        [IPSET_CMD_DEL] = {
1696                .call           = ip_set_udel,
1697                .attr_count     = IPSET_ATTR_CMD_MAX,
1698                .policy         = ip_set_adt_policy,
1699        },
1700        [IPSET_CMD_TEST]        = {
1701                .call           = ip_set_utest,
1702                .attr_count     = IPSET_ATTR_CMD_MAX,
1703                .policy         = ip_set_adt_policy,
1704        },
1705        [IPSET_CMD_HEADER]      = {
1706                .call           = ip_set_header,
1707                .attr_count     = IPSET_ATTR_CMD_MAX,
1708                .policy         = ip_set_setname_policy,
1709        },
1710        [IPSET_CMD_TYPE]        = {
1711                .call           = ip_set_type,
1712                .attr_count     = IPSET_ATTR_CMD_MAX,
1713                .policy         = ip_set_type_policy,
1714        },
1715        [IPSET_CMD_PROTOCOL]    = {
1716                .call           = ip_set_protocol,
1717                .attr_count     = IPSET_ATTR_CMD_MAX,
1718                .policy         = ip_set_protocol_policy,
1719        },
1720};
1721
1722static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1723        .name           = "ip_set",
1724        .subsys_id      = NFNL_SUBSYS_IPSET,
1725        .cb_count       = IPSET_MSG_MAX,
1726        .cb             = ip_set_netlink_subsys_cb,
1727};
1728
1729/* Interface to iptables/ip6tables */
1730
1731static int
1732ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1733{
1734        unsigned int *op;
1735        void *data;
1736        int copylen = *len, ret = 0;
1737
1738        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1739                return -EPERM;
1740        if (optval != SO_IP_SET)
1741                return -EBADF;
1742        if (*len < sizeof(unsigned int))
1743                return -EINVAL;
1744
1745        data = vmalloc(*len);
1746        if (!data)
1747                return -ENOMEM;
1748        if (copy_from_user(data, user, *len) != 0) {
1749                ret = -EFAULT;
1750                goto done;
1751        }
1752        op = (unsigned int *) data;
1753
1754        if (*op < IP_SET_OP_VERSION) {
1755                /* Check the version at the beginning of operations */
1756                struct ip_set_req_version *req_version = data;
1757                if (req_version->version != IPSET_PROTOCOL) {
1758                        ret = -EPROTO;
1759                        goto done;
1760                }
1761        }
1762
1763        switch (*op) {
1764        case IP_SET_OP_VERSION: {
1765                struct ip_set_req_version *req_version = data;
1766
1767                if (*len != sizeof(struct ip_set_req_version)) {
1768                        ret = -EINVAL;
1769                        goto done;
1770                }
1771
1772                req_version->version = IPSET_PROTOCOL;
1773                ret = copy_to_user(user, req_version,
1774                                   sizeof(struct ip_set_req_version));
1775                goto done;
1776        }
1777        case IP_SET_OP_GET_BYNAME: {
1778                struct ip_set_req_get_set *req_get = data;
1779                ip_set_id_t id;
1780
1781                if (*len != sizeof(struct ip_set_req_get_set)) {
1782                        ret = -EINVAL;
1783                        goto done;
1784                }
1785                req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1786                nfnl_lock(NFNL_SUBSYS_IPSET);
1787                find_set_and_id(req_get->set.name, &id);
1788                req_get->set.index = id;
1789                nfnl_unlock(NFNL_SUBSYS_IPSET);
1790                goto copy;
1791        }
1792        case IP_SET_OP_GET_BYINDEX: {
1793                struct ip_set_req_get_set *req_get = data;
1794                struct ip_set *set;
1795
1796                if (*len != sizeof(struct ip_set_req_get_set) ||
1797                    req_get->set.index >= ip_set_max) {
1798                        ret = -EINVAL;
1799                        goto done;
1800                }
1801                nfnl_lock(NFNL_SUBSYS_IPSET);
1802                set = nfnl_set(req_get->set.index);
1803                strncpy(req_get->set.name, set ? set->name : "",
1804                        IPSET_MAXNAMELEN);
1805                nfnl_unlock(NFNL_SUBSYS_IPSET);
1806                goto copy;
1807        }
1808        default:
1809                ret = -EBADMSG;
1810                goto done;
1811        }       /* end of switch(op) */
1812
1813copy:
1814        ret = copy_to_user(user, data, copylen);
1815
1816done:
1817        vfree(data);
1818        if (ret > 0)
1819                ret = 0;
1820        return ret;
1821}
1822
1823static struct nf_sockopt_ops so_set __read_mostly = {
1824        .pf             = PF_INET,
1825        .get_optmin     = SO_IP_SET,
1826        .get_optmax     = SO_IP_SET + 1,
1827        .get            = &ip_set_sockfn_get,
1828        .owner          = THIS_MODULE,
1829};
1830
1831static int __init
1832ip_set_init(void)
1833{
1834        struct ip_set **list;
1835        int ret;
1836
1837        if (max_sets)
1838                ip_set_max = max_sets;
1839        if (ip_set_max >= IPSET_INVALID_ID)
1840                ip_set_max = IPSET_INVALID_ID - 1;
1841
1842        list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL);
1843        if (!list)
1844                return -ENOMEM;
1845
1846        rcu_assign_pointer(ip_set_list, list);
1847        ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
1848        if (ret != 0) {
1849                pr_err("ip_set: cannot register with nfnetlink.\n");
1850                kfree(list);
1851                return ret;
1852        }
1853        ret = nf_register_sockopt(&so_set);
1854        if (ret != 0) {
1855                pr_err("SO_SET registry failed: %d\n", ret);
1856                nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1857                kfree(list);
1858                return ret;
1859        }
1860
1861        pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
1862        return 0;
1863}
1864
1865static void __exit
1866ip_set_fini(void)
1867{
1868        struct ip_set **list = rcu_dereference_protected(ip_set_list, 1);
1869
1870        /* There can't be any existing set */
1871        nf_unregister_sockopt(&so_set);
1872        nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1873        kfree(list);
1874        pr_debug("these are the famous last words\n");
1875}
1876
1877module_init(ip_set_init);
1878module_exit(ip_set_fini);
1879