linux/net/ipv4/netfilter/ipt_CLUSTERIP.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Cluster IP hashmark target
   3 * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
   4 * based on ideas of Fabio Olive Leite <olive@unixforge.org>
   5 *
   6 * Development of this code funded by SuSE Linux AG, https://www.suse.com/
   7 */
   8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   9#include <linux/module.h>
  10#include <linux/proc_fs.h>
  11#include <linux/jhash.h>
  12#include <linux/bitops.h>
  13#include <linux/skbuff.h>
  14#include <linux/slab.h>
  15#include <linux/ip.h>
  16#include <linux/tcp.h>
  17#include <linux/udp.h>
  18#include <linux/icmp.h>
  19#include <linux/if_arp.h>
  20#include <linux/seq_file.h>
  21#include <linux/refcount.h>
  22#include <linux/netfilter_arp.h>
  23#include <linux/netfilter/x_tables.h>
  24#include <linux/netfilter_ipv4/ip_tables.h>
  25#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
  26#include <net/netfilter/nf_conntrack.h>
  27#include <net/net_namespace.h>
  28#include <net/netns/generic.h>
  29#include <net/checksum.h>
  30#include <net/ip.h>
  31
  32#define CLUSTERIP_VERSION "0.8"
  33
  34MODULE_LICENSE("GPL");
  35MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
  36MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
  37
  38struct clusterip_config {
  39        struct list_head list;                  /* list of all configs */
  40        refcount_t refcount;                    /* reference count */
  41        refcount_t entries;                     /* number of entries/rules
  42                                                 * referencing us */
  43
  44        __be32 clusterip;                       /* the IP address */
  45        u_int8_t clustermac[ETH_ALEN];          /* the MAC address */
  46        int ifindex;                            /* device ifindex */
  47        u_int16_t num_total_nodes;              /* total number of nodes */
  48        unsigned long local_nodes;              /* node number array */
  49
  50#ifdef CONFIG_PROC_FS
  51        struct proc_dir_entry *pde;             /* proc dir entry */
  52#endif
  53        enum clusterip_hashmode hash_mode;      /* which hashing mode */
  54        u_int32_t hash_initval;                 /* hash initialization */
  55        struct rcu_head rcu;                    /* for call_rcu */
  56        struct net *net;                        /* netns for pernet list */
  57        char ifname[IFNAMSIZ];                  /* device ifname */
  58};
  59
  60#ifdef CONFIG_PROC_FS
  61static const struct proc_ops clusterip_proc_ops;
  62#endif
  63
  64struct clusterip_net {
  65        struct list_head configs;
  66        /* lock protects the configs list */
  67        spinlock_t lock;
  68
  69#ifdef CONFIG_PROC_FS
  70        struct proc_dir_entry *procdir;
  71        /* mutex protects the config->pde*/
  72        struct mutex mutex;
  73#endif
  74};
  75
  76static unsigned int clusterip_net_id __read_mostly;
  77static inline struct clusterip_net *clusterip_pernet(struct net *net)
  78{
  79        return net_generic(net, clusterip_net_id);
  80}
  81
  82static inline void
  83clusterip_config_get(struct clusterip_config *c)
  84{
  85        refcount_inc(&c->refcount);
  86}
  87
  88static void clusterip_config_rcu_free(struct rcu_head *head)
  89{
  90        struct clusterip_config *config;
  91        struct net_device *dev;
  92
  93        config = container_of(head, struct clusterip_config, rcu);
  94        dev = dev_get_by_name(config->net, config->ifname);
  95        if (dev) {
  96                dev_mc_del(dev, config->clustermac);
  97                dev_put(dev);
  98        }
  99        kfree(config);
 100}
 101
 102static inline void
 103clusterip_config_put(struct clusterip_config *c)
 104{
 105        if (refcount_dec_and_test(&c->refcount))
 106                call_rcu(&c->rcu, clusterip_config_rcu_free);
 107}
 108
 109/* decrease the count of entries using/referencing this config.  If last
 110 * entry(rule) is removed, remove the config from lists, but don't free it
 111 * yet, since proc-files could still be holding references */
 112static inline void
 113clusterip_config_entry_put(struct clusterip_config *c)
 114{
 115        struct clusterip_net *cn = clusterip_pernet(c->net);
 116
 117        local_bh_disable();
 118        if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
 119                list_del_rcu(&c->list);
 120                spin_unlock(&cn->lock);
 121                local_bh_enable();
 122                /* In case anyone still accesses the file, the open/close
 123                 * functions are also incrementing the refcount on their own,
 124                 * so it's safe to remove the entry even if it's in use. */
 125#ifdef CONFIG_PROC_FS
 126                mutex_lock(&cn->mutex);
 127                if (cn->procdir)
 128                        proc_remove(c->pde);
 129                mutex_unlock(&cn->mutex);
 130#endif
 131                return;
 132        }
 133        local_bh_enable();
 134}
 135
 136static struct clusterip_config *
 137__clusterip_config_find(struct net *net, __be32 clusterip)
 138{
 139        struct clusterip_config *c;
 140        struct clusterip_net *cn = clusterip_pernet(net);
 141
 142        list_for_each_entry_rcu(c, &cn->configs, list) {
 143                if (c->clusterip == clusterip)
 144                        return c;
 145        }
 146
 147        return NULL;
 148}
 149
 150static inline struct clusterip_config *
 151clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
 152{
 153        struct clusterip_config *c;
 154
 155        rcu_read_lock_bh();
 156        c = __clusterip_config_find(net, clusterip);
 157        if (c) {
 158#ifdef CONFIG_PROC_FS
 159                if (!c->pde)
 160                        c = NULL;
 161                else
 162#endif
 163                if (unlikely(!refcount_inc_not_zero(&c->refcount)))
 164                        c = NULL;
 165                else if (entry) {
 166                        if (unlikely(!refcount_inc_not_zero(&c->entries))) {
 167                                clusterip_config_put(c);
 168                                c = NULL;
 169                        }
 170                }
 171        }
 172        rcu_read_unlock_bh();
 173
 174        return c;
 175}
 176
 177static void
 178clusterip_config_init_nodelist(struct clusterip_config *c,
 179                               const struct ipt_clusterip_tgt_info *i)
 180{
 181        int n;
 182
 183        for (n = 0; n < i->num_local_nodes; n++)
 184                set_bit(i->local_nodes[n] - 1, &c->local_nodes);
 185}
 186
 187static int
 188clusterip_netdev_event(struct notifier_block *this, unsigned long event,
 189                       void *ptr)
 190{
 191        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 192        struct net *net = dev_net(dev);
 193        struct clusterip_net *cn = clusterip_pernet(net);
 194        struct clusterip_config *c;
 195
 196        spin_lock_bh(&cn->lock);
 197        list_for_each_entry_rcu(c, &cn->configs, list) {
 198                switch (event) {
 199                case NETDEV_REGISTER:
 200                        if (!strcmp(dev->name, c->ifname)) {
 201                                c->ifindex = dev->ifindex;
 202                                dev_mc_add(dev, c->clustermac);
 203                        }
 204                        break;
 205                case NETDEV_UNREGISTER:
 206                        if (dev->ifindex == c->ifindex) {
 207                                dev_mc_del(dev, c->clustermac);
 208                                c->ifindex = -1;
 209                        }
 210                        break;
 211                case NETDEV_CHANGENAME:
 212                        if (!strcmp(dev->name, c->ifname)) {
 213                                c->ifindex = dev->ifindex;
 214                                dev_mc_add(dev, c->clustermac);
 215                        } else if (dev->ifindex == c->ifindex) {
 216                                dev_mc_del(dev, c->clustermac);
 217                                c->ifindex = -1;
 218                        }
 219                        break;
 220                }
 221        }
 222        spin_unlock_bh(&cn->lock);
 223
 224        return NOTIFY_DONE;
 225}
 226
 227static struct clusterip_config *
 228clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
 229                      __be32 ip, const char *iniface)
 230{
 231        struct clusterip_net *cn = clusterip_pernet(net);
 232        struct clusterip_config *c;
 233        struct net_device *dev;
 234        int err;
 235
 236        if (iniface[0] == '\0') {
 237                pr_info("Please specify an interface name\n");
 238                return ERR_PTR(-EINVAL);
 239        }
 240
 241        c = kzalloc(sizeof(*c), GFP_ATOMIC);
 242        if (!c)
 243                return ERR_PTR(-ENOMEM);
 244
 245        dev = dev_get_by_name(net, iniface);
 246        if (!dev) {
 247                pr_info("no such interface %s\n", iniface);
 248                kfree(c);
 249                return ERR_PTR(-ENOENT);
 250        }
 251        c->ifindex = dev->ifindex;
 252        strcpy(c->ifname, dev->name);
 253        memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
 254        dev_mc_add(dev, c->clustermac);
 255        dev_put(dev);
 256
 257        c->clusterip = ip;
 258        c->num_total_nodes = i->num_total_nodes;
 259        clusterip_config_init_nodelist(c, i);
 260        c->hash_mode = i->hash_mode;
 261        c->hash_initval = i->hash_initval;
 262        c->net = net;
 263        refcount_set(&c->refcount, 1);
 264
 265        spin_lock_bh(&cn->lock);
 266        if (__clusterip_config_find(net, ip)) {
 267                err = -EBUSY;
 268                goto out_config_put;
 269        }
 270
 271        list_add_rcu(&c->list, &cn->configs);
 272        spin_unlock_bh(&cn->lock);
 273
 274#ifdef CONFIG_PROC_FS
 275        {
 276                char buffer[16];
 277
 278                /* create proc dir entry */
 279                sprintf(buffer, "%pI4", &ip);
 280                mutex_lock(&cn->mutex);
 281                c->pde = proc_create_data(buffer, 0600,
 282                                          cn->procdir,
 283                                          &clusterip_proc_ops, c);
 284                mutex_unlock(&cn->mutex);
 285                if (!c->pde) {
 286                        err = -ENOMEM;
 287                        goto err;
 288                }
 289        }
 290#endif
 291
 292        refcount_set(&c->entries, 1);
 293        return c;
 294
 295#ifdef CONFIG_PROC_FS
 296err:
 297#endif
 298        spin_lock_bh(&cn->lock);
 299        list_del_rcu(&c->list);
 300out_config_put:
 301        spin_unlock_bh(&cn->lock);
 302        clusterip_config_put(c);
 303        return ERR_PTR(err);
 304}
 305
 306#ifdef CONFIG_PROC_FS
 307static int
 308clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
 309{
 310
 311        if (nodenum == 0 ||
 312            nodenum > c->num_total_nodes)
 313                return 1;
 314
 315        /* check if we already have this number in our bitfield */
 316        if (test_and_set_bit(nodenum - 1, &c->local_nodes))
 317                return 1;
 318
 319        return 0;
 320}
 321
 322static bool
 323clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
 324{
 325        if (nodenum == 0 ||
 326            nodenum > c->num_total_nodes)
 327                return true;
 328
 329        if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
 330                return false;
 331
 332        return true;
 333}
 334#endif
 335
 336static inline u_int32_t
 337clusterip_hashfn(const struct sk_buff *skb,
 338                 const struct clusterip_config *config)
 339{
 340        const struct iphdr *iph = ip_hdr(skb);
 341        unsigned long hashval;
 342        u_int16_t sport = 0, dport = 0;
 343        int poff;
 344
 345        poff = proto_ports_offset(iph->protocol);
 346        if (poff >= 0) {
 347                const u_int16_t *ports;
 348                u16 _ports[2];
 349
 350                ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
 351                if (ports) {
 352                        sport = ports[0];
 353                        dport = ports[1];
 354                }
 355        } else {
 356                net_info_ratelimited("unknown protocol %u\n", iph->protocol);
 357        }
 358
 359        switch (config->hash_mode) {
 360        case CLUSTERIP_HASHMODE_SIP:
 361                hashval = jhash_1word(ntohl(iph->saddr),
 362                                      config->hash_initval);
 363                break;
 364        case CLUSTERIP_HASHMODE_SIP_SPT:
 365                hashval = jhash_2words(ntohl(iph->saddr), sport,
 366                                       config->hash_initval);
 367                break;
 368        case CLUSTERIP_HASHMODE_SIP_SPT_DPT:
 369                hashval = jhash_3words(ntohl(iph->saddr), sport, dport,
 370                                       config->hash_initval);
 371                break;
 372        default:
 373                /* to make gcc happy */
 374                hashval = 0;
 375                /* This cannot happen, unless the check function wasn't called
 376                 * at rule load time */
 377                pr_info("unknown mode %u\n", config->hash_mode);
 378                BUG();
 379                break;
 380        }
 381
 382        /* node numbers are 1..n, not 0..n */
 383        return reciprocal_scale(hashval, config->num_total_nodes) + 1;
 384}
 385
 386static inline int
 387clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
 388{
 389        return test_bit(hash - 1, &config->local_nodes);
 390}
 391
 392/***********************************************************************
 393 * IPTABLES TARGET
 394 ***********************************************************************/
 395
 396static unsigned int
 397clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 398{
 399        const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 400        struct nf_conn *ct;
 401        enum ip_conntrack_info ctinfo;
 402        u_int32_t hash;
 403
 404        /* don't need to clusterip_config_get() here, since refcount
 405         * is only decremented by destroy() - and ip_tables guarantees
 406         * that the ->target() function isn't called after ->destroy() */
 407
 408        ct = nf_ct_get(skb, &ctinfo);
 409        if (ct == NULL)
 410                return NF_DROP;
 411
 412        /* special case: ICMP error handling. conntrack distinguishes between
 413         * error messages (RELATED) and information requests (see below) */
 414        if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
 415            (ctinfo == IP_CT_RELATED ||
 416             ctinfo == IP_CT_RELATED_REPLY))
 417                return XT_CONTINUE;
 418
 419        /* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO,
 420         * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here
 421         * on, which all have an ID field [relevant for hashing]. */
 422
 423        hash = clusterip_hashfn(skb, cipinfo->config);
 424
 425        switch (ctinfo) {
 426        case IP_CT_NEW:
 427                ct->mark = hash;
 428                break;
 429        case IP_CT_RELATED:
 430        case IP_CT_RELATED_REPLY:
 431                /* FIXME: we don't handle expectations at the moment.
 432                 * They can arrive on a different node than
 433                 * the master connection (e.g. FTP passive mode) */
 434        case IP_CT_ESTABLISHED:
 435        case IP_CT_ESTABLISHED_REPLY:
 436                break;
 437        default:                        /* Prevent gcc warnings */
 438                break;
 439        }
 440
 441#ifdef DEBUG
 442        nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 443#endif
 444        pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
 445        if (!clusterip_responsible(cipinfo->config, hash)) {
 446                pr_debug("not responsible\n");
 447                return NF_DROP;
 448        }
 449        pr_debug("responsible\n");
 450
 451        /* despite being received via linklayer multicast, this is
 452         * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
 453        skb->pkt_type = PACKET_HOST;
 454
 455        return XT_CONTINUE;
 456}
 457
 458static int clusterip_tg_check(const struct xt_tgchk_param *par)
 459{
 460        struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 461        const struct ipt_entry *e = par->entryinfo;
 462        struct clusterip_config *config;
 463        int ret, i;
 464
 465        if (par->nft_compat) {
 466                pr_err("cannot use CLUSTERIP target from nftables compat\n");
 467                return -EOPNOTSUPP;
 468        }
 469
 470        if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
 471            cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
 472            cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
 473                pr_info("unknown mode %u\n", cipinfo->hash_mode);
 474                return -EINVAL;
 475
 476        }
 477        if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
 478            e->ip.dst.s_addr == 0) {
 479                pr_info("Please specify destination IP\n");
 480                return -EINVAL;
 481        }
 482        if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) {
 483                pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes);
 484                return -EINVAL;
 485        }
 486        for (i = 0; i < cipinfo->num_local_nodes; i++) {
 487                if (cipinfo->local_nodes[i] - 1 >=
 488                    sizeof(config->local_nodes) * 8) {
 489                        pr_info("bad local_nodes[%d] %u\n",
 490                                i, cipinfo->local_nodes[i]);
 491                        return -EINVAL;
 492                }
 493        }
 494
 495        config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
 496        if (!config) {
 497                if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
 498                        pr_info("no config found for %pI4, need 'new'\n",
 499                                &e->ip.dst.s_addr);
 500                        return -EINVAL;
 501                } else {
 502                        config = clusterip_config_init(par->net, cipinfo,
 503                                                       e->ip.dst.s_addr,
 504                                                       e->ip.iniface);
 505                        if (IS_ERR(config))
 506                                return PTR_ERR(config);
 507                }
 508        } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN))
 509                return -EINVAL;
 510
 511        ret = nf_ct_netns_get(par->net, par->family);
 512        if (ret < 0) {
 513                pr_info("cannot load conntrack support for proto=%u\n",
 514                        par->family);
 515                clusterip_config_entry_put(config);
 516                clusterip_config_put(config);
 517                return ret;
 518        }
 519
 520        if (!par->net->xt.clusterip_deprecated_warning) {
 521                pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
 522                        "use xt_cluster instead\n");
 523                par->net->xt.clusterip_deprecated_warning = true;
 524        }
 525
 526        cipinfo->config = config;
 527        return ret;
 528}
 529
 530/* drop reference count of cluster config when rule is deleted */
 531static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
 532{
 533        const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 534
 535        /* if no more entries are referencing the config, remove it
 536         * from the list and destroy the proc entry */
 537        clusterip_config_entry_put(cipinfo->config);
 538
 539        clusterip_config_put(cipinfo->config);
 540
 541        nf_ct_netns_put(par->net, par->family);
 542}
 543
 544#ifdef CONFIG_COMPAT
 545struct compat_ipt_clusterip_tgt_info
 546{
 547        u_int32_t       flags;
 548        u_int8_t        clustermac[6];
 549        u_int16_t       num_total_nodes;
 550        u_int16_t       num_local_nodes;
 551        u_int16_t       local_nodes[CLUSTERIP_MAX_NODES];
 552        u_int32_t       hash_mode;
 553        u_int32_t       hash_initval;
 554        compat_uptr_t   config;
 555};
 556#endif /* CONFIG_COMPAT */
 557
 558static struct xt_target clusterip_tg_reg __read_mostly = {
 559        .name           = "CLUSTERIP",
 560        .family         = NFPROTO_IPV4,
 561        .target         = clusterip_tg,
 562        .checkentry     = clusterip_tg_check,
 563        .destroy        = clusterip_tg_destroy,
 564        .targetsize     = sizeof(struct ipt_clusterip_tgt_info),
 565        .usersize       = offsetof(struct ipt_clusterip_tgt_info, config),
 566#ifdef CONFIG_COMPAT
 567        .compatsize     = sizeof(struct compat_ipt_clusterip_tgt_info),
 568#endif /* CONFIG_COMPAT */
 569        .me             = THIS_MODULE
 570};
 571
 572
 573/***********************************************************************
 574 * ARP MANGLING CODE
 575 ***********************************************************************/
 576
 577/* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
 578struct arp_payload {
 579        u_int8_t src_hw[ETH_ALEN];
 580        __be32 src_ip;
 581        u_int8_t dst_hw[ETH_ALEN];
 582        __be32 dst_ip;
 583} __packed;
 584
 585#ifdef DEBUG
 586static void arp_print(struct arp_payload *payload)
 587{
 588#define HBUFFERLEN 30
 589        char hbuffer[HBUFFERLEN];
 590        int j, k;
 591
 592        for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) {
 593                hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
 594                hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
 595                hbuffer[k++] = ':';
 596        }
 597        hbuffer[--k] = '\0';
 598
 599        pr_debug("src %pI4@%s, dst %pI4\n",
 600                 &payload->src_ip, hbuffer, &payload->dst_ip);
 601}
 602#endif
 603
 604static unsigned int
 605arp_mangle(void *priv,
 606           struct sk_buff *skb,
 607           const struct nf_hook_state *state)
 608{
 609        struct arphdr *arp = arp_hdr(skb);
 610        struct arp_payload *payload;
 611        struct clusterip_config *c;
 612        struct net *net = state->net;
 613
 614        /* we don't care about non-ethernet and non-ipv4 ARP */
 615        if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
 616            arp->ar_pro != htons(ETH_P_IP) ||
 617            arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
 618                return NF_ACCEPT;
 619
 620        /* we only want to mangle arp requests and replies */
 621        if (arp->ar_op != htons(ARPOP_REPLY) &&
 622            arp->ar_op != htons(ARPOP_REQUEST))
 623                return NF_ACCEPT;
 624
 625        payload = (void *)(arp+1);
 626
 627        /* if there is no clusterip configuration for the arp reply's
 628         * source ip, we don't want to mangle it */
 629        c = clusterip_config_find_get(net, payload->src_ip, 0);
 630        if (!c)
 631                return NF_ACCEPT;
 632
 633        /* normally the linux kernel always replies to arp queries of
 634         * addresses on different interfacs.  However, in the CLUSTERIP case
 635         * this wouldn't work, since we didn't subscribe the mcast group on
 636         * other interfaces */
 637        if (c->ifindex != state->out->ifindex) {
 638                pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n",
 639                         c->ifindex, state->out->ifindex);
 640                clusterip_config_put(c);
 641                return NF_ACCEPT;
 642        }
 643
 644        /* mangle reply hardware address */
 645        memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
 646
 647#ifdef DEBUG
 648        pr_debug("mangled arp reply: ");
 649        arp_print(payload);
 650#endif
 651
 652        clusterip_config_put(c);
 653
 654        return NF_ACCEPT;
 655}
 656
 657static const struct nf_hook_ops cip_arp_ops = {
 658        .hook = arp_mangle,
 659        .pf = NFPROTO_ARP,
 660        .hooknum = NF_ARP_OUT,
 661        .priority = -1
 662};
 663
 664/***********************************************************************
 665 * PROC DIR HANDLING
 666 ***********************************************************************/
 667
 668#ifdef CONFIG_PROC_FS
 669
 670struct clusterip_seq_position {
 671        unsigned int pos;       /* position */
 672        unsigned int weight;    /* number of bits set == size */
 673        unsigned int bit;       /* current bit */
 674        unsigned long val;      /* current value */
 675};
 676
 677static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
 678{
 679        struct clusterip_config *c = s->private;
 680        unsigned int weight;
 681        u_int32_t local_nodes;
 682        struct clusterip_seq_position *idx;
 683
 684        /* FIXME: possible race */
 685        local_nodes = c->local_nodes;
 686        weight = hweight32(local_nodes);
 687        if (*pos >= weight)
 688                return NULL;
 689
 690        idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
 691        if (!idx)
 692                return ERR_PTR(-ENOMEM);
 693
 694        idx->pos = *pos;
 695        idx->weight = weight;
 696        idx->bit = ffs(local_nodes);
 697        idx->val = local_nodes;
 698        clear_bit(idx->bit - 1, &idx->val);
 699
 700        return idx;
 701}
 702
 703static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
 704{
 705        struct clusterip_seq_position *idx = v;
 706
 707        *pos = ++idx->pos;
 708        if (*pos >= idx->weight) {
 709                kfree(v);
 710                return NULL;
 711        }
 712        idx->bit = ffs(idx->val);
 713        clear_bit(idx->bit - 1, &idx->val);
 714        return idx;
 715}
 716
 717static void clusterip_seq_stop(struct seq_file *s, void *v)
 718{
 719        if (!IS_ERR(v))
 720                kfree(v);
 721}
 722
 723static int clusterip_seq_show(struct seq_file *s, void *v)
 724{
 725        struct clusterip_seq_position *idx = v;
 726
 727        if (idx->pos != 0)
 728                seq_putc(s, ',');
 729
 730        seq_printf(s, "%u", idx->bit);
 731
 732        if (idx->pos == idx->weight - 1)
 733                seq_putc(s, '\n');
 734
 735        return 0;
 736}
 737
 738static const struct seq_operations clusterip_seq_ops = {
 739        .start  = clusterip_seq_start,
 740        .next   = clusterip_seq_next,
 741        .stop   = clusterip_seq_stop,
 742        .show   = clusterip_seq_show,
 743};
 744
 745static int clusterip_proc_open(struct inode *inode, struct file *file)
 746{
 747        int ret = seq_open(file, &clusterip_seq_ops);
 748
 749        if (!ret) {
 750                struct seq_file *sf = file->private_data;
 751                struct clusterip_config *c = PDE_DATA(inode);
 752
 753                sf->private = c;
 754
 755                clusterip_config_get(c);
 756        }
 757
 758        return ret;
 759}
 760
 761static int clusterip_proc_release(struct inode *inode, struct file *file)
 762{
 763        struct clusterip_config *c = PDE_DATA(inode);
 764        int ret;
 765
 766        ret = seq_release(inode, file);
 767
 768        if (!ret)
 769                clusterip_config_put(c);
 770
 771        return ret;
 772}
 773
 774static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
 775                                size_t size, loff_t *ofs)
 776{
 777        struct clusterip_config *c = PDE_DATA(file_inode(file));
 778#define PROC_WRITELEN   10
 779        char buffer[PROC_WRITELEN+1];
 780        unsigned long nodenum;
 781        int rc;
 782
 783        if (size > PROC_WRITELEN)
 784                return -EIO;
 785        if (copy_from_user(buffer, input, size))
 786                return -EFAULT;
 787        buffer[size] = 0;
 788
 789        if (*buffer == '+') {
 790                rc = kstrtoul(buffer+1, 10, &nodenum);
 791                if (rc)
 792                        return rc;
 793                if (clusterip_add_node(c, nodenum))
 794                        return -ENOMEM;
 795        } else if (*buffer == '-') {
 796                rc = kstrtoul(buffer+1, 10, &nodenum);
 797                if (rc)
 798                        return rc;
 799                if (clusterip_del_node(c, nodenum))
 800                        return -ENOENT;
 801        } else
 802                return -EIO;
 803
 804        return size;
 805}
 806
 807static const struct proc_ops clusterip_proc_ops = {
 808        .proc_open      = clusterip_proc_open,
 809        .proc_read      = seq_read,
 810        .proc_write     = clusterip_proc_write,
 811        .proc_lseek     = seq_lseek,
 812        .proc_release   = clusterip_proc_release,
 813};
 814
 815#endif /* CONFIG_PROC_FS */
 816
 817static int clusterip_net_init(struct net *net)
 818{
 819        struct clusterip_net *cn = clusterip_pernet(net);
 820        int ret;
 821
 822        INIT_LIST_HEAD(&cn->configs);
 823
 824        spin_lock_init(&cn->lock);
 825
 826        ret = nf_register_net_hook(net, &cip_arp_ops);
 827        if (ret < 0)
 828                return ret;
 829
 830#ifdef CONFIG_PROC_FS
 831        cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
 832        if (!cn->procdir) {
 833                nf_unregister_net_hook(net, &cip_arp_ops);
 834                pr_err("Unable to proc dir entry\n");
 835                return -ENOMEM;
 836        }
 837        mutex_init(&cn->mutex);
 838#endif /* CONFIG_PROC_FS */
 839
 840        return 0;
 841}
 842
 843static void clusterip_net_exit(struct net *net)
 844{
 845#ifdef CONFIG_PROC_FS
 846        struct clusterip_net *cn = clusterip_pernet(net);
 847
 848        mutex_lock(&cn->mutex);
 849        proc_remove(cn->procdir);
 850        cn->procdir = NULL;
 851        mutex_unlock(&cn->mutex);
 852#endif
 853        nf_unregister_net_hook(net, &cip_arp_ops);
 854}
 855
 856static struct pernet_operations clusterip_net_ops = {
 857        .init = clusterip_net_init,
 858        .exit = clusterip_net_exit,
 859        .id   = &clusterip_net_id,
 860        .size = sizeof(struct clusterip_net),
 861};
 862
 863static struct notifier_block cip_netdev_notifier = {
 864        .notifier_call = clusterip_netdev_event
 865};
 866
 867static int __init clusterip_tg_init(void)
 868{
 869        int ret;
 870
 871        ret = register_pernet_subsys(&clusterip_net_ops);
 872        if (ret < 0)
 873                return ret;
 874
 875        ret = xt_register_target(&clusterip_tg_reg);
 876        if (ret < 0)
 877                goto cleanup_subsys;
 878
 879        ret = register_netdevice_notifier(&cip_netdev_notifier);
 880        if (ret < 0)
 881                goto unregister_target;
 882
 883        pr_info("ClusterIP Version %s loaded successfully\n",
 884                CLUSTERIP_VERSION);
 885
 886        return 0;
 887
 888unregister_target:
 889        xt_unregister_target(&clusterip_tg_reg);
 890cleanup_subsys:
 891        unregister_pernet_subsys(&clusterip_net_ops);
 892        return ret;
 893}
 894
 895static void __exit clusterip_tg_exit(void)
 896{
 897        pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
 898
 899        unregister_netdevice_notifier(&cip_netdev_notifier);
 900        xt_unregister_target(&clusterip_tg_reg);
 901        unregister_pernet_subsys(&clusterip_net_ops);
 902
 903        /* Wait for completion of call_rcu()'s (clusterip_config_rcu_free) */
 904        rcu_barrier();
 905}
 906
 907module_init(clusterip_tg_init);
 908module_exit(clusterip_tg_exit);
 909