linux/net/netfilter/ipvs/ip_vs_ctl.c
<<
>>
Prefs
   1/*
   2 * IPVS         An implementation of the IP virtual server support for the
   3 *              LINUX operating system.  IPVS is now implemented as a module
   4 *              over the NetFilter framework. IPVS can be used to build a
   5 *              high-performance and highly available server based on a
   6 *              cluster of servers.
   7 *
   8 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   9 *              Peter Kese <peter.kese@ijs.si>
  10 *              Julian Anastasov <ja@ssi.bg>
  11 *
  12 *              This program is free software; you can redistribute it and/or
  13 *              modify it under the terms of the GNU General Public License
  14 *              as published by the Free Software Foundation; either version
  15 *              2 of the License, or (at your option) any later version.
  16 *
  17 * Changes:
  18 *
  19 */
  20
  21#define KMSG_COMPONENT "IPVS"
  22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/types.h>
  27#include <linux/capability.h>
  28#include <linux/fs.h>
  29#include <linux/sysctl.h>
  30#include <linux/proc_fs.h>
  31#include <linux/workqueue.h>
  32#include <linux/swap.h>
  33#include <linux/seq_file.h>
  34#include <linux/slab.h>
  35
  36#include <linux/netfilter.h>
  37#include <linux/netfilter_ipv4.h>
  38#include <linux/mutex.h>
  39
  40#include <net/net_namespace.h>
  41#include <linux/nsproxy.h>
  42#include <net/ip.h>
  43#ifdef CONFIG_IP_VS_IPV6
  44#include <net/ipv6.h>
  45#include <net/ip6_route.h>
  46#endif
  47#include <net/route.h>
  48#include <net/sock.h>
  49#include <net/genetlink.h>
  50
  51#include <asm/uaccess.h>
  52
  53#include <net/ip_vs.h>
  54
  55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
  56static DEFINE_MUTEX(__ip_vs_mutex);
  57
  58/* lock for service table */
  59static DEFINE_RWLOCK(__ip_vs_svc_lock);
  60
  61/* sysctl variables */
  62
  63#ifdef CONFIG_IP_VS_DEBUG
  64static int sysctl_ip_vs_debug_level = 0;
  65
  66int ip_vs_get_debug_level(void)
  67{
  68        return sysctl_ip_vs_debug_level;
  69}
  70#endif
  71
  72
  73/*  Protos */
  74static void __ip_vs_del_service(struct ip_vs_service *svc);
  75
  76
  77#ifdef CONFIG_IP_VS_IPV6
  78/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
  79static int __ip_vs_addr_is_local_v6(struct net *net,
  80                                    const struct in6_addr *addr)
  81{
  82        struct rt6_info *rt;
  83        struct flowi6 fl6 = {
  84                .daddr = *addr,
  85        };
  86
  87        rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
  88        if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
  89                return 1;
  90
  91        return 0;
  92}
  93#endif
  94
  95#ifdef CONFIG_SYSCTL
  96/*
  97 *      update_defense_level is called from keventd and from sysctl,
  98 *      so it needs to protect itself from softirqs
  99 */
 100static void update_defense_level(struct netns_ipvs *ipvs)
 101{
 102        struct sysinfo i;
 103        static int old_secure_tcp = 0;
 104        int availmem;
 105        int nomem;
 106        int to_change = -1;
 107
 108        /* we only count free and buffered memory (in pages) */
 109        si_meminfo(&i);
 110        availmem = i.freeram + i.bufferram;
 111        /* however in linux 2.5 the i.bufferram is total page cache size,
 112           we need adjust it */
 113        /* si_swapinfo(&i); */
 114        /* availmem = availmem - (i.totalswap - i.freeswap); */
 115
 116        nomem = (availmem < ipvs->sysctl_amemthresh);
 117
 118        local_bh_disable();
 119
 120        /* drop_entry */
 121        spin_lock(&ipvs->dropentry_lock);
 122        switch (ipvs->sysctl_drop_entry) {
 123        case 0:
 124                atomic_set(&ipvs->dropentry, 0);
 125                break;
 126        case 1:
 127                if (nomem) {
 128                        atomic_set(&ipvs->dropentry, 1);
 129                        ipvs->sysctl_drop_entry = 2;
 130                } else {
 131                        atomic_set(&ipvs->dropentry, 0);
 132                }
 133                break;
 134        case 2:
 135                if (nomem) {
 136                        atomic_set(&ipvs->dropentry, 1);
 137                } else {
 138                        atomic_set(&ipvs->dropentry, 0);
 139                        ipvs->sysctl_drop_entry = 1;
 140                };
 141                break;
 142        case 3:
 143                atomic_set(&ipvs->dropentry, 1);
 144                break;
 145        }
 146        spin_unlock(&ipvs->dropentry_lock);
 147
 148        /* drop_packet */
 149        spin_lock(&ipvs->droppacket_lock);
 150        switch (ipvs->sysctl_drop_packet) {
 151        case 0:
 152                ipvs->drop_rate = 0;
 153                break;
 154        case 1:
 155                if (nomem) {
 156                        ipvs->drop_rate = ipvs->drop_counter
 157                                = ipvs->sysctl_amemthresh /
 158                                (ipvs->sysctl_amemthresh-availmem);
 159                        ipvs->sysctl_drop_packet = 2;
 160                } else {
 161                        ipvs->drop_rate = 0;
 162                }
 163                break;
 164        case 2:
 165                if (nomem) {
 166                        ipvs->drop_rate = ipvs->drop_counter
 167                                = ipvs->sysctl_amemthresh /
 168                                (ipvs->sysctl_amemthresh-availmem);
 169                } else {
 170                        ipvs->drop_rate = 0;
 171                        ipvs->sysctl_drop_packet = 1;
 172                }
 173                break;
 174        case 3:
 175                ipvs->drop_rate = ipvs->sysctl_am_droprate;
 176                break;
 177        }
 178        spin_unlock(&ipvs->droppacket_lock);
 179
 180        /* secure_tcp */
 181        spin_lock(&ipvs->securetcp_lock);
 182        switch (ipvs->sysctl_secure_tcp) {
 183        case 0:
 184                if (old_secure_tcp >= 2)
 185                        to_change = 0;
 186                break;
 187        case 1:
 188                if (nomem) {
 189                        if (old_secure_tcp < 2)
 190                                to_change = 1;
 191                        ipvs->sysctl_secure_tcp = 2;
 192                } else {
 193                        if (old_secure_tcp >= 2)
 194                                to_change = 0;
 195                }
 196                break;
 197        case 2:
 198                if (nomem) {
 199                        if (old_secure_tcp < 2)
 200                                to_change = 1;
 201                } else {
 202                        if (old_secure_tcp >= 2)
 203                                to_change = 0;
 204                        ipvs->sysctl_secure_tcp = 1;
 205                }
 206                break;
 207        case 3:
 208                if (old_secure_tcp < 2)
 209                        to_change = 1;
 210                break;
 211        }
 212        old_secure_tcp = ipvs->sysctl_secure_tcp;
 213        if (to_change >= 0)
 214                ip_vs_protocol_timeout_change(ipvs,
 215                                              ipvs->sysctl_secure_tcp > 1);
 216        spin_unlock(&ipvs->securetcp_lock);
 217
 218        local_bh_enable();
 219}
 220
 221
 222/*
 223 *      Timer for checking the defense
 224 */
 225#define DEFENSE_TIMER_PERIOD    1*HZ
 226
 227static void defense_work_handler(struct work_struct *work)
 228{
 229        struct netns_ipvs *ipvs =
 230                container_of(work, struct netns_ipvs, defense_work.work);
 231
 232        update_defense_level(ipvs);
 233        if (atomic_read(&ipvs->dropentry))
 234                ip_vs_random_dropentry(ipvs->net);
 235        schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 236}
 237#endif
 238
 239int
 240ip_vs_use_count_inc(void)
 241{
 242        return try_module_get(THIS_MODULE);
 243}
 244
 245void
 246ip_vs_use_count_dec(void)
 247{
 248        module_put(THIS_MODULE);
 249}
 250
 251
 252/*
 253 *      Hash table: for virtual service lookups
 254 */
 255#define IP_VS_SVC_TAB_BITS 8
 256#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
 257#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
 258
 259/* the service table hashed by <protocol, addr, port> */
 260static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 261/* the service table hashed by fwmark */
 262static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 263
 264
 265/*
 266 *      Returns hash value for virtual service
 267 */
 268static inline unsigned
 269ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
 270                  const union nf_inet_addr *addr, __be16 port)
 271{
 272        register unsigned porth = ntohs(port);
 273        __be32 addr_fold = addr->ip;
 274
 275#ifdef CONFIG_IP_VS_IPV6
 276        if (af == AF_INET6)
 277                addr_fold = addr->ip6[0]^addr->ip6[1]^
 278                            addr->ip6[2]^addr->ip6[3];
 279#endif
 280        addr_fold ^= ((size_t)net>>8);
 281
 282        return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
 283                & IP_VS_SVC_TAB_MASK;
 284}
 285
 286/*
 287 *      Returns hash value of fwmark for virtual service lookup
 288 */
 289static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
 290{
 291        return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
 292}
 293
 294/*
 295 *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
 296 *      or in the ip_vs_svc_fwm_table by fwmark.
 297 *      Should be called with locked tables.
 298 */
 299static int ip_vs_svc_hash(struct ip_vs_service *svc)
 300{
 301        unsigned hash;
 302
 303        if (svc->flags & IP_VS_SVC_F_HASHED) {
 304                pr_err("%s(): request for already hashed, called from %pF\n",
 305                       __func__, __builtin_return_address(0));
 306                return 0;
 307        }
 308
 309        if (svc->fwmark == 0) {
 310                /*
 311                 *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
 312                 */
 313                hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
 314                                         &svc->addr, svc->port);
 315                list_add(&svc->s_list, &ip_vs_svc_table[hash]);
 316        } else {
 317                /*
 318                 *  Hash it by fwmark in svc_fwm_table
 319                 */
 320                hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
 321                list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
 322        }
 323
 324        svc->flags |= IP_VS_SVC_F_HASHED;
 325        /* increase its refcnt because it is referenced by the svc table */
 326        atomic_inc(&svc->refcnt);
 327        return 1;
 328}
 329
 330
 331/*
 332 *      Unhashes a service from svc_table / svc_fwm_table.
 333 *      Should be called with locked tables.
 334 */
 335static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 336{
 337        if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
 338                pr_err("%s(): request for unhash flagged, called from %pF\n",
 339                       __func__, __builtin_return_address(0));
 340                return 0;
 341        }
 342
 343        if (svc->fwmark == 0) {
 344                /* Remove it from the svc_table table */
 345                list_del(&svc->s_list);
 346        } else {
 347                /* Remove it from the svc_fwm_table table */
 348                list_del(&svc->f_list);
 349        }
 350
 351        svc->flags &= ~IP_VS_SVC_F_HASHED;
 352        atomic_dec(&svc->refcnt);
 353        return 1;
 354}
 355
 356
 357/*
 358 *      Get service by {netns, proto,addr,port} in the service table.
 359 */
 360static inline struct ip_vs_service *
 361__ip_vs_service_find(struct net *net, int af, __u16 protocol,
 362                     const union nf_inet_addr *vaddr, __be16 vport)
 363{
 364        unsigned hash;
 365        struct ip_vs_service *svc;
 366
 367        /* Check for "full" addressed entries */
 368        hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
 369
 370        list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
 371                if ((svc->af == af)
 372                    && ip_vs_addr_equal(af, &svc->addr, vaddr)
 373                    && (svc->port == vport)
 374                    && (svc->protocol == protocol)
 375                    && net_eq(svc->net, net)) {
 376                        /* HIT */
 377                        return svc;
 378                }
 379        }
 380
 381        return NULL;
 382}
 383
 384
 385/*
 386 *      Get service by {fwmark} in the service table.
 387 */
 388static inline struct ip_vs_service *
 389__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
 390{
 391        unsigned hash;
 392        struct ip_vs_service *svc;
 393
 394        /* Check for fwmark addressed entries */
 395        hash = ip_vs_svc_fwm_hashkey(net, fwmark);
 396
 397        list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
 398                if (svc->fwmark == fwmark && svc->af == af
 399                    && net_eq(svc->net, net)) {
 400                        /* HIT */
 401                        return svc;
 402                }
 403        }
 404
 405        return NULL;
 406}
 407
 408struct ip_vs_service *
 409ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 410                  const union nf_inet_addr *vaddr, __be16 vport)
 411{
 412        struct ip_vs_service *svc;
 413        struct netns_ipvs *ipvs = net_ipvs(net);
 414
 415        read_lock(&__ip_vs_svc_lock);
 416
 417        /*
 418         *      Check the table hashed by fwmark first
 419         */
 420        if (fwmark) {
 421                svc = __ip_vs_svc_fwm_find(net, af, fwmark);
 422                if (svc)
 423                        goto out;
 424        }
 425
 426        /*
 427         *      Check the table hashed by <protocol,addr,port>
 428         *      for "full" addressed entries
 429         */
 430        svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
 431
 432        if (svc == NULL
 433            && protocol == IPPROTO_TCP
 434            && atomic_read(&ipvs->ftpsvc_counter)
 435            && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
 436                /*
 437                 * Check if ftp service entry exists, the packet
 438                 * might belong to FTP data connections.
 439                 */
 440                svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
 441        }
 442
 443        if (svc == NULL
 444            && atomic_read(&ipvs->nullsvc_counter)) {
 445                /*
 446                 * Check if the catch-all port (port zero) exists
 447                 */
 448                svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
 449        }
 450
 451  out:
 452        if (svc)
 453                atomic_inc(&svc->usecnt);
 454        read_unlock(&__ip_vs_svc_lock);
 455
 456        IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
 457                      fwmark, ip_vs_proto_name(protocol),
 458                      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
 459                      svc ? "hit" : "not hit");
 460
 461        return svc;
 462}
 463
 464
 465static inline void
 466__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 467{
 468        atomic_inc(&svc->refcnt);
 469        dest->svc = svc;
 470}
 471
 472static void
 473__ip_vs_unbind_svc(struct ip_vs_dest *dest)
 474{
 475        struct ip_vs_service *svc = dest->svc;
 476
 477        dest->svc = NULL;
 478        if (atomic_dec_and_test(&svc->refcnt)) {
 479                IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
 480                              svc->fwmark,
 481                              IP_VS_DBG_ADDR(svc->af, &svc->addr),
 482                              ntohs(svc->port), atomic_read(&svc->usecnt));
 483                free_percpu(svc->stats.cpustats);
 484                kfree(svc);
 485        }
 486}
 487
 488
 489/*
 490 *      Returns hash value for real service
 491 */
 492static inline unsigned ip_vs_rs_hashkey(int af,
 493                                            const union nf_inet_addr *addr,
 494                                            __be16 port)
 495{
 496        register unsigned porth = ntohs(port);
 497        __be32 addr_fold = addr->ip;
 498
 499#ifdef CONFIG_IP_VS_IPV6
 500        if (af == AF_INET6)
 501                addr_fold = addr->ip6[0]^addr->ip6[1]^
 502                            addr->ip6[2]^addr->ip6[3];
 503#endif
 504
 505        return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
 506                & IP_VS_RTAB_MASK;
 507}
 508
 509/*
 510 *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
 511 *      should be called with locked tables.
 512 */
 513static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 514{
 515        unsigned hash;
 516
 517        if (!list_empty(&dest->d_list)) {
 518                return 0;
 519        }
 520
 521        /*
 522         *      Hash by proto,addr,port,
 523         *      which are the parameters of the real service.
 524         */
 525        hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
 526
 527        list_add(&dest->d_list, &ipvs->rs_table[hash]);
 528
 529        return 1;
 530}
 531
 532/*
 533 *      UNhashes ip_vs_dest from rs_table.
 534 *      should be called with locked tables.
 535 */
 536static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
 537{
 538        /*
 539         * Remove it from the rs_table table.
 540         */
 541        if (!list_empty(&dest->d_list)) {
 542                list_del(&dest->d_list);
 543                INIT_LIST_HEAD(&dest->d_list);
 544        }
 545
 546        return 1;
 547}
 548
 549/*
 550 *      Lookup real service by <proto,addr,port> in the real service table.
 551 */
 552struct ip_vs_dest *
 553ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
 554                          const union nf_inet_addr *daddr,
 555                          __be16 dport)
 556{
 557        struct netns_ipvs *ipvs = net_ipvs(net);
 558        unsigned hash;
 559        struct ip_vs_dest *dest;
 560
 561        /*
 562         *      Check for "full" addressed entries
 563         *      Return the first found entry
 564         */
 565        hash = ip_vs_rs_hashkey(af, daddr, dport);
 566
 567        read_lock(&ipvs->rs_lock);
 568        list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
 569                if ((dest->af == af)
 570                    && ip_vs_addr_equal(af, &dest->addr, daddr)
 571                    && (dest->port == dport)
 572                    && ((dest->protocol == protocol) ||
 573                        dest->vfwmark)) {
 574                        /* HIT */
 575                        read_unlock(&ipvs->rs_lock);
 576                        return dest;
 577                }
 578        }
 579        read_unlock(&ipvs->rs_lock);
 580
 581        return NULL;
 582}
 583
 584/*
 585 *      Lookup destination by {addr,port} in the given service
 586 */
 587static struct ip_vs_dest *
 588ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 589                  __be16 dport)
 590{
 591        struct ip_vs_dest *dest;
 592
 593        /*
 594         * Find the destination for the given service
 595         */
 596        list_for_each_entry(dest, &svc->destinations, n_list) {
 597                if ((dest->af == svc->af)
 598                    && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
 599                    && (dest->port == dport)) {
 600                        /* HIT */
 601                        return dest;
 602                }
 603        }
 604
 605        return NULL;
 606}
 607
 608/*
 609 * Find destination by {daddr,dport,vaddr,protocol}
 610 * Cretaed to be used in ip_vs_process_message() in
 611 * the backup synchronization daemon. It finds the
 612 * destination to be bound to the received connection
 613 * on the backup.
 614 *
 615 * ip_vs_lookup_real_service() looked promissing, but
 616 * seems not working as expected.
 617 */
 618struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
 619                                   const union nf_inet_addr *daddr,
 620                                   __be16 dport,
 621                                   const union nf_inet_addr *vaddr,
 622                                   __be16 vport, __u16 protocol, __u32 fwmark,
 623                                   __u32 flags)
 624{
 625        struct ip_vs_dest *dest;
 626        struct ip_vs_service *svc;
 627        __be16 port = dport;
 628
 629        svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
 630        if (!svc)
 631                return NULL;
 632        if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
 633                port = 0;
 634        dest = ip_vs_lookup_dest(svc, daddr, port);
 635        if (!dest)
 636                dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
 637        if (dest)
 638                atomic_inc(&dest->refcnt);
 639        ip_vs_service_put(svc);
 640        return dest;
 641}
 642
 643/*
 644 *  Lookup dest by {svc,addr,port} in the destination trash.
 645 *  The destination trash is used to hold the destinations that are removed
 646 *  from the service table but are still referenced by some conn entries.
 647 *  The reason to add the destination trash is when the dest is temporary
 648 *  down (either by administrator or by monitor program), the dest can be
 649 *  picked back from the trash, the remaining connections to the dest can
 650 *  continue, and the counting information of the dest is also useful for
 651 *  scheduling.
 652 */
 653static struct ip_vs_dest *
 654ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 655                     __be16 dport)
 656{
 657        struct ip_vs_dest *dest, *nxt;
 658        struct netns_ipvs *ipvs = net_ipvs(svc->net);
 659
 660        /*
 661         * Find the destination in trash
 662         */
 663        list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
 664                IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
 665                              "dest->refcnt=%d\n",
 666                              dest->vfwmark,
 667                              IP_VS_DBG_ADDR(svc->af, &dest->addr),
 668                              ntohs(dest->port),
 669                              atomic_read(&dest->refcnt));
 670                if (dest->af == svc->af &&
 671                    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
 672                    dest->port == dport &&
 673                    dest->vfwmark == svc->fwmark &&
 674                    dest->protocol == svc->protocol &&
 675                    (svc->fwmark ||
 676                     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
 677                      dest->vport == svc->port))) {
 678                        /* HIT */
 679                        return dest;
 680                }
 681
 682                /*
 683                 * Try to purge the destination from trash if not referenced
 684                 */
 685                if (atomic_read(&dest->refcnt) == 1) {
 686                        IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
 687                                      "from trash\n",
 688                                      dest->vfwmark,
 689                                      IP_VS_DBG_ADDR(svc->af, &dest->addr),
 690                                      ntohs(dest->port));
 691                        list_del(&dest->n_list);
 692                        ip_vs_dst_reset(dest);
 693                        __ip_vs_unbind_svc(dest);
 694                        free_percpu(dest->stats.cpustats);
 695                        kfree(dest);
 696                }
 697        }
 698
 699        return NULL;
 700}
 701
 702
 703/*
 704 *  Clean up all the destinations in the trash
 705 *  Called by the ip_vs_control_cleanup()
 706 *
 707 *  When the ip_vs_control_clearup is activated by ipvs module exit,
 708 *  the service tables must have been flushed and all the connections
 709 *  are expired, and the refcnt of each destination in the trash must
 710 *  be 1, so we simply release them here.
 711 */
 712static void ip_vs_trash_cleanup(struct net *net)
 713{
 714        struct ip_vs_dest *dest, *nxt;
 715        struct netns_ipvs *ipvs = net_ipvs(net);
 716
 717        list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
 718                list_del(&dest->n_list);
 719                ip_vs_dst_reset(dest);
 720                __ip_vs_unbind_svc(dest);
 721                free_percpu(dest->stats.cpustats);
 722                kfree(dest);
 723        }
 724}
 725
 726static void
 727ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
 728{
 729#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
 730
 731        spin_lock_bh(&src->lock);
 732
 733        IP_VS_SHOW_STATS_COUNTER(conns);
 734        IP_VS_SHOW_STATS_COUNTER(inpkts);
 735        IP_VS_SHOW_STATS_COUNTER(outpkts);
 736        IP_VS_SHOW_STATS_COUNTER(inbytes);
 737        IP_VS_SHOW_STATS_COUNTER(outbytes);
 738
 739        ip_vs_read_estimator(dst, src);
 740
 741        spin_unlock_bh(&src->lock);
 742}
 743
 744static void
 745ip_vs_zero_stats(struct ip_vs_stats *stats)
 746{
 747        spin_lock_bh(&stats->lock);
 748
 749        /* get current counters as zero point, rates are zeroed */
 750
 751#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
 752
 753        IP_VS_ZERO_STATS_COUNTER(conns);
 754        IP_VS_ZERO_STATS_COUNTER(inpkts);
 755        IP_VS_ZERO_STATS_COUNTER(outpkts);
 756        IP_VS_ZERO_STATS_COUNTER(inbytes);
 757        IP_VS_ZERO_STATS_COUNTER(outbytes);
 758
 759        ip_vs_zero_estimator(stats);
 760
 761        spin_unlock_bh(&stats->lock);
 762}
 763
 764/*
 765 *      Update a destination in the given service
 766 */
 767static void
 768__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 769                    struct ip_vs_dest_user_kern *udest, int add)
 770{
 771        struct netns_ipvs *ipvs = net_ipvs(svc->net);
 772        int conn_flags;
 773
 774        /* set the weight and the flags */
 775        atomic_set(&dest->weight, udest->weight);
 776        conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
 777        conn_flags |= IP_VS_CONN_F_INACTIVE;
 778
 779        /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
 780        if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
 781                conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 782        } else {
 783                /*
 784                 *    Put the real service in rs_table if not present.
 785                 *    For now only for NAT!
 786                 */
 787                write_lock_bh(&ipvs->rs_lock);
 788                ip_vs_rs_hash(ipvs, dest);
 789                write_unlock_bh(&ipvs->rs_lock);
 790        }
 791        atomic_set(&dest->conn_flags, conn_flags);
 792
 793        /* bind the service */
 794        if (!dest->svc) {
 795                __ip_vs_bind_svc(dest, svc);
 796        } else {
 797                if (dest->svc != svc) {
 798                        __ip_vs_unbind_svc(dest);
 799                        ip_vs_zero_stats(&dest->stats);
 800                        __ip_vs_bind_svc(dest, svc);
 801                }
 802        }
 803
 804        /* set the dest status flags */
 805        dest->flags |= IP_VS_DEST_F_AVAILABLE;
 806
 807        if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
 808                dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
 809        dest->u_threshold = udest->u_threshold;
 810        dest->l_threshold = udest->l_threshold;
 811
 812        spin_lock_bh(&dest->dst_lock);
 813        ip_vs_dst_reset(dest);
 814        spin_unlock_bh(&dest->dst_lock);
 815
 816        if (add)
 817                ip_vs_start_estimator(svc->net, &dest->stats);
 818
 819        write_lock_bh(&__ip_vs_svc_lock);
 820
 821        /* Wait until all other svc users go away */
 822        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
 823
 824        if (add) {
 825                list_add(&dest->n_list, &svc->destinations);
 826                svc->num_dests++;
 827        }
 828
 829        /* call the update_service, because server weight may be changed */
 830        if (svc->scheduler->update_service)
 831                svc->scheduler->update_service(svc);
 832
 833        write_unlock_bh(&__ip_vs_svc_lock);
 834}
 835
 836
 837/*
 838 *      Create a destination for the given service
 839 */
 840static int
 841ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 842               struct ip_vs_dest **dest_p)
 843{
 844        struct ip_vs_dest *dest;
 845        unsigned atype;
 846
 847        EnterFunction(2);
 848
 849#ifdef CONFIG_IP_VS_IPV6
 850        if (svc->af == AF_INET6) {
 851                atype = ipv6_addr_type(&udest->addr.in6);
 852                if ((!(atype & IPV6_ADDR_UNICAST) ||
 853                        atype & IPV6_ADDR_LINKLOCAL) &&
 854                        !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
 855                        return -EINVAL;
 856        } else
 857#endif
 858        {
 859                atype = inet_addr_type(svc->net, udest->addr.ip);
 860                if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 861                        return -EINVAL;
 862        }
 863
 864        dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
 865        if (dest == NULL)
 866                return -ENOMEM;
 867
 868        dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
 869        if (!dest->stats.cpustats)
 870                goto err_alloc;
 871
 872        dest->af = svc->af;
 873        dest->protocol = svc->protocol;
 874        dest->vaddr = svc->addr;
 875        dest->vport = svc->port;
 876        dest->vfwmark = svc->fwmark;
 877        ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
 878        dest->port = udest->port;
 879
 880        atomic_set(&dest->activeconns, 0);
 881        atomic_set(&dest->inactconns, 0);
 882        atomic_set(&dest->persistconns, 0);
 883        atomic_set(&dest->refcnt, 1);
 884
 885        INIT_LIST_HEAD(&dest->d_list);
 886        spin_lock_init(&dest->dst_lock);
 887        spin_lock_init(&dest->stats.lock);
 888        __ip_vs_update_dest(svc, dest, udest, 1);
 889
 890        *dest_p = dest;
 891
 892        LeaveFunction(2);
 893        return 0;
 894
 895err_alloc:
 896        kfree(dest);
 897        return -ENOMEM;
 898}
 899
 900
 901/*
 902 *      Add a destination into an existing service
 903 */
 904static int
 905ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 906{
 907        struct ip_vs_dest *dest;
 908        union nf_inet_addr daddr;
 909        __be16 dport = udest->port;
 910        int ret;
 911
 912        EnterFunction(2);
 913
 914        if (udest->weight < 0) {
 915                pr_err("%s(): server weight less than zero\n", __func__);
 916                return -ERANGE;
 917        }
 918
 919        if (udest->l_threshold > udest->u_threshold) {
 920                pr_err("%s(): lower threshold is higher than upper threshold\n",
 921                        __func__);
 922                return -ERANGE;
 923        }
 924
 925        ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
 926
 927        /*
 928         * Check if the dest already exists in the list
 929         */
 930        dest = ip_vs_lookup_dest(svc, &daddr, dport);
 931
 932        if (dest != NULL) {
 933                IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
 934                return -EEXIST;
 935        }
 936
 937        /*
 938         * Check if the dest already exists in the trash and
 939         * is from the same service
 940         */
 941        dest = ip_vs_trash_get_dest(svc, &daddr, dport);
 942
 943        if (dest != NULL) {
 944                IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
 945                              "dest->refcnt=%d, service %u/%s:%u\n",
 946                              IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
 947                              atomic_read(&dest->refcnt),
 948                              dest->vfwmark,
 949                              IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
 950                              ntohs(dest->vport));
 951
 952                /*
 953                 * Get the destination from the trash
 954                 */
 955                list_del(&dest->n_list);
 956
 957                __ip_vs_update_dest(svc, dest, udest, 1);
 958                ret = 0;
 959        } else {
 960                /*
 961                 * Allocate and initialize the dest structure
 962                 */
 963                ret = ip_vs_new_dest(svc, udest, &dest);
 964        }
 965        LeaveFunction(2);
 966
 967        return ret;
 968}
 969
 970
 971/*
 972 *      Edit a destination in the given service
 973 */
 974static int
 975ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 976{
 977        struct ip_vs_dest *dest;
 978        union nf_inet_addr daddr;
 979        __be16 dport = udest->port;
 980
 981        EnterFunction(2);
 982
 983        if (udest->weight < 0) {
 984                pr_err("%s(): server weight less than zero\n", __func__);
 985                return -ERANGE;
 986        }
 987
 988        if (udest->l_threshold > udest->u_threshold) {
 989                pr_err("%s(): lower threshold is higher than upper threshold\n",
 990                        __func__);
 991                return -ERANGE;
 992        }
 993
 994        ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
 995
 996        /*
 997         *  Lookup the destination list
 998         */
 999        dest = ip_vs_lookup_dest(svc, &daddr, dport);
1000
1001        if (dest == NULL) {
1002                IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1003                return -ENOENT;
1004        }
1005
1006        __ip_vs_update_dest(svc, dest, udest, 0);
1007        LeaveFunction(2);
1008
1009        return 0;
1010}
1011
1012
1013/*
1014 *      Delete a destination (must be already unlinked from the service)
1015 */
1016static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1017{
1018        struct netns_ipvs *ipvs = net_ipvs(net);
1019
1020        ip_vs_stop_estimator(net, &dest->stats);
1021
1022        /*
1023         *  Remove it from the d-linked list with the real services.
1024         */
1025        write_lock_bh(&ipvs->rs_lock);
1026        ip_vs_rs_unhash(dest);
1027        write_unlock_bh(&ipvs->rs_lock);
1028
1029        /*
1030         *  Decrease the refcnt of the dest, and free the dest
1031         *  if nobody refers to it (refcnt=0). Otherwise, throw
1032         *  the destination into the trash.
1033         */
1034        if (atomic_dec_and_test(&dest->refcnt)) {
1035                IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1036                              dest->vfwmark,
1037                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
1038                              ntohs(dest->port));
1039                ip_vs_dst_reset(dest);
1040                /* simply decrease svc->refcnt here, let the caller check
1041                   and release the service if nobody refers to it.
1042                   Only user context can release destination and service,
1043                   and only one user context can update virtual service at a
1044                   time, so the operation here is OK */
1045                atomic_dec(&dest->svc->refcnt);
1046                free_percpu(dest->stats.cpustats);
1047                kfree(dest);
1048        } else {
1049                IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1050                              "dest->refcnt=%d\n",
1051                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
1052                              ntohs(dest->port),
1053                              atomic_read(&dest->refcnt));
1054                list_add(&dest->n_list, &ipvs->dest_trash);
1055                atomic_inc(&dest->refcnt);
1056        }
1057}
1058
1059
1060/*
1061 *      Unlink a destination from the given service
1062 */
1063static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1064                                struct ip_vs_dest *dest,
1065                                int svcupd)
1066{
1067        dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1068
1069        /*
1070         *  Remove it from the d-linked destination list.
1071         */
1072        list_del(&dest->n_list);
1073        svc->num_dests--;
1074
1075        /*
1076         *  Call the update_service function of its scheduler
1077         */
1078        if (svcupd && svc->scheduler->update_service)
1079                        svc->scheduler->update_service(svc);
1080}
1081
1082
1083/*
1084 *      Delete a destination server in the given service
1085 */
1086static int
1087ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1088{
1089        struct ip_vs_dest *dest;
1090        __be16 dport = udest->port;
1091
1092        EnterFunction(2);
1093
1094        dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1095
1096        if (dest == NULL) {
1097                IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1098                return -ENOENT;
1099        }
1100
1101        write_lock_bh(&__ip_vs_svc_lock);
1102
1103        /*
1104         *      Wait until all other svc users go away.
1105         */
1106        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1107
1108        /*
1109         *      Unlink dest from the service
1110         */
1111        __ip_vs_unlink_dest(svc, dest, 1);
1112
1113        write_unlock_bh(&__ip_vs_svc_lock);
1114
1115        /*
1116         *      Delete the destination
1117         */
1118        __ip_vs_del_dest(svc->net, dest);
1119
1120        LeaveFunction(2);
1121
1122        return 0;
1123}
1124
1125
1126/*
1127 *      Add a service into the service hash table
1128 */
1129static int
1130ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1131                  struct ip_vs_service **svc_p)
1132{
1133        int ret = 0;
1134        struct ip_vs_scheduler *sched = NULL;
1135        struct ip_vs_pe *pe = NULL;
1136        struct ip_vs_service *svc = NULL;
1137        struct netns_ipvs *ipvs = net_ipvs(net);
1138
1139        /* increase the module use count */
1140        ip_vs_use_count_inc();
1141
1142        /* Lookup the scheduler by 'u->sched_name' */
1143        sched = ip_vs_scheduler_get(u->sched_name);
1144        if (sched == NULL) {
1145                pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1146                ret = -ENOENT;
1147                goto out_err;
1148        }
1149
1150        if (u->pe_name && *u->pe_name) {
1151                pe = ip_vs_pe_getbyname(u->pe_name);
1152                if (pe == NULL) {
1153                        pr_info("persistence engine module ip_vs_pe_%s "
1154                                "not found\n", u->pe_name);
1155                        ret = -ENOENT;
1156                        goto out_err;
1157                }
1158        }
1159
1160#ifdef CONFIG_IP_VS_IPV6
1161        if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1162                ret = -EINVAL;
1163                goto out_err;
1164        }
1165#endif
1166
1167        svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1168        if (svc == NULL) {
1169                IP_VS_DBG(1, "%s(): no memory\n", __func__);
1170                ret = -ENOMEM;
1171                goto out_err;
1172        }
1173        svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1174        if (!svc->stats.cpustats)
1175                goto out_err;
1176
1177        /* I'm the first user of the service */
1178        atomic_set(&svc->usecnt, 0);
1179        atomic_set(&svc->refcnt, 0);
1180
1181        svc->af = u->af;
1182        svc->protocol = u->protocol;
1183        ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1184        svc->port = u->port;
1185        svc->fwmark = u->fwmark;
1186        svc->flags = u->flags;
1187        svc->timeout = u->timeout * HZ;
1188        svc->netmask = u->netmask;
1189        svc->net = net;
1190
1191        INIT_LIST_HEAD(&svc->destinations);
1192        rwlock_init(&svc->sched_lock);
1193        spin_lock_init(&svc->stats.lock);
1194
1195        /* Bind the scheduler */
1196        ret = ip_vs_bind_scheduler(svc, sched);
1197        if (ret)
1198                goto out_err;
1199        sched = NULL;
1200
1201        /* Bind the ct retriever */
1202        ip_vs_bind_pe(svc, pe);
1203        pe = NULL;
1204
1205        /* Update the virtual service counters */
1206        if (svc->port == FTPPORT)
1207                atomic_inc(&ipvs->ftpsvc_counter);
1208        else if (svc->port == 0)
1209                atomic_inc(&ipvs->nullsvc_counter);
1210
1211        ip_vs_start_estimator(net, &svc->stats);
1212
1213        /* Count only IPv4 services for old get/setsockopt interface */
1214        if (svc->af == AF_INET)
1215                ipvs->num_services++;
1216
1217        /* Hash the service into the service table */
1218        write_lock_bh(&__ip_vs_svc_lock);
1219        ip_vs_svc_hash(svc);
1220        write_unlock_bh(&__ip_vs_svc_lock);
1221
1222        *svc_p = svc;
1223        /* Now there is a service - full throttle */
1224        ipvs->enable = 1;
1225        return 0;
1226
1227
1228 out_err:
1229        if (svc != NULL) {
1230                ip_vs_unbind_scheduler(svc);
1231                if (svc->inc) {
1232                        local_bh_disable();
1233                        ip_vs_app_inc_put(svc->inc);
1234                        local_bh_enable();
1235                }
1236                if (svc->stats.cpustats)
1237                        free_percpu(svc->stats.cpustats);
1238                kfree(svc);
1239        }
1240        ip_vs_scheduler_put(sched);
1241        ip_vs_pe_put(pe);
1242
1243        /* decrease the module use count */
1244        ip_vs_use_count_dec();
1245
1246        return ret;
1247}
1248
1249
1250/*
1251 *      Edit a service and bind it with a new scheduler
1252 */
1253static int
1254ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1255{
1256        struct ip_vs_scheduler *sched, *old_sched;
1257        struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1258        int ret = 0;
1259
1260        /*
1261         * Lookup the scheduler, by 'u->sched_name'
1262         */
1263        sched = ip_vs_scheduler_get(u->sched_name);
1264        if (sched == NULL) {
1265                pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1266                return -ENOENT;
1267        }
1268        old_sched = sched;
1269
1270        if (u->pe_name && *u->pe_name) {
1271                pe = ip_vs_pe_getbyname(u->pe_name);
1272                if (pe == NULL) {
1273                        pr_info("persistence engine module ip_vs_pe_%s "
1274                                "not found\n", u->pe_name);
1275                        ret = -ENOENT;
1276                        goto out;
1277                }
1278                old_pe = pe;
1279        }
1280
1281#ifdef CONFIG_IP_VS_IPV6
1282        if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1283                ret = -EINVAL;
1284                goto out;
1285        }
1286#endif
1287
1288        write_lock_bh(&__ip_vs_svc_lock);
1289
1290        /*
1291         * Wait until all other svc users go away.
1292         */
1293        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1294
1295        /*
1296         * Set the flags and timeout value
1297         */
1298        svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1299        svc->timeout = u->timeout * HZ;
1300        svc->netmask = u->netmask;
1301
1302        old_sched = svc->scheduler;
1303        if (sched != old_sched) {
1304                /*
1305                 * Unbind the old scheduler
1306                 */
1307                if ((ret = ip_vs_unbind_scheduler(svc))) {
1308                        old_sched = sched;
1309                        goto out_unlock;
1310                }
1311
1312                /*
1313                 * Bind the new scheduler
1314                 */
1315                if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1316                        /*
1317                         * If ip_vs_bind_scheduler fails, restore the old
1318                         * scheduler.
1319                         * The main reason of failure is out of memory.
1320                         *
1321                         * The question is if the old scheduler can be
1322                         * restored all the time. TODO: if it cannot be
1323                         * restored some time, we must delete the service,
1324                         * otherwise the system may crash.
1325                         */
1326                        ip_vs_bind_scheduler(svc, old_sched);
1327                        old_sched = sched;
1328                        goto out_unlock;
1329                }
1330        }
1331
1332        old_pe = svc->pe;
1333        if (pe != old_pe) {
1334                ip_vs_unbind_pe(svc);
1335                ip_vs_bind_pe(svc, pe);
1336        }
1337
1338out_unlock:
1339        write_unlock_bh(&__ip_vs_svc_lock);
1340out:
1341        ip_vs_scheduler_put(old_sched);
1342        ip_vs_pe_put(old_pe);
1343        return ret;
1344}
1345
1346
1347/*
1348 *      Delete a service from the service list
1349 *      - The service must be unlinked, unlocked and not referenced!
1350 *      - We are called under _bh lock
1351 */
1352static void __ip_vs_del_service(struct ip_vs_service *svc)
1353{
1354        struct ip_vs_dest *dest, *nxt;
1355        struct ip_vs_scheduler *old_sched;
1356        struct ip_vs_pe *old_pe;
1357        struct netns_ipvs *ipvs = net_ipvs(svc->net);
1358
1359        pr_info("%s: enter\n", __func__);
1360
1361        /* Count only IPv4 services for old get/setsockopt interface */
1362        if (svc->af == AF_INET)
1363                ipvs->num_services--;
1364
1365        ip_vs_stop_estimator(svc->net, &svc->stats);
1366
1367        /* Unbind scheduler */
1368        old_sched = svc->scheduler;
1369        ip_vs_unbind_scheduler(svc);
1370        ip_vs_scheduler_put(old_sched);
1371
1372        /* Unbind persistence engine */
1373        old_pe = svc->pe;
1374        ip_vs_unbind_pe(svc);
1375        ip_vs_pe_put(old_pe);
1376
1377        /* Unbind app inc */
1378        if (svc->inc) {
1379                ip_vs_app_inc_put(svc->inc);
1380                svc->inc = NULL;
1381        }
1382
1383        /*
1384         *    Unlink the whole destination list
1385         */
1386        list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1387                __ip_vs_unlink_dest(svc, dest, 0);
1388                __ip_vs_del_dest(svc->net, dest);
1389        }
1390
1391        /*
1392         *    Update the virtual service counters
1393         */
1394        if (svc->port == FTPPORT)
1395                atomic_dec(&ipvs->ftpsvc_counter);
1396        else if (svc->port == 0)
1397                atomic_dec(&ipvs->nullsvc_counter);
1398
1399        /*
1400         *    Free the service if nobody refers to it
1401         */
1402        if (atomic_read(&svc->refcnt) == 0) {
1403                IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1404                              svc->fwmark,
1405                              IP_VS_DBG_ADDR(svc->af, &svc->addr),
1406                              ntohs(svc->port), atomic_read(&svc->usecnt));
1407                free_percpu(svc->stats.cpustats);
1408                kfree(svc);
1409        }
1410
1411        /* decrease the module use count */
1412        ip_vs_use_count_dec();
1413}
1414
1415/*
1416 * Unlink a service from list and try to delete it if its refcnt reached 0
1417 */
1418static void ip_vs_unlink_service(struct ip_vs_service *svc)
1419{
1420        /*
1421         * Unhash it from the service table
1422         */
1423        write_lock_bh(&__ip_vs_svc_lock);
1424
1425        ip_vs_svc_unhash(svc);
1426
1427        /*
1428         * Wait until all the svc users go away.
1429         */
1430        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1431
1432        __ip_vs_del_service(svc);
1433
1434        write_unlock_bh(&__ip_vs_svc_lock);
1435}
1436
1437/*
1438 *      Delete a service from the service list
1439 */
1440static int ip_vs_del_service(struct ip_vs_service *svc)
1441{
1442        if (svc == NULL)
1443                return -EEXIST;
1444        ip_vs_unlink_service(svc);
1445
1446        return 0;
1447}
1448
1449
1450/*
1451 *      Flush all the virtual services
1452 */
1453static int ip_vs_flush(struct net *net)
1454{
1455        int idx;
1456        struct ip_vs_service *svc, *nxt;
1457
1458        /*
1459         * Flush the service table hashed by <netns,protocol,addr,port>
1460         */
1461        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1462                list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1463                                         s_list) {
1464                        if (net_eq(svc->net, net))
1465                                ip_vs_unlink_service(svc);
1466                }
1467        }
1468
1469        /*
1470         * Flush the service table hashed by fwmark
1471         */
1472        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1473                list_for_each_entry_safe(svc, nxt,
1474                                         &ip_vs_svc_fwm_table[idx], f_list) {
1475                        if (net_eq(svc->net, net))
1476                                ip_vs_unlink_service(svc);
1477                }
1478        }
1479
1480        return 0;
1481}
1482
1483/*
1484 *      Delete service by {netns} in the service table.
1485 *      Called by __ip_vs_cleanup()
1486 */
1487void ip_vs_service_net_cleanup(struct net *net)
1488{
1489        EnterFunction(2);
1490        /* Check for "full" addressed entries */
1491        mutex_lock(&__ip_vs_mutex);
1492        ip_vs_flush(net);
1493        mutex_unlock(&__ip_vs_mutex);
1494        LeaveFunction(2);
1495}
1496/*
1497 * Release dst hold by dst_cache
1498 */
1499static inline void
1500__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1501{
1502        spin_lock_bh(&dest->dst_lock);
1503        if (dest->dst_cache && dest->dst_cache->dev == dev) {
1504                IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1505                              dev->name,
1506                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
1507                              ntohs(dest->port),
1508                              atomic_read(&dest->refcnt));
1509                ip_vs_dst_reset(dest);
1510        }
1511        spin_unlock_bh(&dest->dst_lock);
1512
1513}
1514/*
1515 * Netdev event receiver
1516 * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1517 * a device that is "unregister" it must be released.
1518 */
1519static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1520                            void *ptr)
1521{
1522        struct net_device *dev = ptr;
1523        struct net *net = dev_net(dev);
1524        struct ip_vs_service *svc;
1525        struct ip_vs_dest *dest;
1526        unsigned int idx;
1527
1528        if (event != NETDEV_UNREGISTER)
1529                return NOTIFY_DONE;
1530        IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1531        EnterFunction(2);
1532        mutex_lock(&__ip_vs_mutex);
1533        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1534                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1535                        if (net_eq(svc->net, net)) {
1536                                list_for_each_entry(dest, &svc->destinations,
1537                                                    n_list) {
1538                                        __ip_vs_dev_reset(dest, dev);
1539                                }
1540                        }
1541                }
1542
1543                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1544                        if (net_eq(svc->net, net)) {
1545                                list_for_each_entry(dest, &svc->destinations,
1546                                                    n_list) {
1547                                        __ip_vs_dev_reset(dest, dev);
1548                                }
1549                        }
1550
1551                }
1552        }
1553
1554        list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) {
1555                __ip_vs_dev_reset(dest, dev);
1556        }
1557        mutex_unlock(&__ip_vs_mutex);
1558        LeaveFunction(2);
1559        return NOTIFY_DONE;
1560}
1561
1562/*
1563 *      Zero counters in a service or all services
1564 */
1565static int ip_vs_zero_service(struct ip_vs_service *svc)
1566{
1567        struct ip_vs_dest *dest;
1568
1569        write_lock_bh(&__ip_vs_svc_lock);
1570        list_for_each_entry(dest, &svc->destinations, n_list) {
1571                ip_vs_zero_stats(&dest->stats);
1572        }
1573        ip_vs_zero_stats(&svc->stats);
1574        write_unlock_bh(&__ip_vs_svc_lock);
1575        return 0;
1576}
1577
1578static int ip_vs_zero_all(struct net *net)
1579{
1580        int idx;
1581        struct ip_vs_service *svc;
1582
1583        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1584                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1585                        if (net_eq(svc->net, net))
1586                                ip_vs_zero_service(svc);
1587                }
1588        }
1589
1590        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1591                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1592                        if (net_eq(svc->net, net))
1593                                ip_vs_zero_service(svc);
1594                }
1595        }
1596
1597        ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1598        return 0;
1599}
1600
1601#ifdef CONFIG_SYSCTL
1602static int
1603proc_do_defense_mode(ctl_table *table, int write,
1604                     void __user *buffer, size_t *lenp, loff_t *ppos)
1605{
1606        struct net *net = current->nsproxy->net_ns;
1607        int *valp = table->data;
1608        int val = *valp;
1609        int rc;
1610
1611        rc = proc_dointvec(table, write, buffer, lenp, ppos);
1612        if (write && (*valp != val)) {
1613                if ((*valp < 0) || (*valp > 3)) {
1614                        /* Restore the correct value */
1615                        *valp = val;
1616                } else {
1617                        update_defense_level(net_ipvs(net));
1618                }
1619        }
1620        return rc;
1621}
1622
1623static int
1624proc_do_sync_threshold(ctl_table *table, int write,
1625                       void __user *buffer, size_t *lenp, loff_t *ppos)
1626{
1627        int *valp = table->data;
1628        int val[2];
1629        int rc;
1630
1631        /* backup the value first */
1632        memcpy(val, valp, sizeof(val));
1633
1634        rc = proc_dointvec(table, write, buffer, lenp, ppos);
1635        if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1636                /* Restore the correct value */
1637                memcpy(valp, val, sizeof(val));
1638        }
1639        return rc;
1640}
1641
1642static int
1643proc_do_sync_mode(ctl_table *table, int write,
1644                     void __user *buffer, size_t *lenp, loff_t *ppos)
1645{
1646        int *valp = table->data;
1647        int val = *valp;
1648        int rc;
1649
1650        rc = proc_dointvec(table, write, buffer, lenp, ppos);
1651        if (write && (*valp != val)) {
1652                if ((*valp < 0) || (*valp > 1)) {
1653                        /* Restore the correct value */
1654                        *valp = val;
1655                } else {
1656                        struct net *net = current->nsproxy->net_ns;
1657                        ip_vs_sync_switch_mode(net, val);
1658                }
1659        }
1660        return rc;
1661}
1662
1663/*
1664 *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1665 *      Do not change order or insert new entries without
1666 *      align with netns init in ip_vs_control_net_init()
1667 */
1668
1669static struct ctl_table vs_vars[] = {
1670        {
1671                .procname       = "amemthresh",
1672                .maxlen         = sizeof(int),
1673                .mode           = 0644,
1674                .proc_handler   = proc_dointvec,
1675        },
1676        {
1677                .procname       = "am_droprate",
1678                .maxlen         = sizeof(int),
1679                .mode           = 0644,
1680                .proc_handler   = proc_dointvec,
1681        },
1682        {
1683                .procname       = "drop_entry",
1684                .maxlen         = sizeof(int),
1685                .mode           = 0644,
1686                .proc_handler   = proc_do_defense_mode,
1687        },
1688        {
1689                .procname       = "drop_packet",
1690                .maxlen         = sizeof(int),
1691                .mode           = 0644,
1692                .proc_handler   = proc_do_defense_mode,
1693        },
1694#ifdef CONFIG_IP_VS_NFCT
1695        {
1696                .procname       = "conntrack",
1697                .maxlen         = sizeof(int),
1698                .mode           = 0644,
1699                .proc_handler   = &proc_dointvec,
1700        },
1701#endif
1702        {
1703                .procname       = "secure_tcp",
1704                .maxlen         = sizeof(int),
1705                .mode           = 0644,
1706                .proc_handler   = proc_do_defense_mode,
1707        },
1708        {
1709                .procname       = "snat_reroute",
1710                .maxlen         = sizeof(int),
1711                .mode           = 0644,
1712                .proc_handler   = &proc_dointvec,
1713        },
1714        {
1715                .procname       = "sync_version",
1716                .maxlen         = sizeof(int),
1717                .mode           = 0644,
1718                .proc_handler   = &proc_do_sync_mode,
1719        },
1720        {
1721                .procname       = "cache_bypass",
1722                .maxlen         = sizeof(int),
1723                .mode           = 0644,
1724                .proc_handler   = proc_dointvec,
1725        },
1726        {
1727                .procname       = "expire_nodest_conn",
1728                .maxlen         = sizeof(int),
1729                .mode           = 0644,
1730                .proc_handler   = proc_dointvec,
1731        },
1732        {
1733                .procname       = "expire_quiescent_template",
1734                .maxlen         = sizeof(int),
1735                .mode           = 0644,
1736                .proc_handler   = proc_dointvec,
1737        },
1738        {
1739                .procname       = "sync_threshold",
1740                .maxlen         =
1741                        sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1742                .mode           = 0644,
1743                .proc_handler   = proc_do_sync_threshold,
1744        },
1745        {
1746                .procname       = "nat_icmp_send",
1747                .maxlen         = sizeof(int),
1748                .mode           = 0644,
1749                .proc_handler   = proc_dointvec,
1750        },
1751#ifdef CONFIG_IP_VS_DEBUG
1752        {
1753                .procname       = "debug_level",
1754                .data           = &sysctl_ip_vs_debug_level,
1755                .maxlen         = sizeof(int),
1756                .mode           = 0644,
1757                .proc_handler   = proc_dointvec,
1758        },
1759#endif
1760#if 0
1761        {
1762                .procname       = "timeout_established",
1763                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1764                .maxlen         = sizeof(int),
1765                .mode           = 0644,
1766                .proc_handler   = proc_dointvec_jiffies,
1767        },
1768        {
1769                .procname       = "timeout_synsent",
1770                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1771                .maxlen         = sizeof(int),
1772                .mode           = 0644,
1773                .proc_handler   = proc_dointvec_jiffies,
1774        },
1775        {
1776                .procname       = "timeout_synrecv",
1777                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1778                .maxlen         = sizeof(int),
1779                .mode           = 0644,
1780                .proc_handler   = proc_dointvec_jiffies,
1781        },
1782        {
1783                .procname       = "timeout_finwait",
1784                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1785                .maxlen         = sizeof(int),
1786                .mode           = 0644,
1787                .proc_handler   = proc_dointvec_jiffies,
1788        },
1789        {
1790                .procname       = "timeout_timewait",
1791                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1792                .maxlen         = sizeof(int),
1793                .mode           = 0644,
1794                .proc_handler   = proc_dointvec_jiffies,
1795        },
1796        {
1797                .procname       = "timeout_close",
1798                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1799                .maxlen         = sizeof(int),
1800                .mode           = 0644,
1801                .proc_handler   = proc_dointvec_jiffies,
1802        },
1803        {
1804                .procname       = "timeout_closewait",
1805                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1806                .maxlen         = sizeof(int),
1807                .mode           = 0644,
1808                .proc_handler   = proc_dointvec_jiffies,
1809        },
1810        {
1811                .procname       = "timeout_lastack",
1812                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1813                .maxlen         = sizeof(int),
1814                .mode           = 0644,
1815                .proc_handler   = proc_dointvec_jiffies,
1816        },
1817        {
1818                .procname       = "timeout_listen",
1819                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1820                .maxlen         = sizeof(int),
1821                .mode           = 0644,
1822                .proc_handler   = proc_dointvec_jiffies,
1823        },
1824        {
1825                .procname       = "timeout_synack",
1826                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1827                .maxlen         = sizeof(int),
1828                .mode           = 0644,
1829                .proc_handler   = proc_dointvec_jiffies,
1830        },
1831        {
1832                .procname       = "timeout_udp",
1833                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1834                .maxlen         = sizeof(int),
1835                .mode           = 0644,
1836                .proc_handler   = proc_dointvec_jiffies,
1837        },
1838        {
1839                .procname       = "timeout_icmp",
1840                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1841                .maxlen         = sizeof(int),
1842                .mode           = 0644,
1843                .proc_handler   = proc_dointvec_jiffies,
1844        },
1845#endif
1846        { }
1847};
1848
1849const struct ctl_path net_vs_ctl_path[] = {
1850        { .procname = "net", },
1851        { .procname = "ipv4", },
1852        { .procname = "vs", },
1853        { }
1854};
1855EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1856#endif
1857
1858#ifdef CONFIG_PROC_FS
1859
1860struct ip_vs_iter {
1861        struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1862        struct list_head *table;
1863        int bucket;
1864};
1865
1866/*
1867 *      Write the contents of the VS rule table to a PROCfs file.
1868 *      (It is kept just for backward compatibility)
1869 */
1870static inline const char *ip_vs_fwd_name(unsigned flags)
1871{
1872        switch (flags & IP_VS_CONN_F_FWD_MASK) {
1873        case IP_VS_CONN_F_LOCALNODE:
1874                return "Local";
1875        case IP_VS_CONN_F_TUNNEL:
1876                return "Tunnel";
1877        case IP_VS_CONN_F_DROUTE:
1878                return "Route";
1879        default:
1880                return "Masq";
1881        }
1882}
1883
1884
1885/* Get the Nth entry in the two lists */
1886static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1887{
1888        struct net *net = seq_file_net(seq);
1889        struct ip_vs_iter *iter = seq->private;
1890        int idx;
1891        struct ip_vs_service *svc;
1892
1893        /* look in hash by protocol */
1894        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1895                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1896                        if (net_eq(svc->net, net) && pos-- == 0) {
1897                                iter->table = ip_vs_svc_table;
1898                                iter->bucket = idx;
1899                                return svc;
1900                        }
1901                }
1902        }
1903
1904        /* keep looking in fwmark */
1905        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1906                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1907                        if (net_eq(svc->net, net) && pos-- == 0) {
1908                                iter->table = ip_vs_svc_fwm_table;
1909                                iter->bucket = idx;
1910                                return svc;
1911                        }
1912                }
1913        }
1914
1915        return NULL;
1916}
1917
1918static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1919__acquires(__ip_vs_svc_lock)
1920{
1921
1922        read_lock_bh(&__ip_vs_svc_lock);
1923        return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1924}
1925
1926
1927static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1928{
1929        struct list_head *e;
1930        struct ip_vs_iter *iter;
1931        struct ip_vs_service *svc;
1932
1933        ++*pos;
1934        if (v == SEQ_START_TOKEN)
1935                return ip_vs_info_array(seq,0);
1936
1937        svc = v;
1938        iter = seq->private;
1939
1940        if (iter->table == ip_vs_svc_table) {
1941                /* next service in table hashed by protocol */
1942                if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1943                        return list_entry(e, struct ip_vs_service, s_list);
1944
1945
1946                while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1947                        list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1948                                            s_list) {
1949                                return svc;
1950                        }
1951                }
1952
1953                iter->table = ip_vs_svc_fwm_table;
1954                iter->bucket = -1;
1955                goto scan_fwmark;
1956        }
1957
1958        /* next service in hashed by fwmark */
1959        if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1960                return list_entry(e, struct ip_vs_service, f_list);
1961
1962 scan_fwmark:
1963        while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1964                list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1965                                    f_list)
1966                        return svc;
1967        }
1968
1969        return NULL;
1970}
1971
1972static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1973__releases(__ip_vs_svc_lock)
1974{
1975        read_unlock_bh(&__ip_vs_svc_lock);
1976}
1977
1978
1979static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1980{
1981        if (v == SEQ_START_TOKEN) {
1982                seq_printf(seq,
1983                        "IP Virtual Server version %d.%d.%d (size=%d)\n",
1984                        NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1985                seq_puts(seq,
1986                         "Prot LocalAddress:Port Scheduler Flags\n");
1987                seq_puts(seq,
1988                         "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1989        } else {
1990                const struct ip_vs_service *svc = v;
1991                const struct ip_vs_iter *iter = seq->private;
1992                const struct ip_vs_dest *dest;
1993
1994                if (iter->table == ip_vs_svc_table) {
1995#ifdef CONFIG_IP_VS_IPV6
1996                        if (svc->af == AF_INET6)
1997                                seq_printf(seq, "%s  [%pI6]:%04X %s ",
1998                                           ip_vs_proto_name(svc->protocol),
1999                                           &svc->addr.in6,
2000                                           ntohs(svc->port),
2001                                           svc->scheduler->name);
2002                        else
2003#endif
2004                                seq_printf(seq, "%s  %08X:%04X %s %s ",
2005                                           ip_vs_proto_name(svc->protocol),
2006                                           ntohl(svc->addr.ip),
2007                                           ntohs(svc->port),
2008                                           svc->scheduler->name,
2009                                           (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2010                } else {
2011                        seq_printf(seq, "FWM  %08X %s %s",
2012                                   svc->fwmark, svc->scheduler->name,
2013                                   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2014                }
2015
2016                if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2017                        seq_printf(seq, "persistent %d %08X\n",
2018                                svc->timeout,
2019                                ntohl(svc->netmask));
2020                else
2021                        seq_putc(seq, '\n');
2022
2023                list_for_each_entry(dest, &svc->destinations, n_list) {
2024#ifdef CONFIG_IP_VS_IPV6
2025                        if (dest->af == AF_INET6)
2026                                seq_printf(seq,
2027                                           "  -> [%pI6]:%04X"
2028                                           "      %-7s %-6d %-10d %-10d\n",
2029                                           &dest->addr.in6,
2030                                           ntohs(dest->port),
2031                                           ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2032                                           atomic_read(&dest->weight),
2033                                           atomic_read(&dest->activeconns),
2034                                           atomic_read(&dest->inactconns));
2035                        else
2036#endif
2037                                seq_printf(seq,
2038                                           "  -> %08X:%04X      "
2039                                           "%-7s %-6d %-10d %-10d\n",
2040                                           ntohl(dest->addr.ip),
2041                                           ntohs(dest->port),
2042                                           ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2043                                           atomic_read(&dest->weight),
2044                                           atomic_read(&dest->activeconns),
2045                                           atomic_read(&dest->inactconns));
2046
2047                }
2048        }
2049        return 0;
2050}
2051
2052static const struct seq_operations ip_vs_info_seq_ops = {
2053        .start = ip_vs_info_seq_start,
2054        .next  = ip_vs_info_seq_next,
2055        .stop  = ip_vs_info_seq_stop,
2056        .show  = ip_vs_info_seq_show,
2057};
2058
2059static int ip_vs_info_open(struct inode *inode, struct file *file)
2060{
2061        return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2062                        sizeof(struct ip_vs_iter));
2063}
2064
2065static const struct file_operations ip_vs_info_fops = {
2066        .owner   = THIS_MODULE,
2067        .open    = ip_vs_info_open,
2068        .read    = seq_read,
2069        .llseek  = seq_lseek,
2070        .release = seq_release_net,
2071};
2072
2073static int ip_vs_stats_show(struct seq_file *seq, void *v)
2074{
2075        struct net *net = seq_file_single_net(seq);
2076        struct ip_vs_stats_user show;
2077
2078/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2079        seq_puts(seq,
2080                 "   Total Incoming Outgoing         Incoming         Outgoing\n");
2081        seq_printf(seq,
2082                   "   Conns  Packets  Packets            Bytes            Bytes\n");
2083
2084        ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2085        seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2086                   show.inpkts, show.outpkts,
2087                   (unsigned long long) show.inbytes,
2088                   (unsigned long long) show.outbytes);
2089
2090/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2091        seq_puts(seq,
2092                   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2093        seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2094                        show.cps, show.inpps, show.outpps,
2095                        show.inbps, show.outbps);
2096
2097        return 0;
2098}
2099
2100static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2101{
2102        return single_open_net(inode, file, ip_vs_stats_show);
2103}
2104
2105static const struct file_operations ip_vs_stats_fops = {
2106        .owner = THIS_MODULE,
2107        .open = ip_vs_stats_seq_open,
2108        .read = seq_read,
2109        .llseek = seq_lseek,
2110        .release = single_release_net,
2111};
2112
2113static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2114{
2115        struct net *net = seq_file_single_net(seq);
2116        struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2117        struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2118        struct ip_vs_stats_user rates;
2119        int i;
2120
2121/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2122        seq_puts(seq,
2123                 "       Total Incoming Outgoing         Incoming         Outgoing\n");
2124        seq_printf(seq,
2125                   "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2126
2127        for_each_possible_cpu(i) {
2128                struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2129                unsigned int start;
2130                __u64 inbytes, outbytes;
2131
2132                do {
2133                        start = u64_stats_fetch_begin_bh(&u->syncp);
2134                        inbytes = u->ustats.inbytes;
2135                        outbytes = u->ustats.outbytes;
2136                } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2137
2138                seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2139                           i, u->ustats.conns, u->ustats.inpkts,
2140                           u->ustats.outpkts, (__u64)inbytes,
2141                           (__u64)outbytes);
2142        }
2143
2144        spin_lock_bh(&tot_stats->lock);
2145
2146        seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2147                   tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2148                   tot_stats->ustats.outpkts,
2149                   (unsigned long long) tot_stats->ustats.inbytes,
2150                   (unsigned long long) tot_stats->ustats.outbytes);
2151
2152        ip_vs_read_estimator(&rates, tot_stats);
2153
2154        spin_unlock_bh(&tot_stats->lock);
2155
2156/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2157        seq_puts(seq,
2158                   "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2159        seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2160                        rates.cps,
2161                        rates.inpps,
2162                        rates.outpps,
2163                        rates.inbps,
2164                        rates.outbps);
2165
2166        return 0;
2167}
2168
2169static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2170{
2171        return single_open_net(inode, file, ip_vs_stats_percpu_show);
2172}
2173
2174static const struct file_operations ip_vs_stats_percpu_fops = {
2175        .owner = THIS_MODULE,
2176        .open = ip_vs_stats_percpu_seq_open,
2177        .read = seq_read,
2178        .llseek = seq_lseek,
2179        .release = single_release_net,
2180};
2181#endif
2182
2183/*
2184 *      Set timeout values for tcp tcpfin udp in the timeout_table.
2185 */
2186static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2187{
2188#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2189        struct ip_vs_proto_data *pd;
2190#endif
2191
2192        IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2193                  u->tcp_timeout,
2194                  u->tcp_fin_timeout,
2195                  u->udp_timeout);
2196
2197#ifdef CONFIG_IP_VS_PROTO_TCP
2198        if (u->tcp_timeout) {
2199                pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2200                pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2201                        = u->tcp_timeout * HZ;
2202        }
2203
2204        if (u->tcp_fin_timeout) {
2205                pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2206                pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2207                        = u->tcp_fin_timeout * HZ;
2208        }
2209#endif
2210
2211#ifdef CONFIG_IP_VS_PROTO_UDP
2212        if (u->udp_timeout) {
2213                pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2214                pd->timeout_table[IP_VS_UDP_S_NORMAL]
2215                        = u->udp_timeout * HZ;
2216        }
2217#endif
2218        return 0;
2219}
2220
2221
2222#define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2223#define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2224#define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2225                                 sizeof(struct ip_vs_dest_user))
2226#define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2227#define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2228#define MAX_ARG_LEN             SVCDEST_ARG_LEN
2229
2230static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2231        [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2232        [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2233        [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2234        [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2235        [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2236        [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2237        [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2238        [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2239        [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2240        [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2241        [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2242};
2243
2244static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2245                                  struct ip_vs_service_user *usvc_compat)
2246{
2247        memset(usvc, 0, sizeof(*usvc));
2248
2249        usvc->af                = AF_INET;
2250        usvc->protocol          = usvc_compat->protocol;
2251        usvc->addr.ip           = usvc_compat->addr;
2252        usvc->port              = usvc_compat->port;
2253        usvc->fwmark            = usvc_compat->fwmark;
2254
2255        /* Deep copy of sched_name is not needed here */
2256        usvc->sched_name        = usvc_compat->sched_name;
2257
2258        usvc->flags             = usvc_compat->flags;
2259        usvc->timeout           = usvc_compat->timeout;
2260        usvc->netmask           = usvc_compat->netmask;
2261}
2262
2263static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2264                                   struct ip_vs_dest_user *udest_compat)
2265{
2266        memset(udest, 0, sizeof(*udest));
2267
2268        udest->addr.ip          = udest_compat->addr;
2269        udest->port             = udest_compat->port;
2270        udest->conn_flags       = udest_compat->conn_flags;
2271        udest->weight           = udest_compat->weight;
2272        udest->u_threshold      = udest_compat->u_threshold;
2273        udest->l_threshold      = udest_compat->l_threshold;
2274}
2275
2276static int
2277do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2278{
2279        struct net *net = sock_net(sk);
2280        int ret;
2281        unsigned char arg[MAX_ARG_LEN];
2282        struct ip_vs_service_user *usvc_compat;
2283        struct ip_vs_service_user_kern usvc;
2284        struct ip_vs_service *svc;
2285        struct ip_vs_dest_user *udest_compat;
2286        struct ip_vs_dest_user_kern udest;
2287        struct netns_ipvs *ipvs = net_ipvs(net);
2288
2289        if (!capable(CAP_NET_ADMIN))
2290                return -EPERM;
2291
2292        if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2293                return -EINVAL;
2294        if (len < 0 || len >  MAX_ARG_LEN)
2295                return -EINVAL;
2296        if (len != set_arglen[SET_CMDID(cmd)]) {
2297                pr_err("set_ctl: len %u != %u\n",
2298                       len, set_arglen[SET_CMDID(cmd)]);
2299                return -EINVAL;
2300        }
2301
2302        if (copy_from_user(arg, user, len) != 0)
2303                return -EFAULT;
2304
2305        /* increase the module use count */
2306        ip_vs_use_count_inc();
2307
2308        /* Handle daemons since they have another lock */
2309        if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2310            cmd == IP_VS_SO_SET_STOPDAEMON) {
2311                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2312
2313                if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2314                        ret = -ERESTARTSYS;
2315                        goto out_dec;
2316                }
2317                if (cmd == IP_VS_SO_SET_STARTDAEMON)
2318                        ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2319                                                dm->syncid);
2320                else
2321                        ret = stop_sync_thread(net, dm->state);
2322                mutex_unlock(&ipvs->sync_mutex);
2323                goto out_dec;
2324        }
2325
2326        if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2327                ret = -ERESTARTSYS;
2328                goto out_dec;
2329        }
2330
2331        if (cmd == IP_VS_SO_SET_FLUSH) {
2332                /* Flush the virtual service */
2333                ret = ip_vs_flush(net);
2334                goto out_unlock;
2335        } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2336                /* Set timeout values for (tcp tcpfin udp) */
2337                ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2338                goto out_unlock;
2339        }
2340
2341        usvc_compat = (struct ip_vs_service_user *)arg;
2342        udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2343
2344        /* We only use the new structs internally, so copy userspace compat
2345         * structs to extended internal versions */
2346        ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2347        ip_vs_copy_udest_compat(&udest, udest_compat);
2348
2349        if (cmd == IP_VS_SO_SET_ZERO) {
2350                /* if no service address is set, zero counters in all */
2351                if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2352                        ret = ip_vs_zero_all(net);
2353                        goto out_unlock;
2354                }
2355        }
2356
2357        /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2358        if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2359            usvc.protocol != IPPROTO_SCTP) {
2360                pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2361                       usvc.protocol, &usvc.addr.ip,
2362                       ntohs(usvc.port), usvc.sched_name);
2363                ret = -EFAULT;
2364                goto out_unlock;
2365        }
2366
2367        /* Lookup the exact service by <protocol, addr, port> or fwmark */
2368        if (usvc.fwmark == 0)
2369                svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2370                                           &usvc.addr, usvc.port);
2371        else
2372                svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2373
2374        if (cmd != IP_VS_SO_SET_ADD
2375            && (svc == NULL || svc->protocol != usvc.protocol)) {
2376                ret = -ESRCH;
2377                goto out_unlock;
2378        }
2379
2380        switch (cmd) {
2381        case IP_VS_SO_SET_ADD:
2382                if (svc != NULL)
2383                        ret = -EEXIST;
2384                else
2385                        ret = ip_vs_add_service(net, &usvc, &svc);
2386                break;
2387        case IP_VS_SO_SET_EDIT:
2388                ret = ip_vs_edit_service(svc, &usvc);
2389                break;
2390        case IP_VS_SO_SET_DEL:
2391                ret = ip_vs_del_service(svc);
2392                if (!ret)
2393                        goto out_unlock;
2394                break;
2395        case IP_VS_SO_SET_ZERO:
2396                ret = ip_vs_zero_service(svc);
2397                break;
2398        case IP_VS_SO_SET_ADDDEST:
2399                ret = ip_vs_add_dest(svc, &udest);
2400                break;
2401        case IP_VS_SO_SET_EDITDEST:
2402                ret = ip_vs_edit_dest(svc, &udest);
2403                break;
2404        case IP_VS_SO_SET_DELDEST:
2405                ret = ip_vs_del_dest(svc, &udest);
2406                break;
2407        default:
2408                ret = -EINVAL;
2409        }
2410
2411  out_unlock:
2412        mutex_unlock(&__ip_vs_mutex);
2413  out_dec:
2414        /* decrease the module use count */
2415        ip_vs_use_count_dec();
2416
2417        return ret;
2418}
2419
2420
2421static void
2422ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2423{
2424        dst->protocol = src->protocol;
2425        dst->addr = src->addr.ip;
2426        dst->port = src->port;
2427        dst->fwmark = src->fwmark;
2428        strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2429        dst->flags = src->flags;
2430        dst->timeout = src->timeout / HZ;
2431        dst->netmask = src->netmask;
2432        dst->num_dests = src->num_dests;
2433        ip_vs_copy_stats(&dst->stats, &src->stats);
2434}
2435
2436static inline int
2437__ip_vs_get_service_entries(struct net *net,
2438                            const struct ip_vs_get_services *get,
2439                            struct ip_vs_get_services __user *uptr)
2440{
2441        int idx, count=0;
2442        struct ip_vs_service *svc;
2443        struct ip_vs_service_entry entry;
2444        int ret = 0;
2445
2446        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2447                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2448                        /* Only expose IPv4 entries to old interface */
2449                        if (svc->af != AF_INET || !net_eq(svc->net, net))
2450                                continue;
2451
2452                        if (count >= get->num_services)
2453                                goto out;
2454                        memset(&entry, 0, sizeof(entry));
2455                        ip_vs_copy_service(&entry, svc);
2456                        if (copy_to_user(&uptr->entrytable[count],
2457                                         &entry, sizeof(entry))) {
2458                                ret = -EFAULT;
2459                                goto out;
2460                        }
2461                        count++;
2462                }
2463        }
2464
2465        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2466                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2467                        /* Only expose IPv4 entries to old interface */
2468                        if (svc->af != AF_INET || !net_eq(svc->net, net))
2469                                continue;
2470
2471                        if (count >= get->num_services)
2472                                goto out;
2473                        memset(&entry, 0, sizeof(entry));
2474                        ip_vs_copy_service(&entry, svc);
2475                        if (copy_to_user(&uptr->entrytable[count],
2476                                         &entry, sizeof(entry))) {
2477                                ret = -EFAULT;
2478                                goto out;
2479                        }
2480                        count++;
2481                }
2482        }
2483out:
2484        return ret;
2485}
2486
2487static inline int
2488__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2489                         struct ip_vs_get_dests __user *uptr)
2490{
2491        struct ip_vs_service *svc;
2492        union nf_inet_addr addr = { .ip = get->addr };
2493        int ret = 0;
2494
2495        if (get->fwmark)
2496                svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2497        else
2498                svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2499                                           get->port);
2500
2501        if (svc) {
2502                int count = 0;
2503                struct ip_vs_dest *dest;
2504                struct ip_vs_dest_entry entry;
2505
2506                list_for_each_entry(dest, &svc->destinations, n_list) {
2507                        if (count >= get->num_dests)
2508                                break;
2509
2510                        entry.addr = dest->addr.ip;
2511                        entry.port = dest->port;
2512                        entry.conn_flags = atomic_read(&dest->conn_flags);
2513                        entry.weight = atomic_read(&dest->weight);
2514                        entry.u_threshold = dest->u_threshold;
2515                        entry.l_threshold = dest->l_threshold;
2516                        entry.activeconns = atomic_read(&dest->activeconns);
2517                        entry.inactconns = atomic_read(&dest->inactconns);
2518                        entry.persistconns = atomic_read(&dest->persistconns);
2519                        ip_vs_copy_stats(&entry.stats, &dest->stats);
2520                        if (copy_to_user(&uptr->entrytable[count],
2521                                         &entry, sizeof(entry))) {
2522                                ret = -EFAULT;
2523                                break;
2524                        }
2525                        count++;
2526                }
2527        } else
2528                ret = -ESRCH;
2529        return ret;
2530}
2531
2532static inline void
2533__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2534{
2535#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2536        struct ip_vs_proto_data *pd;
2537#endif
2538
2539#ifdef CONFIG_IP_VS_PROTO_TCP
2540        pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2541        u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2542        u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2543#endif
2544#ifdef CONFIG_IP_VS_PROTO_UDP
2545        pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2546        u->udp_timeout =
2547                        pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2548#endif
2549}
2550
2551
2552#define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2553#define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2554#define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2555#define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2556#define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2557#define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2558#define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2559
2560static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2561        [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2562        [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2563        [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2564        [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2565        [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2566        [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2567        [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2568};
2569
2570static int
2571do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2572{
2573        unsigned char arg[128];
2574        int ret = 0;
2575        unsigned int copylen;
2576        struct net *net = sock_net(sk);
2577        struct netns_ipvs *ipvs = net_ipvs(net);
2578
2579        BUG_ON(!net);
2580        if (!capable(CAP_NET_ADMIN))
2581                return -EPERM;
2582
2583        if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2584                return -EINVAL;
2585
2586        if (*len < get_arglen[GET_CMDID(cmd)]) {
2587                pr_err("get_ctl: len %u < %u\n",
2588                       *len, get_arglen[GET_CMDID(cmd)]);
2589                return -EINVAL;
2590        }
2591
2592        copylen = get_arglen[GET_CMDID(cmd)];
2593        if (copylen > 128)
2594                return -EINVAL;
2595
2596        if (copy_from_user(arg, user, copylen) != 0)
2597                return -EFAULT;
2598        /*
2599         * Handle daemons first since it has its own locking
2600         */
2601        if (cmd == IP_VS_SO_GET_DAEMON) {
2602                struct ip_vs_daemon_user d[2];
2603
2604                memset(&d, 0, sizeof(d));
2605                if (mutex_lock_interruptible(&ipvs->sync_mutex))
2606                        return -ERESTARTSYS;
2607
2608                if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2609                        d[0].state = IP_VS_STATE_MASTER;
2610                        strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2611                                sizeof(d[0].mcast_ifn));
2612                        d[0].syncid = ipvs->master_syncid;
2613                }
2614                if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2615                        d[1].state = IP_VS_STATE_BACKUP;
2616                        strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2617                                sizeof(d[1].mcast_ifn));
2618                        d[1].syncid = ipvs->backup_syncid;
2619                }
2620                if (copy_to_user(user, &d, sizeof(d)) != 0)
2621                        ret = -EFAULT;
2622                mutex_unlock(&ipvs->sync_mutex);
2623                return ret;
2624        }
2625
2626        if (mutex_lock_interruptible(&__ip_vs_mutex))
2627                return -ERESTARTSYS;
2628
2629        switch (cmd) {
2630        case IP_VS_SO_GET_VERSION:
2631        {
2632                char buf[64];
2633
2634                sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2635                        NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2636                if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2637                        ret = -EFAULT;
2638                        goto out;
2639                }
2640                *len = strlen(buf)+1;
2641        }
2642        break;
2643
2644        case IP_VS_SO_GET_INFO:
2645        {
2646                struct ip_vs_getinfo info;
2647                info.version = IP_VS_VERSION_CODE;
2648                info.size = ip_vs_conn_tab_size;
2649                info.num_services = ipvs->num_services;
2650                if (copy_to_user(user, &info, sizeof(info)) != 0)
2651                        ret = -EFAULT;
2652        }
2653        break;
2654
2655        case IP_VS_SO_GET_SERVICES:
2656        {
2657                struct ip_vs_get_services *get;
2658                int size;
2659
2660                get = (struct ip_vs_get_services *)arg;
2661                size = sizeof(*get) +
2662                        sizeof(struct ip_vs_service_entry) * get->num_services;
2663                if (*len != size) {
2664                        pr_err("length: %u != %u\n", *len, size);
2665                        ret = -EINVAL;
2666                        goto out;
2667                }
2668                ret = __ip_vs_get_service_entries(net, get, user);
2669        }
2670        break;
2671
2672        case IP_VS_SO_GET_SERVICE:
2673        {
2674                struct ip_vs_service_entry *entry;
2675                struct ip_vs_service *svc;
2676                union nf_inet_addr addr;
2677
2678                entry = (struct ip_vs_service_entry *)arg;
2679                addr.ip = entry->addr;
2680                if (entry->fwmark)
2681                        svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2682                else
2683                        svc = __ip_vs_service_find(net, AF_INET,
2684                                                   entry->protocol, &addr,
2685                                                   entry->port);
2686                if (svc) {
2687                        ip_vs_copy_service(entry, svc);
2688                        if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2689                                ret = -EFAULT;
2690                } else
2691                        ret = -ESRCH;
2692        }
2693        break;
2694
2695        case IP_VS_SO_GET_DESTS:
2696        {
2697                struct ip_vs_get_dests *get;
2698                int size;
2699
2700                get = (struct ip_vs_get_dests *)arg;
2701                size = sizeof(*get) +
2702                        sizeof(struct ip_vs_dest_entry) * get->num_dests;
2703                if (*len != size) {
2704                        pr_err("length: %u != %u\n", *len, size);
2705                        ret = -EINVAL;
2706                        goto out;
2707                }
2708                ret = __ip_vs_get_dest_entries(net, get, user);
2709        }
2710        break;
2711
2712        case IP_VS_SO_GET_TIMEOUT:
2713        {
2714                struct ip_vs_timeout_user t;
2715
2716                __ip_vs_get_timeouts(net, &t);
2717                if (copy_to_user(user, &t, sizeof(t)) != 0)
2718                        ret = -EFAULT;
2719        }
2720        break;
2721
2722        default:
2723                ret = -EINVAL;
2724        }
2725
2726out:
2727        mutex_unlock(&__ip_vs_mutex);
2728        return ret;
2729}
2730
2731
2732static struct nf_sockopt_ops ip_vs_sockopts = {
2733        .pf             = PF_INET,
2734        .set_optmin     = IP_VS_BASE_CTL,
2735        .set_optmax     = IP_VS_SO_SET_MAX+1,
2736        .set            = do_ip_vs_set_ctl,
2737        .get_optmin     = IP_VS_BASE_CTL,
2738        .get_optmax     = IP_VS_SO_GET_MAX+1,
2739        .get            = do_ip_vs_get_ctl,
2740        .owner          = THIS_MODULE,
2741};
2742
2743/*
2744 * Generic Netlink interface
2745 */
2746
2747/* IPVS genetlink family */
2748static struct genl_family ip_vs_genl_family = {
2749        .id             = GENL_ID_GENERATE,
2750        .hdrsize        = 0,
2751        .name           = IPVS_GENL_NAME,
2752        .version        = IPVS_GENL_VERSION,
2753        .maxattr        = IPVS_CMD_MAX,
2754        .netnsok        = true,         /* Make ipvsadm to work on netns */
2755};
2756
2757/* Policy used for first-level command attributes */
2758static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2759        [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2760        [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2761        [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2762        [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2763        [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2764        [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2765};
2766
2767/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2768static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2769        [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2770        [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2771                                            .len = IP_VS_IFNAME_MAXLEN },
2772        [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2773};
2774
2775/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2776static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2777        [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2778        [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2779        [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2780                                            .len = sizeof(union nf_inet_addr) },
2781        [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2782        [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2783        [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2784                                            .len = IP_VS_SCHEDNAME_MAXLEN },
2785        [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2786                                            .len = IP_VS_PENAME_MAXLEN },
2787        [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2788                                            .len = sizeof(struct ip_vs_flags) },
2789        [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2790        [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2791        [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2792};
2793
2794/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2795static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2796        [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2797                                            .len = sizeof(union nf_inet_addr) },
2798        [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2799        [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2800        [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2801        [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2802        [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2803        [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2804        [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2805        [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2806        [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2807};
2808
2809static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2810                                 struct ip_vs_stats *stats)
2811{
2812        struct ip_vs_stats_user ustats;
2813        struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2814        if (!nl_stats)
2815                return -EMSGSIZE;
2816
2817        ip_vs_copy_stats(&ustats, stats);
2818
2819        NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2820        NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2821        NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2822        NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2823        NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2824        NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2825        NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2826        NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2827        NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2828        NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
2829
2830        nla_nest_end(skb, nl_stats);
2831
2832        return 0;
2833
2834nla_put_failure:
2835        nla_nest_cancel(skb, nl_stats);
2836        return -EMSGSIZE;
2837}
2838
2839static int ip_vs_genl_fill_service(struct sk_buff *skb,
2840                                   struct ip_vs_service *svc)
2841{
2842        struct nlattr *nl_service;
2843        struct ip_vs_flags flags = { .flags = svc->flags,
2844                                     .mask = ~0 };
2845
2846        nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2847        if (!nl_service)
2848                return -EMSGSIZE;
2849
2850        NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2851
2852        if (svc->fwmark) {
2853                NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2854        } else {
2855                NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2856                NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2857                NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2858        }
2859
2860        NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2861        if (svc->pe)
2862                NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2863        NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2864        NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2865        NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2866
2867        if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2868                goto nla_put_failure;
2869
2870        nla_nest_end(skb, nl_service);
2871
2872        return 0;
2873
2874nla_put_failure:
2875        nla_nest_cancel(skb, nl_service);
2876        return -EMSGSIZE;
2877}
2878
2879static int ip_vs_genl_dump_service(struct sk_buff *skb,
2880                                   struct ip_vs_service *svc,
2881                                   struct netlink_callback *cb)
2882{
2883        void *hdr;
2884
2885        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2886                          &ip_vs_genl_family, NLM_F_MULTI,
2887                          IPVS_CMD_NEW_SERVICE);
2888        if (!hdr)
2889                return -EMSGSIZE;
2890
2891        if (ip_vs_genl_fill_service(skb, svc) < 0)
2892                goto nla_put_failure;
2893
2894        return genlmsg_end(skb, hdr);
2895
2896nla_put_failure:
2897        genlmsg_cancel(skb, hdr);
2898        return -EMSGSIZE;
2899}
2900
2901static int ip_vs_genl_dump_services(struct sk_buff *skb,
2902                                    struct netlink_callback *cb)
2903{
2904        int idx = 0, i;
2905        int start = cb->args[0];
2906        struct ip_vs_service *svc;
2907        struct net *net = skb_sknet(skb);
2908
2909        mutex_lock(&__ip_vs_mutex);
2910        for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2911                list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2912                        if (++idx <= start || !net_eq(svc->net, net))
2913                                continue;
2914                        if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2915                                idx--;
2916                                goto nla_put_failure;
2917                        }
2918                }
2919        }
2920
2921        for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2922                list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2923                        if (++idx <= start || !net_eq(svc->net, net))
2924                                continue;
2925                        if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2926                                idx--;
2927                                goto nla_put_failure;
2928                        }
2929                }
2930        }
2931
2932nla_put_failure:
2933        mutex_unlock(&__ip_vs_mutex);
2934        cb->args[0] = idx;
2935
2936        return skb->len;
2937}
2938
2939static int ip_vs_genl_parse_service(struct net *net,
2940                                    struct ip_vs_service_user_kern *usvc,
2941                                    struct nlattr *nla, int full_entry,
2942                                    struct ip_vs_service **ret_svc)
2943{
2944        struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2945        struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2946        struct ip_vs_service *svc;
2947
2948        /* Parse mandatory identifying service fields first */
2949        if (nla == NULL ||
2950            nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2951                return -EINVAL;
2952
2953        nla_af          = attrs[IPVS_SVC_ATTR_AF];
2954        nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2955        nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2956        nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2957        nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2958
2959        if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2960                return -EINVAL;
2961
2962        memset(usvc, 0, sizeof(*usvc));
2963
2964        usvc->af = nla_get_u16(nla_af);
2965#ifdef CONFIG_IP_VS_IPV6
2966        if (usvc->af != AF_INET && usvc->af != AF_INET6)
2967#else
2968        if (usvc->af != AF_INET)
2969#endif
2970                return -EAFNOSUPPORT;
2971
2972        if (nla_fwmark) {
2973                usvc->protocol = IPPROTO_TCP;
2974                usvc->fwmark = nla_get_u32(nla_fwmark);
2975        } else {
2976                usvc->protocol = nla_get_u16(nla_protocol);
2977                nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2978                usvc->port = nla_get_u16(nla_port);
2979                usvc->fwmark = 0;
2980        }
2981
2982        if (usvc->fwmark)
2983                svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2984        else
2985                svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2986                                           &usvc->addr, usvc->port);
2987        *ret_svc = svc;
2988
2989        /* If a full entry was requested, check for the additional fields */
2990        if (full_entry) {
2991                struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2992                              *nla_netmask;
2993                struct ip_vs_flags flags;
2994
2995                nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2996                nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2997                nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2998                nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2999                nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3000
3001                if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3002                        return -EINVAL;
3003
3004                nla_memcpy(&flags, nla_flags, sizeof(flags));
3005
3006                /* prefill flags from service if it already exists */
3007                if (svc)
3008                        usvc->flags = svc->flags;
3009
3010                /* set new flags from userland */
3011                usvc->flags = (usvc->flags & ~flags.mask) |
3012                              (flags.flags & flags.mask);
3013                usvc->sched_name = nla_data(nla_sched);
3014                usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3015                usvc->timeout = nla_get_u32(nla_timeout);
3016                usvc->netmask = nla_get_u32(nla_netmask);
3017        }
3018
3019        return 0;
3020}
3021
3022static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3023                                                     struct nlattr *nla)
3024{
3025        struct ip_vs_service_user_kern usvc;
3026        struct ip_vs_service *svc;
3027        int ret;
3028
3029        ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3030        return ret ? ERR_PTR(ret) : svc;
3031}
3032
3033static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3034{
3035        struct nlattr *nl_dest;
3036
3037        nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3038        if (!nl_dest)
3039                return -EMSGSIZE;
3040
3041        NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
3042        NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
3043
3044        NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3045                    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
3046        NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
3047        NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
3048        NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
3049        NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3050                    atomic_read(&dest->activeconns));
3051        NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3052                    atomic_read(&dest->inactconns));
3053        NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3054                    atomic_read(&dest->persistconns));
3055
3056        if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3057                goto nla_put_failure;
3058
3059        nla_nest_end(skb, nl_dest);
3060
3061        return 0;
3062
3063nla_put_failure:
3064        nla_nest_cancel(skb, nl_dest);
3065        return -EMSGSIZE;
3066}
3067
3068static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3069                                struct netlink_callback *cb)
3070{
3071        void *hdr;
3072
3073        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3074                          &ip_vs_genl_family, NLM_F_MULTI,
3075                          IPVS_CMD_NEW_DEST);
3076        if (!hdr)
3077                return -EMSGSIZE;
3078
3079        if (ip_vs_genl_fill_dest(skb, dest) < 0)
3080                goto nla_put_failure;
3081
3082        return genlmsg_end(skb, hdr);
3083
3084nla_put_failure:
3085        genlmsg_cancel(skb, hdr);
3086        return -EMSGSIZE;
3087}
3088
3089static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3090                                 struct netlink_callback *cb)
3091{
3092        int idx = 0;
3093        int start = cb->args[0];
3094        struct ip_vs_service *svc;
3095        struct ip_vs_dest *dest;
3096        struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3097        struct net *net = skb_sknet(skb);
3098
3099        mutex_lock(&__ip_vs_mutex);
3100
3101        /* Try to find the service for which to dump destinations */
3102        if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3103                        IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3104                goto out_err;
3105
3106
3107        svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3108        if (IS_ERR(svc) || svc == NULL)
3109                goto out_err;
3110
3111        /* Dump the destinations */
3112        list_for_each_entry(dest, &svc->destinations, n_list) {
3113                if (++idx <= start)
3114                        continue;
3115                if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3116                        idx--;
3117                        goto nla_put_failure;
3118                }
3119        }
3120
3121nla_put_failure:
3122        cb->args[0] = idx;
3123
3124out_err:
3125        mutex_unlock(&__ip_vs_mutex);
3126
3127        return skb->len;
3128}
3129
3130static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3131                                 struct nlattr *nla, int full_entry)
3132{
3133        struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3134        struct nlattr *nla_addr, *nla_port;
3135
3136        /* Parse mandatory identifying destination fields first */
3137        if (nla == NULL ||
3138            nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3139                return -EINVAL;
3140
3141        nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3142        nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3143
3144        if (!(nla_addr && nla_port))
3145                return -EINVAL;
3146
3147        memset(udest, 0, sizeof(*udest));
3148
3149        nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3150        udest->port = nla_get_u16(nla_port);
3151
3152        /* If a full entry was requested, check for the additional fields */
3153        if (full_entry) {
3154                struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3155                              *nla_l_thresh;
3156
3157                nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3158                nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3159                nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3160                nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3161
3162                if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3163                        return -EINVAL;
3164
3165                udest->conn_flags = nla_get_u32(nla_fwd)
3166                                    & IP_VS_CONN_F_FWD_MASK;
3167                udest->weight = nla_get_u32(nla_weight);
3168                udest->u_threshold = nla_get_u32(nla_u_thresh);
3169                udest->l_threshold = nla_get_u32(nla_l_thresh);
3170        }
3171
3172        return 0;
3173}
3174
3175static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3176                                  const char *mcast_ifn, __be32 syncid)
3177{
3178        struct nlattr *nl_daemon;
3179
3180        nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3181        if (!nl_daemon)
3182                return -EMSGSIZE;
3183
3184        NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3185        NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3186        NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3187
3188        nla_nest_end(skb, nl_daemon);
3189
3190        return 0;
3191
3192nla_put_failure:
3193        nla_nest_cancel(skb, nl_daemon);
3194        return -EMSGSIZE;
3195}
3196
3197static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3198                                  const char *mcast_ifn, __be32 syncid,
3199                                  struct netlink_callback *cb)
3200{
3201        void *hdr;
3202        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3203                          &ip_vs_genl_family, NLM_F_MULTI,
3204                          IPVS_CMD_NEW_DAEMON);
3205        if (!hdr)
3206                return -EMSGSIZE;
3207
3208        if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3209                goto nla_put_failure;
3210
3211        return genlmsg_end(skb, hdr);
3212
3213nla_put_failure:
3214        genlmsg_cancel(skb, hdr);
3215        return -EMSGSIZE;
3216}
3217
3218static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3219                                   struct netlink_callback *cb)
3220{
3221        struct net *net = skb_sknet(skb);
3222        struct netns_ipvs *ipvs = net_ipvs(net);
3223
3224        mutex_lock(&ipvs->sync_mutex);
3225        if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3226                if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3227                                           ipvs->master_mcast_ifn,
3228                                           ipvs->master_syncid, cb) < 0)
3229                        goto nla_put_failure;
3230
3231                cb->args[0] = 1;
3232        }
3233
3234        if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3235                if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3236                                           ipvs->backup_mcast_ifn,
3237                                           ipvs->backup_syncid, cb) < 0)
3238                        goto nla_put_failure;
3239
3240                cb->args[1] = 1;
3241        }
3242
3243nla_put_failure:
3244        mutex_unlock(&ipvs->sync_mutex);
3245
3246        return skb->len;
3247}
3248
3249static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3250{
3251        if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3252              attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3253              attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3254                return -EINVAL;
3255
3256        return start_sync_thread(net,
3257                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3258                                 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3259                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3260}
3261
3262static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3263{
3264        if (!attrs[IPVS_DAEMON_ATTR_STATE])
3265                return -EINVAL;
3266
3267        return stop_sync_thread(net,
3268                                nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3269}
3270
3271static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3272{
3273        struct ip_vs_timeout_user t;
3274
3275        __ip_vs_get_timeouts(net, &t);
3276
3277        if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3278                t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3279
3280        if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3281                t.tcp_fin_timeout =
3282                        nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3283
3284        if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3285                t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3286
3287        return ip_vs_set_timeout(net, &t);
3288}
3289
3290static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3291{
3292        int ret = 0, cmd;
3293        struct net *net;
3294        struct netns_ipvs *ipvs;
3295
3296        net = skb_sknet(skb);
3297        ipvs = net_ipvs(net);
3298        cmd = info->genlhdr->cmd;
3299
3300        if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3301                struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3302
3303                mutex_lock(&ipvs->sync_mutex);
3304                if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3305                    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3306                                     info->attrs[IPVS_CMD_ATTR_DAEMON],
3307                                     ip_vs_daemon_policy)) {
3308                        ret = -EINVAL;
3309                        goto out;
3310                }
3311
3312                if (cmd == IPVS_CMD_NEW_DAEMON)
3313                        ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3314                else
3315                        ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3316out:
3317                mutex_unlock(&ipvs->sync_mutex);
3318        }
3319        return ret;
3320}
3321
3322static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3323{
3324        struct ip_vs_service *svc = NULL;
3325        struct ip_vs_service_user_kern usvc;
3326        struct ip_vs_dest_user_kern udest;
3327        int ret = 0, cmd;
3328        int need_full_svc = 0, need_full_dest = 0;
3329        struct net *net;
3330
3331        net = skb_sknet(skb);
3332        cmd = info->genlhdr->cmd;
3333
3334        mutex_lock(&__ip_vs_mutex);
3335
3336        if (cmd == IPVS_CMD_FLUSH) {
3337                ret = ip_vs_flush(net);
3338                goto out;
3339        } else if (cmd == IPVS_CMD_SET_CONFIG) {
3340                ret = ip_vs_genl_set_config(net, info->attrs);
3341                goto out;
3342        } else if (cmd == IPVS_CMD_ZERO &&
3343                   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3344                ret = ip_vs_zero_all(net);
3345                goto out;
3346        }
3347
3348        /* All following commands require a service argument, so check if we
3349         * received a valid one. We need a full service specification when
3350         * adding / editing a service. Only identifying members otherwise. */
3351        if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3352                need_full_svc = 1;
3353
3354        ret = ip_vs_genl_parse_service(net, &usvc,
3355                                       info->attrs[IPVS_CMD_ATTR_SERVICE],
3356                                       need_full_svc, &svc);
3357        if (ret)
3358                goto out;
3359
3360        /* Unless we're adding a new service, the service must already exist */
3361        if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3362                ret = -ESRCH;
3363                goto out;
3364        }
3365
3366        /* Destination commands require a valid destination argument. For
3367         * adding / editing a destination, we need a full destination
3368         * specification. */
3369        if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3370            cmd == IPVS_CMD_DEL_DEST) {
3371                if (cmd != IPVS_CMD_DEL_DEST)
3372                        need_full_dest = 1;
3373
3374                ret = ip_vs_genl_parse_dest(&udest,
3375                                            info->attrs[IPVS_CMD_ATTR_DEST],
3376                                            need_full_dest);
3377                if (ret)
3378                        goto out;
3379        }
3380
3381        switch (cmd) {
3382        case IPVS_CMD_NEW_SERVICE:
3383                if (svc == NULL)
3384                        ret = ip_vs_add_service(net, &usvc, &svc);
3385                else
3386                        ret = -EEXIST;
3387                break;
3388        case IPVS_CMD_SET_SERVICE:
3389                ret = ip_vs_edit_service(svc, &usvc);
3390                break;
3391        case IPVS_CMD_DEL_SERVICE:
3392                ret = ip_vs_del_service(svc);
3393                /* do not use svc, it can be freed */
3394                break;
3395        case IPVS_CMD_NEW_DEST:
3396                ret = ip_vs_add_dest(svc, &udest);
3397                break;
3398        case IPVS_CMD_SET_DEST:
3399                ret = ip_vs_edit_dest(svc, &udest);
3400                break;
3401        case IPVS_CMD_DEL_DEST:
3402                ret = ip_vs_del_dest(svc, &udest);
3403                break;
3404        case IPVS_CMD_ZERO:
3405                ret = ip_vs_zero_service(svc);
3406                break;
3407        default:
3408                ret = -EINVAL;
3409        }
3410
3411out:
3412        mutex_unlock(&__ip_vs_mutex);
3413
3414        return ret;
3415}
3416
3417static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3418{
3419        struct sk_buff *msg;
3420        void *reply;
3421        int ret, cmd, reply_cmd;
3422        struct net *net;
3423
3424        net = skb_sknet(skb);
3425        cmd = info->genlhdr->cmd;
3426
3427        if (cmd == IPVS_CMD_GET_SERVICE)
3428                reply_cmd = IPVS_CMD_NEW_SERVICE;
3429        else if (cmd == IPVS_CMD_GET_INFO)
3430                reply_cmd = IPVS_CMD_SET_INFO;
3431        else if (cmd == IPVS_CMD_GET_CONFIG)
3432                reply_cmd = IPVS_CMD_SET_CONFIG;
3433        else {
3434                pr_err("unknown Generic Netlink command\n");
3435                return -EINVAL;
3436        }
3437
3438        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3439        if (!msg)
3440                return -ENOMEM;
3441
3442        mutex_lock(&__ip_vs_mutex);
3443
3444        reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3445        if (reply == NULL)
3446                goto nla_put_failure;
3447
3448        switch (cmd) {
3449        case IPVS_CMD_GET_SERVICE:
3450        {
3451                struct ip_vs_service *svc;
3452
3453                svc = ip_vs_genl_find_service(net,
3454                                              info->attrs[IPVS_CMD_ATTR_SERVICE]);
3455                if (IS_ERR(svc)) {
3456                        ret = PTR_ERR(svc);
3457                        goto out_err;
3458                } else if (svc) {
3459                        ret = ip_vs_genl_fill_service(msg, svc);
3460                        if (ret)
3461                                goto nla_put_failure;
3462                } else {
3463                        ret = -ESRCH;
3464                        goto out_err;
3465                }
3466
3467                break;
3468        }
3469
3470        case IPVS_CMD_GET_CONFIG:
3471        {
3472                struct ip_vs_timeout_user t;
3473
3474                __ip_vs_get_timeouts(net, &t);
3475#ifdef CONFIG_IP_VS_PROTO_TCP
3476                NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3477                NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3478                            t.tcp_fin_timeout);
3479#endif
3480#ifdef CONFIG_IP_VS_PROTO_UDP
3481                NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3482#endif
3483
3484                break;
3485        }
3486
3487        case IPVS_CMD_GET_INFO:
3488                NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3489                NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3490                            ip_vs_conn_tab_size);
3491                break;
3492        }
3493
3494        genlmsg_end(msg, reply);
3495        ret = genlmsg_reply(msg, info);
3496        goto out;
3497
3498nla_put_failure:
3499        pr_err("not enough space in Netlink message\n");
3500        ret = -EMSGSIZE;
3501
3502out_err:
3503        nlmsg_free(msg);
3504out:
3505        mutex_unlock(&__ip_vs_mutex);
3506
3507        return ret;
3508}
3509
3510
3511static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3512        {
3513                .cmd    = IPVS_CMD_NEW_SERVICE,
3514                .flags  = GENL_ADMIN_PERM,
3515                .policy = ip_vs_cmd_policy,
3516                .doit   = ip_vs_genl_set_cmd,
3517        },
3518        {
3519                .cmd    = IPVS_CMD_SET_SERVICE,
3520                .flags  = GENL_ADMIN_PERM,
3521                .policy = ip_vs_cmd_policy,
3522                .doit   = ip_vs_genl_set_cmd,
3523        },
3524        {
3525                .cmd    = IPVS_CMD_DEL_SERVICE,
3526                .flags  = GENL_ADMIN_PERM,
3527                .policy = ip_vs_cmd_policy,
3528                .doit   = ip_vs_genl_set_cmd,
3529        },
3530        {
3531                .cmd    = IPVS_CMD_GET_SERVICE,
3532                .flags  = GENL_ADMIN_PERM,
3533                .doit   = ip_vs_genl_get_cmd,
3534                .dumpit = ip_vs_genl_dump_services,
3535                .policy = ip_vs_cmd_policy,
3536        },
3537        {
3538                .cmd    = IPVS_CMD_NEW_DEST,
3539                .flags  = GENL_ADMIN_PERM,
3540                .policy = ip_vs_cmd_policy,
3541                .doit   = ip_vs_genl_set_cmd,
3542        },
3543        {
3544                .cmd    = IPVS_CMD_SET_DEST,
3545                .flags  = GENL_ADMIN_PERM,
3546                .policy = ip_vs_cmd_policy,
3547                .doit   = ip_vs_genl_set_cmd,
3548        },
3549        {
3550                .cmd    = IPVS_CMD_DEL_DEST,
3551                .flags  = GENL_ADMIN_PERM,
3552                .policy = ip_vs_cmd_policy,
3553                .doit   = ip_vs_genl_set_cmd,
3554        },
3555        {
3556                .cmd    = IPVS_CMD_GET_DEST,
3557                .flags  = GENL_ADMIN_PERM,
3558                .policy = ip_vs_cmd_policy,
3559                .dumpit = ip_vs_genl_dump_dests,
3560        },
3561        {
3562                .cmd    = IPVS_CMD_NEW_DAEMON,
3563                .flags  = GENL_ADMIN_PERM,
3564                .policy = ip_vs_cmd_policy,
3565                .doit   = ip_vs_genl_set_daemon,
3566        },
3567        {
3568                .cmd    = IPVS_CMD_DEL_DAEMON,
3569                .flags  = GENL_ADMIN_PERM,
3570                .policy = ip_vs_cmd_policy,
3571                .doit   = ip_vs_genl_set_daemon,
3572        },
3573        {
3574                .cmd    = IPVS_CMD_GET_DAEMON,
3575                .flags  = GENL_ADMIN_PERM,
3576                .dumpit = ip_vs_genl_dump_daemons,
3577        },
3578        {
3579                .cmd    = IPVS_CMD_SET_CONFIG,
3580                .flags  = GENL_ADMIN_PERM,
3581                .policy = ip_vs_cmd_policy,
3582                .doit   = ip_vs_genl_set_cmd,
3583        },
3584        {
3585                .cmd    = IPVS_CMD_GET_CONFIG,
3586                .flags  = GENL_ADMIN_PERM,
3587                .doit   = ip_vs_genl_get_cmd,
3588        },
3589        {
3590                .cmd    = IPVS_CMD_GET_INFO,
3591                .flags  = GENL_ADMIN_PERM,
3592                .doit   = ip_vs_genl_get_cmd,
3593        },
3594        {
3595                .cmd    = IPVS_CMD_ZERO,
3596                .flags  = GENL_ADMIN_PERM,
3597                .policy = ip_vs_cmd_policy,
3598                .doit   = ip_vs_genl_set_cmd,
3599        },
3600        {
3601                .cmd    = IPVS_CMD_FLUSH,
3602                .flags  = GENL_ADMIN_PERM,
3603                .doit   = ip_vs_genl_set_cmd,
3604        },
3605};
3606
3607static int __init ip_vs_genl_register(void)
3608{
3609        return genl_register_family_with_ops(&ip_vs_genl_family,
3610                ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3611}
3612
3613static void ip_vs_genl_unregister(void)
3614{
3615        genl_unregister_family(&ip_vs_genl_family);
3616}
3617
3618/* End of Generic Netlink interface definitions */
3619
3620/*
3621 * per netns intit/exit func.
3622 */
3623#ifdef CONFIG_SYSCTL
3624int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3625{
3626        int idx;
3627        struct netns_ipvs *ipvs = net_ipvs(net);
3628        struct ctl_table *tbl;
3629
3630        atomic_set(&ipvs->dropentry, 0);
3631        spin_lock_init(&ipvs->dropentry_lock);
3632        spin_lock_init(&ipvs->droppacket_lock);
3633        spin_lock_init(&ipvs->securetcp_lock);
3634
3635        if (!net_eq(net, &init_net)) {
3636                tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3637                if (tbl == NULL)
3638                        return -ENOMEM;
3639        } else
3640                tbl = vs_vars;
3641        /* Initialize sysctl defaults */
3642        idx = 0;
3643        ipvs->sysctl_amemthresh = 1024;
3644        tbl[idx++].data = &ipvs->sysctl_amemthresh;
3645        ipvs->sysctl_am_droprate = 10;
3646        tbl[idx++].data = &ipvs->sysctl_am_droprate;
3647        tbl[idx++].data = &ipvs->sysctl_drop_entry;
3648        tbl[idx++].data = &ipvs->sysctl_drop_packet;
3649#ifdef CONFIG_IP_VS_NFCT
3650        tbl[idx++].data = &ipvs->sysctl_conntrack;
3651#endif
3652        tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3653        ipvs->sysctl_snat_reroute = 1;
3654        tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3655        ipvs->sysctl_sync_ver = 1;
3656        tbl[idx++].data = &ipvs->sysctl_sync_ver;
3657        tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3658        tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3659        tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3660        ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3661        ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3662        tbl[idx].data = &ipvs->sysctl_sync_threshold;
3663        tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3664        tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3665
3666
3667        ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3668                                                     tbl);
3669        if (ipvs->sysctl_hdr == NULL) {
3670                if (!net_eq(net, &init_net))
3671                        kfree(tbl);
3672                return -ENOMEM;
3673        }
3674        ip_vs_start_estimator(net, &ipvs->tot_stats);
3675        ipvs->sysctl_tbl = tbl;
3676        /* Schedule defense work */
3677        INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3678        schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3679
3680        return 0;
3681}
3682
3683void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net)
3684{
3685        struct netns_ipvs *ipvs = net_ipvs(net);
3686
3687        cancel_delayed_work_sync(&ipvs->defense_work);
3688        cancel_work_sync(&ipvs->defense_work.work);
3689        unregister_net_sysctl_table(ipvs->sysctl_hdr);
3690}
3691
3692#else
3693
3694int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3695void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) { }
3696
3697#endif
3698
3699static struct notifier_block ip_vs_dst_notifier = {
3700        .notifier_call = ip_vs_dst_event,
3701};
3702
3703int __net_init ip_vs_control_net_init(struct net *net)
3704{
3705        int idx;
3706        struct netns_ipvs *ipvs = net_ipvs(net);
3707
3708        rwlock_init(&ipvs->rs_lock);
3709
3710        /* Initialize rs_table */
3711        for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3712                INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3713
3714        INIT_LIST_HEAD(&ipvs->dest_trash);
3715        atomic_set(&ipvs->ftpsvc_counter, 0);
3716        atomic_set(&ipvs->nullsvc_counter, 0);
3717
3718        /* procfs stats */
3719        ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3720        if (!ipvs->tot_stats.cpustats)
3721                return -ENOMEM;
3722
3723        spin_lock_init(&ipvs->tot_stats.lock);
3724
3725        proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3726        proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3727        proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3728                             &ip_vs_stats_percpu_fops);
3729
3730        if (ip_vs_control_net_init_sysctl(net))
3731                goto err;
3732
3733        return 0;
3734
3735err:
3736        free_percpu(ipvs->tot_stats.cpustats);
3737        return -ENOMEM;
3738}
3739
3740void __net_exit ip_vs_control_net_cleanup(struct net *net)
3741{
3742        struct netns_ipvs *ipvs = net_ipvs(net);
3743
3744        ip_vs_trash_cleanup(net);
3745        ip_vs_stop_estimator(net, &ipvs->tot_stats);
3746        ip_vs_control_net_cleanup_sysctl(net);
3747        proc_net_remove(net, "ip_vs_stats_percpu");
3748        proc_net_remove(net, "ip_vs_stats");
3749        proc_net_remove(net, "ip_vs");
3750        free_percpu(ipvs->tot_stats.cpustats);
3751}
3752
3753int __init ip_vs_control_init(void)
3754{
3755        int idx;
3756        int ret;
3757
3758        EnterFunction(2);
3759
3760        /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3761        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3762                INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3763                INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3764        }
3765
3766        smp_wmb();      /* Do we really need it now ? */
3767
3768        ret = nf_register_sockopt(&ip_vs_sockopts);
3769        if (ret) {
3770                pr_err("cannot register sockopt.\n");
3771                goto err_sock;
3772        }
3773
3774        ret = ip_vs_genl_register();
3775        if (ret) {
3776                pr_err("cannot register Generic Netlink interface.\n");
3777                goto err_genl;
3778        }
3779
3780        ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3781        if (ret < 0)
3782                goto err_notf;
3783
3784        LeaveFunction(2);
3785        return 0;
3786
3787err_notf:
3788        ip_vs_genl_unregister();
3789err_genl:
3790        nf_unregister_sockopt(&ip_vs_sockopts);
3791err_sock:
3792        return ret;
3793}
3794
3795
3796void ip_vs_control_cleanup(void)
3797{
3798        EnterFunction(2);
3799        unregister_netdevice_notifier(&ip_vs_dst_notifier);
3800        ip_vs_genl_unregister();
3801        nf_unregister_sockopt(&ip_vs_sockopts);
3802        LeaveFunction(2);
3803}
3804