linux/net/netfilter/ipvs/ip_vs_ctl.c
<<
>>
Prefs
   1/*
   2 * IPVS         An implementation of the IP virtual server support for the
   3 *              LINUX operating system.  IPVS is now implemented as a module
   4 *              over the NetFilter framework. IPVS can be used to build a
   5 *              high-performance and highly available server based on a
   6 *              cluster of servers.
   7 *
   8 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   9 *              Peter Kese <peter.kese@ijs.si>
  10 *              Julian Anastasov <ja@ssi.bg>
  11 *
  12 *              This program is free software; you can redistribute it and/or
  13 *              modify it under the terms of the GNU General Public License
  14 *              as published by the Free Software Foundation; either version
  15 *              2 of the License, or (at your option) any later version.
  16 *
  17 * Changes:
  18 *
  19 */
  20
  21#define KMSG_COMPONENT "IPVS"
  22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/types.h>
  27#include <linux/capability.h>
  28#include <linux/fs.h>
  29#include <linux/sysctl.h>
  30#include <linux/proc_fs.h>
  31#include <linux/workqueue.h>
  32#include <linux/swap.h>
  33#include <linux/seq_file.h>
  34#include <linux/slab.h>
  35
  36#include <linux/netfilter.h>
  37#include <linux/netfilter_ipv4.h>
  38#include <linux/mutex.h>
  39
  40#include <net/net_namespace.h>
  41#include <linux/nsproxy.h>
  42#include <net/ip.h>
  43#ifdef CONFIG_IP_VS_IPV6
  44#include <net/ipv6.h>
  45#include <net/ip6_route.h>
  46#endif
  47#include <net/route.h>
  48#include <net/sock.h>
  49#include <net/genetlink.h>
  50
  51#include <asm/uaccess.h>
  52
  53#include <net/ip_vs.h>
  54
  55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
  56static DEFINE_MUTEX(__ip_vs_mutex);
  57
  58/* lock for service table */
  59static DEFINE_RWLOCK(__ip_vs_svc_lock);
  60
  61/* sysctl variables */
  62
  63#ifdef CONFIG_IP_VS_DEBUG
  64static int sysctl_ip_vs_debug_level = 0;
  65
  66int ip_vs_get_debug_level(void)
  67{
  68        return sysctl_ip_vs_debug_level;
  69}
  70#endif
  71
  72
  73/*  Protos */
  74static void __ip_vs_del_service(struct ip_vs_service *svc);
  75
  76
  77#ifdef CONFIG_IP_VS_IPV6
  78/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
  79static bool __ip_vs_addr_is_local_v6(struct net *net,
  80                                     const struct in6_addr *addr)
  81{
  82        struct flowi6 fl6 = {
  83                .daddr = *addr,
  84        };
  85        struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
  86        bool is_local;
  87
  88        is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
  89
  90        dst_release(dst);
  91        return is_local;
  92}
  93#endif
  94
  95#ifdef CONFIG_SYSCTL
  96/*
  97 *      update_defense_level is called from keventd and from sysctl,
  98 *      so it needs to protect itself from softirqs
  99 */
 100static void update_defense_level(struct netns_ipvs *ipvs)
 101{
 102        struct sysinfo i;
 103        static int old_secure_tcp = 0;
 104        int availmem;
 105        int nomem;
 106        int to_change = -1;
 107
 108        /* we only count free and buffered memory (in pages) */
 109        si_meminfo(&i);
 110        availmem = i.freeram + i.bufferram;
 111        /* however in linux 2.5 the i.bufferram is total page cache size,
 112           we need adjust it */
 113        /* si_swapinfo(&i); */
 114        /* availmem = availmem - (i.totalswap - i.freeswap); */
 115
 116        nomem = (availmem < ipvs->sysctl_amemthresh);
 117
 118        local_bh_disable();
 119
 120        /* drop_entry */
 121        spin_lock(&ipvs->dropentry_lock);
 122        switch (ipvs->sysctl_drop_entry) {
 123        case 0:
 124                atomic_set(&ipvs->dropentry, 0);
 125                break;
 126        case 1:
 127                if (nomem) {
 128                        atomic_set(&ipvs->dropentry, 1);
 129                        ipvs->sysctl_drop_entry = 2;
 130                } else {
 131                        atomic_set(&ipvs->dropentry, 0);
 132                }
 133                break;
 134        case 2:
 135                if (nomem) {
 136                        atomic_set(&ipvs->dropentry, 1);
 137                } else {
 138                        atomic_set(&ipvs->dropentry, 0);
 139                        ipvs->sysctl_drop_entry = 1;
 140                };
 141                break;
 142        case 3:
 143                atomic_set(&ipvs->dropentry, 1);
 144                break;
 145        }
 146        spin_unlock(&ipvs->dropentry_lock);
 147
 148        /* drop_packet */
 149        spin_lock(&ipvs->droppacket_lock);
 150        switch (ipvs->sysctl_drop_packet) {
 151        case 0:
 152                ipvs->drop_rate = 0;
 153                break;
 154        case 1:
 155                if (nomem) {
 156                        ipvs->drop_rate = ipvs->drop_counter
 157                                = ipvs->sysctl_amemthresh /
 158                                (ipvs->sysctl_amemthresh-availmem);
 159                        ipvs->sysctl_drop_packet = 2;
 160                } else {
 161                        ipvs->drop_rate = 0;
 162                }
 163                break;
 164        case 2:
 165                if (nomem) {
 166                        ipvs->drop_rate = ipvs->drop_counter
 167                                = ipvs->sysctl_amemthresh /
 168                                (ipvs->sysctl_amemthresh-availmem);
 169                } else {
 170                        ipvs->drop_rate = 0;
 171                        ipvs->sysctl_drop_packet = 1;
 172                }
 173                break;
 174        case 3:
 175                ipvs->drop_rate = ipvs->sysctl_am_droprate;
 176                break;
 177        }
 178        spin_unlock(&ipvs->droppacket_lock);
 179
 180        /* secure_tcp */
 181        spin_lock(&ipvs->securetcp_lock);
 182        switch (ipvs->sysctl_secure_tcp) {
 183        case 0:
 184                if (old_secure_tcp >= 2)
 185                        to_change = 0;
 186                break;
 187        case 1:
 188                if (nomem) {
 189                        if (old_secure_tcp < 2)
 190                                to_change = 1;
 191                        ipvs->sysctl_secure_tcp = 2;
 192                } else {
 193                        if (old_secure_tcp >= 2)
 194                                to_change = 0;
 195                }
 196                break;
 197        case 2:
 198                if (nomem) {
 199                        if (old_secure_tcp < 2)
 200                                to_change = 1;
 201                } else {
 202                        if (old_secure_tcp >= 2)
 203                                to_change = 0;
 204                        ipvs->sysctl_secure_tcp = 1;
 205                }
 206                break;
 207        case 3:
 208                if (old_secure_tcp < 2)
 209                        to_change = 1;
 210                break;
 211        }
 212        old_secure_tcp = ipvs->sysctl_secure_tcp;
 213        if (to_change >= 0)
 214                ip_vs_protocol_timeout_change(ipvs,
 215                                              ipvs->sysctl_secure_tcp > 1);
 216        spin_unlock(&ipvs->securetcp_lock);
 217
 218        local_bh_enable();
 219}
 220
 221
 222/*
 223 *      Timer for checking the defense
 224 */
 225#define DEFENSE_TIMER_PERIOD    1*HZ
 226
 227static void defense_work_handler(struct work_struct *work)
 228{
 229        struct netns_ipvs *ipvs =
 230                container_of(work, struct netns_ipvs, defense_work.work);
 231
 232        update_defense_level(ipvs);
 233        if (atomic_read(&ipvs->dropentry))
 234                ip_vs_random_dropentry(ipvs->net);
 235        schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 236}
 237#endif
 238
 239int
 240ip_vs_use_count_inc(void)
 241{
 242        return try_module_get(THIS_MODULE);
 243}
 244
 245void
 246ip_vs_use_count_dec(void)
 247{
 248        module_put(THIS_MODULE);
 249}
 250
 251
 252/*
 253 *      Hash table: for virtual service lookups
 254 */
 255#define IP_VS_SVC_TAB_BITS 8
 256#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
 257#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
 258
 259/* the service table hashed by <protocol, addr, port> */
 260static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 261/* the service table hashed by fwmark */
 262static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 263
 264
 265/*
 266 *      Returns hash value for virtual service
 267 */
 268static inline unsigned int
 269ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
 270                  const union nf_inet_addr *addr, __be16 port)
 271{
 272        register unsigned int porth = ntohs(port);
 273        __be32 addr_fold = addr->ip;
 274
 275#ifdef CONFIG_IP_VS_IPV6
 276        if (af == AF_INET6)
 277                addr_fold = addr->ip6[0]^addr->ip6[1]^
 278                            addr->ip6[2]^addr->ip6[3];
 279#endif
 280        addr_fold ^= ((size_t)net>>8);
 281
 282        return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
 283                & IP_VS_SVC_TAB_MASK;
 284}
 285
 286/*
 287 *      Returns hash value of fwmark for virtual service lookup
 288 */
 289static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
 290{
 291        return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
 292}
 293
 294/*
 295 *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
 296 *      or in the ip_vs_svc_fwm_table by fwmark.
 297 *      Should be called with locked tables.
 298 */
 299static int ip_vs_svc_hash(struct ip_vs_service *svc)
 300{
 301        unsigned int hash;
 302
 303        if (svc->flags & IP_VS_SVC_F_HASHED) {
 304                pr_err("%s(): request for already hashed, called from %pF\n",
 305                       __func__, __builtin_return_address(0));
 306                return 0;
 307        }
 308
 309        if (svc->fwmark == 0) {
 310                /*
 311                 *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
 312                 */
 313                hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
 314                                         &svc->addr, svc->port);
 315                list_add(&svc->s_list, &ip_vs_svc_table[hash]);
 316        } else {
 317                /*
 318                 *  Hash it by fwmark in svc_fwm_table
 319                 */
 320                hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
 321                list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
 322        }
 323
 324        svc->flags |= IP_VS_SVC_F_HASHED;
 325        /* increase its refcnt because it is referenced by the svc table */
 326        atomic_inc(&svc->refcnt);
 327        return 1;
 328}
 329
 330
 331/*
 332 *      Unhashes a service from svc_table / svc_fwm_table.
 333 *      Should be called with locked tables.
 334 */
 335static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 336{
 337        if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
 338                pr_err("%s(): request for unhash flagged, called from %pF\n",
 339                       __func__, __builtin_return_address(0));
 340                return 0;
 341        }
 342
 343        if (svc->fwmark == 0) {
 344                /* Remove it from the svc_table table */
 345                list_del(&svc->s_list);
 346        } else {
 347                /* Remove it from the svc_fwm_table table */
 348                list_del(&svc->f_list);
 349        }
 350
 351        svc->flags &= ~IP_VS_SVC_F_HASHED;
 352        atomic_dec(&svc->refcnt);
 353        return 1;
 354}
 355
 356
 357/*
 358 *      Get service by {netns, proto,addr,port} in the service table.
 359 */
 360static inline struct ip_vs_service *
 361__ip_vs_service_find(struct net *net, int af, __u16 protocol,
 362                     const union nf_inet_addr *vaddr, __be16 vport)
 363{
 364        unsigned int hash;
 365        struct ip_vs_service *svc;
 366
 367        /* Check for "full" addressed entries */
 368        hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
 369
 370        list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
 371                if ((svc->af == af)
 372                    && ip_vs_addr_equal(af, &svc->addr, vaddr)
 373                    && (svc->port == vport)
 374                    && (svc->protocol == protocol)
 375                    && net_eq(svc->net, net)) {
 376                        /* HIT */
 377                        return svc;
 378                }
 379        }
 380
 381        return NULL;
 382}
 383
 384
 385/*
 386 *      Get service by {fwmark} in the service table.
 387 */
 388static inline struct ip_vs_service *
 389__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
 390{
 391        unsigned int hash;
 392        struct ip_vs_service *svc;
 393
 394        /* Check for fwmark addressed entries */
 395        hash = ip_vs_svc_fwm_hashkey(net, fwmark);
 396
 397        list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
 398                if (svc->fwmark == fwmark && svc->af == af
 399                    && net_eq(svc->net, net)) {
 400                        /* HIT */
 401                        return svc;
 402                }
 403        }
 404
 405        return NULL;
 406}
 407
 408struct ip_vs_service *
 409ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 410                  const union nf_inet_addr *vaddr, __be16 vport)
 411{
 412        struct ip_vs_service *svc;
 413        struct netns_ipvs *ipvs = net_ipvs(net);
 414
 415        read_lock(&__ip_vs_svc_lock);
 416
 417        /*
 418         *      Check the table hashed by fwmark first
 419         */
 420        if (fwmark) {
 421                svc = __ip_vs_svc_fwm_find(net, af, fwmark);
 422                if (svc)
 423                        goto out;
 424        }
 425
 426        /*
 427         *      Check the table hashed by <protocol,addr,port>
 428         *      for "full" addressed entries
 429         */
 430        svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
 431
 432        if (svc == NULL
 433            && protocol == IPPROTO_TCP
 434            && atomic_read(&ipvs->ftpsvc_counter)
 435            && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
 436                /*
 437                 * Check if ftp service entry exists, the packet
 438                 * might belong to FTP data connections.
 439                 */
 440                svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
 441        }
 442
 443        if (svc == NULL
 444            && atomic_read(&ipvs->nullsvc_counter)) {
 445                /*
 446                 * Check if the catch-all port (port zero) exists
 447                 */
 448                svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
 449        }
 450
 451  out:
 452        if (svc)
 453                atomic_inc(&svc->usecnt);
 454        read_unlock(&__ip_vs_svc_lock);
 455
 456        IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
 457                      fwmark, ip_vs_proto_name(protocol),
 458                      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
 459                      svc ? "hit" : "not hit");
 460
 461        return svc;
 462}
 463
 464
 465static inline void
 466__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 467{
 468        atomic_inc(&svc->refcnt);
 469        dest->svc = svc;
 470}
 471
 472static void
 473__ip_vs_unbind_svc(struct ip_vs_dest *dest)
 474{
 475        struct ip_vs_service *svc = dest->svc;
 476
 477        dest->svc = NULL;
 478        if (atomic_dec_and_test(&svc->refcnt)) {
 479                IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
 480                              svc->fwmark,
 481                              IP_VS_DBG_ADDR(svc->af, &svc->addr),
 482                              ntohs(svc->port), atomic_read(&svc->usecnt));
 483                free_percpu(svc->stats.cpustats);
 484                kfree(svc);
 485        }
 486}
 487
 488
 489/*
 490 *      Returns hash value for real service
 491 */
 492static inline unsigned int ip_vs_rs_hashkey(int af,
 493                                            const union nf_inet_addr *addr,
 494                                            __be16 port)
 495{
 496        register unsigned int porth = ntohs(port);
 497        __be32 addr_fold = addr->ip;
 498
 499#ifdef CONFIG_IP_VS_IPV6
 500        if (af == AF_INET6)
 501                addr_fold = addr->ip6[0]^addr->ip6[1]^
 502                            addr->ip6[2]^addr->ip6[3];
 503#endif
 504
 505        return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
 506                & IP_VS_RTAB_MASK;
 507}
 508
 509/*
 510 *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
 511 *      should be called with locked tables.
 512 */
 513static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 514{
 515        unsigned int hash;
 516
 517        if (!list_empty(&dest->d_list)) {
 518                return 0;
 519        }
 520
 521        /*
 522         *      Hash by proto,addr,port,
 523         *      which are the parameters of the real service.
 524         */
 525        hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
 526
 527        list_add(&dest->d_list, &ipvs->rs_table[hash]);
 528
 529        return 1;
 530}
 531
 532/*
 533 *      UNhashes ip_vs_dest from rs_table.
 534 *      should be called with locked tables.
 535 */
 536static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
 537{
 538        /*
 539         * Remove it from the rs_table table.
 540         */
 541        if (!list_empty(&dest->d_list)) {
 542                list_del_init(&dest->d_list);
 543        }
 544
 545        return 1;
 546}
 547
 548/*
 549 *      Lookup real service by <proto,addr,port> in the real service table.
 550 */
 551struct ip_vs_dest *
 552ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
 553                          const union nf_inet_addr *daddr,
 554                          __be16 dport)
 555{
 556        struct netns_ipvs *ipvs = net_ipvs(net);
 557        unsigned int hash;
 558        struct ip_vs_dest *dest;
 559
 560        /*
 561         *      Check for "full" addressed entries
 562         *      Return the first found entry
 563         */
 564        hash = ip_vs_rs_hashkey(af, daddr, dport);
 565
 566        read_lock(&ipvs->rs_lock);
 567        list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
 568                if ((dest->af == af)
 569                    && ip_vs_addr_equal(af, &dest->addr, daddr)
 570                    && (dest->port == dport)
 571                    && ((dest->protocol == protocol) ||
 572                        dest->vfwmark)) {
 573                        /* HIT */
 574                        read_unlock(&ipvs->rs_lock);
 575                        return dest;
 576                }
 577        }
 578        read_unlock(&ipvs->rs_lock);
 579
 580        return NULL;
 581}
 582
 583/*
 584 *      Lookup destination by {addr,port} in the given service
 585 */
 586static struct ip_vs_dest *
 587ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 588                  __be16 dport)
 589{
 590        struct ip_vs_dest *dest;
 591
 592        /*
 593         * Find the destination for the given service
 594         */
 595        list_for_each_entry(dest, &svc->destinations, n_list) {
 596                if ((dest->af == svc->af)
 597                    && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
 598                    && (dest->port == dport)) {
 599                        /* HIT */
 600                        return dest;
 601                }
 602        }
 603
 604        return NULL;
 605}
 606
 607/*
 608 * Find destination by {daddr,dport,vaddr,protocol}
 609 * Cretaed to be used in ip_vs_process_message() in
 610 * the backup synchronization daemon. It finds the
 611 * destination to be bound to the received connection
 612 * on the backup.
 613 *
 614 * ip_vs_lookup_real_service() looked promissing, but
 615 * seems not working as expected.
 616 */
 617struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
 618                                   const union nf_inet_addr *daddr,
 619                                   __be16 dport,
 620                                   const union nf_inet_addr *vaddr,
 621                                   __be16 vport, __u16 protocol, __u32 fwmark,
 622                                   __u32 flags)
 623{
 624        struct ip_vs_dest *dest;
 625        struct ip_vs_service *svc;
 626        __be16 port = dport;
 627
 628        svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
 629        if (!svc)
 630                return NULL;
 631        if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
 632                port = 0;
 633        dest = ip_vs_lookup_dest(svc, daddr, port);
 634        if (!dest)
 635                dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
 636        if (dest)
 637                atomic_inc(&dest->refcnt);
 638        ip_vs_service_put(svc);
 639        return dest;
 640}
 641
 642/*
 643 *  Lookup dest by {svc,addr,port} in the destination trash.
 644 *  The destination trash is used to hold the destinations that are removed
 645 *  from the service table but are still referenced by some conn entries.
 646 *  The reason to add the destination trash is when the dest is temporary
 647 *  down (either by administrator or by monitor program), the dest can be
 648 *  picked back from the trash, the remaining connections to the dest can
 649 *  continue, and the counting information of the dest is also useful for
 650 *  scheduling.
 651 */
 652static struct ip_vs_dest *
 653ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 654                     __be16 dport)
 655{
 656        struct ip_vs_dest *dest, *nxt;
 657        struct netns_ipvs *ipvs = net_ipvs(svc->net);
 658
 659        /*
 660         * Find the destination in trash
 661         */
 662        list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
 663                IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
 664                              "dest->refcnt=%d\n",
 665                              dest->vfwmark,
 666                              IP_VS_DBG_ADDR(svc->af, &dest->addr),
 667                              ntohs(dest->port),
 668                              atomic_read(&dest->refcnt));
 669                if (dest->af == svc->af &&
 670                    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
 671                    dest->port == dport &&
 672                    dest->vfwmark == svc->fwmark &&
 673                    dest->protocol == svc->protocol &&
 674                    (svc->fwmark ||
 675                     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
 676                      dest->vport == svc->port))) {
 677                        /* HIT */
 678                        return dest;
 679                }
 680
 681                /*
 682                 * Try to purge the destination from trash if not referenced
 683                 */
 684                if (atomic_read(&dest->refcnt) == 1) {
 685                        IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
 686                                      "from trash\n",
 687                                      dest->vfwmark,
 688                                      IP_VS_DBG_ADDR(svc->af, &dest->addr),
 689                                      ntohs(dest->port));
 690                        list_del(&dest->n_list);
 691                        ip_vs_dst_reset(dest);
 692                        __ip_vs_unbind_svc(dest);
 693                        free_percpu(dest->stats.cpustats);
 694                        kfree(dest);
 695                }
 696        }
 697
 698        return NULL;
 699}
 700
 701
 702/*
 703 *  Clean up all the destinations in the trash
 704 *  Called by the ip_vs_control_cleanup()
 705 *
 706 *  When the ip_vs_control_clearup is activated by ipvs module exit,
 707 *  the service tables must have been flushed and all the connections
 708 *  are expired, and the refcnt of each destination in the trash must
 709 *  be 1, so we simply release them here.
 710 */
 711static void ip_vs_trash_cleanup(struct net *net)
 712{
 713        struct ip_vs_dest *dest, *nxt;
 714        struct netns_ipvs *ipvs = net_ipvs(net);
 715
 716        list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
 717                list_del(&dest->n_list);
 718                ip_vs_dst_reset(dest);
 719                __ip_vs_unbind_svc(dest);
 720                free_percpu(dest->stats.cpustats);
 721                kfree(dest);
 722        }
 723}
 724
 725static void
 726ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
 727{
 728#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
 729
 730        spin_lock_bh(&src->lock);
 731
 732        IP_VS_SHOW_STATS_COUNTER(conns);
 733        IP_VS_SHOW_STATS_COUNTER(inpkts);
 734        IP_VS_SHOW_STATS_COUNTER(outpkts);
 735        IP_VS_SHOW_STATS_COUNTER(inbytes);
 736        IP_VS_SHOW_STATS_COUNTER(outbytes);
 737
 738        ip_vs_read_estimator(dst, src);
 739
 740        spin_unlock_bh(&src->lock);
 741}
 742
 743static void
 744ip_vs_zero_stats(struct ip_vs_stats *stats)
 745{
 746        spin_lock_bh(&stats->lock);
 747
 748        /* get current counters as zero point, rates are zeroed */
 749
 750#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
 751
 752        IP_VS_ZERO_STATS_COUNTER(conns);
 753        IP_VS_ZERO_STATS_COUNTER(inpkts);
 754        IP_VS_ZERO_STATS_COUNTER(outpkts);
 755        IP_VS_ZERO_STATS_COUNTER(inbytes);
 756        IP_VS_ZERO_STATS_COUNTER(outbytes);
 757
 758        ip_vs_zero_estimator(stats);
 759
 760        spin_unlock_bh(&stats->lock);
 761}
 762
 763/*
 764 *      Update a destination in the given service
 765 */
 766static void
 767__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 768                    struct ip_vs_dest_user_kern *udest, int add)
 769{
 770        struct netns_ipvs *ipvs = net_ipvs(svc->net);
 771        int conn_flags;
 772
 773        /* set the weight and the flags */
 774        atomic_set(&dest->weight, udest->weight);
 775        conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
 776        conn_flags |= IP_VS_CONN_F_INACTIVE;
 777
 778        /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
 779        if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
 780                conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 781        } else {
 782                /*
 783                 *    Put the real service in rs_table if not present.
 784                 *    For now only for NAT!
 785                 */
 786                write_lock_bh(&ipvs->rs_lock);
 787                ip_vs_rs_hash(ipvs, dest);
 788                write_unlock_bh(&ipvs->rs_lock);
 789        }
 790        atomic_set(&dest->conn_flags, conn_flags);
 791
 792        /* bind the service */
 793        if (!dest->svc) {
 794                __ip_vs_bind_svc(dest, svc);
 795        } else {
 796                if (dest->svc != svc) {
 797                        __ip_vs_unbind_svc(dest);
 798                        ip_vs_zero_stats(&dest->stats);
 799                        __ip_vs_bind_svc(dest, svc);
 800                }
 801        }
 802
 803        /* set the dest status flags */
 804        dest->flags |= IP_VS_DEST_F_AVAILABLE;
 805
 806        if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
 807                dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
 808        dest->u_threshold = udest->u_threshold;
 809        dest->l_threshold = udest->l_threshold;
 810
 811        spin_lock_bh(&dest->dst_lock);
 812        ip_vs_dst_reset(dest);
 813        spin_unlock_bh(&dest->dst_lock);
 814
 815        if (add)
 816                ip_vs_start_estimator(svc->net, &dest->stats);
 817
 818        write_lock_bh(&__ip_vs_svc_lock);
 819
 820        /* Wait until all other svc users go away */
 821        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
 822
 823        if (add) {
 824                list_add(&dest->n_list, &svc->destinations);
 825                svc->num_dests++;
 826        }
 827
 828        /* call the update_service, because server weight may be changed */
 829        if (svc->scheduler->update_service)
 830                svc->scheduler->update_service(svc);
 831
 832        write_unlock_bh(&__ip_vs_svc_lock);
 833}
 834
 835
 836/*
 837 *      Create a destination for the given service
 838 */
 839static int
 840ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 841               struct ip_vs_dest **dest_p)
 842{
 843        struct ip_vs_dest *dest;
 844        unsigned int atype;
 845
 846        EnterFunction(2);
 847
 848#ifdef CONFIG_IP_VS_IPV6
 849        if (svc->af == AF_INET6) {
 850                atype = ipv6_addr_type(&udest->addr.in6);
 851                if ((!(atype & IPV6_ADDR_UNICAST) ||
 852                        atype & IPV6_ADDR_LINKLOCAL) &&
 853                        !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
 854                        return -EINVAL;
 855        } else
 856#endif
 857        {
 858                atype = inet_addr_type(svc->net, udest->addr.ip);
 859                if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 860                        return -EINVAL;
 861        }
 862
 863        dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
 864        if (dest == NULL)
 865                return -ENOMEM;
 866
 867        dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
 868        if (!dest->stats.cpustats)
 869                goto err_alloc;
 870
 871        dest->af = svc->af;
 872        dest->protocol = svc->protocol;
 873        dest->vaddr = svc->addr;
 874        dest->vport = svc->port;
 875        dest->vfwmark = svc->fwmark;
 876        ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
 877        dest->port = udest->port;
 878
 879        atomic_set(&dest->activeconns, 0);
 880        atomic_set(&dest->inactconns, 0);
 881        atomic_set(&dest->persistconns, 0);
 882        atomic_set(&dest->refcnt, 1);
 883
 884        INIT_LIST_HEAD(&dest->d_list);
 885        spin_lock_init(&dest->dst_lock);
 886        spin_lock_init(&dest->stats.lock);
 887        __ip_vs_update_dest(svc, dest, udest, 1);
 888
 889        *dest_p = dest;
 890
 891        LeaveFunction(2);
 892        return 0;
 893
 894err_alloc:
 895        kfree(dest);
 896        return -ENOMEM;
 897}
 898
 899
 900/*
 901 *      Add a destination into an existing service
 902 */
 903static int
 904ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 905{
 906        struct ip_vs_dest *dest;
 907        union nf_inet_addr daddr;
 908        __be16 dport = udest->port;
 909        int ret;
 910
 911        EnterFunction(2);
 912
 913        if (udest->weight < 0) {
 914                pr_err("%s(): server weight less than zero\n", __func__);
 915                return -ERANGE;
 916        }
 917
 918        if (udest->l_threshold > udest->u_threshold) {
 919                pr_err("%s(): lower threshold is higher than upper threshold\n",
 920                        __func__);
 921                return -ERANGE;
 922        }
 923
 924        ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
 925
 926        /*
 927         * Check if the dest already exists in the list
 928         */
 929        dest = ip_vs_lookup_dest(svc, &daddr, dport);
 930
 931        if (dest != NULL) {
 932                IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
 933                return -EEXIST;
 934        }
 935
 936        /*
 937         * Check if the dest already exists in the trash and
 938         * is from the same service
 939         */
 940        dest = ip_vs_trash_get_dest(svc, &daddr, dport);
 941
 942        if (dest != NULL) {
 943                IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
 944                              "dest->refcnt=%d, service %u/%s:%u\n",
 945                              IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
 946                              atomic_read(&dest->refcnt),
 947                              dest->vfwmark,
 948                              IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
 949                              ntohs(dest->vport));
 950
 951                /*
 952                 * Get the destination from the trash
 953                 */
 954                list_del(&dest->n_list);
 955
 956                __ip_vs_update_dest(svc, dest, udest, 1);
 957                ret = 0;
 958        } else {
 959                /*
 960                 * Allocate and initialize the dest structure
 961                 */
 962                ret = ip_vs_new_dest(svc, udest, &dest);
 963        }
 964        LeaveFunction(2);
 965
 966        return ret;
 967}
 968
 969
 970/*
 971 *      Edit a destination in the given service
 972 */
 973static int
 974ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 975{
 976        struct ip_vs_dest *dest;
 977        union nf_inet_addr daddr;
 978        __be16 dport = udest->port;
 979
 980        EnterFunction(2);
 981
 982        if (udest->weight < 0) {
 983                pr_err("%s(): server weight less than zero\n", __func__);
 984                return -ERANGE;
 985        }
 986
 987        if (udest->l_threshold > udest->u_threshold) {
 988                pr_err("%s(): lower threshold is higher than upper threshold\n",
 989                        __func__);
 990                return -ERANGE;
 991        }
 992
 993        ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
 994
 995        /*
 996         *  Lookup the destination list
 997         */
 998        dest = ip_vs_lookup_dest(svc, &daddr, dport);
 999
1000        if (dest == NULL) {
1001                IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1002                return -ENOENT;
1003        }
1004
1005        __ip_vs_update_dest(svc, dest, udest, 0);
1006        LeaveFunction(2);
1007
1008        return 0;
1009}
1010
1011
1012/*
1013 *      Delete a destination (must be already unlinked from the service)
1014 */
1015static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1016{
1017        struct netns_ipvs *ipvs = net_ipvs(net);
1018
1019        ip_vs_stop_estimator(net, &dest->stats);
1020
1021        /*
1022         *  Remove it from the d-linked list with the real services.
1023         */
1024        write_lock_bh(&ipvs->rs_lock);
1025        ip_vs_rs_unhash(dest);
1026        write_unlock_bh(&ipvs->rs_lock);
1027
1028        /*
1029         *  Decrease the refcnt of the dest, and free the dest
1030         *  if nobody refers to it (refcnt=0). Otherwise, throw
1031         *  the destination into the trash.
1032         */
1033        if (atomic_dec_and_test(&dest->refcnt)) {
1034                IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1035                              dest->vfwmark,
1036                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
1037                              ntohs(dest->port));
1038                ip_vs_dst_reset(dest);
1039                /* simply decrease svc->refcnt here, let the caller check
1040                   and release the service if nobody refers to it.
1041                   Only user context can release destination and service,
1042                   and only one user context can update virtual service at a
1043                   time, so the operation here is OK */
1044                atomic_dec(&dest->svc->refcnt);
1045                free_percpu(dest->stats.cpustats);
1046                kfree(dest);
1047        } else {
1048                IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1049                              "dest->refcnt=%d\n",
1050                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
1051                              ntohs(dest->port),
1052                              atomic_read(&dest->refcnt));
1053                list_add(&dest->n_list, &ipvs->dest_trash);
1054                atomic_inc(&dest->refcnt);
1055        }
1056}
1057
1058
1059/*
1060 *      Unlink a destination from the given service
1061 */
1062static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1063                                struct ip_vs_dest *dest,
1064                                int svcupd)
1065{
1066        dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1067
1068        /*
1069         *  Remove it from the d-linked destination list.
1070         */
1071        list_del(&dest->n_list);
1072        svc->num_dests--;
1073
1074        /*
1075         *  Call the update_service function of its scheduler
1076         */
1077        if (svcupd && svc->scheduler->update_service)
1078                        svc->scheduler->update_service(svc);
1079}
1080
1081
1082/*
1083 *      Delete a destination server in the given service
1084 */
1085static int
1086ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1087{
1088        struct ip_vs_dest *dest;
1089        __be16 dport = udest->port;
1090
1091        EnterFunction(2);
1092
1093        dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1094
1095        if (dest == NULL) {
1096                IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1097                return -ENOENT;
1098        }
1099
1100        write_lock_bh(&__ip_vs_svc_lock);
1101
1102        /*
1103         *      Wait until all other svc users go away.
1104         */
1105        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1106
1107        /*
1108         *      Unlink dest from the service
1109         */
1110        __ip_vs_unlink_dest(svc, dest, 1);
1111
1112        write_unlock_bh(&__ip_vs_svc_lock);
1113
1114        /*
1115         *      Delete the destination
1116         */
1117        __ip_vs_del_dest(svc->net, dest);
1118
1119        LeaveFunction(2);
1120
1121        return 0;
1122}
1123
1124
1125/*
1126 *      Add a service into the service hash table
1127 */
1128static int
1129ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1130                  struct ip_vs_service **svc_p)
1131{
1132        int ret = 0;
1133        struct ip_vs_scheduler *sched = NULL;
1134        struct ip_vs_pe *pe = NULL;
1135        struct ip_vs_service *svc = NULL;
1136        struct netns_ipvs *ipvs = net_ipvs(net);
1137
1138        /* increase the module use count */
1139        ip_vs_use_count_inc();
1140
1141        /* Lookup the scheduler by 'u->sched_name' */
1142        sched = ip_vs_scheduler_get(u->sched_name);
1143        if (sched == NULL) {
1144                pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1145                ret = -ENOENT;
1146                goto out_err;
1147        }
1148
1149        if (u->pe_name && *u->pe_name) {
1150                pe = ip_vs_pe_getbyname(u->pe_name);
1151                if (pe == NULL) {
1152                        pr_info("persistence engine module ip_vs_pe_%s "
1153                                "not found\n", u->pe_name);
1154                        ret = -ENOENT;
1155                        goto out_err;
1156                }
1157        }
1158
1159#ifdef CONFIG_IP_VS_IPV6
1160        if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1161                ret = -EINVAL;
1162                goto out_err;
1163        }
1164#endif
1165
1166        svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1167        if (svc == NULL) {
1168                IP_VS_DBG(1, "%s(): no memory\n", __func__);
1169                ret = -ENOMEM;
1170                goto out_err;
1171        }
1172        svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1173        if (!svc->stats.cpustats) {
1174                ret = -ENOMEM;
1175                goto out_err;
1176        }
1177
1178        /* I'm the first user of the service */
1179        atomic_set(&svc->usecnt, 0);
1180        atomic_set(&svc->refcnt, 0);
1181
1182        svc->af = u->af;
1183        svc->protocol = u->protocol;
1184        ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1185        svc->port = u->port;
1186        svc->fwmark = u->fwmark;
1187        svc->flags = u->flags;
1188        svc->timeout = u->timeout * HZ;
1189        svc->netmask = u->netmask;
1190        svc->net = net;
1191
1192        INIT_LIST_HEAD(&svc->destinations);
1193        rwlock_init(&svc->sched_lock);
1194        spin_lock_init(&svc->stats.lock);
1195
1196        /* Bind the scheduler */
1197        ret = ip_vs_bind_scheduler(svc, sched);
1198        if (ret)
1199                goto out_err;
1200        sched = NULL;
1201
1202        /* Bind the ct retriever */
1203        ip_vs_bind_pe(svc, pe);
1204        pe = NULL;
1205
1206        /* Update the virtual service counters */
1207        if (svc->port == FTPPORT)
1208                atomic_inc(&ipvs->ftpsvc_counter);
1209        else if (svc->port == 0)
1210                atomic_inc(&ipvs->nullsvc_counter);
1211
1212        ip_vs_start_estimator(net, &svc->stats);
1213
1214        /* Count only IPv4 services for old get/setsockopt interface */
1215        if (svc->af == AF_INET)
1216                ipvs->num_services++;
1217
1218        /* Hash the service into the service table */
1219        write_lock_bh(&__ip_vs_svc_lock);
1220        ip_vs_svc_hash(svc);
1221        write_unlock_bh(&__ip_vs_svc_lock);
1222
1223        *svc_p = svc;
1224        /* Now there is a service - full throttle */
1225        ipvs->enable = 1;
1226        return 0;
1227
1228
1229 out_err:
1230        if (svc != NULL) {
1231                ip_vs_unbind_scheduler(svc);
1232                if (svc->inc) {
1233                        local_bh_disable();
1234                        ip_vs_app_inc_put(svc->inc);
1235                        local_bh_enable();
1236                }
1237                if (svc->stats.cpustats)
1238                        free_percpu(svc->stats.cpustats);
1239                kfree(svc);
1240        }
1241        ip_vs_scheduler_put(sched);
1242        ip_vs_pe_put(pe);
1243
1244        /* decrease the module use count */
1245        ip_vs_use_count_dec();
1246
1247        return ret;
1248}
1249
1250
1251/*
1252 *      Edit a service and bind it with a new scheduler
1253 */
1254static int
1255ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1256{
1257        struct ip_vs_scheduler *sched, *old_sched;
1258        struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1259        int ret = 0;
1260
1261        /*
1262         * Lookup the scheduler, by 'u->sched_name'
1263         */
1264        sched = ip_vs_scheduler_get(u->sched_name);
1265        if (sched == NULL) {
1266                pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1267                return -ENOENT;
1268        }
1269        old_sched = sched;
1270
1271        if (u->pe_name && *u->pe_name) {
1272                pe = ip_vs_pe_getbyname(u->pe_name);
1273                if (pe == NULL) {
1274                        pr_info("persistence engine module ip_vs_pe_%s "
1275                                "not found\n", u->pe_name);
1276                        ret = -ENOENT;
1277                        goto out;
1278                }
1279                old_pe = pe;
1280        }
1281
1282#ifdef CONFIG_IP_VS_IPV6
1283        if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1284                ret = -EINVAL;
1285                goto out;
1286        }
1287#endif
1288
1289        write_lock_bh(&__ip_vs_svc_lock);
1290
1291        /*
1292         * Wait until all other svc users go away.
1293         */
1294        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1295
1296        /*
1297         * Set the flags and timeout value
1298         */
1299        svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1300        svc->timeout = u->timeout * HZ;
1301        svc->netmask = u->netmask;
1302
1303        old_sched = svc->scheduler;
1304        if (sched != old_sched) {
1305                /*
1306                 * Unbind the old scheduler
1307                 */
1308                if ((ret = ip_vs_unbind_scheduler(svc))) {
1309                        old_sched = sched;
1310                        goto out_unlock;
1311                }
1312
1313                /*
1314                 * Bind the new scheduler
1315                 */
1316                if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1317                        /*
1318                         * If ip_vs_bind_scheduler fails, restore the old
1319                         * scheduler.
1320                         * The main reason of failure is out of memory.
1321                         *
1322                         * The question is if the old scheduler can be
1323                         * restored all the time. TODO: if it cannot be
1324                         * restored some time, we must delete the service,
1325                         * otherwise the system may crash.
1326                         */
1327                        ip_vs_bind_scheduler(svc, old_sched);
1328                        old_sched = sched;
1329                        goto out_unlock;
1330                }
1331        }
1332
1333        old_pe = svc->pe;
1334        if (pe != old_pe) {
1335                ip_vs_unbind_pe(svc);
1336                ip_vs_bind_pe(svc, pe);
1337        }
1338
1339out_unlock:
1340        write_unlock_bh(&__ip_vs_svc_lock);
1341out:
1342        ip_vs_scheduler_put(old_sched);
1343        ip_vs_pe_put(old_pe);
1344        return ret;
1345}
1346
1347
1348/*
1349 *      Delete a service from the service list
1350 *      - The service must be unlinked, unlocked and not referenced!
1351 *      - We are called under _bh lock
1352 */
1353static void __ip_vs_del_service(struct ip_vs_service *svc)
1354{
1355        struct ip_vs_dest *dest, *nxt;
1356        struct ip_vs_scheduler *old_sched;
1357        struct ip_vs_pe *old_pe;
1358        struct netns_ipvs *ipvs = net_ipvs(svc->net);
1359
1360        pr_info("%s: enter\n", __func__);
1361
1362        /* Count only IPv4 services for old get/setsockopt interface */
1363        if (svc->af == AF_INET)
1364                ipvs->num_services--;
1365
1366        ip_vs_stop_estimator(svc->net, &svc->stats);
1367
1368        /* Unbind scheduler */
1369        old_sched = svc->scheduler;
1370        ip_vs_unbind_scheduler(svc);
1371        ip_vs_scheduler_put(old_sched);
1372
1373        /* Unbind persistence engine */
1374        old_pe = svc->pe;
1375        ip_vs_unbind_pe(svc);
1376        ip_vs_pe_put(old_pe);
1377
1378        /* Unbind app inc */
1379        if (svc->inc) {
1380                ip_vs_app_inc_put(svc->inc);
1381                svc->inc = NULL;
1382        }
1383
1384        /*
1385         *    Unlink the whole destination list
1386         */
1387        list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1388                __ip_vs_unlink_dest(svc, dest, 0);
1389                __ip_vs_del_dest(svc->net, dest);
1390        }
1391
1392        /*
1393         *    Update the virtual service counters
1394         */
1395        if (svc->port == FTPPORT)
1396                atomic_dec(&ipvs->ftpsvc_counter);
1397        else if (svc->port == 0)
1398                atomic_dec(&ipvs->nullsvc_counter);
1399
1400        /*
1401         *    Free the service if nobody refers to it
1402         */
1403        if (atomic_read(&svc->refcnt) == 0) {
1404                IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1405                              svc->fwmark,
1406                              IP_VS_DBG_ADDR(svc->af, &svc->addr),
1407                              ntohs(svc->port), atomic_read(&svc->usecnt));
1408                free_percpu(svc->stats.cpustats);
1409                kfree(svc);
1410        }
1411
1412        /* decrease the module use count */
1413        ip_vs_use_count_dec();
1414}
1415
1416/*
1417 * Unlink a service from list and try to delete it if its refcnt reached 0
1418 */
1419static void ip_vs_unlink_service(struct ip_vs_service *svc)
1420{
1421        /*
1422         * Unhash it from the service table
1423         */
1424        write_lock_bh(&__ip_vs_svc_lock);
1425
1426        ip_vs_svc_unhash(svc);
1427
1428        /*
1429         * Wait until all the svc users go away.
1430         */
1431        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1432
1433        __ip_vs_del_service(svc);
1434
1435        write_unlock_bh(&__ip_vs_svc_lock);
1436}
1437
1438/*
1439 *      Delete a service from the service list
1440 */
1441static int ip_vs_del_service(struct ip_vs_service *svc)
1442{
1443        if (svc == NULL)
1444                return -EEXIST;
1445        ip_vs_unlink_service(svc);
1446
1447        return 0;
1448}
1449
1450
1451/*
1452 *      Flush all the virtual services
1453 */
1454static int ip_vs_flush(struct net *net)
1455{
1456        int idx;
1457        struct ip_vs_service *svc, *nxt;
1458
1459        /*
1460         * Flush the service table hashed by <netns,protocol,addr,port>
1461         */
1462        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1463                list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1464                                         s_list) {
1465                        if (net_eq(svc->net, net))
1466                                ip_vs_unlink_service(svc);
1467                }
1468        }
1469
1470        /*
1471         * Flush the service table hashed by fwmark
1472         */
1473        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1474                list_for_each_entry_safe(svc, nxt,
1475                                         &ip_vs_svc_fwm_table[idx], f_list) {
1476                        if (net_eq(svc->net, net))
1477                                ip_vs_unlink_service(svc);
1478                }
1479        }
1480
1481        return 0;
1482}
1483
1484/*
1485 *      Delete service by {netns} in the service table.
1486 *      Called by __ip_vs_cleanup()
1487 */
1488void ip_vs_service_net_cleanup(struct net *net)
1489{
1490        EnterFunction(2);
1491        /* Check for "full" addressed entries */
1492        mutex_lock(&__ip_vs_mutex);
1493        ip_vs_flush(net);
1494        mutex_unlock(&__ip_vs_mutex);
1495        LeaveFunction(2);
1496}
1497/*
1498 * Release dst hold by dst_cache
1499 */
1500static inline void
1501__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1502{
1503        spin_lock_bh(&dest->dst_lock);
1504        if (dest->dst_cache && dest->dst_cache->dev == dev) {
1505                IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1506                              dev->name,
1507                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
1508                              ntohs(dest->port),
1509                              atomic_read(&dest->refcnt));
1510                ip_vs_dst_reset(dest);
1511        }
1512        spin_unlock_bh(&dest->dst_lock);
1513
1514}
1515/*
1516 * Netdev event receiver
1517 * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1518 * a device that is "unregister" it must be released.
1519 */
1520static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1521                            void *ptr)
1522{
1523        struct net_device *dev = ptr;
1524        struct net *net = dev_net(dev);
1525        struct netns_ipvs *ipvs = net_ipvs(net);
1526        struct ip_vs_service *svc;
1527        struct ip_vs_dest *dest;
1528        unsigned int idx;
1529
1530        if (event != NETDEV_UNREGISTER || !ipvs)
1531                return NOTIFY_DONE;
1532        IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1533        EnterFunction(2);
1534        mutex_lock(&__ip_vs_mutex);
1535        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1536                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1537                        if (net_eq(svc->net, net)) {
1538                                list_for_each_entry(dest, &svc->destinations,
1539                                                    n_list) {
1540                                        __ip_vs_dev_reset(dest, dev);
1541                                }
1542                        }
1543                }
1544
1545                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1546                        if (net_eq(svc->net, net)) {
1547                                list_for_each_entry(dest, &svc->destinations,
1548                                                    n_list) {
1549                                        __ip_vs_dev_reset(dest, dev);
1550                                }
1551                        }
1552
1553                }
1554        }
1555
1556        list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
1557                __ip_vs_dev_reset(dest, dev);
1558        }
1559        mutex_unlock(&__ip_vs_mutex);
1560        LeaveFunction(2);
1561        return NOTIFY_DONE;
1562}
1563
1564/*
1565 *      Zero counters in a service or all services
1566 */
1567static int ip_vs_zero_service(struct ip_vs_service *svc)
1568{
1569        struct ip_vs_dest *dest;
1570
1571        write_lock_bh(&__ip_vs_svc_lock);
1572        list_for_each_entry(dest, &svc->destinations, n_list) {
1573                ip_vs_zero_stats(&dest->stats);
1574        }
1575        ip_vs_zero_stats(&svc->stats);
1576        write_unlock_bh(&__ip_vs_svc_lock);
1577        return 0;
1578}
1579
1580static int ip_vs_zero_all(struct net *net)
1581{
1582        int idx;
1583        struct ip_vs_service *svc;
1584
1585        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1586                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1587                        if (net_eq(svc->net, net))
1588                                ip_vs_zero_service(svc);
1589                }
1590        }
1591
1592        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1593                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1594                        if (net_eq(svc->net, net))
1595                                ip_vs_zero_service(svc);
1596                }
1597        }
1598
1599        ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1600        return 0;
1601}
1602
1603#ifdef CONFIG_SYSCTL
1604
1605static int zero;
1606static int three = 3;
1607
1608static int
1609proc_do_defense_mode(ctl_table *table, int write,
1610                     void __user *buffer, size_t *lenp, loff_t *ppos)
1611{
1612        struct net *net = current->nsproxy->net_ns;
1613        int *valp = table->data;
1614        int val = *valp;
1615        int rc;
1616
1617        rc = proc_dointvec(table, write, buffer, lenp, ppos);
1618        if (write && (*valp != val)) {
1619                if ((*valp < 0) || (*valp > 3)) {
1620                        /* Restore the correct value */
1621                        *valp = val;
1622                } else {
1623                        update_defense_level(net_ipvs(net));
1624                }
1625        }
1626        return rc;
1627}
1628
1629static int
1630proc_do_sync_threshold(ctl_table *table, int write,
1631                       void __user *buffer, size_t *lenp, loff_t *ppos)
1632{
1633        int *valp = table->data;
1634        int val[2];
1635        int rc;
1636
1637        /* backup the value first */
1638        memcpy(val, valp, sizeof(val));
1639
1640        rc = proc_dointvec(table, write, buffer, lenp, ppos);
1641        if (write && (valp[0] < 0 || valp[1] < 0 ||
1642            (valp[0] >= valp[1] && valp[1]))) {
1643                /* Restore the correct value */
1644                memcpy(valp, val, sizeof(val));
1645        }
1646        return rc;
1647}
1648
1649static int
1650proc_do_sync_mode(ctl_table *table, int write,
1651                     void __user *buffer, size_t *lenp, loff_t *ppos)
1652{
1653        int *valp = table->data;
1654        int val = *valp;
1655        int rc;
1656
1657        rc = proc_dointvec(table, write, buffer, lenp, ppos);
1658        if (write && (*valp != val)) {
1659                if ((*valp < 0) || (*valp > 1)) {
1660                        /* Restore the correct value */
1661                        *valp = val;
1662                }
1663        }
1664        return rc;
1665}
1666
1667static int
1668proc_do_sync_ports(ctl_table *table, int write,
1669                   void __user *buffer, size_t *lenp, loff_t *ppos)
1670{
1671        int *valp = table->data;
1672        int val = *valp;
1673        int rc;
1674
1675        rc = proc_dointvec(table, write, buffer, lenp, ppos);
1676        if (write && (*valp != val)) {
1677                if (*valp < 1 || !is_power_of_2(*valp)) {
1678                        /* Restore the correct value */
1679                        *valp = val;
1680                }
1681        }
1682        return rc;
1683}
1684
1685/*
1686 *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1687 *      Do not change order or insert new entries without
1688 *      align with netns init in ip_vs_control_net_init()
1689 */
1690
1691static struct ctl_table vs_vars[] = {
1692        {
1693                .procname       = "amemthresh",
1694                .maxlen         = sizeof(int),
1695                .mode           = 0644,
1696                .proc_handler   = proc_dointvec,
1697        },
1698        {
1699                .procname       = "am_droprate",
1700                .maxlen         = sizeof(int),
1701                .mode           = 0644,
1702                .proc_handler   = proc_dointvec,
1703        },
1704        {
1705                .procname       = "drop_entry",
1706                .maxlen         = sizeof(int),
1707                .mode           = 0644,
1708                .proc_handler   = proc_do_defense_mode,
1709        },
1710        {
1711                .procname       = "drop_packet",
1712                .maxlen         = sizeof(int),
1713                .mode           = 0644,
1714                .proc_handler   = proc_do_defense_mode,
1715        },
1716#ifdef CONFIG_IP_VS_NFCT
1717        {
1718                .procname       = "conntrack",
1719                .maxlen         = sizeof(int),
1720                .mode           = 0644,
1721                .proc_handler   = &proc_dointvec,
1722        },
1723#endif
1724        {
1725                .procname       = "secure_tcp",
1726                .maxlen         = sizeof(int),
1727                .mode           = 0644,
1728                .proc_handler   = proc_do_defense_mode,
1729        },
1730        {
1731                .procname       = "snat_reroute",
1732                .maxlen         = sizeof(int),
1733                .mode           = 0644,
1734                .proc_handler   = &proc_dointvec,
1735        },
1736        {
1737                .procname       = "sync_version",
1738                .maxlen         = sizeof(int),
1739                .mode           = 0644,
1740                .proc_handler   = &proc_do_sync_mode,
1741        },
1742        {
1743                .procname       = "sync_ports",
1744                .maxlen         = sizeof(int),
1745                .mode           = 0644,
1746                .proc_handler   = &proc_do_sync_ports,
1747        },
1748        {
1749                .procname       = "sync_qlen_max",
1750                .maxlen         = sizeof(int),
1751                .mode           = 0644,
1752                .proc_handler   = proc_dointvec,
1753        },
1754        {
1755                .procname       = "sync_sock_size",
1756                .maxlen         = sizeof(int),
1757                .mode           = 0644,
1758                .proc_handler   = proc_dointvec,
1759        },
1760        {
1761                .procname       = "cache_bypass",
1762                .maxlen         = sizeof(int),
1763                .mode           = 0644,
1764                .proc_handler   = proc_dointvec,
1765        },
1766        {
1767                .procname       = "expire_nodest_conn",
1768                .maxlen         = sizeof(int),
1769                .mode           = 0644,
1770                .proc_handler   = proc_dointvec,
1771        },
1772        {
1773                .procname       = "expire_quiescent_template",
1774                .maxlen         = sizeof(int),
1775                .mode           = 0644,
1776                .proc_handler   = proc_dointvec,
1777        },
1778        {
1779                .procname       = "sync_threshold",
1780                .maxlen         =
1781                        sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1782                .mode           = 0644,
1783                .proc_handler   = proc_do_sync_threshold,
1784        },
1785        {
1786                .procname       = "sync_refresh_period",
1787                .maxlen         = sizeof(int),
1788                .mode           = 0644,
1789                .proc_handler   = proc_dointvec_jiffies,
1790        },
1791        {
1792                .procname       = "sync_retries",
1793                .maxlen         = sizeof(int),
1794                .mode           = 0644,
1795                .proc_handler   = proc_dointvec_minmax,
1796                .extra1         = &zero,
1797                .extra2         = &three,
1798        },
1799        {
1800                .procname       = "nat_icmp_send",
1801                .maxlen         = sizeof(int),
1802                .mode           = 0644,
1803                .proc_handler   = proc_dointvec,
1804        },
1805        {
1806                .procname       = "pmtu_disc",
1807                .maxlen         = sizeof(int),
1808                .mode           = 0644,
1809                .proc_handler   = proc_dointvec,
1810        },
1811        {
1812                .procname       = "backup_only",
1813                .maxlen         = sizeof(int),
1814                .mode           = 0644,
1815                .proc_handler   = proc_dointvec,
1816        },
1817#ifdef CONFIG_IP_VS_DEBUG
1818        {
1819                .procname       = "debug_level",
1820                .data           = &sysctl_ip_vs_debug_level,
1821                .maxlen         = sizeof(int),
1822                .mode           = 0644,
1823                .proc_handler   = proc_dointvec,
1824        },
1825#endif
1826#if 0
1827        {
1828                .procname       = "timeout_established",
1829                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1830                .maxlen         = sizeof(int),
1831                .mode           = 0644,
1832                .proc_handler   = proc_dointvec_jiffies,
1833        },
1834        {
1835                .procname       = "timeout_synsent",
1836                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1837                .maxlen         = sizeof(int),
1838                .mode           = 0644,
1839                .proc_handler   = proc_dointvec_jiffies,
1840        },
1841        {
1842                .procname       = "timeout_synrecv",
1843                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1844                .maxlen         = sizeof(int),
1845                .mode           = 0644,
1846                .proc_handler   = proc_dointvec_jiffies,
1847        },
1848        {
1849                .procname       = "timeout_finwait",
1850                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1851                .maxlen         = sizeof(int),
1852                .mode           = 0644,
1853                .proc_handler   = proc_dointvec_jiffies,
1854        },
1855        {
1856                .procname       = "timeout_timewait",
1857                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1858                .maxlen         = sizeof(int),
1859                .mode           = 0644,
1860                .proc_handler   = proc_dointvec_jiffies,
1861        },
1862        {
1863                .procname       = "timeout_close",
1864                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1865                .maxlen         = sizeof(int),
1866                .mode           = 0644,
1867                .proc_handler   = proc_dointvec_jiffies,
1868        },
1869        {
1870                .procname       = "timeout_closewait",
1871                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1872                .maxlen         = sizeof(int),
1873                .mode           = 0644,
1874                .proc_handler   = proc_dointvec_jiffies,
1875        },
1876        {
1877                .procname       = "timeout_lastack",
1878                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1879                .maxlen         = sizeof(int),
1880                .mode           = 0644,
1881                .proc_handler   = proc_dointvec_jiffies,
1882        },
1883        {
1884                .procname       = "timeout_listen",
1885                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1886                .maxlen         = sizeof(int),
1887                .mode           = 0644,
1888                .proc_handler   = proc_dointvec_jiffies,
1889        },
1890        {
1891                .procname       = "timeout_synack",
1892                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1893                .maxlen         = sizeof(int),
1894                .mode           = 0644,
1895                .proc_handler   = proc_dointvec_jiffies,
1896        },
1897        {
1898                .procname       = "timeout_udp",
1899                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1900                .maxlen         = sizeof(int),
1901                .mode           = 0644,
1902                .proc_handler   = proc_dointvec_jiffies,
1903        },
1904        {
1905                .procname       = "timeout_icmp",
1906                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1907                .maxlen         = sizeof(int),
1908                .mode           = 0644,
1909                .proc_handler   = proc_dointvec_jiffies,
1910        },
1911#endif
1912        { }
1913};
1914
1915#endif
1916
1917#ifdef CONFIG_PROC_FS
1918
1919struct ip_vs_iter {
1920        struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1921        struct list_head *table;
1922        int bucket;
1923};
1924
1925/*
1926 *      Write the contents of the VS rule table to a PROCfs file.
1927 *      (It is kept just for backward compatibility)
1928 */
1929static inline const char *ip_vs_fwd_name(unsigned int flags)
1930{
1931        switch (flags & IP_VS_CONN_F_FWD_MASK) {
1932        case IP_VS_CONN_F_LOCALNODE:
1933                return "Local";
1934        case IP_VS_CONN_F_TUNNEL:
1935                return "Tunnel";
1936        case IP_VS_CONN_F_DROUTE:
1937                return "Route";
1938        default:
1939                return "Masq";
1940        }
1941}
1942
1943
1944/* Get the Nth entry in the two lists */
1945static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1946{
1947        struct net *net = seq_file_net(seq);
1948        struct ip_vs_iter *iter = seq->private;
1949        int idx;
1950        struct ip_vs_service *svc;
1951
1952        /* look in hash by protocol */
1953        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1954                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1955                        if (net_eq(svc->net, net) && pos-- == 0) {
1956                                iter->table = ip_vs_svc_table;
1957                                iter->bucket = idx;
1958                                return svc;
1959                        }
1960                }
1961        }
1962
1963        /* keep looking in fwmark */
1964        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1965                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1966                        if (net_eq(svc->net, net) && pos-- == 0) {
1967                                iter->table = ip_vs_svc_fwm_table;
1968                                iter->bucket = idx;
1969                                return svc;
1970                        }
1971                }
1972        }
1973
1974        return NULL;
1975}
1976
1977static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1978__acquires(__ip_vs_svc_lock)
1979{
1980
1981        read_lock_bh(&__ip_vs_svc_lock);
1982        return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1983}
1984
1985
1986static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1987{
1988        struct list_head *e;
1989        struct ip_vs_iter *iter;
1990        struct ip_vs_service *svc;
1991
1992        ++*pos;
1993        if (v == SEQ_START_TOKEN)
1994                return ip_vs_info_array(seq,0);
1995
1996        svc = v;
1997        iter = seq->private;
1998
1999        if (iter->table == ip_vs_svc_table) {
2000                /* next service in table hashed by protocol */
2001                if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
2002                        return list_entry(e, struct ip_vs_service, s_list);
2003
2004
2005                while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2006                        list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
2007                                            s_list) {
2008                                return svc;
2009                        }
2010                }
2011
2012                iter->table = ip_vs_svc_fwm_table;
2013                iter->bucket = -1;
2014                goto scan_fwmark;
2015        }
2016
2017        /* next service in hashed by fwmark */
2018        if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
2019                return list_entry(e, struct ip_vs_service, f_list);
2020
2021 scan_fwmark:
2022        while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2023                list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
2024                                    f_list)
2025                        return svc;
2026        }
2027
2028        return NULL;
2029}
2030
2031static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2032__releases(__ip_vs_svc_lock)
2033{
2034        read_unlock_bh(&__ip_vs_svc_lock);
2035}
2036
2037
2038static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2039{
2040        if (v == SEQ_START_TOKEN) {
2041                seq_printf(seq,
2042                        "IP Virtual Server version %d.%d.%d (size=%d)\n",
2043                        NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2044                seq_puts(seq,
2045                         "Prot LocalAddress:Port Scheduler Flags\n");
2046                seq_puts(seq,
2047                         "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2048        } else {
2049                const struct ip_vs_service *svc = v;
2050                const struct ip_vs_iter *iter = seq->private;
2051                const struct ip_vs_dest *dest;
2052
2053                if (iter->table == ip_vs_svc_table) {
2054#ifdef CONFIG_IP_VS_IPV6
2055                        if (svc->af == AF_INET6)
2056                                seq_printf(seq, "%s  [%pI6]:%04X %s ",
2057                                           ip_vs_proto_name(svc->protocol),
2058                                           &svc->addr.in6,
2059                                           ntohs(svc->port),
2060                                           svc->scheduler->name);
2061                        else
2062#endif
2063                                seq_printf(seq, "%s  %08X:%04X %s %s ",
2064                                           ip_vs_proto_name(svc->protocol),
2065                                           ntohl(svc->addr.ip),
2066                                           ntohs(svc->port),
2067                                           svc->scheduler->name,
2068                                           (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2069                } else {
2070                        seq_printf(seq, "FWM  %08X %s %s",
2071                                   svc->fwmark, svc->scheduler->name,
2072                                   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2073                }
2074
2075                if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2076                        seq_printf(seq, "persistent %d %08X\n",
2077                                svc->timeout,
2078                                ntohl(svc->netmask));
2079                else
2080                        seq_putc(seq, '\n');
2081
2082                list_for_each_entry(dest, &svc->destinations, n_list) {
2083#ifdef CONFIG_IP_VS_IPV6
2084                        if (dest->af == AF_INET6)
2085                                seq_printf(seq,
2086                                           "  -> [%pI6]:%04X"
2087                                           "      %-7s %-6d %-10d %-10d\n",
2088                                           &dest->addr.in6,
2089                                           ntohs(dest->port),
2090                                           ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2091                                           atomic_read(&dest->weight),
2092                                           atomic_read(&dest->activeconns),
2093                                           atomic_read(&dest->inactconns));
2094                        else
2095#endif
2096                                seq_printf(seq,
2097                                           "  -> %08X:%04X      "
2098                                           "%-7s %-6d %-10d %-10d\n",
2099                                           ntohl(dest->addr.ip),
2100                                           ntohs(dest->port),
2101                                           ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2102                                           atomic_read(&dest->weight),
2103                                           atomic_read(&dest->activeconns),
2104                                           atomic_read(&dest->inactconns));
2105
2106                }
2107        }
2108        return 0;
2109}
2110
2111static const struct seq_operations ip_vs_info_seq_ops = {
2112        .start = ip_vs_info_seq_start,
2113        .next  = ip_vs_info_seq_next,
2114        .stop  = ip_vs_info_seq_stop,
2115        .show  = ip_vs_info_seq_show,
2116};
2117
2118static int ip_vs_info_open(struct inode *inode, struct file *file)
2119{
2120        return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2121                        sizeof(struct ip_vs_iter));
2122}
2123
2124static const struct file_operations ip_vs_info_fops = {
2125        .owner   = THIS_MODULE,
2126        .open    = ip_vs_info_open,
2127        .read    = seq_read,
2128        .llseek  = seq_lseek,
2129        .release = seq_release_net,
2130};
2131
2132static int ip_vs_stats_show(struct seq_file *seq, void *v)
2133{
2134        struct net *net = seq_file_single_net(seq);
2135        struct ip_vs_stats_user show;
2136
2137/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2138        seq_puts(seq,
2139                 "   Total Incoming Outgoing         Incoming         Outgoing\n");
2140        seq_printf(seq,
2141                   "   Conns  Packets  Packets            Bytes            Bytes\n");
2142
2143        ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2144        seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2145                   show.inpkts, show.outpkts,
2146                   (unsigned long long) show.inbytes,
2147                   (unsigned long long) show.outbytes);
2148
2149/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2150        seq_puts(seq,
2151                   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2152        seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2153                        show.cps, show.inpps, show.outpps,
2154                        show.inbps, show.outbps);
2155
2156        return 0;
2157}
2158
2159static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2160{
2161        return single_open_net(inode, file, ip_vs_stats_show);
2162}
2163
2164static const struct file_operations ip_vs_stats_fops = {
2165        .owner = THIS_MODULE,
2166        .open = ip_vs_stats_seq_open,
2167        .read = seq_read,
2168        .llseek = seq_lseek,
2169        .release = single_release_net,
2170};
2171
2172static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2173{
2174        struct net *net = seq_file_single_net(seq);
2175        struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2176        struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2177        struct ip_vs_stats_user rates;
2178        int i;
2179
2180/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2181        seq_puts(seq,
2182                 "       Total Incoming Outgoing         Incoming         Outgoing\n");
2183        seq_printf(seq,
2184                   "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2185
2186        for_each_possible_cpu(i) {
2187                struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2188                unsigned int start;
2189                __u64 inbytes, outbytes;
2190
2191                do {
2192                        start = u64_stats_fetch_begin_bh(&u->syncp);
2193                        inbytes = u->ustats.inbytes;
2194                        outbytes = u->ustats.outbytes;
2195                } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2196
2197                seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2198                           i, u->ustats.conns, u->ustats.inpkts,
2199                           u->ustats.outpkts, (__u64)inbytes,
2200                           (__u64)outbytes);
2201        }
2202
2203        spin_lock_bh(&tot_stats->lock);
2204
2205        seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2206                   tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2207                   tot_stats->ustats.outpkts,
2208                   (unsigned long long) tot_stats->ustats.inbytes,
2209                   (unsigned long long) tot_stats->ustats.outbytes);
2210
2211        ip_vs_read_estimator(&rates, tot_stats);
2212
2213        spin_unlock_bh(&tot_stats->lock);
2214
2215/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2216        seq_puts(seq,
2217                   "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2218        seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2219                        rates.cps,
2220                        rates.inpps,
2221                        rates.outpps,
2222                        rates.inbps,
2223                        rates.outbps);
2224
2225        return 0;
2226}
2227
2228static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2229{
2230        return single_open_net(inode, file, ip_vs_stats_percpu_show);
2231}
2232
2233static const struct file_operations ip_vs_stats_percpu_fops = {
2234        .owner = THIS_MODULE,
2235        .open = ip_vs_stats_percpu_seq_open,
2236        .read = seq_read,
2237        .llseek = seq_lseek,
2238        .release = single_release_net,
2239};
2240#endif
2241
2242/*
2243 *      Set timeout values for tcp tcpfin udp in the timeout_table.
2244 */
2245static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2246{
2247#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2248        struct ip_vs_proto_data *pd;
2249#endif
2250
2251        IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2252                  u->tcp_timeout,
2253                  u->tcp_fin_timeout,
2254                  u->udp_timeout);
2255
2256#ifdef CONFIG_IP_VS_PROTO_TCP
2257        if (u->tcp_timeout) {
2258                pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2259                pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2260                        = u->tcp_timeout * HZ;
2261        }
2262
2263        if (u->tcp_fin_timeout) {
2264                pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2265                pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2266                        = u->tcp_fin_timeout * HZ;
2267        }
2268#endif
2269
2270#ifdef CONFIG_IP_VS_PROTO_UDP
2271        if (u->udp_timeout) {
2272                pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2273                pd->timeout_table[IP_VS_UDP_S_NORMAL]
2274                        = u->udp_timeout * HZ;
2275        }
2276#endif
2277        return 0;
2278}
2279
2280
2281#define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2282#define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2283#define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2284                                 sizeof(struct ip_vs_dest_user))
2285#define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2286#define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2287#define MAX_ARG_LEN             SVCDEST_ARG_LEN
2288
2289static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2290        [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2291        [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2292        [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2293        [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2294        [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2295        [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2296        [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2297        [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2298        [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2299        [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2300        [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2301};
2302
2303static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2304                                  struct ip_vs_service_user *usvc_compat)
2305{
2306        memset(usvc, 0, sizeof(*usvc));
2307
2308        usvc->af                = AF_INET;
2309        usvc->protocol          = usvc_compat->protocol;
2310        usvc->addr.ip           = usvc_compat->addr;
2311        usvc->port              = usvc_compat->port;
2312        usvc->fwmark            = usvc_compat->fwmark;
2313
2314        /* Deep copy of sched_name is not needed here */
2315        usvc->sched_name        = usvc_compat->sched_name;
2316
2317        usvc->flags             = usvc_compat->flags;
2318        usvc->timeout           = usvc_compat->timeout;
2319        usvc->netmask           = usvc_compat->netmask;
2320}
2321
2322static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2323                                   struct ip_vs_dest_user *udest_compat)
2324{
2325        memset(udest, 0, sizeof(*udest));
2326
2327        udest->addr.ip          = udest_compat->addr;
2328        udest->port             = udest_compat->port;
2329        udest->conn_flags       = udest_compat->conn_flags;
2330        udest->weight           = udest_compat->weight;
2331        udest->u_threshold      = udest_compat->u_threshold;
2332        udest->l_threshold      = udest_compat->l_threshold;
2333}
2334
2335static int
2336do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2337{
2338        struct net *net = sock_net(sk);
2339        int ret;
2340        unsigned char arg[MAX_ARG_LEN];
2341        struct ip_vs_service_user *usvc_compat;
2342        struct ip_vs_service_user_kern usvc;
2343        struct ip_vs_service *svc;
2344        struct ip_vs_dest_user *udest_compat;
2345        struct ip_vs_dest_user_kern udest;
2346        struct netns_ipvs *ipvs = net_ipvs(net);
2347
2348        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2349                return -EPERM;
2350
2351        if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2352                return -EINVAL;
2353        if (len < 0 || len >  MAX_ARG_LEN)
2354                return -EINVAL;
2355        if (len != set_arglen[SET_CMDID(cmd)]) {
2356                pr_err("set_ctl: len %u != %u\n",
2357                       len, set_arglen[SET_CMDID(cmd)]);
2358                return -EINVAL;
2359        }
2360
2361        if (copy_from_user(arg, user, len) != 0)
2362                return -EFAULT;
2363
2364        /* increase the module use count */
2365        ip_vs_use_count_inc();
2366
2367        /* Handle daemons since they have another lock */
2368        if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2369            cmd == IP_VS_SO_SET_STOPDAEMON) {
2370                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2371
2372                if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2373                        ret = -ERESTARTSYS;
2374                        goto out_dec;
2375                }
2376                if (cmd == IP_VS_SO_SET_STARTDAEMON)
2377                        ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2378                                                dm->syncid);
2379                else
2380                        ret = stop_sync_thread(net, dm->state);
2381                mutex_unlock(&ipvs->sync_mutex);
2382                goto out_dec;
2383        }
2384
2385        if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2386                ret = -ERESTARTSYS;
2387                goto out_dec;
2388        }
2389
2390        if (cmd == IP_VS_SO_SET_FLUSH) {
2391                /* Flush the virtual service */
2392                ret = ip_vs_flush(net);
2393                goto out_unlock;
2394        } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2395                /* Set timeout values for (tcp tcpfin udp) */
2396                ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2397                goto out_unlock;
2398        }
2399
2400        usvc_compat = (struct ip_vs_service_user *)arg;
2401        udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2402
2403        /* We only use the new structs internally, so copy userspace compat
2404         * structs to extended internal versions */
2405        ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2406        ip_vs_copy_udest_compat(&udest, udest_compat);
2407
2408        if (cmd == IP_VS_SO_SET_ZERO) {
2409                /* if no service address is set, zero counters in all */
2410                if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2411                        ret = ip_vs_zero_all(net);
2412                        goto out_unlock;
2413                }
2414        }
2415
2416        /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2417        if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2418            usvc.protocol != IPPROTO_SCTP) {
2419                pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2420                       usvc.protocol, &usvc.addr.ip,
2421                       ntohs(usvc.port), usvc.sched_name);
2422                ret = -EFAULT;
2423                goto out_unlock;
2424        }
2425
2426        /* Lookup the exact service by <protocol, addr, port> or fwmark */
2427        if (usvc.fwmark == 0)
2428                svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2429                                           &usvc.addr, usvc.port);
2430        else
2431                svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2432
2433        if (cmd != IP_VS_SO_SET_ADD
2434            && (svc == NULL || svc->protocol != usvc.protocol)) {
2435                ret = -ESRCH;
2436                goto out_unlock;
2437        }
2438
2439        switch (cmd) {
2440        case IP_VS_SO_SET_ADD:
2441                if (svc != NULL)
2442                        ret = -EEXIST;
2443                else
2444                        ret = ip_vs_add_service(net, &usvc, &svc);
2445                break;
2446        case IP_VS_SO_SET_EDIT:
2447                ret = ip_vs_edit_service(svc, &usvc);
2448                break;
2449        case IP_VS_SO_SET_DEL:
2450                ret = ip_vs_del_service(svc);
2451                if (!ret)
2452                        goto out_unlock;
2453                break;
2454        case IP_VS_SO_SET_ZERO:
2455                ret = ip_vs_zero_service(svc);
2456                break;
2457        case IP_VS_SO_SET_ADDDEST:
2458                ret = ip_vs_add_dest(svc, &udest);
2459                break;
2460        case IP_VS_SO_SET_EDITDEST:
2461                ret = ip_vs_edit_dest(svc, &udest);
2462                break;
2463        case IP_VS_SO_SET_DELDEST:
2464                ret = ip_vs_del_dest(svc, &udest);
2465                break;
2466        default:
2467                ret = -EINVAL;
2468        }
2469
2470  out_unlock:
2471        mutex_unlock(&__ip_vs_mutex);
2472  out_dec:
2473        /* decrease the module use count */
2474        ip_vs_use_count_dec();
2475
2476        return ret;
2477}
2478
2479
2480static void
2481ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2482{
2483        dst->protocol = src->protocol;
2484        dst->addr = src->addr.ip;
2485        dst->port = src->port;
2486        dst->fwmark = src->fwmark;
2487        strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2488        dst->flags = src->flags;
2489        dst->timeout = src->timeout / HZ;
2490        dst->netmask = src->netmask;
2491        dst->num_dests = src->num_dests;
2492        ip_vs_copy_stats(&dst->stats, &src->stats);
2493}
2494
2495static inline int
2496__ip_vs_get_service_entries(struct net *net,
2497                            const struct ip_vs_get_services *get,
2498                            struct ip_vs_get_services __user *uptr)
2499{
2500        int idx, count=0;
2501        struct ip_vs_service *svc;
2502        struct ip_vs_service_entry entry;
2503        int ret = 0;
2504
2505        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2506                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2507                        /* Only expose IPv4 entries to old interface */
2508                        if (svc->af != AF_INET || !net_eq(svc->net, net))
2509                                continue;
2510
2511                        if (count >= get->num_services)
2512                                goto out;
2513                        memset(&entry, 0, sizeof(entry));
2514                        ip_vs_copy_service(&entry, svc);
2515                        if (copy_to_user(&uptr->entrytable[count],
2516                                         &entry, sizeof(entry))) {
2517                                ret = -EFAULT;
2518                                goto out;
2519                        }
2520                        count++;
2521                }
2522        }
2523
2524        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2525                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2526                        /* Only expose IPv4 entries to old interface */
2527                        if (svc->af != AF_INET || !net_eq(svc->net, net))
2528                                continue;
2529
2530                        if (count >= get->num_services)
2531                                goto out;
2532                        memset(&entry, 0, sizeof(entry));
2533                        ip_vs_copy_service(&entry, svc);
2534                        if (copy_to_user(&uptr->entrytable[count],
2535                                         &entry, sizeof(entry))) {
2536                                ret = -EFAULT;
2537                                goto out;
2538                        }
2539                        count++;
2540                }
2541        }
2542out:
2543        return ret;
2544}
2545
2546static inline int
2547__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2548                         struct ip_vs_get_dests __user *uptr)
2549{
2550        struct ip_vs_service *svc;
2551        union nf_inet_addr addr = { .ip = get->addr };
2552        int ret = 0;
2553
2554        if (get->fwmark)
2555                svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2556        else
2557                svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2558                                           get->port);
2559
2560        if (svc) {
2561                int count = 0;
2562                struct ip_vs_dest *dest;
2563                struct ip_vs_dest_entry entry;
2564
2565                list_for_each_entry(dest, &svc->destinations, n_list) {
2566                        if (count >= get->num_dests)
2567                                break;
2568
2569                        entry.addr = dest->addr.ip;
2570                        entry.port = dest->port;
2571                        entry.conn_flags = atomic_read(&dest->conn_flags);
2572                        entry.weight = atomic_read(&dest->weight);
2573                        entry.u_threshold = dest->u_threshold;
2574                        entry.l_threshold = dest->l_threshold;
2575                        entry.activeconns = atomic_read(&dest->activeconns);
2576                        entry.inactconns = atomic_read(&dest->inactconns);
2577                        entry.persistconns = atomic_read(&dest->persistconns);
2578                        ip_vs_copy_stats(&entry.stats, &dest->stats);
2579                        if (copy_to_user(&uptr->entrytable[count],
2580                                         &entry, sizeof(entry))) {
2581                                ret = -EFAULT;
2582                                break;
2583                        }
2584                        count++;
2585                }
2586        } else
2587                ret = -ESRCH;
2588        return ret;
2589}
2590
2591static inline void
2592__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2593{
2594#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2595        struct ip_vs_proto_data *pd;
2596#endif
2597
2598        memset(u, 0, sizeof (*u));
2599
2600#ifdef CONFIG_IP_VS_PROTO_TCP
2601        pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2602        u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2603        u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2604#endif
2605#ifdef CONFIG_IP_VS_PROTO_UDP
2606        pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2607        u->udp_timeout =
2608                        pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2609#endif
2610}
2611
2612
2613#define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2614#define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2615#define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2616#define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2617#define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2618#define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2619#define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2620
2621static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2622        [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2623        [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2624        [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2625        [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2626        [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2627        [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2628        [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2629};
2630
2631static int
2632do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2633{
2634        unsigned char arg[128];
2635        int ret = 0;
2636        unsigned int copylen;
2637        struct net *net = sock_net(sk);
2638        struct netns_ipvs *ipvs = net_ipvs(net);
2639
2640        BUG_ON(!net);
2641        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2642                return -EPERM;
2643
2644        if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2645                return -EINVAL;
2646
2647        if (*len < get_arglen[GET_CMDID(cmd)]) {
2648                pr_err("get_ctl: len %u < %u\n",
2649                       *len, get_arglen[GET_CMDID(cmd)]);
2650                return -EINVAL;
2651        }
2652
2653        copylen = get_arglen[GET_CMDID(cmd)];
2654        if (copylen > 128)
2655                return -EINVAL;
2656
2657        if (copy_from_user(arg, user, copylen) != 0)
2658                return -EFAULT;
2659        /*
2660         * Handle daemons first since it has its own locking
2661         */
2662        if (cmd == IP_VS_SO_GET_DAEMON) {
2663                struct ip_vs_daemon_user d[2];
2664
2665                memset(&d, 0, sizeof(d));
2666                if (mutex_lock_interruptible(&ipvs->sync_mutex))
2667                        return -ERESTARTSYS;
2668
2669                if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2670                        d[0].state = IP_VS_STATE_MASTER;
2671                        strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2672                                sizeof(d[0].mcast_ifn));
2673                        d[0].syncid = ipvs->master_syncid;
2674                }
2675                if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2676                        d[1].state = IP_VS_STATE_BACKUP;
2677                        strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2678                                sizeof(d[1].mcast_ifn));
2679                        d[1].syncid = ipvs->backup_syncid;
2680                }
2681                if (copy_to_user(user, &d, sizeof(d)) != 0)
2682                        ret = -EFAULT;
2683                mutex_unlock(&ipvs->sync_mutex);
2684                return ret;
2685        }
2686
2687        if (mutex_lock_interruptible(&__ip_vs_mutex))
2688                return -ERESTARTSYS;
2689
2690        switch (cmd) {
2691        case IP_VS_SO_GET_VERSION:
2692        {
2693                char buf[64];
2694
2695                sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2696                        NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2697                if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2698                        ret = -EFAULT;
2699                        goto out;
2700                }
2701                *len = strlen(buf)+1;
2702        }
2703        break;
2704
2705        case IP_VS_SO_GET_INFO:
2706        {
2707                struct ip_vs_getinfo info;
2708                info.version = IP_VS_VERSION_CODE;
2709                info.size = ip_vs_conn_tab_size;
2710                info.num_services = ipvs->num_services;
2711                if (copy_to_user(user, &info, sizeof(info)) != 0)
2712                        ret = -EFAULT;
2713        }
2714        break;
2715
2716        case IP_VS_SO_GET_SERVICES:
2717        {
2718                struct ip_vs_get_services *get;
2719                int size;
2720
2721                get = (struct ip_vs_get_services *)arg;
2722                size = sizeof(*get) +
2723                        sizeof(struct ip_vs_service_entry) * get->num_services;
2724                if (*len != size) {
2725                        pr_err("length: %u != %u\n", *len, size);
2726                        ret = -EINVAL;
2727                        goto out;
2728                }
2729                ret = __ip_vs_get_service_entries(net, get, user);
2730        }
2731        break;
2732
2733        case IP_VS_SO_GET_SERVICE:
2734        {
2735                struct ip_vs_service_entry *entry;
2736                struct ip_vs_service *svc;
2737                union nf_inet_addr addr;
2738
2739                entry = (struct ip_vs_service_entry *)arg;
2740                addr.ip = entry->addr;
2741                if (entry->fwmark)
2742                        svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2743                else
2744                        svc = __ip_vs_service_find(net, AF_INET,
2745                                                   entry->protocol, &addr,
2746                                                   entry->port);
2747                if (svc) {
2748                        ip_vs_copy_service(entry, svc);
2749                        if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2750                                ret = -EFAULT;
2751                } else
2752                        ret = -ESRCH;
2753        }
2754        break;
2755
2756        case IP_VS_SO_GET_DESTS:
2757        {
2758                struct ip_vs_get_dests *get;
2759                int size;
2760
2761                get = (struct ip_vs_get_dests *)arg;
2762                size = sizeof(*get) +
2763                        sizeof(struct ip_vs_dest_entry) * get->num_dests;
2764                if (*len != size) {
2765                        pr_err("length: %u != %u\n", *len, size);
2766                        ret = -EINVAL;
2767                        goto out;
2768                }
2769                ret = __ip_vs_get_dest_entries(net, get, user);
2770        }
2771        break;
2772
2773        case IP_VS_SO_GET_TIMEOUT:
2774        {
2775                struct ip_vs_timeout_user t;
2776
2777                __ip_vs_get_timeouts(net, &t);
2778                if (copy_to_user(user, &t, sizeof(t)) != 0)
2779                        ret = -EFAULT;
2780        }
2781        break;
2782
2783        default:
2784                ret = -EINVAL;
2785        }
2786
2787out:
2788        mutex_unlock(&__ip_vs_mutex);
2789        return ret;
2790}
2791
2792
2793static struct nf_sockopt_ops ip_vs_sockopts = {
2794        .pf             = PF_INET,
2795        .set_optmin     = IP_VS_BASE_CTL,
2796        .set_optmax     = IP_VS_SO_SET_MAX+1,
2797        .set            = do_ip_vs_set_ctl,
2798        .get_optmin     = IP_VS_BASE_CTL,
2799        .get_optmax     = IP_VS_SO_GET_MAX+1,
2800        .get            = do_ip_vs_get_ctl,
2801        .owner          = THIS_MODULE,
2802};
2803
2804/*
2805 * Generic Netlink interface
2806 */
2807
2808/* IPVS genetlink family */
2809static struct genl_family ip_vs_genl_family = {
2810        .id             = GENL_ID_GENERATE,
2811        .hdrsize        = 0,
2812        .name           = IPVS_GENL_NAME,
2813        .version        = IPVS_GENL_VERSION,
2814        .maxattr        = IPVS_CMD_MAX,
2815        .netnsok        = true,         /* Make ipvsadm to work on netns */
2816};
2817
2818/* Policy used for first-level command attributes */
2819static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2820        [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2821        [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2822        [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2823        [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2824        [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2825        [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2826};
2827
2828/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2829static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2830        [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2831        [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2832                                            .len = IP_VS_IFNAME_MAXLEN },
2833        [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2834};
2835
2836/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2837static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2838        [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2839        [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2840        [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2841                                            .len = sizeof(union nf_inet_addr) },
2842        [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2843        [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2844        [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2845                                            .len = IP_VS_SCHEDNAME_MAXLEN },
2846        [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2847                                            .len = IP_VS_PENAME_MAXLEN },
2848        [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2849                                            .len = sizeof(struct ip_vs_flags) },
2850        [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2851        [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2852        [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2853};
2854
2855/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2856static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2857        [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2858                                            .len = sizeof(union nf_inet_addr) },
2859        [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2860        [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2861        [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2862        [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2863        [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2864        [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2865        [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2866        [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2867        [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2868};
2869
2870static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2871                                 struct ip_vs_stats *stats)
2872{
2873        struct ip_vs_stats_user ustats;
2874        struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2875        if (!nl_stats)
2876                return -EMSGSIZE;
2877
2878        ip_vs_copy_stats(&ustats, stats);
2879
2880        if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2881            nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2882            nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2883            nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2884            nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2885            nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2886            nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2887            nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2888            nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2889            nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2890                goto nla_put_failure;
2891        nla_nest_end(skb, nl_stats);
2892
2893        return 0;
2894
2895nla_put_failure:
2896        nla_nest_cancel(skb, nl_stats);
2897        return -EMSGSIZE;
2898}
2899
2900static int ip_vs_genl_fill_service(struct sk_buff *skb,
2901                                   struct ip_vs_service *svc)
2902{
2903        struct nlattr *nl_service;
2904        struct ip_vs_flags flags = { .flags = svc->flags,
2905                                     .mask = ~0 };
2906
2907        nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2908        if (!nl_service)
2909                return -EMSGSIZE;
2910
2911        if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2912                goto nla_put_failure;
2913        if (svc->fwmark) {
2914                if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2915                        goto nla_put_failure;
2916        } else {
2917                if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2918                    nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2919                    nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2920                        goto nla_put_failure;
2921        }
2922
2923        if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
2924            (svc->pe &&
2925             nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2926            nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2927            nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2928            nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2929                goto nla_put_failure;
2930        if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2931                goto nla_put_failure;
2932
2933        nla_nest_end(skb, nl_service);
2934
2935        return 0;
2936
2937nla_put_failure:
2938        nla_nest_cancel(skb, nl_service);
2939        return -EMSGSIZE;
2940}
2941
2942static int ip_vs_genl_dump_service(struct sk_buff *skb,
2943                                   struct ip_vs_service *svc,
2944                                   struct netlink_callback *cb)
2945{
2946        void *hdr;
2947
2948        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2949                          &ip_vs_genl_family, NLM_F_MULTI,
2950                          IPVS_CMD_NEW_SERVICE);
2951        if (!hdr)
2952                return -EMSGSIZE;
2953
2954        if (ip_vs_genl_fill_service(skb, svc) < 0)
2955                goto nla_put_failure;
2956
2957        return genlmsg_end(skb, hdr);
2958
2959nla_put_failure:
2960        genlmsg_cancel(skb, hdr);
2961        return -EMSGSIZE;
2962}
2963
2964static int ip_vs_genl_dump_services(struct sk_buff *skb,
2965                                    struct netlink_callback *cb)
2966{
2967        int idx = 0, i;
2968        int start = cb->args[0];
2969        struct ip_vs_service *svc;
2970        struct net *net = skb_sknet(skb);
2971
2972        mutex_lock(&__ip_vs_mutex);
2973        for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2974                list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2975                        if (++idx <= start || !net_eq(svc->net, net))
2976                                continue;
2977                        if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2978                                idx--;
2979                                goto nla_put_failure;
2980                        }
2981                }
2982        }
2983
2984        for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2985                list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2986                        if (++idx <= start || !net_eq(svc->net, net))
2987                                continue;
2988                        if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2989                                idx--;
2990                                goto nla_put_failure;
2991                        }
2992                }
2993        }
2994
2995nla_put_failure:
2996        mutex_unlock(&__ip_vs_mutex);
2997        cb->args[0] = idx;
2998
2999        return skb->len;
3000}
3001
3002static int ip_vs_genl_parse_service(struct net *net,
3003                                    struct ip_vs_service_user_kern *usvc,
3004                                    struct nlattr *nla, int full_entry,
3005                                    struct ip_vs_service **ret_svc)
3006{
3007        struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3008        struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
3009        struct ip_vs_service *svc;
3010
3011        /* Parse mandatory identifying service fields first */
3012        if (nla == NULL ||
3013            nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3014                return -EINVAL;
3015
3016        nla_af          = attrs[IPVS_SVC_ATTR_AF];
3017        nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
3018        nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
3019        nla_port        = attrs[IPVS_SVC_ATTR_PORT];
3020        nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
3021
3022        if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3023                return -EINVAL;
3024
3025        memset(usvc, 0, sizeof(*usvc));
3026
3027        usvc->af = nla_get_u16(nla_af);
3028#ifdef CONFIG_IP_VS_IPV6
3029        if (usvc->af != AF_INET && usvc->af != AF_INET6)
3030#else
3031        if (usvc->af != AF_INET)
3032#endif
3033                return -EAFNOSUPPORT;
3034
3035        if (nla_fwmark) {
3036                usvc->protocol = IPPROTO_TCP;
3037                usvc->fwmark = nla_get_u32(nla_fwmark);
3038        } else {
3039                usvc->protocol = nla_get_u16(nla_protocol);
3040                nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3041                usvc->port = nla_get_u16(nla_port);
3042                usvc->fwmark = 0;
3043        }
3044
3045        if (usvc->fwmark)
3046                svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
3047        else
3048                svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
3049                                           &usvc->addr, usvc->port);
3050        *ret_svc = svc;
3051
3052        /* If a full entry was requested, check for the additional fields */
3053        if (full_entry) {
3054                struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3055                              *nla_netmask;
3056                struct ip_vs_flags flags;
3057
3058                nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3059                nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3060                nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3061                nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3062                nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3063
3064                if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3065                        return -EINVAL;
3066
3067                nla_memcpy(&flags, nla_flags, sizeof(flags));
3068
3069                /* prefill flags from service if it already exists */
3070                if (svc)
3071                        usvc->flags = svc->flags;
3072
3073                /* set new flags from userland */
3074                usvc->flags = (usvc->flags & ~flags.mask) |
3075                              (flags.flags & flags.mask);
3076                usvc->sched_name = nla_data(nla_sched);
3077                usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3078                usvc->timeout = nla_get_u32(nla_timeout);
3079                usvc->netmask = nla_get_u32(nla_netmask);
3080        }
3081
3082        return 0;
3083}
3084
3085static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3086                                                     struct nlattr *nla)
3087{
3088        struct ip_vs_service_user_kern usvc;
3089        struct ip_vs_service *svc;
3090        int ret;
3091
3092        ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3093        return ret ? ERR_PTR(ret) : svc;
3094}
3095
3096static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3097{
3098        struct nlattr *nl_dest;
3099
3100        nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3101        if (!nl_dest)
3102                return -EMSGSIZE;
3103
3104        if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3105            nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3106            nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3107                        (atomic_read(&dest->conn_flags) &
3108                         IP_VS_CONN_F_FWD_MASK)) ||
3109            nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3110                        atomic_read(&dest->weight)) ||
3111            nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3112            nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3113            nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3114                        atomic_read(&dest->activeconns)) ||
3115            nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3116                        atomic_read(&dest->inactconns)) ||
3117            nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3118                        atomic_read(&dest->persistconns)))
3119                goto nla_put_failure;
3120        if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3121                goto nla_put_failure;
3122
3123        nla_nest_end(skb, nl_dest);
3124
3125        return 0;
3126
3127nla_put_failure:
3128        nla_nest_cancel(skb, nl_dest);
3129        return -EMSGSIZE;
3130}
3131
3132static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3133                                struct netlink_callback *cb)
3134{
3135        void *hdr;
3136
3137        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3138                          &ip_vs_genl_family, NLM_F_MULTI,
3139                          IPVS_CMD_NEW_DEST);
3140        if (!hdr)
3141                return -EMSGSIZE;
3142
3143        if (ip_vs_genl_fill_dest(skb, dest) < 0)
3144                goto nla_put_failure;
3145
3146        return genlmsg_end(skb, hdr);
3147
3148nla_put_failure:
3149        genlmsg_cancel(skb, hdr);
3150        return -EMSGSIZE;
3151}
3152
3153static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3154                                 struct netlink_callback *cb)
3155{
3156        int idx = 0;
3157        int start = cb->args[0];
3158        struct ip_vs_service *svc;
3159        struct ip_vs_dest *dest;
3160        struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3161        struct net *net = skb_sknet(skb);
3162
3163        mutex_lock(&__ip_vs_mutex);
3164
3165        /* Try to find the service for which to dump destinations */
3166        if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3167                        IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3168                goto out_err;
3169
3170
3171        svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3172        if (IS_ERR(svc) || svc == NULL)
3173                goto out_err;
3174
3175        /* Dump the destinations */
3176        list_for_each_entry(dest, &svc->destinations, n_list) {
3177                if (++idx <= start)
3178                        continue;
3179                if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3180                        idx--;
3181                        goto nla_put_failure;
3182                }
3183        }
3184
3185nla_put_failure:
3186        cb->args[0] = idx;
3187
3188out_err:
3189        mutex_unlock(&__ip_vs_mutex);
3190
3191        return skb->len;
3192}
3193
3194static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3195                                 struct nlattr *nla, int full_entry)
3196{
3197        struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3198        struct nlattr *nla_addr, *nla_port;
3199
3200        /* Parse mandatory identifying destination fields first */
3201        if (nla == NULL ||
3202            nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3203                return -EINVAL;
3204
3205        nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3206        nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3207
3208        if (!(nla_addr && nla_port))
3209                return -EINVAL;
3210
3211        memset(udest, 0, sizeof(*udest));
3212
3213        nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3214        udest->port = nla_get_u16(nla_port);
3215
3216        /* If a full entry was requested, check for the additional fields */
3217        if (full_entry) {
3218                struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3219                              *nla_l_thresh;
3220
3221                nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3222                nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3223                nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3224                nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3225
3226                if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3227                        return -EINVAL;
3228
3229                udest->conn_flags = nla_get_u32(nla_fwd)
3230                                    & IP_VS_CONN_F_FWD_MASK;
3231                udest->weight = nla_get_u32(nla_weight);
3232                udest->u_threshold = nla_get_u32(nla_u_thresh);
3233                udest->l_threshold = nla_get_u32(nla_l_thresh);
3234        }
3235
3236        return 0;
3237}
3238
3239static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3240                                  const char *mcast_ifn, __be32 syncid)
3241{
3242        struct nlattr *nl_daemon;
3243
3244        nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3245        if (!nl_daemon)
3246                return -EMSGSIZE;
3247
3248        if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3249            nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3250            nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3251                goto nla_put_failure;
3252        nla_nest_end(skb, nl_daemon);
3253
3254        return 0;
3255
3256nla_put_failure:
3257        nla_nest_cancel(skb, nl_daemon);
3258        return -EMSGSIZE;
3259}
3260
3261static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3262                                  const char *mcast_ifn, __be32 syncid,
3263                                  struct netlink_callback *cb)
3264{
3265        void *hdr;
3266        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3267                          &ip_vs_genl_family, NLM_F_MULTI,
3268                          IPVS_CMD_NEW_DAEMON);
3269        if (!hdr)
3270                return -EMSGSIZE;
3271
3272        if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3273                goto nla_put_failure;
3274
3275        return genlmsg_end(skb, hdr);
3276
3277nla_put_failure:
3278        genlmsg_cancel(skb, hdr);
3279        return -EMSGSIZE;
3280}
3281
3282static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3283                                   struct netlink_callback *cb)
3284{
3285        struct net *net = skb_sknet(skb);
3286        struct netns_ipvs *ipvs = net_ipvs(net);
3287
3288        mutex_lock(&ipvs->sync_mutex);
3289        if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3290                if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3291                                           ipvs->master_mcast_ifn,
3292                                           ipvs->master_syncid, cb) < 0)
3293                        goto nla_put_failure;
3294
3295                cb->args[0] = 1;
3296        }
3297
3298        if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3299                if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3300                                           ipvs->backup_mcast_ifn,
3301                                           ipvs->backup_syncid, cb) < 0)
3302                        goto nla_put_failure;
3303
3304                cb->args[1] = 1;
3305        }
3306
3307nla_put_failure:
3308        mutex_unlock(&ipvs->sync_mutex);
3309
3310        return skb->len;
3311}
3312
3313static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3314{
3315        if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3316              attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3317              attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3318                return -EINVAL;
3319
3320        return start_sync_thread(net,
3321                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3322                                 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3323                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3324}
3325
3326static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3327{
3328        if (!attrs[IPVS_DAEMON_ATTR_STATE])
3329                return -EINVAL;
3330
3331        return stop_sync_thread(net,
3332                                nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3333}
3334
3335static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3336{
3337        struct ip_vs_timeout_user t;
3338
3339        __ip_vs_get_timeouts(net, &t);
3340
3341        if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3342                t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3343
3344        if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3345                t.tcp_fin_timeout =
3346                        nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3347
3348        if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3349                t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3350
3351        return ip_vs_set_timeout(net, &t);
3352}
3353
3354static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3355{
3356        int ret = 0, cmd;
3357        struct net *net;
3358        struct netns_ipvs *ipvs;
3359
3360        net = skb_sknet(skb);
3361        ipvs = net_ipvs(net);
3362        cmd = info->genlhdr->cmd;
3363
3364        if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3365                struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3366
3367                mutex_lock(&ipvs->sync_mutex);
3368                if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3369                    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3370                                     info->attrs[IPVS_CMD_ATTR_DAEMON],
3371                                     ip_vs_daemon_policy)) {
3372                        ret = -EINVAL;
3373                        goto out;
3374                }
3375
3376                if (cmd == IPVS_CMD_NEW_DAEMON)
3377                        ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3378                else
3379                        ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3380out:
3381                mutex_unlock(&ipvs->sync_mutex);
3382        }
3383        return ret;
3384}
3385
3386static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3387{
3388        struct ip_vs_service *svc = NULL;
3389        struct ip_vs_service_user_kern usvc;
3390        struct ip_vs_dest_user_kern udest;
3391        int ret = 0, cmd;
3392        int need_full_svc = 0, need_full_dest = 0;
3393        struct net *net;
3394
3395        net = skb_sknet(skb);
3396        cmd = info->genlhdr->cmd;
3397
3398        mutex_lock(&__ip_vs_mutex);
3399
3400        if (cmd == IPVS_CMD_FLUSH) {
3401                ret = ip_vs_flush(net);
3402                goto out;
3403        } else if (cmd == IPVS_CMD_SET_CONFIG) {
3404                ret = ip_vs_genl_set_config(net, info->attrs);
3405                goto out;
3406        } else if (cmd == IPVS_CMD_ZERO &&
3407                   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3408                ret = ip_vs_zero_all(net);
3409                goto out;
3410        }
3411
3412        /* All following commands require a service argument, so check if we
3413         * received a valid one. We need a full service specification when
3414         * adding / editing a service. Only identifying members otherwise. */
3415        if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3416                need_full_svc = 1;
3417
3418        ret = ip_vs_genl_parse_service(net, &usvc,
3419                                       info->attrs[IPVS_CMD_ATTR_SERVICE],
3420                                       need_full_svc, &svc);
3421        if (ret)
3422                goto out;
3423
3424        /* Unless we're adding a new service, the service must already exist */
3425        if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3426                ret = -ESRCH;
3427                goto out;
3428        }
3429
3430        /* Destination commands require a valid destination argument. For
3431         * adding / editing a destination, we need a full destination
3432         * specification. */
3433        if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3434            cmd == IPVS_CMD_DEL_DEST) {
3435                if (cmd != IPVS_CMD_DEL_DEST)
3436                        need_full_dest = 1;
3437
3438                ret = ip_vs_genl_parse_dest(&udest,
3439                                            info->attrs[IPVS_CMD_ATTR_DEST],
3440                                            need_full_dest);
3441                if (ret)
3442                        goto out;
3443        }
3444
3445        switch (cmd) {
3446        case IPVS_CMD_NEW_SERVICE:
3447                if (svc == NULL)
3448                        ret = ip_vs_add_service(net, &usvc, &svc);
3449                else
3450                        ret = -EEXIST;
3451                break;
3452        case IPVS_CMD_SET_SERVICE:
3453                ret = ip_vs_edit_service(svc, &usvc);
3454                break;
3455        case IPVS_CMD_DEL_SERVICE:
3456                ret = ip_vs_del_service(svc);
3457                /* do not use svc, it can be freed */
3458                break;
3459        case IPVS_CMD_NEW_DEST:
3460                ret = ip_vs_add_dest(svc, &udest);
3461                break;
3462        case IPVS_CMD_SET_DEST:
3463                ret = ip_vs_edit_dest(svc, &udest);
3464                break;
3465        case IPVS_CMD_DEL_DEST:
3466                ret = ip_vs_del_dest(svc, &udest);
3467                break;
3468        case IPVS_CMD_ZERO:
3469                ret = ip_vs_zero_service(svc);
3470                break;
3471        default:
3472                ret = -EINVAL;
3473        }
3474
3475out:
3476        mutex_unlock(&__ip_vs_mutex);
3477
3478        return ret;
3479}
3480
3481static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3482{
3483        struct sk_buff *msg;
3484        void *reply;
3485        int ret, cmd, reply_cmd;
3486        struct net *net;
3487
3488        net = skb_sknet(skb);
3489        cmd = info->genlhdr->cmd;
3490
3491        if (cmd == IPVS_CMD_GET_SERVICE)
3492                reply_cmd = IPVS_CMD_NEW_SERVICE;
3493        else if (cmd == IPVS_CMD_GET_INFO)
3494                reply_cmd = IPVS_CMD_SET_INFO;
3495        else if (cmd == IPVS_CMD_GET_CONFIG)
3496                reply_cmd = IPVS_CMD_SET_CONFIG;
3497        else {
3498                pr_err("unknown Generic Netlink command\n");
3499                return -EINVAL;
3500        }
3501
3502        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3503        if (!msg)
3504                return -ENOMEM;
3505
3506        mutex_lock(&__ip_vs_mutex);
3507
3508        reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3509        if (reply == NULL)
3510                goto nla_put_failure;
3511
3512        switch (cmd) {
3513        case IPVS_CMD_GET_SERVICE:
3514        {
3515                struct ip_vs_service *svc;
3516
3517                svc = ip_vs_genl_find_service(net,
3518                                              info->attrs[IPVS_CMD_ATTR_SERVICE]);
3519                if (IS_ERR(svc)) {
3520                        ret = PTR_ERR(svc);
3521                        goto out_err;
3522                } else if (svc) {
3523                        ret = ip_vs_genl_fill_service(msg, svc);
3524                        if (ret)
3525                                goto nla_put_failure;
3526                } else {
3527                        ret = -ESRCH;
3528                        goto out_err;
3529                }
3530
3531                break;
3532        }
3533
3534        case IPVS_CMD_GET_CONFIG:
3535        {
3536                struct ip_vs_timeout_user t;
3537
3538                __ip_vs_get_timeouts(net, &t);
3539#ifdef CONFIG_IP_VS_PROTO_TCP
3540                if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3541                                t.tcp_timeout) ||
3542                    nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3543                                t.tcp_fin_timeout))
3544                        goto nla_put_failure;
3545#endif
3546#ifdef CONFIG_IP_VS_PROTO_UDP
3547                if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3548                        goto nla_put_failure;
3549#endif
3550
3551                break;
3552        }
3553
3554        case IPVS_CMD_GET_INFO:
3555                if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3556                                IP_VS_VERSION_CODE) ||
3557                    nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3558                                ip_vs_conn_tab_size))
3559                        goto nla_put_failure;
3560                break;
3561        }
3562
3563        genlmsg_end(msg, reply);
3564        ret = genlmsg_reply(msg, info);
3565        goto out;
3566
3567nla_put_failure:
3568        pr_err("not enough space in Netlink message\n");
3569        ret = -EMSGSIZE;
3570
3571out_err:
3572        nlmsg_free(msg);
3573out:
3574        mutex_unlock(&__ip_vs_mutex);
3575
3576        return ret;
3577}
3578
3579
3580static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3581        {
3582                .cmd    = IPVS_CMD_NEW_SERVICE,
3583                .flags  = GENL_ADMIN_PERM,
3584                .policy = ip_vs_cmd_policy,
3585                .doit   = ip_vs_genl_set_cmd,
3586        },
3587        {
3588                .cmd    = IPVS_CMD_SET_SERVICE,
3589                .flags  = GENL_ADMIN_PERM,
3590                .policy = ip_vs_cmd_policy,
3591                .doit   = ip_vs_genl_set_cmd,
3592        },
3593        {
3594                .cmd    = IPVS_CMD_DEL_SERVICE,
3595                .flags  = GENL_ADMIN_PERM,
3596                .policy = ip_vs_cmd_policy,
3597                .doit   = ip_vs_genl_set_cmd,
3598        },
3599        {
3600                .cmd    = IPVS_CMD_GET_SERVICE,
3601                .flags  = GENL_ADMIN_PERM,
3602                .doit   = ip_vs_genl_get_cmd,
3603                .dumpit = ip_vs_genl_dump_services,
3604                .policy = ip_vs_cmd_policy,
3605        },
3606        {
3607                .cmd    = IPVS_CMD_NEW_DEST,
3608                .flags  = GENL_ADMIN_PERM,
3609                .policy = ip_vs_cmd_policy,
3610                .doit   = ip_vs_genl_set_cmd,
3611        },
3612        {
3613                .cmd    = IPVS_CMD_SET_DEST,
3614                .flags  = GENL_ADMIN_PERM,
3615                .policy = ip_vs_cmd_policy,
3616                .doit   = ip_vs_genl_set_cmd,
3617        },
3618        {
3619                .cmd    = IPVS_CMD_DEL_DEST,
3620                .flags  = GENL_ADMIN_PERM,
3621                .policy = ip_vs_cmd_policy,
3622                .doit   = ip_vs_genl_set_cmd,
3623        },
3624        {
3625                .cmd    = IPVS_CMD_GET_DEST,
3626                .flags  = GENL_ADMIN_PERM,
3627                .policy = ip_vs_cmd_policy,
3628                .dumpit = ip_vs_genl_dump_dests,
3629        },
3630        {
3631                .cmd    = IPVS_CMD_NEW_DAEMON,
3632                .flags  = GENL_ADMIN_PERM,
3633                .policy = ip_vs_cmd_policy,
3634                .doit   = ip_vs_genl_set_daemon,
3635        },
3636        {
3637                .cmd    = IPVS_CMD_DEL_DAEMON,
3638                .flags  = GENL_ADMIN_PERM,
3639                .policy = ip_vs_cmd_policy,
3640                .doit   = ip_vs_genl_set_daemon,
3641        },
3642        {
3643                .cmd    = IPVS_CMD_GET_DAEMON,
3644                .flags  = GENL_ADMIN_PERM,
3645                .dumpit = ip_vs_genl_dump_daemons,
3646        },
3647        {
3648                .cmd    = IPVS_CMD_SET_CONFIG,
3649                .flags  = GENL_ADMIN_PERM,
3650                .policy = ip_vs_cmd_policy,
3651                .doit   = ip_vs_genl_set_cmd,
3652        },
3653        {
3654                .cmd    = IPVS_CMD_GET_CONFIG,
3655                .flags  = GENL_ADMIN_PERM,
3656                .doit   = ip_vs_genl_get_cmd,
3657        },
3658        {
3659                .cmd    = IPVS_CMD_GET_INFO,
3660                .flags  = GENL_ADMIN_PERM,
3661                .doit   = ip_vs_genl_get_cmd,
3662        },
3663        {
3664                .cmd    = IPVS_CMD_ZERO,
3665                .flags  = GENL_ADMIN_PERM,
3666                .policy = ip_vs_cmd_policy,
3667                .doit   = ip_vs_genl_set_cmd,
3668        },
3669        {
3670                .cmd    = IPVS_CMD_FLUSH,
3671                .flags  = GENL_ADMIN_PERM,
3672                .doit   = ip_vs_genl_set_cmd,
3673        },
3674};
3675
3676static int __init ip_vs_genl_register(void)
3677{
3678        return genl_register_family_with_ops(&ip_vs_genl_family,
3679                ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3680}
3681
3682static void ip_vs_genl_unregister(void)
3683{
3684        genl_unregister_family(&ip_vs_genl_family);
3685}
3686
3687/* End of Generic Netlink interface definitions */
3688
3689/*
3690 * per netns intit/exit func.
3691 */
3692#ifdef CONFIG_SYSCTL
3693static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3694{
3695        int idx;
3696        struct netns_ipvs *ipvs = net_ipvs(net);
3697        struct ctl_table *tbl;
3698
3699        atomic_set(&ipvs->dropentry, 0);
3700        spin_lock_init(&ipvs->dropentry_lock);
3701        spin_lock_init(&ipvs->droppacket_lock);
3702        spin_lock_init(&ipvs->securetcp_lock);
3703
3704        if (!net_eq(net, &init_net)) {
3705                tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3706                if (tbl == NULL)
3707                        return -ENOMEM;
3708
3709                /* Don't export sysctls to unprivileged users */
3710                if (net->user_ns != &init_user_ns)
3711                        tbl[0].procname = NULL;
3712        } else
3713                tbl = vs_vars;
3714        /* Initialize sysctl defaults */
3715        idx = 0;
3716        ipvs->sysctl_amemthresh = 1024;
3717        tbl[idx++].data = &ipvs->sysctl_amemthresh;
3718        ipvs->sysctl_am_droprate = 10;
3719        tbl[idx++].data = &ipvs->sysctl_am_droprate;
3720        tbl[idx++].data = &ipvs->sysctl_drop_entry;
3721        tbl[idx++].data = &ipvs->sysctl_drop_packet;
3722#ifdef CONFIG_IP_VS_NFCT
3723        tbl[idx++].data = &ipvs->sysctl_conntrack;
3724#endif
3725        tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3726        ipvs->sysctl_snat_reroute = 1;
3727        tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3728        ipvs->sysctl_sync_ver = 1;
3729        tbl[idx++].data = &ipvs->sysctl_sync_ver;
3730        ipvs->sysctl_sync_ports = 1;
3731        tbl[idx++].data = &ipvs->sysctl_sync_ports;
3732        ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3733        tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3734        ipvs->sysctl_sync_sock_size = 0;
3735        tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3736        tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3737        tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3738        tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3739        ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3740        ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3741        tbl[idx].data = &ipvs->sysctl_sync_threshold;
3742        tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3743        ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3744        tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3745        ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3746        tbl[idx++].data = &ipvs->sysctl_sync_retries;
3747        tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3748        ipvs->sysctl_pmtu_disc = 1;
3749        tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
3750        tbl[idx++].data = &ipvs->sysctl_backup_only;
3751
3752
3753        ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3754        if (ipvs->sysctl_hdr == NULL) {
3755                if (!net_eq(net, &init_net))
3756                        kfree(tbl);
3757                return -ENOMEM;
3758        }
3759        ip_vs_start_estimator(net, &ipvs->tot_stats);
3760        ipvs->sysctl_tbl = tbl;
3761        /* Schedule defense work */
3762        INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3763        schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3764
3765        return 0;
3766}
3767
3768static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
3769{
3770        struct netns_ipvs *ipvs = net_ipvs(net);
3771
3772        cancel_delayed_work_sync(&ipvs->defense_work);
3773        cancel_work_sync(&ipvs->defense_work.work);
3774        unregister_net_sysctl_table(ipvs->sysctl_hdr);
3775}
3776
3777#else
3778
3779static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3780static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
3781
3782#endif
3783
3784static struct notifier_block ip_vs_dst_notifier = {
3785        .notifier_call = ip_vs_dst_event,
3786};
3787
3788int __net_init ip_vs_control_net_init(struct net *net)
3789{
3790        int idx;
3791        struct netns_ipvs *ipvs = net_ipvs(net);
3792
3793        rwlock_init(&ipvs->rs_lock);
3794
3795        /* Initialize rs_table */
3796        for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3797                INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3798
3799        INIT_LIST_HEAD(&ipvs->dest_trash);
3800        atomic_set(&ipvs->ftpsvc_counter, 0);
3801        atomic_set(&ipvs->nullsvc_counter, 0);
3802
3803        /* procfs stats */
3804        ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3805        if (!ipvs->tot_stats.cpustats)
3806                return -ENOMEM;
3807
3808        spin_lock_init(&ipvs->tot_stats.lock);
3809
3810        proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops);
3811        proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops);
3812        proc_create("ip_vs_stats_percpu", 0, net->proc_net,
3813                    &ip_vs_stats_percpu_fops);
3814
3815        if (ip_vs_control_net_init_sysctl(net))
3816                goto err;
3817
3818        return 0;
3819
3820err:
3821        free_percpu(ipvs->tot_stats.cpustats);
3822        return -ENOMEM;
3823}
3824
3825void __net_exit ip_vs_control_net_cleanup(struct net *net)
3826{
3827        struct netns_ipvs *ipvs = net_ipvs(net);
3828
3829        ip_vs_trash_cleanup(net);
3830        ip_vs_stop_estimator(net, &ipvs->tot_stats);
3831        ip_vs_control_net_cleanup_sysctl(net);
3832        remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
3833        remove_proc_entry("ip_vs_stats", net->proc_net);
3834        remove_proc_entry("ip_vs", net->proc_net);
3835        free_percpu(ipvs->tot_stats.cpustats);
3836}
3837
3838int __init ip_vs_register_nl_ioctl(void)
3839{
3840        int ret;
3841
3842        ret = nf_register_sockopt(&ip_vs_sockopts);
3843        if (ret) {
3844                pr_err("cannot register sockopt.\n");
3845                goto err_sock;
3846        }
3847
3848        ret = ip_vs_genl_register();
3849        if (ret) {
3850                pr_err("cannot register Generic Netlink interface.\n");
3851                goto err_genl;
3852        }
3853        return 0;
3854
3855err_genl:
3856        nf_unregister_sockopt(&ip_vs_sockopts);
3857err_sock:
3858        return ret;
3859}
3860
3861void ip_vs_unregister_nl_ioctl(void)
3862{
3863        ip_vs_genl_unregister();
3864        nf_unregister_sockopt(&ip_vs_sockopts);
3865}
3866
3867int __init ip_vs_control_init(void)
3868{
3869        int idx;
3870        int ret;
3871
3872        EnterFunction(2);
3873
3874        /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3875        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3876                INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3877                INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3878        }
3879
3880        smp_wmb();      /* Do we really need it now ? */
3881
3882        ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3883        if (ret < 0)
3884                return ret;
3885
3886        LeaveFunction(2);
3887        return 0;
3888}
3889
3890
3891void ip_vs_control_cleanup(void)
3892{
3893        EnterFunction(2);
3894        unregister_netdevice_notifier(&ip_vs_dst_notifier);
3895        LeaveFunction(2);
3896}
3897