linux/net/netfilter/ipvs/ip_vs_ctl.c
<<
>>
Prefs
   1/*
   2 * IPVS         An implementation of the IP virtual server support for the
   3 *              LINUX operating system.  IPVS is now implemented as a module
   4 *              over the NetFilter framework. IPVS can be used to build a
   5 *              high-performance and highly available server based on a
   6 *              cluster of servers.
   7 *
   8 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   9 *              Peter Kese <peter.kese@ijs.si>
  10 *              Julian Anastasov <ja@ssi.bg>
  11 *
  12 *              This program is free software; you can redistribute it and/or
  13 *              modify it under the terms of the GNU General Public License
  14 *              as published by the Free Software Foundation; either version
  15 *              2 of the License, or (at your option) any later version.
  16 *
  17 * Changes:
  18 *
  19 */
  20
  21#define KMSG_COMPONENT "IPVS"
  22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/types.h>
  27#include <linux/capability.h>
  28#include <linux/fs.h>
  29#include <linux/sysctl.h>
  30#include <linux/proc_fs.h>
  31#include <linux/workqueue.h>
  32#include <linux/swap.h>
  33#include <linux/seq_file.h>
  34#include <linux/slab.h>
  35
  36#include <linux/netfilter.h>
  37#include <linux/netfilter_ipv4.h>
  38#include <linux/mutex.h>
  39
  40#include <net/net_namespace.h>
  41#include <linux/nsproxy.h>
  42#include <net/ip.h>
  43#ifdef CONFIG_IP_VS_IPV6
  44#include <net/ipv6.h>
  45#include <net/ip6_route.h>
  46#endif
  47#include <net/route.h>
  48#include <net/sock.h>
  49#include <net/genetlink.h>
  50
  51#include <asm/uaccess.h>
  52
  53#include <net/ip_vs.h>
  54
  55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
  56static DEFINE_MUTEX(__ip_vs_mutex);
  57
  58/* sysctl variables */
  59
  60#ifdef CONFIG_IP_VS_DEBUG
  61static int sysctl_ip_vs_debug_level = 0;
  62
  63int ip_vs_get_debug_level(void)
  64{
  65        return sysctl_ip_vs_debug_level;
  66}
  67#endif
  68
  69
  70/*  Protos */
  71static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
  72
  73
  74#ifdef CONFIG_IP_VS_IPV6
  75/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
  76static bool __ip_vs_addr_is_local_v6(struct net *net,
  77                                     const struct in6_addr *addr)
  78{
  79        struct flowi6 fl6 = {
  80                .daddr = *addr,
  81        };
  82        struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
  83        bool is_local;
  84
  85        is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
  86
  87        dst_release(dst);
  88        return is_local;
  89}
  90#endif
  91
  92#ifdef CONFIG_SYSCTL
  93/*
  94 *      update_defense_level is called from keventd and from sysctl,
  95 *      so it needs to protect itself from softirqs
  96 */
  97static void update_defense_level(struct netns_ipvs *ipvs)
  98{
  99        struct sysinfo i;
 100        static int old_secure_tcp = 0;
 101        int availmem;
 102        int nomem;
 103        int to_change = -1;
 104
 105        /* we only count free and buffered memory (in pages) */
 106        si_meminfo(&i);
 107        availmem = i.freeram + i.bufferram;
 108        /* however in linux 2.5 the i.bufferram is total page cache size,
 109           we need adjust it */
 110        /* si_swapinfo(&i); */
 111        /* availmem = availmem - (i.totalswap - i.freeswap); */
 112
 113        nomem = (availmem < ipvs->sysctl_amemthresh);
 114
 115        local_bh_disable();
 116
 117        /* drop_entry */
 118        spin_lock(&ipvs->dropentry_lock);
 119        switch (ipvs->sysctl_drop_entry) {
 120        case 0:
 121                atomic_set(&ipvs->dropentry, 0);
 122                break;
 123        case 1:
 124                if (nomem) {
 125                        atomic_set(&ipvs->dropentry, 1);
 126                        ipvs->sysctl_drop_entry = 2;
 127                } else {
 128                        atomic_set(&ipvs->dropentry, 0);
 129                }
 130                break;
 131        case 2:
 132                if (nomem) {
 133                        atomic_set(&ipvs->dropentry, 1);
 134                } else {
 135                        atomic_set(&ipvs->dropentry, 0);
 136                        ipvs->sysctl_drop_entry = 1;
 137                };
 138                break;
 139        case 3:
 140                atomic_set(&ipvs->dropentry, 1);
 141                break;
 142        }
 143        spin_unlock(&ipvs->dropentry_lock);
 144
 145        /* drop_packet */
 146        spin_lock(&ipvs->droppacket_lock);
 147        switch (ipvs->sysctl_drop_packet) {
 148        case 0:
 149                ipvs->drop_rate = 0;
 150                break;
 151        case 1:
 152                if (nomem) {
 153                        ipvs->drop_rate = ipvs->drop_counter
 154                                = ipvs->sysctl_amemthresh /
 155                                (ipvs->sysctl_amemthresh-availmem);
 156                        ipvs->sysctl_drop_packet = 2;
 157                } else {
 158                        ipvs->drop_rate = 0;
 159                }
 160                break;
 161        case 2:
 162                if (nomem) {
 163                        ipvs->drop_rate = ipvs->drop_counter
 164                                = ipvs->sysctl_amemthresh /
 165                                (ipvs->sysctl_amemthresh-availmem);
 166                } else {
 167                        ipvs->drop_rate = 0;
 168                        ipvs->sysctl_drop_packet = 1;
 169                }
 170                break;
 171        case 3:
 172                ipvs->drop_rate = ipvs->sysctl_am_droprate;
 173                break;
 174        }
 175        spin_unlock(&ipvs->droppacket_lock);
 176
 177        /* secure_tcp */
 178        spin_lock(&ipvs->securetcp_lock);
 179        switch (ipvs->sysctl_secure_tcp) {
 180        case 0:
 181                if (old_secure_tcp >= 2)
 182                        to_change = 0;
 183                break;
 184        case 1:
 185                if (nomem) {
 186                        if (old_secure_tcp < 2)
 187                                to_change = 1;
 188                        ipvs->sysctl_secure_tcp = 2;
 189                } else {
 190                        if (old_secure_tcp >= 2)
 191                                to_change = 0;
 192                }
 193                break;
 194        case 2:
 195                if (nomem) {
 196                        if (old_secure_tcp < 2)
 197                                to_change = 1;
 198                } else {
 199                        if (old_secure_tcp >= 2)
 200                                to_change = 0;
 201                        ipvs->sysctl_secure_tcp = 1;
 202                }
 203                break;
 204        case 3:
 205                if (old_secure_tcp < 2)
 206                        to_change = 1;
 207                break;
 208        }
 209        old_secure_tcp = ipvs->sysctl_secure_tcp;
 210        if (to_change >= 0)
 211                ip_vs_protocol_timeout_change(ipvs,
 212                                              ipvs->sysctl_secure_tcp > 1);
 213        spin_unlock(&ipvs->securetcp_lock);
 214
 215        local_bh_enable();
 216}
 217
 218
 219/*
 220 *      Timer for checking the defense
 221 */
 222#define DEFENSE_TIMER_PERIOD    1*HZ
 223
 224static void defense_work_handler(struct work_struct *work)
 225{
 226        struct netns_ipvs *ipvs =
 227                container_of(work, struct netns_ipvs, defense_work.work);
 228
 229        update_defense_level(ipvs);
 230        if (atomic_read(&ipvs->dropentry))
 231                ip_vs_random_dropentry(ipvs);
 232        schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 233}
 234#endif
 235
 236int
 237ip_vs_use_count_inc(void)
 238{
 239        return try_module_get(THIS_MODULE);
 240}
 241
 242void
 243ip_vs_use_count_dec(void)
 244{
 245        module_put(THIS_MODULE);
 246}
 247
 248
 249/*
 250 *      Hash table: for virtual service lookups
 251 */
 252#define IP_VS_SVC_TAB_BITS 8
 253#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
 254#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
 255
 256/* the service table hashed by <protocol, addr, port> */
 257static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 258/* the service table hashed by fwmark */
 259static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 260
 261
 262/*
 263 *      Returns hash value for virtual service
 264 */
 265static inline unsigned int
 266ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
 267                  const union nf_inet_addr *addr, __be16 port)
 268{
 269        register unsigned int porth = ntohs(port);
 270        __be32 addr_fold = addr->ip;
 271        __u32 ahash;
 272
 273#ifdef CONFIG_IP_VS_IPV6
 274        if (af == AF_INET6)
 275                addr_fold = addr->ip6[0]^addr->ip6[1]^
 276                            addr->ip6[2]^addr->ip6[3];
 277#endif
 278        ahash = ntohl(addr_fold);
 279        ahash ^= ((size_t) ipvs >> 8);
 280
 281        return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
 282               IP_VS_SVC_TAB_MASK;
 283}
 284
 285/*
 286 *      Returns hash value of fwmark for virtual service lookup
 287 */
 288static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark)
 289{
 290        return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
 291}
 292
 293/*
 294 *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
 295 *      or in the ip_vs_svc_fwm_table by fwmark.
 296 *      Should be called with locked tables.
 297 */
 298static int ip_vs_svc_hash(struct ip_vs_service *svc)
 299{
 300        unsigned int hash;
 301
 302        if (svc->flags & IP_VS_SVC_F_HASHED) {
 303                pr_err("%s(): request for already hashed, called from %pF\n",
 304                       __func__, __builtin_return_address(0));
 305                return 0;
 306        }
 307
 308        if (svc->fwmark == 0) {
 309                /*
 310                 *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
 311                 */
 312                hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
 313                                         &svc->addr, svc->port);
 314                hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
 315        } else {
 316                /*
 317                 *  Hash it by fwmark in svc_fwm_table
 318                 */
 319                hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
 320                hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
 321        }
 322
 323        svc->flags |= IP_VS_SVC_F_HASHED;
 324        /* increase its refcnt because it is referenced by the svc table */
 325        atomic_inc(&svc->refcnt);
 326        return 1;
 327}
 328
 329
 330/*
 331 *      Unhashes a service from svc_table / svc_fwm_table.
 332 *      Should be called with locked tables.
 333 */
 334static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 335{
 336        if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
 337                pr_err("%s(): request for unhash flagged, called from %pF\n",
 338                       __func__, __builtin_return_address(0));
 339                return 0;
 340        }
 341
 342        if (svc->fwmark == 0) {
 343                /* Remove it from the svc_table table */
 344                hlist_del_rcu(&svc->s_list);
 345        } else {
 346                /* Remove it from the svc_fwm_table table */
 347                hlist_del_rcu(&svc->f_list);
 348        }
 349
 350        svc->flags &= ~IP_VS_SVC_F_HASHED;
 351        atomic_dec(&svc->refcnt);
 352        return 1;
 353}
 354
 355
 356/*
 357 *      Get service by {netns, proto,addr,port} in the service table.
 358 */
 359static inline struct ip_vs_service *
 360__ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
 361                     const union nf_inet_addr *vaddr, __be16 vport)
 362{
 363        unsigned int hash;
 364        struct ip_vs_service *svc;
 365
 366        /* Check for "full" addressed entries */
 367        hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport);
 368
 369        hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
 370                if ((svc->af == af)
 371                    && ip_vs_addr_equal(af, &svc->addr, vaddr)
 372                    && (svc->port == vport)
 373                    && (svc->protocol == protocol)
 374                    && (svc->ipvs == ipvs)) {
 375                        /* HIT */
 376                        return svc;
 377                }
 378        }
 379
 380        return NULL;
 381}
 382
 383
 384/*
 385 *      Get service by {fwmark} in the service table.
 386 */
 387static inline struct ip_vs_service *
 388__ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
 389{
 390        unsigned int hash;
 391        struct ip_vs_service *svc;
 392
 393        /* Check for fwmark addressed entries */
 394        hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
 395
 396        hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
 397                if (svc->fwmark == fwmark && svc->af == af
 398                    && (svc->ipvs == ipvs)) {
 399                        /* HIT */
 400                        return svc;
 401                }
 402        }
 403
 404        return NULL;
 405}
 406
 407/* Find service, called under RCU lock */
 408struct ip_vs_service *
 409ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
 410                   const union nf_inet_addr *vaddr, __be16 vport)
 411{
 412        struct ip_vs_service *svc;
 413
 414        /*
 415         *      Check the table hashed by fwmark first
 416         */
 417        if (fwmark) {
 418                svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
 419                if (svc)
 420                        goto out;
 421        }
 422
 423        /*
 424         *      Check the table hashed by <protocol,addr,port>
 425         *      for "full" addressed entries
 426         */
 427        svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
 428
 429        if (svc == NULL
 430            && protocol == IPPROTO_TCP
 431            && atomic_read(&ipvs->ftpsvc_counter)
 432            && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
 433                /*
 434                 * Check if ftp service entry exists, the packet
 435                 * might belong to FTP data connections.
 436                 */
 437                svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
 438        }
 439
 440        if (svc == NULL
 441            && atomic_read(&ipvs->nullsvc_counter)) {
 442                /*
 443                 * Check if the catch-all port (port zero) exists
 444                 */
 445                svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
 446        }
 447
 448  out:
 449        IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
 450                      fwmark, ip_vs_proto_name(protocol),
 451                      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
 452                      svc ? "hit" : "not hit");
 453
 454        return svc;
 455}
 456
 457
 458static inline void
 459__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 460{
 461        atomic_inc(&svc->refcnt);
 462        rcu_assign_pointer(dest->svc, svc);
 463}
 464
 465static void ip_vs_service_free(struct ip_vs_service *svc)
 466{
 467        free_percpu(svc->stats.cpustats);
 468        kfree(svc);
 469}
 470
 471static void ip_vs_service_rcu_free(struct rcu_head *head)
 472{
 473        struct ip_vs_service *svc;
 474
 475        svc = container_of(head, struct ip_vs_service, rcu_head);
 476        ip_vs_service_free(svc);
 477}
 478
 479static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
 480{
 481        if (atomic_dec_and_test(&svc->refcnt)) {
 482                IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
 483                              svc->fwmark,
 484                              IP_VS_DBG_ADDR(svc->af, &svc->addr),
 485                              ntohs(svc->port));
 486                if (do_delay)
 487                        call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
 488                else
 489                        ip_vs_service_free(svc);
 490        }
 491}
 492
 493
 494/*
 495 *      Returns hash value for real service
 496 */
 497static inline unsigned int ip_vs_rs_hashkey(int af,
 498                                            const union nf_inet_addr *addr,
 499                                            __be16 port)
 500{
 501        register unsigned int porth = ntohs(port);
 502        __be32 addr_fold = addr->ip;
 503
 504#ifdef CONFIG_IP_VS_IPV6
 505        if (af == AF_INET6)
 506                addr_fold = addr->ip6[0]^addr->ip6[1]^
 507                            addr->ip6[2]^addr->ip6[3];
 508#endif
 509
 510        return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
 511                & IP_VS_RTAB_MASK;
 512}
 513
 514/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
 515static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 516{
 517        unsigned int hash;
 518
 519        if (dest->in_rs_table)
 520                return;
 521
 522        /*
 523         *      Hash by proto,addr,port,
 524         *      which are the parameters of the real service.
 525         */
 526        hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
 527
 528        hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
 529        dest->in_rs_table = 1;
 530}
 531
 532/* Unhash ip_vs_dest from rs_table. */
 533static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
 534{
 535        /*
 536         * Remove it from the rs_table table.
 537         */
 538        if (dest->in_rs_table) {
 539                hlist_del_rcu(&dest->d_list);
 540                dest->in_rs_table = 0;
 541        }
 542}
 543
 544/* Check if real service by <proto,addr,port> is present */
 545bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
 546                            const union nf_inet_addr *daddr, __be16 dport)
 547{
 548        unsigned int hash;
 549        struct ip_vs_dest *dest;
 550
 551        /* Check for "full" addressed entries */
 552        hash = ip_vs_rs_hashkey(af, daddr, dport);
 553
 554        rcu_read_lock();
 555        hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
 556                if (dest->port == dport &&
 557                    dest->af == af &&
 558                    ip_vs_addr_equal(af, &dest->addr, daddr) &&
 559                    (dest->protocol == protocol || dest->vfwmark)) {
 560                        /* HIT */
 561                        rcu_read_unlock();
 562                        return true;
 563                }
 564        }
 565        rcu_read_unlock();
 566
 567        return false;
 568}
 569
 570/* Lookup destination by {addr,port} in the given service
 571 * Called under RCU lock.
 572 */
 573static struct ip_vs_dest *
 574ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
 575                  const union nf_inet_addr *daddr, __be16 dport)
 576{
 577        struct ip_vs_dest *dest;
 578
 579        /*
 580         * Find the destination for the given service
 581         */
 582        list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
 583                if ((dest->af == dest_af) &&
 584                    ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
 585                    (dest->port == dport)) {
 586                        /* HIT */
 587                        return dest;
 588                }
 589        }
 590
 591        return NULL;
 592}
 593
 594/*
 595 * Find destination by {daddr,dport,vaddr,protocol}
 596 * Created to be used in ip_vs_process_message() in
 597 * the backup synchronization daemon. It finds the
 598 * destination to be bound to the received connection
 599 * on the backup.
 600 * Called under RCU lock, no refcnt is returned.
 601 */
 602struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
 603                                   const union nf_inet_addr *daddr,
 604                                   __be16 dport,
 605                                   const union nf_inet_addr *vaddr,
 606                                   __be16 vport, __u16 protocol, __u32 fwmark,
 607                                   __u32 flags)
 608{
 609        struct ip_vs_dest *dest;
 610        struct ip_vs_service *svc;
 611        __be16 port = dport;
 612
 613        svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
 614        if (!svc)
 615                return NULL;
 616        if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
 617                port = 0;
 618        dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
 619        if (!dest)
 620                dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
 621        return dest;
 622}
 623
 624void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
 625{
 626        struct ip_vs_dest_dst *dest_dst = container_of(head,
 627                                                       struct ip_vs_dest_dst,
 628                                                       rcu_head);
 629
 630        dst_release(dest_dst->dst_cache);
 631        kfree(dest_dst);
 632}
 633
 634/* Release dest_dst and dst_cache for dest in user context */
 635static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
 636{
 637        struct ip_vs_dest_dst *old;
 638
 639        old = rcu_dereference_protected(dest->dest_dst, 1);
 640        if (old) {
 641                RCU_INIT_POINTER(dest->dest_dst, NULL);
 642                call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
 643        }
 644}
 645
 646/*
 647 *  Lookup dest by {svc,addr,port} in the destination trash.
 648 *  The destination trash is used to hold the destinations that are removed
 649 *  from the service table but are still referenced by some conn entries.
 650 *  The reason to add the destination trash is when the dest is temporary
 651 *  down (either by administrator or by monitor program), the dest can be
 652 *  picked back from the trash, the remaining connections to the dest can
 653 *  continue, and the counting information of the dest is also useful for
 654 *  scheduling.
 655 */
 656static struct ip_vs_dest *
 657ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
 658                     const union nf_inet_addr *daddr, __be16 dport)
 659{
 660        struct ip_vs_dest *dest;
 661        struct netns_ipvs *ipvs = svc->ipvs;
 662
 663        /*
 664         * Find the destination in trash
 665         */
 666        spin_lock_bh(&ipvs->dest_trash_lock);
 667        list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
 668                IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
 669                              "dest->refcnt=%d\n",
 670                              dest->vfwmark,
 671                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
 672                              ntohs(dest->port),
 673                              atomic_read(&dest->refcnt));
 674                if (dest->af == dest_af &&
 675                    ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
 676                    dest->port == dport &&
 677                    dest->vfwmark == svc->fwmark &&
 678                    dest->protocol == svc->protocol &&
 679                    (svc->fwmark ||
 680                     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
 681                      dest->vport == svc->port))) {
 682                        /* HIT */
 683                        list_del(&dest->t_list);
 684                        ip_vs_dest_hold(dest);
 685                        goto out;
 686                }
 687        }
 688
 689        dest = NULL;
 690
 691out:
 692        spin_unlock_bh(&ipvs->dest_trash_lock);
 693
 694        return dest;
 695}
 696
 697static void ip_vs_dest_free(struct ip_vs_dest *dest)
 698{
 699        struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
 700
 701        __ip_vs_dst_cache_reset(dest);
 702        __ip_vs_svc_put(svc, false);
 703        free_percpu(dest->stats.cpustats);
 704        ip_vs_dest_put_and_free(dest);
 705}
 706
 707/*
 708 *  Clean up all the destinations in the trash
 709 *  Called by the ip_vs_control_cleanup()
 710 *
 711 *  When the ip_vs_control_clearup is activated by ipvs module exit,
 712 *  the service tables must have been flushed and all the connections
 713 *  are expired, and the refcnt of each destination in the trash must
 714 *  be 0, so we simply release them here.
 715 */
 716static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
 717{
 718        struct ip_vs_dest *dest, *nxt;
 719
 720        del_timer_sync(&ipvs->dest_trash_timer);
 721        /* No need to use dest_trash_lock */
 722        list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
 723                list_del(&dest->t_list);
 724                ip_vs_dest_free(dest);
 725        }
 726}
 727
 728static void
 729ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
 730{
 731#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
 732
 733        spin_lock_bh(&src->lock);
 734
 735        IP_VS_SHOW_STATS_COUNTER(conns);
 736        IP_VS_SHOW_STATS_COUNTER(inpkts);
 737        IP_VS_SHOW_STATS_COUNTER(outpkts);
 738        IP_VS_SHOW_STATS_COUNTER(inbytes);
 739        IP_VS_SHOW_STATS_COUNTER(outbytes);
 740
 741        ip_vs_read_estimator(dst, src);
 742
 743        spin_unlock_bh(&src->lock);
 744}
 745
 746static void
 747ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
 748{
 749        dst->conns = (u32)src->conns;
 750        dst->inpkts = (u32)src->inpkts;
 751        dst->outpkts = (u32)src->outpkts;
 752        dst->inbytes = src->inbytes;
 753        dst->outbytes = src->outbytes;
 754        dst->cps = (u32)src->cps;
 755        dst->inpps = (u32)src->inpps;
 756        dst->outpps = (u32)src->outpps;
 757        dst->inbps = (u32)src->inbps;
 758        dst->outbps = (u32)src->outbps;
 759}
 760
 761static void
 762ip_vs_zero_stats(struct ip_vs_stats *stats)
 763{
 764        spin_lock_bh(&stats->lock);
 765
 766        /* get current counters as zero point, rates are zeroed */
 767
 768#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
 769
 770        IP_VS_ZERO_STATS_COUNTER(conns);
 771        IP_VS_ZERO_STATS_COUNTER(inpkts);
 772        IP_VS_ZERO_STATS_COUNTER(outpkts);
 773        IP_VS_ZERO_STATS_COUNTER(inbytes);
 774        IP_VS_ZERO_STATS_COUNTER(outbytes);
 775
 776        ip_vs_zero_estimator(stats);
 777
 778        spin_unlock_bh(&stats->lock);
 779}
 780
 781/*
 782 *      Update a destination in the given service
 783 */
 784static void
 785__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 786                    struct ip_vs_dest_user_kern *udest, int add)
 787{
 788        struct netns_ipvs *ipvs = svc->ipvs;
 789        struct ip_vs_service *old_svc;
 790        struct ip_vs_scheduler *sched;
 791        int conn_flags;
 792
 793        /* We cannot modify an address and change the address family */
 794        BUG_ON(!add && udest->af != dest->af);
 795
 796        if (add && udest->af != svc->af)
 797                ipvs->mixed_address_family_dests++;
 798
 799        /* set the weight and the flags */
 800        atomic_set(&dest->weight, udest->weight);
 801        conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
 802        conn_flags |= IP_VS_CONN_F_INACTIVE;
 803
 804        /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
 805        if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
 806                conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 807        } else {
 808                /*
 809                 *    Put the real service in rs_table if not present.
 810                 *    For now only for NAT!
 811                 */
 812                ip_vs_rs_hash(ipvs, dest);
 813        }
 814        atomic_set(&dest->conn_flags, conn_flags);
 815
 816        /* bind the service */
 817        old_svc = rcu_dereference_protected(dest->svc, 1);
 818        if (!old_svc) {
 819                __ip_vs_bind_svc(dest, svc);
 820        } else {
 821                if (old_svc != svc) {
 822                        ip_vs_zero_stats(&dest->stats);
 823                        __ip_vs_bind_svc(dest, svc);
 824                        __ip_vs_svc_put(old_svc, true);
 825                }
 826        }
 827
 828        /* set the dest status flags */
 829        dest->flags |= IP_VS_DEST_F_AVAILABLE;
 830
 831        if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
 832                dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
 833        dest->u_threshold = udest->u_threshold;
 834        dest->l_threshold = udest->l_threshold;
 835
 836        dest->af = udest->af;
 837
 838        spin_lock_bh(&dest->dst_lock);
 839        __ip_vs_dst_cache_reset(dest);
 840        spin_unlock_bh(&dest->dst_lock);
 841
 842        if (add) {
 843                ip_vs_start_estimator(svc->ipvs, &dest->stats);
 844                list_add_rcu(&dest->n_list, &svc->destinations);
 845                svc->num_dests++;
 846                sched = rcu_dereference_protected(svc->scheduler, 1);
 847                if (sched && sched->add_dest)
 848                        sched->add_dest(svc, dest);
 849        } else {
 850                sched = rcu_dereference_protected(svc->scheduler, 1);
 851                if (sched && sched->upd_dest)
 852                        sched->upd_dest(svc, dest);
 853        }
 854}
 855
 856
 857/*
 858 *      Create a destination for the given service
 859 */
 860static int
 861ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 862               struct ip_vs_dest **dest_p)
 863{
 864        struct ip_vs_dest *dest;
 865        unsigned int atype, i;
 866
 867        EnterFunction(2);
 868
 869#ifdef CONFIG_IP_VS_IPV6
 870        if (udest->af == AF_INET6) {
 871                atype = ipv6_addr_type(&udest->addr.in6);
 872                if ((!(atype & IPV6_ADDR_UNICAST) ||
 873                        atype & IPV6_ADDR_LINKLOCAL) &&
 874                        !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
 875                        return -EINVAL;
 876        } else
 877#endif
 878        {
 879                atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
 880                if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 881                        return -EINVAL;
 882        }
 883
 884        dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
 885        if (dest == NULL)
 886                return -ENOMEM;
 887
 888        dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
 889        if (!dest->stats.cpustats)
 890                goto err_alloc;
 891
 892        for_each_possible_cpu(i) {
 893                struct ip_vs_cpu_stats *ip_vs_dest_stats;
 894                ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i);
 895                u64_stats_init(&ip_vs_dest_stats->syncp);
 896        }
 897
 898        dest->af = udest->af;
 899        dest->protocol = svc->protocol;
 900        dest->vaddr = svc->addr;
 901        dest->vport = svc->port;
 902        dest->vfwmark = svc->fwmark;
 903        ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
 904        dest->port = udest->port;
 905
 906        atomic_set(&dest->activeconns, 0);
 907        atomic_set(&dest->inactconns, 0);
 908        atomic_set(&dest->persistconns, 0);
 909        atomic_set(&dest->refcnt, 1);
 910
 911        INIT_HLIST_NODE(&dest->d_list);
 912        spin_lock_init(&dest->dst_lock);
 913        spin_lock_init(&dest->stats.lock);
 914        __ip_vs_update_dest(svc, dest, udest, 1);
 915
 916        *dest_p = dest;
 917
 918        LeaveFunction(2);
 919        return 0;
 920
 921err_alloc:
 922        kfree(dest);
 923        return -ENOMEM;
 924}
 925
 926
 927/*
 928 *      Add a destination into an existing service
 929 */
 930static int
 931ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 932{
 933        struct ip_vs_dest *dest;
 934        union nf_inet_addr daddr;
 935        __be16 dport = udest->port;
 936        int ret;
 937
 938        EnterFunction(2);
 939
 940        if (udest->weight < 0) {
 941                pr_err("%s(): server weight less than zero\n", __func__);
 942                return -ERANGE;
 943        }
 944
 945        if (udest->l_threshold > udest->u_threshold) {
 946                pr_err("%s(): lower threshold is higher than upper threshold\n",
 947                        __func__);
 948                return -ERANGE;
 949        }
 950
 951        ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
 952
 953        /* We use function that requires RCU lock */
 954        rcu_read_lock();
 955        dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
 956        rcu_read_unlock();
 957
 958        if (dest != NULL) {
 959                IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
 960                return -EEXIST;
 961        }
 962
 963        /*
 964         * Check if the dest already exists in the trash and
 965         * is from the same service
 966         */
 967        dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
 968
 969        if (dest != NULL) {
 970                IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
 971                              "dest->refcnt=%d, service %u/%s:%u\n",
 972                              IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
 973                              atomic_read(&dest->refcnt),
 974                              dest->vfwmark,
 975                              IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
 976                              ntohs(dest->vport));
 977
 978                __ip_vs_update_dest(svc, dest, udest, 1);
 979                ret = 0;
 980        } else {
 981                /*
 982                 * Allocate and initialize the dest structure
 983                 */
 984                ret = ip_vs_new_dest(svc, udest, &dest);
 985        }
 986        LeaveFunction(2);
 987
 988        return ret;
 989}
 990
 991
 992/*
 993 *      Edit a destination in the given service
 994 */
 995static int
 996ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 997{
 998        struct ip_vs_dest *dest;
 999        union nf_inet_addr daddr;
1000        __be16 dport = udest->port;
1001
1002        EnterFunction(2);
1003
1004        if (udest->weight < 0) {
1005                pr_err("%s(): server weight less than zero\n", __func__);
1006                return -ERANGE;
1007        }
1008
1009        if (udest->l_threshold > udest->u_threshold) {
1010                pr_err("%s(): lower threshold is higher than upper threshold\n",
1011                        __func__);
1012                return -ERANGE;
1013        }
1014
1015        ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
1016
1017        /* We use function that requires RCU lock */
1018        rcu_read_lock();
1019        dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
1020        rcu_read_unlock();
1021
1022        if (dest == NULL) {
1023                IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1024                return -ENOENT;
1025        }
1026
1027        __ip_vs_update_dest(svc, dest, udest, 0);
1028        LeaveFunction(2);
1029
1030        return 0;
1031}
1032
1033/*
1034 *      Delete a destination (must be already unlinked from the service)
1035 */
1036static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
1037                             bool cleanup)
1038{
1039        ip_vs_stop_estimator(ipvs, &dest->stats);
1040
1041        /*
1042         *  Remove it from the d-linked list with the real services.
1043         */
1044        ip_vs_rs_unhash(dest);
1045
1046        spin_lock_bh(&ipvs->dest_trash_lock);
1047        IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1048                      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1049                      atomic_read(&dest->refcnt));
1050        if (list_empty(&ipvs->dest_trash) && !cleanup)
1051                mod_timer(&ipvs->dest_trash_timer,
1052                          jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1053        /* dest lives in trash without reference */
1054        list_add(&dest->t_list, &ipvs->dest_trash);
1055        dest->idle_start = 0;
1056        spin_unlock_bh(&ipvs->dest_trash_lock);
1057        ip_vs_dest_put(dest);
1058}
1059
1060
1061/*
1062 *      Unlink a destination from the given service
1063 */
1064static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1065                                struct ip_vs_dest *dest,
1066                                int svcupd)
1067{
1068        dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1069
1070        /*
1071         *  Remove it from the d-linked destination list.
1072         */
1073        list_del_rcu(&dest->n_list);
1074        svc->num_dests--;
1075
1076        if (dest->af != svc->af)
1077                svc->ipvs->mixed_address_family_dests--;
1078
1079        if (svcupd) {
1080                struct ip_vs_scheduler *sched;
1081
1082                sched = rcu_dereference_protected(svc->scheduler, 1);
1083                if (sched && sched->del_dest)
1084                        sched->del_dest(svc, dest);
1085        }
1086}
1087
1088
1089/*
1090 *      Delete a destination server in the given service
1091 */
1092static int
1093ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1094{
1095        struct ip_vs_dest *dest;
1096        __be16 dport = udest->port;
1097
1098        EnterFunction(2);
1099
1100        /* We use function that requires RCU lock */
1101        rcu_read_lock();
1102        dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
1103        rcu_read_unlock();
1104
1105        if (dest == NULL) {
1106                IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1107                return -ENOENT;
1108        }
1109
1110        /*
1111         *      Unlink dest from the service
1112         */
1113        __ip_vs_unlink_dest(svc, dest, 1);
1114
1115        /*
1116         *      Delete the destination
1117         */
1118        __ip_vs_del_dest(svc->ipvs, dest, false);
1119
1120        LeaveFunction(2);
1121
1122        return 0;
1123}
1124
1125static void ip_vs_dest_trash_expire(unsigned long data)
1126{
1127        struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
1128        struct ip_vs_dest *dest, *next;
1129        unsigned long now = jiffies;
1130
1131        spin_lock(&ipvs->dest_trash_lock);
1132        list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1133                if (atomic_read(&dest->refcnt) > 0)
1134                        continue;
1135                if (dest->idle_start) {
1136                        if (time_before(now, dest->idle_start +
1137                                             IP_VS_DEST_TRASH_PERIOD))
1138                                continue;
1139                } else {
1140                        dest->idle_start = max(1UL, now);
1141                        continue;
1142                }
1143                IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1144                              dest->vfwmark,
1145                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
1146                              ntohs(dest->port));
1147                list_del(&dest->t_list);
1148                ip_vs_dest_free(dest);
1149        }
1150        if (!list_empty(&ipvs->dest_trash))
1151                mod_timer(&ipvs->dest_trash_timer,
1152                          jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1153        spin_unlock(&ipvs->dest_trash_lock);
1154}
1155
1156/*
1157 *      Add a service into the service hash table
1158 */
1159static int
1160ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
1161                  struct ip_vs_service **svc_p)
1162{
1163        int ret = 0, i;
1164        struct ip_vs_scheduler *sched = NULL;
1165        struct ip_vs_pe *pe = NULL;
1166        struct ip_vs_service *svc = NULL;
1167
1168        /* increase the module use count */
1169        ip_vs_use_count_inc();
1170
1171        /* Lookup the scheduler by 'u->sched_name' */
1172        if (strcmp(u->sched_name, "none")) {
1173                sched = ip_vs_scheduler_get(u->sched_name);
1174                if (!sched) {
1175                        pr_info("Scheduler module ip_vs_%s not found\n",
1176                                u->sched_name);
1177                        ret = -ENOENT;
1178                        goto out_err;
1179                }
1180        }
1181
1182        if (u->pe_name && *u->pe_name) {
1183                pe = ip_vs_pe_getbyname(u->pe_name);
1184                if (pe == NULL) {
1185                        pr_info("persistence engine module ip_vs_pe_%s "
1186                                "not found\n", u->pe_name);
1187                        ret = -ENOENT;
1188                        goto out_err;
1189                }
1190        }
1191
1192#ifdef CONFIG_IP_VS_IPV6
1193        if (u->af == AF_INET6) {
1194                __u32 plen = (__force __u32) u->netmask;
1195
1196                if (plen < 1 || plen > 128) {
1197                        ret = -EINVAL;
1198                        goto out_err;
1199                }
1200        }
1201#endif
1202
1203        svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1204        if (svc == NULL) {
1205                IP_VS_DBG(1, "%s(): no memory\n", __func__);
1206                ret = -ENOMEM;
1207                goto out_err;
1208        }
1209        svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1210        if (!svc->stats.cpustats) {
1211                ret = -ENOMEM;
1212                goto out_err;
1213        }
1214
1215        for_each_possible_cpu(i) {
1216                struct ip_vs_cpu_stats *ip_vs_stats;
1217                ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i);
1218                u64_stats_init(&ip_vs_stats->syncp);
1219        }
1220
1221
1222        /* I'm the first user of the service */
1223        atomic_set(&svc->refcnt, 0);
1224
1225        svc->af = u->af;
1226        svc->protocol = u->protocol;
1227        ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1228        svc->port = u->port;
1229        svc->fwmark = u->fwmark;
1230        svc->flags = u->flags;
1231        svc->timeout = u->timeout * HZ;
1232        svc->netmask = u->netmask;
1233        svc->ipvs = ipvs;
1234
1235        INIT_LIST_HEAD(&svc->destinations);
1236        spin_lock_init(&svc->sched_lock);
1237        spin_lock_init(&svc->stats.lock);
1238
1239        /* Bind the scheduler */
1240        if (sched) {
1241                ret = ip_vs_bind_scheduler(svc, sched);
1242                if (ret)
1243                        goto out_err;
1244                sched = NULL;
1245        }
1246
1247        /* Bind the ct retriever */
1248        RCU_INIT_POINTER(svc->pe, pe);
1249        pe = NULL;
1250
1251        /* Update the virtual service counters */
1252        if (svc->port == FTPPORT)
1253                atomic_inc(&ipvs->ftpsvc_counter);
1254        else if (svc->port == 0)
1255                atomic_inc(&ipvs->nullsvc_counter);
1256
1257        ip_vs_start_estimator(ipvs, &svc->stats);
1258
1259        /* Count only IPv4 services for old get/setsockopt interface */
1260        if (svc->af == AF_INET)
1261                ipvs->num_services++;
1262
1263        /* Hash the service into the service table */
1264        ip_vs_svc_hash(svc);
1265
1266        *svc_p = svc;
1267        /* Now there is a service - full throttle */
1268        ipvs->enable = 1;
1269        return 0;
1270
1271
1272 out_err:
1273        if (svc != NULL) {
1274                ip_vs_unbind_scheduler(svc, sched);
1275                ip_vs_service_free(svc);
1276        }
1277        ip_vs_scheduler_put(sched);
1278        ip_vs_pe_put(pe);
1279
1280        /* decrease the module use count */
1281        ip_vs_use_count_dec();
1282
1283        return ret;
1284}
1285
1286
1287/*
1288 *      Edit a service and bind it with a new scheduler
1289 */
1290static int
1291ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1292{
1293        struct ip_vs_scheduler *sched = NULL, *old_sched;
1294        struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1295        int ret = 0;
1296
1297        /*
1298         * Lookup the scheduler, by 'u->sched_name'
1299         */
1300        if (strcmp(u->sched_name, "none")) {
1301                sched = ip_vs_scheduler_get(u->sched_name);
1302                if (!sched) {
1303                        pr_info("Scheduler module ip_vs_%s not found\n",
1304                                u->sched_name);
1305                        return -ENOENT;
1306                }
1307        }
1308        old_sched = sched;
1309
1310        if (u->pe_name && *u->pe_name) {
1311                pe = ip_vs_pe_getbyname(u->pe_name);
1312                if (pe == NULL) {
1313                        pr_info("persistence engine module ip_vs_pe_%s "
1314                                "not found\n", u->pe_name);
1315                        ret = -ENOENT;
1316                        goto out;
1317                }
1318                old_pe = pe;
1319        }
1320
1321#ifdef CONFIG_IP_VS_IPV6
1322        if (u->af == AF_INET6) {
1323                __u32 plen = (__force __u32) u->netmask;
1324
1325                if (plen < 1 || plen > 128) {
1326                        ret = -EINVAL;
1327                        goto out;
1328                }
1329        }
1330#endif
1331
1332        old_sched = rcu_dereference_protected(svc->scheduler, 1);
1333        if (sched != old_sched) {
1334                if (old_sched) {
1335                        ip_vs_unbind_scheduler(svc, old_sched);
1336                        RCU_INIT_POINTER(svc->scheduler, NULL);
1337                        /* Wait all svc->sched_data users */
1338                        synchronize_rcu();
1339                }
1340                /* Bind the new scheduler */
1341                if (sched) {
1342                        ret = ip_vs_bind_scheduler(svc, sched);
1343                        if (ret) {
1344                                ip_vs_scheduler_put(sched);
1345                                goto out;
1346                        }
1347                }
1348        }
1349
1350        /*
1351         * Set the flags and timeout value
1352         */
1353        svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1354        svc->timeout = u->timeout * HZ;
1355        svc->netmask = u->netmask;
1356
1357        old_pe = rcu_dereference_protected(svc->pe, 1);
1358        if (pe != old_pe)
1359                rcu_assign_pointer(svc->pe, pe);
1360
1361out:
1362        ip_vs_scheduler_put(old_sched);
1363        ip_vs_pe_put(old_pe);
1364        return ret;
1365}
1366
1367/*
1368 *      Delete a service from the service list
1369 *      - The service must be unlinked, unlocked and not referenced!
1370 *      - We are called under _bh lock
1371 */
1372static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1373{
1374        struct ip_vs_dest *dest, *nxt;
1375        struct ip_vs_scheduler *old_sched;
1376        struct ip_vs_pe *old_pe;
1377        struct netns_ipvs *ipvs = svc->ipvs;
1378
1379        /* Count only IPv4 services for old get/setsockopt interface */
1380        if (svc->af == AF_INET)
1381                ipvs->num_services--;
1382
1383        ip_vs_stop_estimator(svc->ipvs, &svc->stats);
1384
1385        /* Unbind scheduler */
1386        old_sched = rcu_dereference_protected(svc->scheduler, 1);
1387        ip_vs_unbind_scheduler(svc, old_sched);
1388        ip_vs_scheduler_put(old_sched);
1389
1390        /* Unbind persistence engine, keep svc->pe */
1391        old_pe = rcu_dereference_protected(svc->pe, 1);
1392        ip_vs_pe_put(old_pe);
1393
1394        /*
1395         *    Unlink the whole destination list
1396         */
1397        list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1398                __ip_vs_unlink_dest(svc, dest, 0);
1399                __ip_vs_del_dest(svc->ipvs, dest, cleanup);
1400        }
1401
1402        /*
1403         *    Update the virtual service counters
1404         */
1405        if (svc->port == FTPPORT)
1406                atomic_dec(&ipvs->ftpsvc_counter);
1407        else if (svc->port == 0)
1408                atomic_dec(&ipvs->nullsvc_counter);
1409
1410        /*
1411         *    Free the service if nobody refers to it
1412         */
1413        __ip_vs_svc_put(svc, true);
1414
1415        /* decrease the module use count */
1416        ip_vs_use_count_dec();
1417}
1418
1419/*
1420 * Unlink a service from list and try to delete it if its refcnt reached 0
1421 */
1422static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
1423{
1424        /* Hold svc to avoid double release from dest_trash */
1425        atomic_inc(&svc->refcnt);
1426        /*
1427         * Unhash it from the service table
1428         */
1429        ip_vs_svc_unhash(svc);
1430
1431        __ip_vs_del_service(svc, cleanup);
1432}
1433
1434/*
1435 *      Delete a service from the service list
1436 */
1437static int ip_vs_del_service(struct ip_vs_service *svc)
1438{
1439        if (svc == NULL)
1440                return -EEXIST;
1441        ip_vs_unlink_service(svc, false);
1442
1443        return 0;
1444}
1445
1446
1447/*
1448 *      Flush all the virtual services
1449 */
1450static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
1451{
1452        int idx;
1453        struct ip_vs_service *svc;
1454        struct hlist_node *n;
1455
1456        /*
1457         * Flush the service table hashed by <netns,protocol,addr,port>
1458         */
1459        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1460                hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
1461                                          s_list) {
1462                        if (svc->ipvs == ipvs)
1463                                ip_vs_unlink_service(svc, cleanup);
1464                }
1465        }
1466
1467        /*
1468         * Flush the service table hashed by fwmark
1469         */
1470        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1471                hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
1472                                          f_list) {
1473                        if (svc->ipvs == ipvs)
1474                                ip_vs_unlink_service(svc, cleanup);
1475                }
1476        }
1477
1478        return 0;
1479}
1480
1481/*
1482 *      Delete service by {netns} in the service table.
1483 *      Called by __ip_vs_cleanup()
1484 */
1485void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
1486{
1487        EnterFunction(2);
1488        /* Check for "full" addressed entries */
1489        mutex_lock(&__ip_vs_mutex);
1490        ip_vs_flush(ipvs, true);
1491        mutex_unlock(&__ip_vs_mutex);
1492        LeaveFunction(2);
1493}
1494
1495/* Put all references for device (dst_cache) */
1496static inline void
1497ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
1498{
1499        struct ip_vs_dest_dst *dest_dst;
1500
1501        spin_lock_bh(&dest->dst_lock);
1502        dest_dst = rcu_dereference_protected(dest->dest_dst, 1);
1503        if (dest_dst && dest_dst->dst_cache->dev == dev) {
1504                IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1505                              dev->name,
1506                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
1507                              ntohs(dest->port),
1508                              atomic_read(&dest->refcnt));
1509                __ip_vs_dst_cache_reset(dest);
1510        }
1511        spin_unlock_bh(&dest->dst_lock);
1512
1513}
1514/* Netdev event receiver
1515 * Currently only NETDEV_DOWN is handled to release refs to cached dsts
1516 */
1517static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1518                           void *ptr)
1519{
1520        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1521        struct net *net = dev_net(dev);
1522        struct netns_ipvs *ipvs = net_ipvs(net);
1523        struct ip_vs_service *svc;
1524        struct ip_vs_dest *dest;
1525        unsigned int idx;
1526
1527        if (event != NETDEV_DOWN || !ipvs)
1528                return NOTIFY_DONE;
1529        IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1530        EnterFunction(2);
1531        mutex_lock(&__ip_vs_mutex);
1532        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1533                hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1534                        if (svc->ipvs == ipvs) {
1535                                list_for_each_entry(dest, &svc->destinations,
1536                                                    n_list) {
1537                                        ip_vs_forget_dev(dest, dev);
1538                                }
1539                        }
1540                }
1541
1542                hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1543                        if (svc->ipvs == ipvs) {
1544                                list_for_each_entry(dest, &svc->destinations,
1545                                                    n_list) {
1546                                        ip_vs_forget_dev(dest, dev);
1547                                }
1548                        }
1549
1550                }
1551        }
1552
1553        spin_lock_bh(&ipvs->dest_trash_lock);
1554        list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
1555                ip_vs_forget_dev(dest, dev);
1556        }
1557        spin_unlock_bh(&ipvs->dest_trash_lock);
1558        mutex_unlock(&__ip_vs_mutex);
1559        LeaveFunction(2);
1560        return NOTIFY_DONE;
1561}
1562
1563/*
1564 *      Zero counters in a service or all services
1565 */
1566static int ip_vs_zero_service(struct ip_vs_service *svc)
1567{
1568        struct ip_vs_dest *dest;
1569
1570        list_for_each_entry(dest, &svc->destinations, n_list) {
1571                ip_vs_zero_stats(&dest->stats);
1572        }
1573        ip_vs_zero_stats(&svc->stats);
1574        return 0;
1575}
1576
1577static int ip_vs_zero_all(struct netns_ipvs *ipvs)
1578{
1579        int idx;
1580        struct ip_vs_service *svc;
1581
1582        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1583                hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1584                        if (svc->ipvs == ipvs)
1585                                ip_vs_zero_service(svc);
1586                }
1587        }
1588
1589        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1590                hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1591                        if (svc->ipvs == ipvs)
1592                                ip_vs_zero_service(svc);
1593                }
1594        }
1595
1596        ip_vs_zero_stats(&ipvs->tot_stats);
1597        return 0;
1598}
1599
1600#ifdef CONFIG_SYSCTL
1601
1602static int zero;
1603static int three = 3;
1604
1605static int
1606proc_do_defense_mode(struct ctl_table *table, int write,
1607                     void __user *buffer, size_t *lenp, loff_t *ppos)
1608{
1609        struct netns_ipvs *ipvs = table->extra2;
1610        int *valp = table->data;
1611        int val = *valp;
1612        int rc;
1613
1614        rc = proc_dointvec(table, write, buffer, lenp, ppos);
1615        if (write && (*valp != val)) {
1616                if ((*valp < 0) || (*valp > 3)) {
1617                        /* Restore the correct value */
1618                        *valp = val;
1619                } else {
1620                        update_defense_level(ipvs);
1621                }
1622        }
1623        return rc;
1624}
1625
1626static int
1627proc_do_sync_threshold(struct ctl_table *table, int write,
1628                       void __user *buffer, size_t *lenp, loff_t *ppos)
1629{
1630        int *valp = table->data;
1631        int val[2];
1632        int rc;
1633
1634        /* backup the value first */
1635        memcpy(val, valp, sizeof(val));
1636
1637        rc = proc_dointvec(table, write, buffer, lenp, ppos);
1638        if (write && (valp[0] < 0 || valp[1] < 0 ||
1639            (valp[0] >= valp[1] && valp[1]))) {
1640                /* Restore the correct value */
1641                memcpy(valp, val, sizeof(val));
1642        }
1643        return rc;
1644}
1645
1646static int
1647proc_do_sync_mode(struct ctl_table *table, int write,
1648                     void __user *buffer, size_t *lenp, loff_t *ppos)
1649{
1650        int *valp = table->data;
1651        int val = *valp;
1652        int rc;
1653
1654        rc = proc_dointvec(table, write, buffer, lenp, ppos);
1655        if (write && (*valp != val)) {
1656                if ((*valp < 0) || (*valp > 1)) {
1657                        /* Restore the correct value */
1658                        *valp = val;
1659                }
1660        }
1661        return rc;
1662}
1663
1664static int
1665proc_do_sync_ports(struct ctl_table *table, int write,
1666                   void __user *buffer, size_t *lenp, loff_t *ppos)
1667{
1668        int *valp = table->data;
1669        int val = *valp;
1670        int rc;
1671
1672        rc = proc_dointvec(table, write, buffer, lenp, ppos);
1673        if (write && (*valp != val)) {
1674                if (*valp < 1 || !is_power_of_2(*valp)) {
1675                        /* Restore the correct value */
1676                        *valp = val;
1677                }
1678        }
1679        return rc;
1680}
1681
1682/*
1683 *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1684 *      Do not change order or insert new entries without
1685 *      align with netns init in ip_vs_control_net_init()
1686 */
1687
1688static struct ctl_table vs_vars[] = {
1689        {
1690                .procname       = "amemthresh",
1691                .maxlen         = sizeof(int),
1692                .mode           = 0644,
1693                .proc_handler   = proc_dointvec,
1694        },
1695        {
1696                .procname       = "am_droprate",
1697                .maxlen         = sizeof(int),
1698                .mode           = 0644,
1699                .proc_handler   = proc_dointvec,
1700        },
1701        {
1702                .procname       = "drop_entry",
1703                .maxlen         = sizeof(int),
1704                .mode           = 0644,
1705                .proc_handler   = proc_do_defense_mode,
1706        },
1707        {
1708                .procname       = "drop_packet",
1709                .maxlen         = sizeof(int),
1710                .mode           = 0644,
1711                .proc_handler   = proc_do_defense_mode,
1712        },
1713#ifdef CONFIG_IP_VS_NFCT
1714        {
1715                .procname       = "conntrack",
1716                .maxlen         = sizeof(int),
1717                .mode           = 0644,
1718                .proc_handler   = &proc_dointvec,
1719        },
1720#endif
1721        {
1722                .procname       = "secure_tcp",
1723                .maxlen         = sizeof(int),
1724                .mode           = 0644,
1725                .proc_handler   = proc_do_defense_mode,
1726        },
1727        {
1728                .procname       = "snat_reroute",
1729                .maxlen         = sizeof(int),
1730                .mode           = 0644,
1731                .proc_handler   = &proc_dointvec,
1732        },
1733        {
1734                .procname       = "sync_version",
1735                .maxlen         = sizeof(int),
1736                .mode           = 0644,
1737                .proc_handler   = &proc_do_sync_mode,
1738        },
1739        {
1740                .procname       = "sync_ports",
1741                .maxlen         = sizeof(int),
1742                .mode           = 0644,
1743                .proc_handler   = &proc_do_sync_ports,
1744        },
1745        {
1746                .procname       = "sync_persist_mode",
1747                .maxlen         = sizeof(int),
1748                .mode           = 0644,
1749                .proc_handler   = proc_dointvec,
1750        },
1751        {
1752                .procname       = "sync_qlen_max",
1753                .maxlen         = sizeof(unsigned long),
1754                .mode           = 0644,
1755                .proc_handler   = proc_doulongvec_minmax,
1756        },
1757        {
1758                .procname       = "sync_sock_size",
1759                .maxlen         = sizeof(int),
1760                .mode           = 0644,
1761                .proc_handler   = proc_dointvec,
1762        },
1763        {
1764                .procname       = "cache_bypass",
1765                .maxlen         = sizeof(int),
1766                .mode           = 0644,
1767                .proc_handler   = proc_dointvec,
1768        },
1769        {
1770                .procname       = "expire_nodest_conn",
1771                .maxlen         = sizeof(int),
1772                .mode           = 0644,
1773                .proc_handler   = proc_dointvec,
1774        },
1775        {
1776                .procname       = "sloppy_tcp",
1777                .maxlen         = sizeof(int),
1778                .mode           = 0644,
1779                .proc_handler   = proc_dointvec,
1780        },
1781        {
1782                .procname       = "sloppy_sctp",
1783                .maxlen         = sizeof(int),
1784                .mode           = 0644,
1785                .proc_handler   = proc_dointvec,
1786        },
1787        {
1788                .procname       = "expire_quiescent_template",
1789                .maxlen         = sizeof(int),
1790                .mode           = 0644,
1791                .proc_handler   = proc_dointvec,
1792        },
1793        {
1794                .procname       = "sync_threshold",
1795                .maxlen         =
1796                        sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1797                .mode           = 0644,
1798                .proc_handler   = proc_do_sync_threshold,
1799        },
1800        {
1801                .procname       = "sync_refresh_period",
1802                .maxlen         = sizeof(int),
1803                .mode           = 0644,
1804                .proc_handler   = proc_dointvec_jiffies,
1805        },
1806        {
1807                .procname       = "sync_retries",
1808                .maxlen         = sizeof(int),
1809                .mode           = 0644,
1810                .proc_handler   = proc_dointvec_minmax,
1811                .extra1         = &zero,
1812                .extra2         = &three,
1813        },
1814        {
1815                .procname       = "nat_icmp_send",
1816                .maxlen         = sizeof(int),
1817                .mode           = 0644,
1818                .proc_handler   = proc_dointvec,
1819        },
1820        {
1821                .procname       = "pmtu_disc",
1822                .maxlen         = sizeof(int),
1823                .mode           = 0644,
1824                .proc_handler   = proc_dointvec,
1825        },
1826        {
1827                .procname       = "backup_only",
1828                .maxlen         = sizeof(int),
1829                .mode           = 0644,
1830                .proc_handler   = proc_dointvec,
1831        },
1832        {
1833                .procname       = "conn_reuse_mode",
1834                .maxlen         = sizeof(int),
1835                .mode           = 0644,
1836                .proc_handler   = proc_dointvec,
1837        },
1838        {
1839                .procname       = "schedule_icmp",
1840                .maxlen         = sizeof(int),
1841                .mode           = 0644,
1842                .proc_handler   = proc_dointvec,
1843        },
1844        {
1845                .procname       = "ignore_tunneled",
1846                .maxlen         = sizeof(int),
1847                .mode           = 0644,
1848                .proc_handler   = proc_dointvec,
1849        },
1850#ifdef CONFIG_IP_VS_DEBUG
1851        {
1852                .procname       = "debug_level",
1853                .data           = &sysctl_ip_vs_debug_level,
1854                .maxlen         = sizeof(int),
1855                .mode           = 0644,
1856                .proc_handler   = proc_dointvec,
1857        },
1858#endif
1859        { }
1860};
1861
1862#endif
1863
1864#ifdef CONFIG_PROC_FS
1865
1866struct ip_vs_iter {
1867        struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1868        struct hlist_head *table;
1869        int bucket;
1870};
1871
1872/*
1873 *      Write the contents of the VS rule table to a PROCfs file.
1874 *      (It is kept just for backward compatibility)
1875 */
1876static inline const char *ip_vs_fwd_name(unsigned int flags)
1877{
1878        switch (flags & IP_VS_CONN_F_FWD_MASK) {
1879        case IP_VS_CONN_F_LOCALNODE:
1880                return "Local";
1881        case IP_VS_CONN_F_TUNNEL:
1882                return "Tunnel";
1883        case IP_VS_CONN_F_DROUTE:
1884                return "Route";
1885        default:
1886                return "Masq";
1887        }
1888}
1889
1890
1891/* Get the Nth entry in the two lists */
1892static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1893{
1894        struct net *net = seq_file_net(seq);
1895        struct netns_ipvs *ipvs = net_ipvs(net);
1896        struct ip_vs_iter *iter = seq->private;
1897        int idx;
1898        struct ip_vs_service *svc;
1899
1900        /* look in hash by protocol */
1901        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1902                hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
1903                        if ((svc->ipvs == ipvs) && pos-- == 0) {
1904                                iter->table = ip_vs_svc_table;
1905                                iter->bucket = idx;
1906                                return svc;
1907                        }
1908                }
1909        }
1910
1911        /* keep looking in fwmark */
1912        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1913                hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
1914                                         f_list) {
1915                        if ((svc->ipvs == ipvs) && pos-- == 0) {
1916                                iter->table = ip_vs_svc_fwm_table;
1917                                iter->bucket = idx;
1918                                return svc;
1919                        }
1920                }
1921        }
1922
1923        return NULL;
1924}
1925
1926static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1927        __acquires(RCU)
1928{
1929        rcu_read_lock();
1930        return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1931}
1932
1933
1934static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1935{
1936        struct hlist_node *e;
1937        struct ip_vs_iter *iter;
1938        struct ip_vs_service *svc;
1939
1940        ++*pos;
1941        if (v == SEQ_START_TOKEN)
1942                return ip_vs_info_array(seq,0);
1943
1944        svc = v;
1945        iter = seq->private;
1946
1947        if (iter->table == ip_vs_svc_table) {
1948                /* next service in table hashed by protocol */
1949                e = rcu_dereference(hlist_next_rcu(&svc->s_list));
1950                if (e)
1951                        return hlist_entry(e, struct ip_vs_service, s_list);
1952
1953                while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1954                        hlist_for_each_entry_rcu(svc,
1955                                                 &ip_vs_svc_table[iter->bucket],
1956                                                 s_list) {
1957                                return svc;
1958                        }
1959                }
1960
1961                iter->table = ip_vs_svc_fwm_table;
1962                iter->bucket = -1;
1963                goto scan_fwmark;
1964        }
1965
1966        /* next service in hashed by fwmark */
1967        e = rcu_dereference(hlist_next_rcu(&svc->f_list));
1968        if (e)
1969                return hlist_entry(e, struct ip_vs_service, f_list);
1970
1971 scan_fwmark:
1972        while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1973                hlist_for_each_entry_rcu(svc,
1974                                         &ip_vs_svc_fwm_table[iter->bucket],
1975                                         f_list)
1976                        return svc;
1977        }
1978
1979        return NULL;
1980}
1981
1982static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1983        __releases(RCU)
1984{
1985        rcu_read_unlock();
1986}
1987
1988
1989static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1990{
1991        if (v == SEQ_START_TOKEN) {
1992                seq_printf(seq,
1993                        "IP Virtual Server version %d.%d.%d (size=%d)\n",
1994                        NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1995                seq_puts(seq,
1996                         "Prot LocalAddress:Port Scheduler Flags\n");
1997                seq_puts(seq,
1998                         "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1999        } else {
2000                const struct ip_vs_service *svc = v;
2001                const struct ip_vs_iter *iter = seq->private;
2002                const struct ip_vs_dest *dest;
2003                struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
2004                char *sched_name = sched ? sched->name : "none";
2005
2006                if (iter->table == ip_vs_svc_table) {
2007#ifdef CONFIG_IP_VS_IPV6
2008                        if (svc->af == AF_INET6)
2009                                seq_printf(seq, "%s  [%pI6]:%04X %s ",
2010                                           ip_vs_proto_name(svc->protocol),
2011                                           &svc->addr.in6,
2012                                           ntohs(svc->port),
2013                                           sched_name);
2014                        else
2015#endif
2016                                seq_printf(seq, "%s  %08X:%04X %s %s ",
2017                                           ip_vs_proto_name(svc->protocol),
2018                                           ntohl(svc->addr.ip),
2019                                           ntohs(svc->port),
2020                                           sched_name,
2021                                           (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2022                } else {
2023                        seq_printf(seq, "FWM  %08X %s %s",
2024                                   svc->fwmark, sched_name,
2025                                   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2026                }
2027
2028                if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2029                        seq_printf(seq, "persistent %d %08X\n",
2030                                svc->timeout,
2031                                ntohl(svc->netmask));
2032                else
2033                        seq_putc(seq, '\n');
2034
2035                list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
2036#ifdef CONFIG_IP_VS_IPV6
2037                        if (dest->af == AF_INET6)
2038                                seq_printf(seq,
2039                                           "  -> [%pI6]:%04X"
2040                                           "      %-7s %-6d %-10d %-10d\n",
2041                                           &dest->addr.in6,
2042                                           ntohs(dest->port),
2043                                           ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2044                                           atomic_read(&dest->weight),
2045                                           atomic_read(&dest->activeconns),
2046                                           atomic_read(&dest->inactconns));
2047                        else
2048#endif
2049                                seq_printf(seq,
2050                                           "  -> %08X:%04X      "
2051                                           "%-7s %-6d %-10d %-10d\n",
2052                                           ntohl(dest->addr.ip),
2053                                           ntohs(dest->port),
2054                                           ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2055                                           atomic_read(&dest->weight),
2056                                           atomic_read(&dest->activeconns),
2057                                           atomic_read(&dest->inactconns));
2058
2059                }
2060        }
2061        return 0;
2062}
2063
2064static const struct seq_operations ip_vs_info_seq_ops = {
2065        .start = ip_vs_info_seq_start,
2066        .next  = ip_vs_info_seq_next,
2067        .stop  = ip_vs_info_seq_stop,
2068        .show  = ip_vs_info_seq_show,
2069};
2070
2071static int ip_vs_info_open(struct inode *inode, struct file *file)
2072{
2073        return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2074                        sizeof(struct ip_vs_iter));
2075}
2076
2077static const struct file_operations ip_vs_info_fops = {
2078        .owner   = THIS_MODULE,
2079        .open    = ip_vs_info_open,
2080        .read    = seq_read,
2081        .llseek  = seq_lseek,
2082        .release = seq_release_net,
2083};
2084
2085static int ip_vs_stats_show(struct seq_file *seq, void *v)
2086{
2087        struct net *net = seq_file_single_net(seq);
2088        struct ip_vs_kstats show;
2089
2090/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2091        seq_puts(seq,
2092                 "   Total Incoming Outgoing         Incoming         Outgoing\n");
2093        seq_printf(seq,
2094                   "   Conns  Packets  Packets            Bytes            Bytes\n");
2095
2096        ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2097        seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
2098                   (unsigned long long)show.conns,
2099                   (unsigned long long)show.inpkts,
2100                   (unsigned long long)show.outpkts,
2101                   (unsigned long long)show.inbytes,
2102                   (unsigned long long)show.outbytes);
2103
2104/*                01234567 01234567 01234567 0123456701234567 0123456701234567*/
2105        seq_puts(seq,
2106                 " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2107        seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
2108                   (unsigned long long)show.cps,
2109                   (unsigned long long)show.inpps,
2110                   (unsigned long long)show.outpps,
2111                   (unsigned long long)show.inbps,
2112                   (unsigned long long)show.outbps);
2113
2114        return 0;
2115}
2116
2117static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2118{
2119        return single_open_net(inode, file, ip_vs_stats_show);
2120}
2121
2122static const struct file_operations ip_vs_stats_fops = {
2123        .owner = THIS_MODULE,
2124        .open = ip_vs_stats_seq_open,
2125        .read = seq_read,
2126        .llseek = seq_lseek,
2127        .release = single_release_net,
2128};
2129
2130static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2131{
2132        struct net *net = seq_file_single_net(seq);
2133        struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2134        struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
2135        struct ip_vs_kstats kstats;
2136        int i;
2137
2138/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2139        seq_puts(seq,
2140                 "       Total Incoming Outgoing         Incoming         Outgoing\n");
2141        seq_printf(seq,
2142                   "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2143
2144        for_each_possible_cpu(i) {
2145                struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2146                unsigned int start;
2147                u64 conns, inpkts, outpkts, inbytes, outbytes;
2148
2149                do {
2150                        start = u64_stats_fetch_begin_irq(&u->syncp);
2151                        conns = u->cnt.conns;
2152                        inpkts = u->cnt.inpkts;
2153                        outpkts = u->cnt.outpkts;
2154                        inbytes = u->cnt.inbytes;
2155                        outbytes = u->cnt.outbytes;
2156                } while (u64_stats_fetch_retry_irq(&u->syncp, start));
2157
2158                seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
2159                           i, (u64)conns, (u64)inpkts,
2160                           (u64)outpkts, (u64)inbytes,
2161                           (u64)outbytes);
2162        }
2163
2164        ip_vs_copy_stats(&kstats, tot_stats);
2165
2166        seq_printf(seq, "  ~ %8LX %8LX %8LX %16LX %16LX\n\n",
2167                   (unsigned long long)kstats.conns,
2168                   (unsigned long long)kstats.inpkts,
2169                   (unsigned long long)kstats.outpkts,
2170                   (unsigned long long)kstats.inbytes,
2171                   (unsigned long long)kstats.outbytes);
2172
2173/*                ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2174        seq_puts(seq,
2175                 "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2176        seq_printf(seq, "    %8LX %8LX %8LX %16LX %16LX\n",
2177                   kstats.cps,
2178                   kstats.inpps,
2179                   kstats.outpps,
2180                   kstats.inbps,
2181                   kstats.outbps);
2182
2183        return 0;
2184}
2185
2186static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2187{
2188        return single_open_net(inode, file, ip_vs_stats_percpu_show);
2189}
2190
2191static const struct file_operations ip_vs_stats_percpu_fops = {
2192        .owner = THIS_MODULE,
2193        .open = ip_vs_stats_percpu_seq_open,
2194        .read = seq_read,
2195        .llseek = seq_lseek,
2196        .release = single_release_net,
2197};
2198#endif
2199
2200/*
2201 *      Set timeout values for tcp tcpfin udp in the timeout_table.
2202 */
2203static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
2204{
2205#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2206        struct ip_vs_proto_data *pd;
2207#endif
2208
2209        IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2210                  u->tcp_timeout,
2211                  u->tcp_fin_timeout,
2212                  u->udp_timeout);
2213
2214#ifdef CONFIG_IP_VS_PROTO_TCP
2215        if (u->tcp_timeout) {
2216                pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2217                pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2218                        = u->tcp_timeout * HZ;
2219        }
2220
2221        if (u->tcp_fin_timeout) {
2222                pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2223                pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2224                        = u->tcp_fin_timeout * HZ;
2225        }
2226#endif
2227
2228#ifdef CONFIG_IP_VS_PROTO_UDP
2229        if (u->udp_timeout) {
2230                pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
2231                pd->timeout_table[IP_VS_UDP_S_NORMAL]
2232                        = u->udp_timeout * HZ;
2233        }
2234#endif
2235        return 0;
2236}
2237
2238#define CMDID(cmd)              (cmd - IP_VS_BASE_CTL)
2239
2240struct ip_vs_svcdest_user {
2241        struct ip_vs_service_user       s;
2242        struct ip_vs_dest_user          d;
2243};
2244
2245static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = {
2246        [CMDID(IP_VS_SO_SET_ADD)]         = sizeof(struct ip_vs_service_user),
2247        [CMDID(IP_VS_SO_SET_EDIT)]        = sizeof(struct ip_vs_service_user),
2248        [CMDID(IP_VS_SO_SET_DEL)]         = sizeof(struct ip_vs_service_user),
2249        [CMDID(IP_VS_SO_SET_ADDDEST)]     = sizeof(struct ip_vs_svcdest_user),
2250        [CMDID(IP_VS_SO_SET_DELDEST)]     = sizeof(struct ip_vs_svcdest_user),
2251        [CMDID(IP_VS_SO_SET_EDITDEST)]    = sizeof(struct ip_vs_svcdest_user),
2252        [CMDID(IP_VS_SO_SET_TIMEOUT)]     = sizeof(struct ip_vs_timeout_user),
2253        [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user),
2254        [CMDID(IP_VS_SO_SET_STOPDAEMON)]  = sizeof(struct ip_vs_daemon_user),
2255        [CMDID(IP_VS_SO_SET_ZERO)]        = sizeof(struct ip_vs_service_user),
2256};
2257
2258union ip_vs_set_arglen {
2259        struct ip_vs_service_user       field_IP_VS_SO_SET_ADD;
2260        struct ip_vs_service_user       field_IP_VS_SO_SET_EDIT;
2261        struct ip_vs_service_user       field_IP_VS_SO_SET_DEL;
2262        struct ip_vs_svcdest_user       field_IP_VS_SO_SET_ADDDEST;
2263        struct ip_vs_svcdest_user       field_IP_VS_SO_SET_DELDEST;
2264        struct ip_vs_svcdest_user       field_IP_VS_SO_SET_EDITDEST;
2265        struct ip_vs_timeout_user       field_IP_VS_SO_SET_TIMEOUT;
2266        struct ip_vs_daemon_user        field_IP_VS_SO_SET_STARTDAEMON;
2267        struct ip_vs_daemon_user        field_IP_VS_SO_SET_STOPDAEMON;
2268        struct ip_vs_service_user       field_IP_VS_SO_SET_ZERO;
2269};
2270
2271#define MAX_SET_ARGLEN  sizeof(union ip_vs_set_arglen)
2272
2273static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2274                                  struct ip_vs_service_user *usvc_compat)
2275{
2276        memset(usvc, 0, sizeof(*usvc));
2277
2278        usvc->af                = AF_INET;
2279        usvc->protocol          = usvc_compat->protocol;
2280        usvc->addr.ip           = usvc_compat->addr;
2281        usvc->port              = usvc_compat->port;
2282        usvc->fwmark            = usvc_compat->fwmark;
2283
2284        /* Deep copy of sched_name is not needed here */
2285        usvc->sched_name        = usvc_compat->sched_name;
2286
2287        usvc->flags             = usvc_compat->flags;
2288        usvc->timeout           = usvc_compat->timeout;
2289        usvc->netmask           = usvc_compat->netmask;
2290}
2291
2292static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2293                                   struct ip_vs_dest_user *udest_compat)
2294{
2295        memset(udest, 0, sizeof(*udest));
2296
2297        udest->addr.ip          = udest_compat->addr;
2298        udest->port             = udest_compat->port;
2299        udest->conn_flags       = udest_compat->conn_flags;
2300        udest->weight           = udest_compat->weight;
2301        udest->u_threshold      = udest_compat->u_threshold;
2302        udest->l_threshold      = udest_compat->l_threshold;
2303        udest->af               = AF_INET;
2304}
2305
2306static int
2307do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2308{
2309        struct net *net = sock_net(sk);
2310        int ret;
2311        unsigned char arg[MAX_SET_ARGLEN];
2312        struct ip_vs_service_user *usvc_compat;
2313        struct ip_vs_service_user_kern usvc;
2314        struct ip_vs_service *svc;
2315        struct ip_vs_dest_user *udest_compat;
2316        struct ip_vs_dest_user_kern udest;
2317        struct netns_ipvs *ipvs = net_ipvs(net);
2318
2319        BUILD_BUG_ON(sizeof(arg) > 255);
2320        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2321                return -EPERM;
2322
2323        if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2324                return -EINVAL;
2325        if (len != set_arglen[CMDID(cmd)]) {
2326                IP_VS_DBG(1, "set_ctl: len %u != %u\n",
2327                          len, set_arglen[CMDID(cmd)]);
2328                return -EINVAL;
2329        }
2330
2331        if (copy_from_user(arg, user, len) != 0)
2332                return -EFAULT;
2333
2334        /* increase the module use count */
2335        ip_vs_use_count_inc();
2336
2337        /* Handle daemons since they have another lock */
2338        if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2339            cmd == IP_VS_SO_SET_STOPDAEMON) {
2340                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2341
2342                if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2343                        struct ipvs_sync_daemon_cfg cfg;
2344
2345                        memset(&cfg, 0, sizeof(cfg));
2346                        strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
2347                                sizeof(cfg.mcast_ifn));
2348                        cfg.syncid = dm->syncid;
2349                        rtnl_lock();
2350                        mutex_lock(&ipvs->sync_mutex);
2351                        ret = start_sync_thread(ipvs, &cfg, dm->state);
2352                        mutex_unlock(&ipvs->sync_mutex);
2353                        rtnl_unlock();
2354                } else {
2355                        mutex_lock(&ipvs->sync_mutex);
2356                        ret = stop_sync_thread(ipvs, dm->state);
2357                        mutex_unlock(&ipvs->sync_mutex);
2358                }
2359                goto out_dec;
2360        }
2361
2362        mutex_lock(&__ip_vs_mutex);
2363        if (cmd == IP_VS_SO_SET_FLUSH) {
2364                /* Flush the virtual service */
2365                ret = ip_vs_flush(ipvs, false);
2366                goto out_unlock;
2367        } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2368                /* Set timeout values for (tcp tcpfin udp) */
2369                ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
2370                goto out_unlock;
2371        }
2372
2373        usvc_compat = (struct ip_vs_service_user *)arg;
2374        udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2375
2376        /* We only use the new structs internally, so copy userspace compat
2377         * structs to extended internal versions */
2378        ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2379        ip_vs_copy_udest_compat(&udest, udest_compat);
2380
2381        if (cmd == IP_VS_SO_SET_ZERO) {
2382                /* if no service address is set, zero counters in all */
2383                if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2384                        ret = ip_vs_zero_all(ipvs);
2385                        goto out_unlock;
2386                }
2387        }
2388
2389        /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2390        if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2391            usvc.protocol != IPPROTO_SCTP) {
2392                pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2393                       usvc.protocol, &usvc.addr.ip,
2394                       ntohs(usvc.port), usvc.sched_name);
2395                ret = -EFAULT;
2396                goto out_unlock;
2397        }
2398
2399        /* Lookup the exact service by <protocol, addr, port> or fwmark */
2400        rcu_read_lock();
2401        if (usvc.fwmark == 0)
2402                svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
2403                                           &usvc.addr, usvc.port);
2404        else
2405                svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
2406        rcu_read_unlock();
2407
2408        if (cmd != IP_VS_SO_SET_ADD
2409            && (svc == NULL || svc->protocol != usvc.protocol)) {
2410                ret = -ESRCH;
2411                goto out_unlock;
2412        }
2413
2414        switch (cmd) {
2415        case IP_VS_SO_SET_ADD:
2416                if (svc != NULL)
2417                        ret = -EEXIST;
2418                else
2419                        ret = ip_vs_add_service(ipvs, &usvc, &svc);
2420                break;
2421        case IP_VS_SO_SET_EDIT:
2422                ret = ip_vs_edit_service(svc, &usvc);
2423                break;
2424        case IP_VS_SO_SET_DEL:
2425                ret = ip_vs_del_service(svc);
2426                if (!ret)
2427                        goto out_unlock;
2428                break;
2429        case IP_VS_SO_SET_ZERO:
2430                ret = ip_vs_zero_service(svc);
2431                break;
2432        case IP_VS_SO_SET_ADDDEST:
2433                ret = ip_vs_add_dest(svc, &udest);
2434                break;
2435        case IP_VS_SO_SET_EDITDEST:
2436                ret = ip_vs_edit_dest(svc, &udest);
2437                break;
2438        case IP_VS_SO_SET_DELDEST:
2439                ret = ip_vs_del_dest(svc, &udest);
2440                break;
2441        default:
2442                ret = -EINVAL;
2443        }
2444
2445  out_unlock:
2446        mutex_unlock(&__ip_vs_mutex);
2447  out_dec:
2448        /* decrease the module use count */
2449        ip_vs_use_count_dec();
2450
2451        return ret;
2452}
2453
2454
2455static void
2456ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2457{
2458        struct ip_vs_scheduler *sched;
2459        struct ip_vs_kstats kstats;
2460        char *sched_name;
2461
2462        sched = rcu_dereference_protected(src->scheduler, 1);
2463        sched_name = sched ? sched->name : "none";
2464        dst->protocol = src->protocol;
2465        dst->addr = src->addr.ip;
2466        dst->port = src->port;
2467        dst->fwmark = src->fwmark;
2468        strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
2469        dst->flags = src->flags;
2470        dst->timeout = src->timeout / HZ;
2471        dst->netmask = src->netmask;
2472        dst->num_dests = src->num_dests;
2473        ip_vs_copy_stats(&kstats, &src->stats);
2474        ip_vs_export_stats_user(&dst->stats, &kstats);
2475}
2476
2477static inline int
2478__ip_vs_get_service_entries(struct netns_ipvs *ipvs,
2479                            const struct ip_vs_get_services *get,
2480                            struct ip_vs_get_services __user *uptr)
2481{
2482        int idx, count=0;
2483        struct ip_vs_service *svc;
2484        struct ip_vs_service_entry entry;
2485        int ret = 0;
2486
2487        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2488                hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2489                        /* Only expose IPv4 entries to old interface */
2490                        if (svc->af != AF_INET || (svc->ipvs != ipvs))
2491                                continue;
2492
2493                        if (count >= get->num_services)
2494                                goto out;
2495                        memset(&entry, 0, sizeof(entry));
2496                        ip_vs_copy_service(&entry, svc);
2497                        if (copy_to_user(&uptr->entrytable[count],
2498                                         &entry, sizeof(entry))) {
2499                                ret = -EFAULT;
2500                                goto out;
2501                        }
2502                        count++;
2503                }
2504        }
2505
2506        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2507                hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2508                        /* Only expose IPv4 entries to old interface */
2509                        if (svc->af != AF_INET || (svc->ipvs != ipvs))
2510                                continue;
2511
2512                        if (count >= get->num_services)
2513                                goto out;
2514                        memset(&entry, 0, sizeof(entry));
2515                        ip_vs_copy_service(&entry, svc);
2516                        if (copy_to_user(&uptr->entrytable[count],
2517                                         &entry, sizeof(entry))) {
2518                                ret = -EFAULT;
2519                                goto out;
2520                        }
2521                        count++;
2522                }
2523        }
2524out:
2525        return ret;
2526}
2527
2528static inline int
2529__ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
2530                         struct ip_vs_get_dests __user *uptr)
2531{
2532        struct ip_vs_service *svc;
2533        union nf_inet_addr addr = { .ip = get->addr };
2534        int ret = 0;
2535
2536        rcu_read_lock();
2537        if (get->fwmark)
2538                svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
2539        else
2540                svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
2541                                           get->port);
2542        rcu_read_unlock();
2543
2544        if (svc) {
2545                int count = 0;
2546                struct ip_vs_dest *dest;
2547                struct ip_vs_dest_entry entry;
2548                struct ip_vs_kstats kstats;
2549
2550                memset(&entry, 0, sizeof(entry));
2551                list_for_each_entry(dest, &svc->destinations, n_list) {
2552                        if (count >= get->num_dests)
2553                                break;
2554
2555                        /* Cannot expose heterogeneous members via sockopt
2556                         * interface
2557                         */
2558                        if (dest->af != svc->af)
2559                                continue;
2560
2561                        entry.addr = dest->addr.ip;
2562                        entry.port = dest->port;
2563                        entry.conn_flags = atomic_read(&dest->conn_flags);
2564                        entry.weight = atomic_read(&dest->weight);
2565                        entry.u_threshold = dest->u_threshold;
2566                        entry.l_threshold = dest->l_threshold;
2567                        entry.activeconns = atomic_read(&dest->activeconns);
2568                        entry.inactconns = atomic_read(&dest->inactconns);
2569                        entry.persistconns = atomic_read(&dest->persistconns);
2570                        ip_vs_copy_stats(&kstats, &dest->stats);
2571                        ip_vs_export_stats_user(&entry.stats, &kstats);
2572                        if (copy_to_user(&uptr->entrytable[count],
2573                                         &entry, sizeof(entry))) {
2574                                ret = -EFAULT;
2575                                break;
2576                        }
2577                        count++;
2578                }
2579        } else
2580                ret = -ESRCH;
2581        return ret;
2582}
2583
2584static inline void
2585__ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
2586{
2587#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2588        struct ip_vs_proto_data *pd;
2589#endif
2590
2591        memset(u, 0, sizeof (*u));
2592
2593#ifdef CONFIG_IP_VS_PROTO_TCP
2594        pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2595        u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2596        u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2597#endif
2598#ifdef CONFIG_IP_VS_PROTO_UDP
2599        pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
2600        u->udp_timeout =
2601                        pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2602#endif
2603}
2604
2605static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = {
2606        [CMDID(IP_VS_SO_GET_VERSION)]  = 64,
2607        [CMDID(IP_VS_SO_GET_INFO)]     = sizeof(struct ip_vs_getinfo),
2608        [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services),
2609        [CMDID(IP_VS_SO_GET_SERVICE)]  = sizeof(struct ip_vs_service_entry),
2610        [CMDID(IP_VS_SO_GET_DESTS)]    = sizeof(struct ip_vs_get_dests),
2611        [CMDID(IP_VS_SO_GET_TIMEOUT)]  = sizeof(struct ip_vs_timeout_user),
2612        [CMDID(IP_VS_SO_GET_DAEMON)]   = 2 * sizeof(struct ip_vs_daemon_user),
2613};
2614
2615union ip_vs_get_arglen {
2616        char                            field_IP_VS_SO_GET_VERSION[64];
2617        struct ip_vs_getinfo            field_IP_VS_SO_GET_INFO;
2618        struct ip_vs_get_services       field_IP_VS_SO_GET_SERVICES;
2619        struct ip_vs_service_entry      field_IP_VS_SO_GET_SERVICE;
2620        struct ip_vs_get_dests          field_IP_VS_SO_GET_DESTS;
2621        struct ip_vs_timeout_user       field_IP_VS_SO_GET_TIMEOUT;
2622        struct ip_vs_daemon_user        field_IP_VS_SO_GET_DAEMON[2];
2623};
2624
2625#define MAX_GET_ARGLEN  sizeof(union ip_vs_get_arglen)
2626
2627static int
2628do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2629{
2630        unsigned char arg[MAX_GET_ARGLEN];
2631        int ret = 0;
2632        unsigned int copylen;
2633        struct net *net = sock_net(sk);
2634        struct netns_ipvs *ipvs = net_ipvs(net);
2635
2636        BUG_ON(!net);
2637        BUILD_BUG_ON(sizeof(arg) > 255);
2638        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2639                return -EPERM;
2640
2641        if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2642                return -EINVAL;
2643
2644        copylen = get_arglen[CMDID(cmd)];
2645        if (*len < (int) copylen) {
2646                IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen);
2647                return -EINVAL;
2648        }
2649
2650        if (copy_from_user(arg, user, copylen) != 0)
2651                return -EFAULT;
2652        /*
2653         * Handle daemons first since it has its own locking
2654         */
2655        if (cmd == IP_VS_SO_GET_DAEMON) {
2656                struct ip_vs_daemon_user d[2];
2657
2658                memset(&d, 0, sizeof(d));
2659                mutex_lock(&ipvs->sync_mutex);
2660                if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2661                        d[0].state = IP_VS_STATE_MASTER;
2662                        strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
2663                                sizeof(d[0].mcast_ifn));
2664                        d[0].syncid = ipvs->mcfg.syncid;
2665                }
2666                if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2667                        d[1].state = IP_VS_STATE_BACKUP;
2668                        strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
2669                                sizeof(d[1].mcast_ifn));
2670                        d[1].syncid = ipvs->bcfg.syncid;
2671                }
2672                if (copy_to_user(user, &d, sizeof(d)) != 0)
2673                        ret = -EFAULT;
2674                mutex_unlock(&ipvs->sync_mutex);
2675                return ret;
2676        }
2677
2678        mutex_lock(&__ip_vs_mutex);
2679        switch (cmd) {
2680        case IP_VS_SO_GET_VERSION:
2681        {
2682                char buf[64];
2683
2684                sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2685                        NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2686                if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2687                        ret = -EFAULT;
2688                        goto out;
2689                }
2690                *len = strlen(buf)+1;
2691        }
2692        break;
2693
2694        case IP_VS_SO_GET_INFO:
2695        {
2696                struct ip_vs_getinfo info;
2697                info.version = IP_VS_VERSION_CODE;
2698                info.size = ip_vs_conn_tab_size;
2699                info.num_services = ipvs->num_services;
2700                if (copy_to_user(user, &info, sizeof(info)) != 0)
2701                        ret = -EFAULT;
2702        }
2703        break;
2704
2705        case IP_VS_SO_GET_SERVICES:
2706        {
2707                struct ip_vs_get_services *get;
2708                int size;
2709
2710                get = (struct ip_vs_get_services *)arg;
2711                size = sizeof(*get) +
2712                        sizeof(struct ip_vs_service_entry) * get->num_services;
2713                if (*len != size) {
2714                        pr_err("length: %u != %u\n", *len, size);
2715                        ret = -EINVAL;
2716                        goto out;
2717                }
2718                ret = __ip_vs_get_service_entries(ipvs, get, user);
2719        }
2720        break;
2721
2722        case IP_VS_SO_GET_SERVICE:
2723        {
2724                struct ip_vs_service_entry *entry;
2725                struct ip_vs_service *svc;
2726                union nf_inet_addr addr;
2727
2728                entry = (struct ip_vs_service_entry *)arg;
2729                addr.ip = entry->addr;
2730                rcu_read_lock();
2731                if (entry->fwmark)
2732                        svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
2733                else
2734                        svc = __ip_vs_service_find(ipvs, AF_INET,
2735                                                   entry->protocol, &addr,
2736                                                   entry->port);
2737                rcu_read_unlock();
2738                if (svc) {
2739                        ip_vs_copy_service(entry, svc);
2740                        if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2741                                ret = -EFAULT;
2742                } else
2743                        ret = -ESRCH;
2744        }
2745        break;
2746
2747        case IP_VS_SO_GET_DESTS:
2748        {
2749                struct ip_vs_get_dests *get;
2750                int size;
2751
2752                get = (struct ip_vs_get_dests *)arg;
2753                size = sizeof(*get) +
2754                        sizeof(struct ip_vs_dest_entry) * get->num_dests;
2755                if (*len != size) {
2756                        pr_err("length: %u != %u\n", *len, size);
2757                        ret = -EINVAL;
2758                        goto out;
2759                }
2760                ret = __ip_vs_get_dest_entries(ipvs, get, user);
2761        }
2762        break;
2763
2764        case IP_VS_SO_GET_TIMEOUT:
2765        {
2766                struct ip_vs_timeout_user t;
2767
2768                __ip_vs_get_timeouts(ipvs, &t);
2769                if (copy_to_user(user, &t, sizeof(t)) != 0)
2770                        ret = -EFAULT;
2771        }
2772        break;
2773
2774        default:
2775                ret = -EINVAL;
2776        }
2777
2778out:
2779        mutex_unlock(&__ip_vs_mutex);
2780        return ret;
2781}
2782
2783
2784static struct nf_sockopt_ops ip_vs_sockopts = {
2785        .pf             = PF_INET,
2786        .set_optmin     = IP_VS_BASE_CTL,
2787        .set_optmax     = IP_VS_SO_SET_MAX+1,
2788        .set            = do_ip_vs_set_ctl,
2789        .get_optmin     = IP_VS_BASE_CTL,
2790        .get_optmax     = IP_VS_SO_GET_MAX+1,
2791        .get            = do_ip_vs_get_ctl,
2792        .owner          = THIS_MODULE,
2793};
2794
2795/*
2796 * Generic Netlink interface
2797 */
2798
2799/* IPVS genetlink family */
2800static struct genl_family ip_vs_genl_family = {
2801        .id             = GENL_ID_GENERATE,
2802        .hdrsize        = 0,
2803        .name           = IPVS_GENL_NAME,
2804        .version        = IPVS_GENL_VERSION,
2805        .maxattr        = IPVS_CMD_MAX,
2806        .netnsok        = true,         /* Make ipvsadm to work on netns */
2807};
2808
2809/* Policy used for first-level command attributes */
2810static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2811        [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2812        [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2813        [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2814        [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2815        [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2816        [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2817};
2818
2819/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2820static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2821        [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2822        [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2823                                            .len = IP_VS_IFNAME_MAXLEN },
2824        [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2825        [IPVS_DAEMON_ATTR_SYNC_MAXLEN]  = { .type = NLA_U16 },
2826        [IPVS_DAEMON_ATTR_MCAST_GROUP]  = { .type = NLA_U32 },
2827        [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) },
2828        [IPVS_DAEMON_ATTR_MCAST_PORT]   = { .type = NLA_U16 },
2829        [IPVS_DAEMON_ATTR_MCAST_TTL]    = { .type = NLA_U8 },
2830};
2831
2832/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2833static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2834        [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2835        [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2836        [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2837                                            .len = sizeof(union nf_inet_addr) },
2838        [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2839        [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2840        [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2841                                            .len = IP_VS_SCHEDNAME_MAXLEN },
2842        [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2843                                            .len = IP_VS_PENAME_MAXLEN },
2844        [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2845                                            .len = sizeof(struct ip_vs_flags) },
2846        [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2847        [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2848        [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2849};
2850
2851/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2852static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2853        [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2854                                            .len = sizeof(union nf_inet_addr) },
2855        [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2856        [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2857        [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2858        [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2859        [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2860        [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2861        [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2862        [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2863        [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2864        [IPVS_DEST_ATTR_ADDR_FAMILY]    = { .type = NLA_U16 },
2865};
2866
2867static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2868                                 struct ip_vs_kstats *kstats)
2869{
2870        struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2871
2872        if (!nl_stats)
2873                return -EMSGSIZE;
2874
2875        if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
2876            nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
2877            nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
2878            nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
2879            nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
2880            nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
2881            nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
2882            nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
2883            nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
2884            nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
2885                goto nla_put_failure;
2886        nla_nest_end(skb, nl_stats);
2887
2888        return 0;
2889
2890nla_put_failure:
2891        nla_nest_cancel(skb, nl_stats);
2892        return -EMSGSIZE;
2893}
2894
2895static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
2896                                   struct ip_vs_kstats *kstats)
2897{
2898        struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2899
2900        if (!nl_stats)
2901                return -EMSGSIZE;
2902
2903        if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) ||
2904            nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) ||
2905            nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) ||
2906            nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
2907            nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
2908            nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) ||
2909            nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) ||
2910            nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) ||
2911            nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) ||
2912            nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps))
2913                goto nla_put_failure;
2914        nla_nest_end(skb, nl_stats);
2915
2916        return 0;
2917
2918nla_put_failure:
2919        nla_nest_cancel(skb, nl_stats);
2920        return -EMSGSIZE;
2921}
2922
2923static int ip_vs_genl_fill_service(struct sk_buff *skb,
2924                                   struct ip_vs_service *svc)
2925{
2926        struct ip_vs_scheduler *sched;
2927        struct ip_vs_pe *pe;
2928        struct nlattr *nl_service;
2929        struct ip_vs_flags flags = { .flags = svc->flags,
2930                                     .mask = ~0 };
2931        struct ip_vs_kstats kstats;
2932        char *sched_name;
2933
2934        nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2935        if (!nl_service)
2936                return -EMSGSIZE;
2937
2938        if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2939                goto nla_put_failure;
2940        if (svc->fwmark) {
2941                if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2942                        goto nla_put_failure;
2943        } else {
2944                if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2945                    nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2946                    nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2947                        goto nla_put_failure;
2948        }
2949
2950        sched = rcu_dereference_protected(svc->scheduler, 1);
2951        sched_name = sched ? sched->name : "none";
2952        pe = rcu_dereference_protected(svc->pe, 1);
2953        if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
2954            (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
2955            nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2956            nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2957            nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2958                goto nla_put_failure;
2959        ip_vs_copy_stats(&kstats, &svc->stats);
2960        if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
2961                goto nla_put_failure;
2962        if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
2963                goto nla_put_failure;
2964
2965        nla_nest_end(skb, nl_service);
2966
2967        return 0;
2968
2969nla_put_failure:
2970        nla_nest_cancel(skb, nl_service);
2971        return -EMSGSIZE;
2972}
2973
2974static int ip_vs_genl_dump_service(struct sk_buff *skb,
2975                                   struct ip_vs_service *svc,
2976                                   struct netlink_callback *cb)
2977{
2978        void *hdr;
2979
2980        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2981                          &ip_vs_genl_family, NLM_F_MULTI,
2982                          IPVS_CMD_NEW_SERVICE);
2983        if (!hdr)
2984                return -EMSGSIZE;
2985
2986        if (ip_vs_genl_fill_service(skb, svc) < 0)
2987                goto nla_put_failure;
2988
2989        genlmsg_end(skb, hdr);
2990        return 0;
2991
2992nla_put_failure:
2993        genlmsg_cancel(skb, hdr);
2994        return -EMSGSIZE;
2995}
2996
2997static int ip_vs_genl_dump_services(struct sk_buff *skb,
2998                                    struct netlink_callback *cb)
2999{
3000        int idx = 0, i;
3001        int start = cb->args[0];
3002        struct ip_vs_service *svc;
3003        struct net *net = sock_net(skb->sk);
3004        struct netns_ipvs *ipvs = net_ipvs(net);
3005
3006        mutex_lock(&__ip_vs_mutex);
3007        for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3008                hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
3009                        if (++idx <= start || (svc->ipvs != ipvs))
3010                                continue;
3011                        if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3012                                idx--;
3013                                goto nla_put_failure;
3014                        }
3015                }
3016        }
3017
3018        for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3019                hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
3020                        if (++idx <= start || (svc->ipvs != ipvs))
3021                                continue;
3022                        if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3023                                idx--;
3024                                goto nla_put_failure;
3025                        }
3026                }
3027        }
3028
3029nla_put_failure:
3030        mutex_unlock(&__ip_vs_mutex);
3031        cb->args[0] = idx;
3032
3033        return skb->len;
3034}
3035
3036static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
3037                                    struct ip_vs_service_user_kern *usvc,
3038                                    struct nlattr *nla, int full_entry,
3039                                    struct ip_vs_service **ret_svc)
3040{
3041        struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3042        struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
3043        struct ip_vs_service *svc;
3044
3045        /* Parse mandatory identifying service fields first */
3046        if (nla == NULL ||
3047            nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3048                return -EINVAL;
3049
3050        nla_af          = attrs[IPVS_SVC_ATTR_AF];
3051        nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
3052        nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
3053        nla_port        = attrs[IPVS_SVC_ATTR_PORT];
3054        nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
3055
3056        if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3057                return -EINVAL;
3058
3059        memset(usvc, 0, sizeof(*usvc));
3060
3061        usvc->af = nla_get_u16(nla_af);
3062#ifdef CONFIG_IP_VS_IPV6
3063        if (usvc->af != AF_INET && usvc->af != AF_INET6)
3064#else
3065        if (usvc->af != AF_INET)
3066#endif
3067                return -EAFNOSUPPORT;
3068
3069        if (nla_fwmark) {
3070                usvc->protocol = IPPROTO_TCP;
3071                usvc->fwmark = nla_get_u32(nla_fwmark);
3072        } else {
3073                usvc->protocol = nla_get_u16(nla_protocol);
3074                nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3075                usvc->port = nla_get_be16(nla_port);
3076                usvc->fwmark = 0;
3077        }
3078
3079        rcu_read_lock();
3080        if (usvc->fwmark)
3081                svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
3082        else
3083                svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
3084                                           &usvc->addr, usvc->port);
3085        rcu_read_unlock();
3086        *ret_svc = svc;
3087
3088        /* If a full entry was requested, check for the additional fields */
3089        if (full_entry) {
3090                struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3091                              *nla_netmask;
3092                struct ip_vs_flags flags;
3093
3094                nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3095                nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3096                nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3097                nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3098                nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3099
3100                if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3101                        return -EINVAL;
3102
3103                nla_memcpy(&flags, nla_flags, sizeof(flags));
3104
3105                /* prefill flags from service if it already exists */
3106                if (svc)
3107                        usvc->flags = svc->flags;
3108
3109                /* set new flags from userland */
3110                usvc->flags = (usvc->flags & ~flags.mask) |
3111                              (flags.flags & flags.mask);
3112                usvc->sched_name = nla_data(nla_sched);
3113                usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3114                usvc->timeout = nla_get_u32(nla_timeout);
3115                usvc->netmask = nla_get_be32(nla_netmask);
3116        }
3117
3118        return 0;
3119}
3120
3121static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
3122                                                     struct nlattr *nla)
3123{
3124        struct ip_vs_service_user_kern usvc;
3125        struct ip_vs_service *svc;
3126        int ret;
3127
3128        ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc);
3129        return ret ? ERR_PTR(ret) : svc;
3130}
3131
3132static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3133{
3134        struct nlattr *nl_dest;
3135        struct ip_vs_kstats kstats;
3136
3137        nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3138        if (!nl_dest)
3139                return -EMSGSIZE;
3140
3141        if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3142            nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3143            nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3144                        (atomic_read(&dest->conn_flags) &
3145                         IP_VS_CONN_F_FWD_MASK)) ||
3146            nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3147                        atomic_read(&dest->weight)) ||
3148            nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3149            nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3150            nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3151                        atomic_read(&dest->activeconns)) ||
3152            nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3153                        atomic_read(&dest->inactconns)) ||
3154            nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3155                        atomic_read(&dest->persistconns)) ||
3156            nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
3157                goto nla_put_failure;
3158        ip_vs_copy_stats(&kstats, &dest->stats);
3159        if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
3160                goto nla_put_failure;
3161        if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
3162                goto nla_put_failure;
3163
3164        nla_nest_end(skb, nl_dest);
3165
3166        return 0;
3167
3168nla_put_failure:
3169        nla_nest_cancel(skb, nl_dest);
3170        return -EMSGSIZE;
3171}
3172
3173static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3174                                struct netlink_callback *cb)
3175{
3176        void *hdr;
3177
3178        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3179                          &ip_vs_genl_family, NLM_F_MULTI,
3180                          IPVS_CMD_NEW_DEST);
3181        if (!hdr)
3182                return -EMSGSIZE;
3183
3184        if (ip_vs_genl_fill_dest(skb, dest) < 0)
3185                goto nla_put_failure;
3186
3187        genlmsg_end(skb, hdr);
3188        return 0;
3189
3190nla_put_failure:
3191        genlmsg_cancel(skb, hdr);
3192        return -EMSGSIZE;
3193}
3194
3195static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3196                                 struct netlink_callback *cb)
3197{
3198        int idx = 0;
3199        int start = cb->args[0];
3200        struct ip_vs_service *svc;
3201        struct ip_vs_dest *dest;
3202        struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3203        struct net *net = sock_net(skb->sk);
3204        struct netns_ipvs *ipvs = net_ipvs(net);
3205
3206        mutex_lock(&__ip_vs_mutex);
3207
3208        /* Try to find the service for which to dump destinations */
3209        if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3210                        IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3211                goto out_err;
3212
3213
3214        svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
3215        if (IS_ERR(svc) || svc == NULL)
3216                goto out_err;
3217
3218        /* Dump the destinations */
3219        list_for_each_entry(dest, &svc->destinations, n_list) {
3220                if (++idx <= start)
3221                        continue;
3222                if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3223                        idx--;
3224                        goto nla_put_failure;
3225                }
3226        }
3227
3228nla_put_failure:
3229        cb->args[0] = idx;
3230
3231out_err:
3232        mutex_unlock(&__ip_vs_mutex);
3233
3234        return skb->len;
3235}
3236
3237static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3238                                 struct nlattr *nla, int full_entry)
3239{
3240        struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3241        struct nlattr *nla_addr, *nla_port;
3242        struct nlattr *nla_addr_family;
3243
3244        /* Parse mandatory identifying destination fields first */
3245        if (nla == NULL ||
3246            nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3247                return -EINVAL;
3248
3249        nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3250        nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3251        nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
3252
3253        if (!(nla_addr && nla_port))
3254                return -EINVAL;
3255
3256        memset(udest, 0, sizeof(*udest));
3257
3258        nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3259        udest->port = nla_get_be16(nla_port);
3260
3261        if (nla_addr_family)
3262                udest->af = nla_get_u16(nla_addr_family);
3263        else
3264                udest->af = 0;
3265
3266        /* If a full entry was requested, check for the additional fields */
3267        if (full_entry) {
3268                struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3269                              *nla_l_thresh;
3270
3271                nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3272                nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3273                nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3274                nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3275
3276                if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3277                        return -EINVAL;
3278
3279                udest->conn_flags = nla_get_u32(nla_fwd)
3280                                    & IP_VS_CONN_F_FWD_MASK;
3281                udest->weight = nla_get_u32(nla_weight);
3282                udest->u_threshold = nla_get_u32(nla_u_thresh);
3283                udest->l_threshold = nla_get_u32(nla_l_thresh);
3284        }
3285
3286        return 0;
3287}
3288
3289static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
3290                                  struct ipvs_sync_daemon_cfg *c)
3291{
3292        struct nlattr *nl_daemon;
3293
3294        nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3295        if (!nl_daemon)
3296                return -EMSGSIZE;
3297
3298        if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3299            nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
3300            nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
3301            nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
3302            nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
3303            nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
3304                goto nla_put_failure;
3305#ifdef CONFIG_IP_VS_IPV6
3306        if (c->mcast_af == AF_INET6) {
3307                if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
3308                                     &c->mcast_group.in6))
3309                        goto nla_put_failure;
3310        } else
3311#endif
3312                if (c->mcast_af == AF_INET &&
3313                    nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
3314                                    c->mcast_group.ip))
3315                        goto nla_put_failure;
3316        nla_nest_end(skb, nl_daemon);
3317
3318        return 0;
3319
3320nla_put_failure:
3321        nla_nest_cancel(skb, nl_daemon);
3322        return -EMSGSIZE;
3323}
3324
3325static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
3326                                  struct ipvs_sync_daemon_cfg *c,
3327                                  struct netlink_callback *cb)
3328{
3329        void *hdr;
3330        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3331                          &ip_vs_genl_family, NLM_F_MULTI,
3332                          IPVS_CMD_NEW_DAEMON);
3333        if (!hdr)
3334                return -EMSGSIZE;
3335
3336        if (ip_vs_genl_fill_daemon(skb, state, c))
3337                goto nla_put_failure;
3338
3339        genlmsg_end(skb, hdr);
3340        return 0;
3341
3342nla_put_failure:
3343        genlmsg_cancel(skb, hdr);
3344        return -EMSGSIZE;
3345}
3346
3347static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3348                                   struct netlink_callback *cb)
3349{
3350        struct net *net = sock_net(skb->sk);
3351        struct netns_ipvs *ipvs = net_ipvs(net);
3352
3353        mutex_lock(&ipvs->sync_mutex);
3354        if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3355                if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3356                                           &ipvs->mcfg, cb) < 0)
3357                        goto nla_put_failure;
3358
3359                cb->args[0] = 1;
3360        }
3361
3362        if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3363                if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3364                                           &ipvs->bcfg, cb) < 0)
3365                        goto nla_put_failure;
3366
3367                cb->args[1] = 1;
3368        }
3369
3370nla_put_failure:
3371        mutex_unlock(&ipvs->sync_mutex);
3372
3373        return skb->len;
3374}
3375
3376static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
3377{
3378        struct ipvs_sync_daemon_cfg c;
3379        struct nlattr *a;
3380        int ret;
3381
3382        memset(&c, 0, sizeof(c));
3383        if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3384              attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3385              attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3386                return -EINVAL;
3387        strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3388                sizeof(c.mcast_ifn));
3389        c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
3390
3391        a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
3392        if (a)
3393                c.sync_maxlen = nla_get_u16(a);
3394
3395        a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
3396        if (a) {
3397                c.mcast_af = AF_INET;
3398                c.mcast_group.ip = nla_get_in_addr(a);
3399                if (!ipv4_is_multicast(c.mcast_group.ip))
3400                        return -EINVAL;
3401        } else {
3402                a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
3403                if (a) {
3404#ifdef CONFIG_IP_VS_IPV6
3405                        int addr_type;
3406
3407                        c.mcast_af = AF_INET6;
3408                        c.mcast_group.in6 = nla_get_in6_addr(a);
3409                        addr_type = ipv6_addr_type(&c.mcast_group.in6);
3410                        if (!(addr_type & IPV6_ADDR_MULTICAST))
3411                                return -EINVAL;
3412#else
3413                        return -EAFNOSUPPORT;
3414#endif
3415                }
3416        }
3417
3418        a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
3419        if (a)
3420                c.mcast_port = nla_get_u16(a);
3421
3422        a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
3423        if (a)
3424                c.mcast_ttl = nla_get_u8(a);
3425
3426        /* The synchronization protocol is incompatible with mixed family
3427         * services
3428         */
3429        if (ipvs->mixed_address_family_dests > 0)
3430                return -EINVAL;
3431
3432        rtnl_lock();
3433        mutex_lock(&ipvs->sync_mutex);
3434        ret = start_sync_thread(ipvs, &c,
3435                                nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3436        mutex_unlock(&ipvs->sync_mutex);
3437        rtnl_unlock();
3438        return ret;
3439}
3440
3441static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
3442{
3443        int ret;
3444
3445        if (!attrs[IPVS_DAEMON_ATTR_STATE])
3446                return -EINVAL;
3447
3448        mutex_lock(&ipvs->sync_mutex);
3449        ret = stop_sync_thread(ipvs,
3450                               nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3451        mutex_unlock(&ipvs->sync_mutex);
3452        return ret;
3453}
3454
3455static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
3456{
3457        struct ip_vs_timeout_user t;
3458
3459        __ip_vs_get_timeouts(ipvs, &t);
3460
3461        if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3462                t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3463
3464        if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3465                t.tcp_fin_timeout =
3466                        nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3467
3468        if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3469                t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3470
3471        return ip_vs_set_timeout(ipvs, &t);
3472}
3473
3474static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3475{
3476        int ret = -EINVAL, cmd;
3477        struct net *net = sock_net(skb->sk);
3478        struct netns_ipvs *ipvs = net_ipvs(net);
3479
3480        cmd = info->genlhdr->cmd;
3481
3482        if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3483                struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3484
3485                if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3486                    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3487                                     info->attrs[IPVS_CMD_ATTR_DAEMON],
3488                                     ip_vs_daemon_policy))
3489                        goto out;
3490
3491                if (cmd == IPVS_CMD_NEW_DAEMON)
3492                        ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
3493                else
3494                        ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
3495        }
3496
3497out:
3498        return ret;
3499}
3500
3501static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3502{
3503        struct ip_vs_service *svc = NULL;
3504        struct ip_vs_service_user_kern usvc;
3505        struct ip_vs_dest_user_kern udest;
3506        int ret = 0, cmd;
3507        int need_full_svc = 0, need_full_dest = 0;
3508        struct net *net = sock_net(skb->sk);
3509        struct netns_ipvs *ipvs = net_ipvs(net);
3510
3511        cmd = info->genlhdr->cmd;
3512
3513        mutex_lock(&__ip_vs_mutex);
3514
3515        if (cmd == IPVS_CMD_FLUSH) {
3516                ret = ip_vs_flush(ipvs, false);
3517                goto out;
3518        } else if (cmd == IPVS_CMD_SET_CONFIG) {
3519                ret = ip_vs_genl_set_config(ipvs, info->attrs);
3520                goto out;
3521        } else if (cmd == IPVS_CMD_ZERO &&
3522                   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3523                ret = ip_vs_zero_all(ipvs);
3524                goto out;
3525        }
3526
3527        /* All following commands require a service argument, so check if we
3528         * received a valid one. We need a full service specification when
3529         * adding / editing a service. Only identifying members otherwise. */
3530        if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3531                need_full_svc = 1;
3532
3533        ret = ip_vs_genl_parse_service(ipvs, &usvc,
3534                                       info->attrs[IPVS_CMD_ATTR_SERVICE],
3535                                       need_full_svc, &svc);
3536        if (ret)
3537                goto out;
3538
3539        /* Unless we're adding a new service, the service must already exist */
3540        if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3541                ret = -ESRCH;
3542                goto out;
3543        }
3544
3545        /* Destination commands require a valid destination argument. For
3546         * adding / editing a destination, we need a full destination
3547         * specification. */
3548        if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3549            cmd == IPVS_CMD_DEL_DEST) {
3550                if (cmd != IPVS_CMD_DEL_DEST)
3551                        need_full_dest = 1;
3552
3553                ret = ip_vs_genl_parse_dest(&udest,
3554                                            info->attrs[IPVS_CMD_ATTR_DEST],
3555                                            need_full_dest);
3556                if (ret)
3557                        goto out;
3558
3559                /* Old protocols did not allow the user to specify address
3560                 * family, so we set it to zero instead.  We also didn't
3561                 * allow heterogeneous pools in the old code, so it's safe
3562                 * to assume that this will have the same address family as
3563                 * the service.
3564                 */
3565                if (udest.af == 0)
3566                        udest.af = svc->af;
3567
3568                if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
3569                        /* The synchronization protocol is incompatible
3570                         * with mixed family services
3571                         */
3572                        if (ipvs->sync_state) {
3573                                ret = -EINVAL;
3574                                goto out;
3575                        }
3576
3577                        /* Which connection types do we support? */
3578                        switch (udest.conn_flags) {
3579                        case IP_VS_CONN_F_TUNNEL:
3580                                /* We are able to forward this */
3581                                break;
3582                        default:
3583                                ret = -EINVAL;
3584                                goto out;
3585                        }
3586                }
3587        }
3588
3589        switch (cmd) {
3590        case IPVS_CMD_NEW_SERVICE:
3591                if (svc == NULL)
3592                        ret = ip_vs_add_service(ipvs, &usvc, &svc);
3593                else
3594                        ret = -EEXIST;
3595                break;
3596        case IPVS_CMD_SET_SERVICE:
3597                ret = ip_vs_edit_service(svc, &usvc);
3598                break;
3599        case IPVS_CMD_DEL_SERVICE:
3600                ret = ip_vs_del_service(svc);
3601                /* do not use svc, it can be freed */
3602                break;
3603        case IPVS_CMD_NEW_DEST:
3604                ret = ip_vs_add_dest(svc, &udest);
3605                break;
3606        case IPVS_CMD_SET_DEST:
3607                ret = ip_vs_edit_dest(svc, &udest);
3608                break;
3609        case IPVS_CMD_DEL_DEST:
3610                ret = ip_vs_del_dest(svc, &udest);
3611                break;
3612        case IPVS_CMD_ZERO:
3613                ret = ip_vs_zero_service(svc);
3614                break;
3615        default:
3616                ret = -EINVAL;
3617        }
3618
3619out:
3620        mutex_unlock(&__ip_vs_mutex);
3621
3622        return ret;
3623}
3624
3625static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3626{
3627        struct sk_buff *msg;
3628        void *reply;
3629        int ret, cmd, reply_cmd;
3630        struct net *net = sock_net(skb->sk);
3631        struct netns_ipvs *ipvs = net_ipvs(net);
3632
3633        cmd = info->genlhdr->cmd;
3634
3635        if (cmd == IPVS_CMD_GET_SERVICE)
3636                reply_cmd = IPVS_CMD_NEW_SERVICE;
3637        else if (cmd == IPVS_CMD_GET_INFO)
3638                reply_cmd = IPVS_CMD_SET_INFO;
3639        else if (cmd == IPVS_CMD_GET_CONFIG)
3640                reply_cmd = IPVS_CMD_SET_CONFIG;
3641        else {
3642                pr_err("unknown Generic Netlink command\n");
3643                return -EINVAL;
3644        }
3645
3646        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3647        if (!msg)
3648                return -ENOMEM;
3649
3650        mutex_lock(&__ip_vs_mutex);
3651
3652        reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3653        if (reply == NULL)
3654                goto nla_put_failure;
3655
3656        switch (cmd) {
3657        case IPVS_CMD_GET_SERVICE:
3658        {
3659                struct ip_vs_service *svc;
3660
3661                svc = ip_vs_genl_find_service(ipvs,
3662                                              info->attrs[IPVS_CMD_ATTR_SERVICE]);
3663                if (IS_ERR(svc)) {
3664                        ret = PTR_ERR(svc);
3665                        goto out_err;
3666                } else if (svc) {
3667                        ret = ip_vs_genl_fill_service(msg, svc);
3668                        if (ret)
3669                                goto nla_put_failure;
3670                } else {
3671                        ret = -ESRCH;
3672                        goto out_err;
3673                }
3674
3675                break;
3676        }
3677
3678        case IPVS_CMD_GET_CONFIG:
3679        {
3680                struct ip_vs_timeout_user t;
3681
3682                __ip_vs_get_timeouts(ipvs, &t);
3683#ifdef CONFIG_IP_VS_PROTO_TCP
3684                if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3685                                t.tcp_timeout) ||
3686                    nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3687                                t.tcp_fin_timeout))
3688                        goto nla_put_failure;
3689#endif
3690#ifdef CONFIG_IP_VS_PROTO_UDP
3691                if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3692                        goto nla_put_failure;
3693#endif
3694
3695                break;
3696        }
3697
3698        case IPVS_CMD_GET_INFO:
3699                if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3700                                IP_VS_VERSION_CODE) ||
3701                    nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3702                                ip_vs_conn_tab_size))
3703                        goto nla_put_failure;
3704                break;
3705        }
3706
3707        genlmsg_end(msg, reply);
3708        ret = genlmsg_reply(msg, info);
3709        goto out;
3710
3711nla_put_failure:
3712        pr_err("not enough space in Netlink message\n");
3713        ret = -EMSGSIZE;
3714
3715out_err:
3716        nlmsg_free(msg);
3717out:
3718        mutex_unlock(&__ip_vs_mutex);
3719
3720        return ret;
3721}
3722
3723
3724static const struct genl_ops ip_vs_genl_ops[] = {
3725        {
3726                .cmd    = IPVS_CMD_NEW_SERVICE,
3727                .flags  = GENL_ADMIN_PERM,
3728                .policy = ip_vs_cmd_policy,
3729                .doit   = ip_vs_genl_set_cmd,
3730        },
3731        {
3732                .cmd    = IPVS_CMD_SET_SERVICE,
3733                .flags  = GENL_ADMIN_PERM,
3734                .policy = ip_vs_cmd_policy,
3735                .doit   = ip_vs_genl_set_cmd,
3736        },
3737        {
3738                .cmd    = IPVS_CMD_DEL_SERVICE,
3739                .flags  = GENL_ADMIN_PERM,
3740                .policy = ip_vs_cmd_policy,
3741                .doit   = ip_vs_genl_set_cmd,
3742        },
3743        {
3744                .cmd    = IPVS_CMD_GET_SERVICE,
3745                .flags  = GENL_ADMIN_PERM,
3746                .doit   = ip_vs_genl_get_cmd,
3747                .dumpit = ip_vs_genl_dump_services,
3748                .policy = ip_vs_cmd_policy,
3749        },
3750        {
3751                .cmd    = IPVS_CMD_NEW_DEST,
3752                .flags  = GENL_ADMIN_PERM,
3753                .policy = ip_vs_cmd_policy,
3754                .doit   = ip_vs_genl_set_cmd,
3755        },
3756        {
3757                .cmd    = IPVS_CMD_SET_DEST,
3758                .flags  = GENL_ADMIN_PERM,
3759                .policy = ip_vs_cmd_policy,
3760                .doit   = ip_vs_genl_set_cmd,
3761        },
3762        {
3763                .cmd    = IPVS_CMD_DEL_DEST,
3764                .flags  = GENL_ADMIN_PERM,
3765                .policy = ip_vs_cmd_policy,
3766                .doit   = ip_vs_genl_set_cmd,
3767        },
3768        {
3769                .cmd    = IPVS_CMD_GET_DEST,
3770                .flags  = GENL_ADMIN_PERM,
3771                .policy = ip_vs_cmd_policy,
3772                .dumpit = ip_vs_genl_dump_dests,
3773        },
3774        {
3775                .cmd    = IPVS_CMD_NEW_DAEMON,
3776                .flags  = GENL_ADMIN_PERM,
3777                .policy = ip_vs_cmd_policy,
3778                .doit   = ip_vs_genl_set_daemon,
3779        },
3780        {
3781                .cmd    = IPVS_CMD_DEL_DAEMON,
3782                .flags  = GENL_ADMIN_PERM,
3783                .policy = ip_vs_cmd_policy,
3784                .doit   = ip_vs_genl_set_daemon,
3785        },
3786        {
3787                .cmd    = IPVS_CMD_GET_DAEMON,
3788                .flags  = GENL_ADMIN_PERM,
3789                .dumpit = ip_vs_genl_dump_daemons,
3790        },
3791        {
3792                .cmd    = IPVS_CMD_SET_CONFIG,
3793                .flags  = GENL_ADMIN_PERM,
3794                .policy = ip_vs_cmd_policy,
3795                .doit   = ip_vs_genl_set_cmd,
3796        },
3797        {
3798                .cmd    = IPVS_CMD_GET_CONFIG,
3799                .flags  = GENL_ADMIN_PERM,
3800                .doit   = ip_vs_genl_get_cmd,
3801        },
3802        {
3803                .cmd    = IPVS_CMD_GET_INFO,
3804                .flags  = GENL_ADMIN_PERM,
3805                .doit   = ip_vs_genl_get_cmd,
3806        },
3807        {
3808                .cmd    = IPVS_CMD_ZERO,
3809                .flags  = GENL_ADMIN_PERM,
3810                .policy = ip_vs_cmd_policy,
3811                .doit   = ip_vs_genl_set_cmd,
3812        },
3813        {
3814                .cmd    = IPVS_CMD_FLUSH,
3815                .flags  = GENL_ADMIN_PERM,
3816                .doit   = ip_vs_genl_set_cmd,
3817        },
3818};
3819
3820static int __init ip_vs_genl_register(void)
3821{
3822        return genl_register_family_with_ops(&ip_vs_genl_family,
3823                                             ip_vs_genl_ops);
3824}
3825
3826static void ip_vs_genl_unregister(void)
3827{
3828        genl_unregister_family(&ip_vs_genl_family);
3829}
3830
3831/* End of Generic Netlink interface definitions */
3832
3833/*
3834 * per netns intit/exit func.
3835 */
3836#ifdef CONFIG_SYSCTL
3837static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
3838{
3839        struct net *net = ipvs->net;
3840        int idx;
3841        struct ctl_table *tbl;
3842
3843        atomic_set(&ipvs->dropentry, 0);
3844        spin_lock_init(&ipvs->dropentry_lock);
3845        spin_lock_init(&ipvs->droppacket_lock);
3846        spin_lock_init(&ipvs->securetcp_lock);
3847
3848        if (!net_eq(net, &init_net)) {
3849                tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3850                if (tbl == NULL)
3851                        return -ENOMEM;
3852
3853                /* Don't export sysctls to unprivileged users */
3854                if (net->user_ns != &init_user_ns)
3855                        tbl[0].procname = NULL;
3856        } else
3857                tbl = vs_vars;
3858        /* Initialize sysctl defaults */
3859        for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
3860                if (tbl[idx].proc_handler == proc_do_defense_mode)
3861                        tbl[idx].extra2 = ipvs;
3862        }
3863        idx = 0;
3864        ipvs->sysctl_amemthresh = 1024;
3865        tbl[idx++].data = &ipvs->sysctl_amemthresh;
3866        ipvs->sysctl_am_droprate = 10;
3867        tbl[idx++].data = &ipvs->sysctl_am_droprate;
3868        tbl[idx++].data = &ipvs->sysctl_drop_entry;
3869        tbl[idx++].data = &ipvs->sysctl_drop_packet;
3870#ifdef CONFIG_IP_VS_NFCT
3871        tbl[idx++].data = &ipvs->sysctl_conntrack;
3872#endif
3873        tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3874        ipvs->sysctl_snat_reroute = 1;
3875        tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3876        ipvs->sysctl_sync_ver = 1;
3877        tbl[idx++].data = &ipvs->sysctl_sync_ver;
3878        ipvs->sysctl_sync_ports = 1;
3879        tbl[idx++].data = &ipvs->sysctl_sync_ports;
3880        tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
3881        ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3882        tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3883        ipvs->sysctl_sync_sock_size = 0;
3884        tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3885        tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3886        tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3887        tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
3888        tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
3889        tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3890        ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3891        ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3892        tbl[idx].data = &ipvs->sysctl_sync_threshold;
3893        tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3894        ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3895        tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3896        ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3897        tbl[idx++].data = &ipvs->sysctl_sync_retries;
3898        tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3899        ipvs->sysctl_pmtu_disc = 1;
3900        tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
3901        tbl[idx++].data = &ipvs->sysctl_backup_only;
3902        ipvs->sysctl_conn_reuse_mode = 1;
3903        tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
3904        tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
3905        tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
3906
3907        ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3908        if (ipvs->sysctl_hdr == NULL) {
3909                if (!net_eq(net, &init_net))
3910                        kfree(tbl);
3911                return -ENOMEM;
3912        }
3913        ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
3914        ipvs->sysctl_tbl = tbl;
3915        /* Schedule defense work */
3916        INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3917        schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3918
3919        return 0;
3920}
3921
3922static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
3923{
3924        struct net *net = ipvs->net;
3925
3926        cancel_delayed_work_sync(&ipvs->defense_work);
3927        cancel_work_sync(&ipvs->defense_work.work);
3928        unregister_net_sysctl_table(ipvs->sysctl_hdr);
3929        ip_vs_stop_estimator(ipvs, &ipvs->tot_stats);
3930
3931        if (!net_eq(net, &init_net))
3932                kfree(ipvs->sysctl_tbl);
3933}
3934
3935#else
3936
3937static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
3938static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
3939
3940#endif
3941
3942static struct notifier_block ip_vs_dst_notifier = {
3943        .notifier_call = ip_vs_dst_event,
3944};
3945
3946int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
3947{
3948        int i, idx;
3949
3950        /* Initialize rs_table */
3951        for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3952                INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
3953
3954        INIT_LIST_HEAD(&ipvs->dest_trash);
3955        spin_lock_init(&ipvs->dest_trash_lock);
3956        setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
3957                    (unsigned long) ipvs);
3958        atomic_set(&ipvs->ftpsvc_counter, 0);
3959        atomic_set(&ipvs->nullsvc_counter, 0);
3960
3961        /* procfs stats */
3962        ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3963        if (!ipvs->tot_stats.cpustats)
3964                return -ENOMEM;
3965
3966        for_each_possible_cpu(i) {
3967                struct ip_vs_cpu_stats *ipvs_tot_stats;
3968                ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i);
3969                u64_stats_init(&ipvs_tot_stats->syncp);
3970        }
3971
3972        spin_lock_init(&ipvs->tot_stats.lock);
3973
3974        proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops);
3975        proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops);
3976        proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
3977                    &ip_vs_stats_percpu_fops);
3978
3979        if (ip_vs_control_net_init_sysctl(ipvs))
3980                goto err;
3981
3982        return 0;
3983
3984err:
3985        free_percpu(ipvs->tot_stats.cpustats);
3986        return -ENOMEM;
3987}
3988
3989void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
3990{
3991        ip_vs_trash_cleanup(ipvs);
3992        ip_vs_control_net_cleanup_sysctl(ipvs);
3993        remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
3994        remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
3995        remove_proc_entry("ip_vs", ipvs->net->proc_net);
3996        free_percpu(ipvs->tot_stats.cpustats);
3997}
3998
3999int __init ip_vs_register_nl_ioctl(void)
4000{
4001        int ret;
4002
4003        ret = nf_register_sockopt(&ip_vs_sockopts);
4004        if (ret) {
4005                pr_err("cannot register sockopt.\n");
4006                goto err_sock;
4007        }
4008
4009        ret = ip_vs_genl_register();
4010        if (ret) {
4011                pr_err("cannot register Generic Netlink interface.\n");
4012                goto err_genl;
4013        }
4014        return 0;
4015
4016err_genl:
4017        nf_unregister_sockopt(&ip_vs_sockopts);
4018err_sock:
4019        return ret;
4020}
4021
4022void ip_vs_unregister_nl_ioctl(void)
4023{
4024        ip_vs_genl_unregister();
4025        nf_unregister_sockopt(&ip_vs_sockopts);
4026}
4027
4028int __init ip_vs_control_init(void)
4029{
4030        int idx;
4031        int ret;
4032
4033        EnterFunction(2);
4034
4035        /* Initialize svc_table, ip_vs_svc_fwm_table */
4036        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
4037                INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
4038                INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
4039        }
4040
4041        smp_wmb();      /* Do we really need it now ? */
4042
4043        ret = register_netdevice_notifier(&ip_vs_dst_notifier);
4044        if (ret < 0)
4045                return ret;
4046
4047        LeaveFunction(2);
4048        return 0;
4049}
4050
4051
4052void ip_vs_control_cleanup(void)
4053{
4054        EnterFunction(2);
4055        unregister_netdevice_notifier(&ip_vs_dst_notifier);
4056        LeaveFunction(2);
4057}
4058