linux/include/net/ip_vs.h
<<
>>
Prefs
   1/*
   2 *      IP Virtual Server
   3 *      data structure and functionality definitions
   4 */
   5
   6#ifndef _NET_IP_VS_H
   7#define _NET_IP_VS_H
   8
   9#include <linux/ip_vs.h>                /* definitions shared with userland */
  10
  11#include <asm/types.h>                  /* for __uXX types */
  12
  13#include <linux/list.h>                 /* for struct list_head */
  14#include <linux/spinlock.h>             /* for struct rwlock_t */
  15#include <linux/atomic.h>                 /* for struct atomic_t */
  16#include <linux/compiler.h>
  17#include <linux/timer.h>
  18#include <linux/bug.h>
  19
  20#include <net/checksum.h>
  21#include <linux/netfilter.h>            /* for union nf_inet_addr */
  22#include <linux/ip.h>
  23#include <linux/ipv6.h>                 /* for struct ipv6hdr */
  24#include <net/ipv6.h>
  25#if IS_ENABLED(CONFIG_IP_VS_IPV6)
  26#include <linux/netfilter_ipv6/ip6_tables.h>
  27#endif
  28#if IS_ENABLED(CONFIG_NF_CONNTRACK)
  29#include <net/netfilter/nf_conntrack.h>
  30#endif
  31#include <net/net_namespace.h>          /* Netw namespace */
  32
  33/*
  34 * Generic access of ipvs struct
  35 */
  36static inline struct netns_ipvs *net_ipvs(struct net* net)
  37{
  38        return net->ipvs;
  39}
  40/*
  41 * Get net ptr from skb in traffic cases
  42 * use skb_sknet when call is from userland (ioctl or netlink)
  43 */
  44static inline struct net *skb_net(const struct sk_buff *skb)
  45{
  46#ifdef CONFIG_NET_NS
  47#ifdef CONFIG_IP_VS_DEBUG
  48        /*
  49         * This is used for debug only.
  50         * Start with the most likely hit
  51         * End with BUG
  52         */
  53        if (likely(skb->dev && skb->dev->nd_net))
  54                return dev_net(skb->dev);
  55        if (skb_dst(skb) && skb_dst(skb)->dev)
  56                return dev_net(skb_dst(skb)->dev);
  57        WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n",
  58                      __func__, __LINE__);
  59        if (likely(skb->sk && skb->sk->sk_net))
  60                return sock_net(skb->sk);
  61        pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
  62                __func__, __LINE__);
  63        BUG();
  64#else
  65        return dev_net(skb->dev ? : skb_dst(skb)->dev);
  66#endif
  67#else
  68        return &init_net;
  69#endif
  70}
  71
  72static inline struct net *skb_sknet(const struct sk_buff *skb)
  73{
  74#ifdef CONFIG_NET_NS
  75#ifdef CONFIG_IP_VS_DEBUG
  76        /* Start with the most likely hit */
  77        if (likely(skb->sk && skb->sk->sk_net))
  78                return sock_net(skb->sk);
  79        WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n",
  80                       __func__, __LINE__);
  81        if (likely(skb->dev && skb->dev->nd_net))
  82                return dev_net(skb->dev);
  83        pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
  84                __func__, __LINE__);
  85        BUG();
  86#else
  87        return sock_net(skb->sk);
  88#endif
  89#else
  90        return &init_net;
  91#endif
  92}
  93/*
  94 * This one needed for single_open_net since net is stored directly in
  95 * private not as a struct i.e. seq_file_net can't be used.
  96 */
  97static inline struct net *seq_file_single_net(struct seq_file *seq)
  98{
  99#ifdef CONFIG_NET_NS
 100        return (struct net *)seq->private;
 101#else
 102        return &init_net;
 103#endif
 104}
 105
 106/* Connections' size value needed by ip_vs_ctl.c */
 107extern int ip_vs_conn_tab_size;
 108
 109struct ip_vs_iphdr {
 110        __u32 len;      /* IPv4 simply where L4 starts
 111                           IPv6 where L4 Transport Header starts */
 112        __u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */
 113        __u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
 114        __s16 protocol;
 115        __s32 flags;
 116        union nf_inet_addr saddr;
 117        union nf_inet_addr daddr;
 118};
 119
 120/* Dependency to module: nf_defrag_ipv6 */
 121#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
 122static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
 123{
 124        return skb->nfct_reasm;
 125}
 126static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
 127                                      int len, void *buffer,
 128                                      const struct ip_vs_iphdr *ipvsh)
 129{
 130        if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb)))
 131                return skb_header_pointer(skb_nfct_reasm(skb),
 132                                          ipvsh->thoff_reasm, len, buffer);
 133
 134        return skb_header_pointer(skb, offset, len, buffer);
 135}
 136#else
 137static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
 138{
 139        return NULL;
 140}
 141static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
 142                                      int len, void *buffer,
 143                                      const struct ip_vs_iphdr *ipvsh)
 144{
 145        return skb_header_pointer(skb, offset, len, buffer);
 146}
 147#endif
 148
 149static inline void
 150ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
 151{
 152        const struct iphdr *iph = nh;
 153
 154        iphdr->len      = iph->ihl * 4;
 155        iphdr->fragoffs = 0;
 156        iphdr->protocol = iph->protocol;
 157        iphdr->saddr.ip = iph->saddr;
 158        iphdr->daddr.ip = iph->daddr;
 159}
 160
 161/* This function handles filling *ip_vs_iphdr, both for IPv4 and IPv6.
 162 * IPv6 requires some extra work, as finding proper header position,
 163 * depend on the IPv6 extension headers.
 164 */
 165static inline void
 166ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
 167{
 168#ifdef CONFIG_IP_VS_IPV6
 169        if (af == AF_INET6) {
 170                const struct ipv6hdr *iph =
 171                        (struct ipv6hdr *)skb_network_header(skb);
 172                iphdr->saddr.in6 = iph->saddr;
 173                iphdr->daddr.in6 = iph->daddr;
 174                /* ipv6_find_hdr() updates len, flags, thoff_reasm */
 175                iphdr->thoff_reasm = 0;
 176                iphdr->len       = 0;
 177                iphdr->flags     = 0;
 178                iphdr->protocol  = ipv6_find_hdr(skb, &iphdr->len, -1,
 179                                                 &iphdr->fragoffs,
 180                                                 &iphdr->flags);
 181                /* get proto from re-assembled packet and it's offset */
 182                if (skb_nfct_reasm(skb))
 183                        iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb),
 184                                                        &iphdr->thoff_reasm,
 185                                                        -1, NULL, NULL);
 186
 187        } else
 188#endif
 189        {
 190                const struct iphdr *iph =
 191                        (struct iphdr *)skb_network_header(skb);
 192                iphdr->len      = iph->ihl * 4;
 193                iphdr->fragoffs = 0;
 194                iphdr->protocol = iph->protocol;
 195                iphdr->saddr.ip = iph->saddr;
 196                iphdr->daddr.ip = iph->daddr;
 197        }
 198}
 199
 200static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
 201                                   const union nf_inet_addr *src)
 202{
 203#ifdef CONFIG_IP_VS_IPV6
 204        if (af == AF_INET6)
 205                dst->in6 = src->in6;
 206        else
 207#endif
 208        dst->ip = src->ip;
 209}
 210
 211static inline void ip_vs_addr_set(int af, union nf_inet_addr *dst,
 212                                  const union nf_inet_addr *src)
 213{
 214#ifdef CONFIG_IP_VS_IPV6
 215        if (af == AF_INET6) {
 216                dst->in6 = src->in6;
 217                return;
 218        }
 219#endif
 220        dst->ip = src->ip;
 221        dst->all[1] = 0;
 222        dst->all[2] = 0;
 223        dst->all[3] = 0;
 224}
 225
 226static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
 227                                   const union nf_inet_addr *b)
 228{
 229#ifdef CONFIG_IP_VS_IPV6
 230        if (af == AF_INET6)
 231                return ipv6_addr_equal(&a->in6, &b->in6);
 232#endif
 233        return a->ip == b->ip;
 234}
 235
 236#ifdef CONFIG_IP_VS_DEBUG
 237#include <linux/net.h>
 238
 239extern int ip_vs_get_debug_level(void);
 240
 241static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
 242                                         const union nf_inet_addr *addr,
 243                                         int *idx)
 244{
 245        int len;
 246#ifdef CONFIG_IP_VS_IPV6
 247        if (af == AF_INET6)
 248                len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6c]",
 249                               &addr->in6) + 1;
 250        else
 251#endif
 252                len = snprintf(&buf[*idx], buf_len - *idx, "%pI4",
 253                               &addr->ip) + 1;
 254
 255        *idx += len;
 256        BUG_ON(*idx > buf_len + 1);
 257        return &buf[*idx - len];
 258}
 259
 260#define IP_VS_DBG_BUF(level, msg, ...)                                  \
 261        do {                                                            \
 262                char ip_vs_dbg_buf[160];                                \
 263                int ip_vs_dbg_idx = 0;                                  \
 264                if (level <= ip_vs_get_debug_level())                   \
 265                        printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);  \
 266        } while (0)
 267#define IP_VS_ERR_BUF(msg...)                                           \
 268        do {                                                            \
 269                char ip_vs_dbg_buf[160];                                \
 270                int ip_vs_dbg_idx = 0;                                  \
 271                pr_err(msg);                                            \
 272        } while (0)
 273
 274/* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */
 275#define IP_VS_DBG_ADDR(af, addr)                                        \
 276        ip_vs_dbg_addr(af, ip_vs_dbg_buf,                               \
 277                       sizeof(ip_vs_dbg_buf), addr,                     \
 278                       &ip_vs_dbg_idx)
 279
 280#define IP_VS_DBG(level, msg, ...)                                      \
 281        do {                                                            \
 282                if (level <= ip_vs_get_debug_level())                   \
 283                        printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);  \
 284        } while (0)
 285#define IP_VS_DBG_RL(msg, ...)                                          \
 286        do {                                                            \
 287                if (net_ratelimit())                                    \
 288                        printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);  \
 289        } while (0)
 290#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)                     \
 291        do {                                                            \
 292                if (level <= ip_vs_get_debug_level())                   \
 293                        pp->debug_packet(af, pp, skb, ofs, msg);        \
 294        } while (0)
 295#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)                  \
 296        do {                                                            \
 297                if (level <= ip_vs_get_debug_level() &&                 \
 298                    net_ratelimit())                                    \
 299                        pp->debug_packet(af, pp, skb, ofs, msg);        \
 300        } while (0)
 301#else   /* NO DEBUGGING at ALL */
 302#define IP_VS_DBG_BUF(level, msg...)  do {} while (0)
 303#define IP_VS_ERR_BUF(msg...)  do {} while (0)
 304#define IP_VS_DBG(level, msg...)  do {} while (0)
 305#define IP_VS_DBG_RL(msg...)  do {} while (0)
 306#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)     do {} while (0)
 307#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)  do {} while (0)
 308#endif
 309
 310#define IP_VS_BUG() BUG()
 311#define IP_VS_ERR_RL(msg, ...)                                          \
 312        do {                                                            \
 313                if (net_ratelimit())                                    \
 314                        pr_err(msg, ##__VA_ARGS__);                     \
 315        } while (0)
 316
 317#ifdef CONFIG_IP_VS_DEBUG
 318#define EnterFunction(level)                                            \
 319        do {                                                            \
 320                if (level <= ip_vs_get_debug_level())                   \
 321                        printk(KERN_DEBUG                               \
 322                               pr_fmt("Enter: %s, %s line %i\n"),       \
 323                               __func__, __FILE__, __LINE__);           \
 324        } while (0)
 325#define LeaveFunction(level)                                            \
 326        do {                                                            \
 327                if (level <= ip_vs_get_debug_level())                   \
 328                        printk(KERN_DEBUG                               \
 329                               pr_fmt("Leave: %s, %s line %i\n"),       \
 330                               __func__, __FILE__, __LINE__);           \
 331        } while (0)
 332#else
 333#define EnterFunction(level)   do {} while (0)
 334#define LeaveFunction(level)   do {} while (0)
 335#endif
 336
 337
 338/*
 339 *      The port number of FTP service (in network order).
 340 */
 341#define FTPPORT  cpu_to_be16(21)
 342#define FTPDATA  cpu_to_be16(20)
 343
 344/*
 345 *      TCP State Values
 346 */
 347enum {
 348        IP_VS_TCP_S_NONE = 0,
 349        IP_VS_TCP_S_ESTABLISHED,
 350        IP_VS_TCP_S_SYN_SENT,
 351        IP_VS_TCP_S_SYN_RECV,
 352        IP_VS_TCP_S_FIN_WAIT,
 353        IP_VS_TCP_S_TIME_WAIT,
 354        IP_VS_TCP_S_CLOSE,
 355        IP_VS_TCP_S_CLOSE_WAIT,
 356        IP_VS_TCP_S_LAST_ACK,
 357        IP_VS_TCP_S_LISTEN,
 358        IP_VS_TCP_S_SYNACK,
 359        IP_VS_TCP_S_LAST
 360};
 361
 362/*
 363 *      UDP State Values
 364 */
 365enum {
 366        IP_VS_UDP_S_NORMAL,
 367        IP_VS_UDP_S_LAST,
 368};
 369
 370/*
 371 *      ICMP State Values
 372 */
 373enum {
 374        IP_VS_ICMP_S_NORMAL,
 375        IP_VS_ICMP_S_LAST,
 376};
 377
 378/*
 379 *      SCTP State Values
 380 */
 381enum ip_vs_sctp_states {
 382        IP_VS_SCTP_S_NONE,
 383        IP_VS_SCTP_S_INIT1,
 384        IP_VS_SCTP_S_INIT,
 385        IP_VS_SCTP_S_COOKIE_SENT,
 386        IP_VS_SCTP_S_COOKIE_REPLIED,
 387        IP_VS_SCTP_S_COOKIE_WAIT,
 388        IP_VS_SCTP_S_COOKIE,
 389        IP_VS_SCTP_S_COOKIE_ECHOED,
 390        IP_VS_SCTP_S_ESTABLISHED,
 391        IP_VS_SCTP_S_SHUTDOWN_SENT,
 392        IP_VS_SCTP_S_SHUTDOWN_RECEIVED,
 393        IP_VS_SCTP_S_SHUTDOWN_ACK_SENT,
 394        IP_VS_SCTP_S_REJECTED,
 395        IP_VS_SCTP_S_CLOSED,
 396        IP_VS_SCTP_S_LAST
 397};
 398
 399/*
 400 *      Delta sequence info structure
 401 *      Each ip_vs_conn has 2 (output AND input seq. changes).
 402 *      Only used in the VS/NAT.
 403 */
 404struct ip_vs_seq {
 405        __u32                   init_seq;       /* Add delta from this seq */
 406        __u32                   delta;          /* Delta in sequence numbers */
 407        __u32                   previous_delta; /* Delta in sequence numbers
 408                                                   before last resized pkt */
 409};
 410
 411/*
 412 * counters per cpu
 413 */
 414struct ip_vs_counters {
 415        __u32           conns;          /* connections scheduled */
 416        __u32           inpkts;         /* incoming packets */
 417        __u32           outpkts;        /* outgoing packets */
 418        __u64           inbytes;        /* incoming bytes */
 419        __u64           outbytes;       /* outgoing bytes */
 420};
 421/*
 422 * Stats per cpu
 423 */
 424struct ip_vs_cpu_stats {
 425        struct ip_vs_counters   ustats;
 426        struct u64_stats_sync   syncp;
 427};
 428
 429/*
 430 *      IPVS statistics objects
 431 */
 432struct ip_vs_estimator {
 433        struct list_head        list;
 434
 435        u64                     last_inbytes;
 436        u64                     last_outbytes;
 437        u32                     last_conns;
 438        u32                     last_inpkts;
 439        u32                     last_outpkts;
 440
 441        u32                     cps;
 442        u32                     inpps;
 443        u32                     outpps;
 444        u32                     inbps;
 445        u32                     outbps;
 446};
 447
 448struct ip_vs_stats {
 449        struct ip_vs_stats_user ustats;         /* statistics */
 450        struct ip_vs_estimator  est;            /* estimator */
 451        struct ip_vs_cpu_stats __percpu *cpustats;      /* per cpu counters */
 452        spinlock_t              lock;           /* spin lock */
 453        struct ip_vs_stats_user ustats0;        /* reset values */
 454};
 455
 456struct dst_entry;
 457struct iphdr;
 458struct ip_vs_conn;
 459struct ip_vs_app;
 460struct sk_buff;
 461struct ip_vs_proto_data;
 462
 463struct ip_vs_protocol {
 464        struct ip_vs_protocol   *next;
 465        char                    *name;
 466        u16                     protocol;
 467        u16                     num_states;
 468        int                     dont_defrag;
 469
 470        void (*init)(struct ip_vs_protocol *pp);
 471
 472        void (*exit)(struct ip_vs_protocol *pp);
 473
 474        int (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
 475
 476        void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
 477
 478        int (*conn_schedule)(int af, struct sk_buff *skb,
 479                             struct ip_vs_proto_data *pd,
 480                             int *verdict, struct ip_vs_conn **cpp,
 481                             struct ip_vs_iphdr *iph);
 482
 483        struct ip_vs_conn *
 484        (*conn_in_get)(int af,
 485                       const struct sk_buff *skb,
 486                       const struct ip_vs_iphdr *iph,
 487                       int inverse);
 488
 489        struct ip_vs_conn *
 490        (*conn_out_get)(int af,
 491                        const struct sk_buff *skb,
 492                        const struct ip_vs_iphdr *iph,
 493                        int inverse);
 494
 495        int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
 496                            struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
 497
 498        int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
 499                            struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
 500
 501        int (*csum_check)(int af, struct sk_buff *skb,
 502                          struct ip_vs_protocol *pp);
 503
 504        const char *(*state_name)(int state);
 505
 506        void (*state_transition)(struct ip_vs_conn *cp, int direction,
 507                                 const struct sk_buff *skb,
 508                                 struct ip_vs_proto_data *pd);
 509
 510        int (*register_app)(struct net *net, struct ip_vs_app *inc);
 511
 512        void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
 513
 514        int (*app_conn_bind)(struct ip_vs_conn *cp);
 515
 516        void (*debug_packet)(int af, struct ip_vs_protocol *pp,
 517                             const struct sk_buff *skb,
 518                             int offset,
 519                             const char *msg);
 520
 521        void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
 522};
 523
 524/*
 525 * protocol data per netns
 526 */
 527struct ip_vs_proto_data {
 528        struct ip_vs_proto_data *next;
 529        struct ip_vs_protocol   *pp;
 530        int                     *timeout_table; /* protocol timeout table */
 531        atomic_t                appcnt;         /* counter of proto app incs. */
 532        struct tcp_states_t     *tcp_state_table;
 533};
 534
 535extern struct ip_vs_protocol   *ip_vs_proto_get(unsigned short proto);
 536extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net,
 537                                                     unsigned short proto);
 538
 539struct ip_vs_conn_param {
 540        struct net                      *net;
 541        const union nf_inet_addr        *caddr;
 542        const union nf_inet_addr        *vaddr;
 543        __be16                          cport;
 544        __be16                          vport;
 545        __u16                           protocol;
 546        u16                             af;
 547
 548        const struct ip_vs_pe           *pe;
 549        char                            *pe_data;
 550        __u8                            pe_data_len;
 551};
 552
 553/*
 554 *      IP_VS structure allocated for each dynamically scheduled connection
 555 */
 556struct ip_vs_conn {
 557        struct hlist_node       c_list;         /* hashed list heads */
 558        /* Protocol, addresses and port numbers */
 559        __be16                  cport;
 560        __be16                  dport;
 561        __be16                  vport;
 562        u16                     af;             /* address family */
 563        union nf_inet_addr      caddr;          /* client address */
 564        union nf_inet_addr      vaddr;          /* virtual address */
 565        union nf_inet_addr      daddr;          /* destination address */
 566        volatile __u32          flags;          /* status flags */
 567        __u16                   protocol;       /* Which protocol (TCP/UDP) */
 568#ifdef CONFIG_NET_NS
 569        struct net              *net;           /* Name space */
 570#endif
 571
 572        /* counter and timer */
 573        atomic_t                refcnt;         /* reference count */
 574        struct timer_list       timer;          /* Expiration timer */
 575        volatile unsigned long  timeout;        /* timeout */
 576
 577        /* Flags and state transition */
 578        spinlock_t              lock;           /* lock for state transition */
 579        volatile __u16          state;          /* state info */
 580        volatile __u16          old_state;      /* old state, to be used for
 581                                                 * state transition triggerd
 582                                                 * synchronization
 583                                                 */
 584        __u32                   fwmark;         /* Fire wall mark from skb */
 585        unsigned long           sync_endtime;   /* jiffies + sent_retries */
 586
 587        /* Control members */
 588        struct ip_vs_conn       *control;       /* Master control connection */
 589        atomic_t                n_control;      /* Number of controlled ones */
 590        struct ip_vs_dest       *dest;          /* real server */
 591        atomic_t                in_pkts;        /* incoming packet counter */
 592
 593        /* packet transmitter for different forwarding methods.  If it
 594           mangles the packet, it must return NF_DROP or better NF_STOLEN,
 595           otherwise this must be changed to a sk_buff **.
 596           NF_ACCEPT can be returned when destination is local.
 597         */
 598        int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
 599                           struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
 600
 601        /* Note: we can group the following members into a structure,
 602           in order to save more space, and the following members are
 603           only used in VS/NAT anyway */
 604        struct ip_vs_app        *app;           /* bound ip_vs_app object */
 605        void                    *app_data;      /* Application private data */
 606        struct ip_vs_seq        in_seq;         /* incoming seq. struct */
 607        struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
 608
 609        const struct ip_vs_pe   *pe;
 610        char                    *pe_data;
 611        __u8                    pe_data_len;
 612
 613        struct rcu_head         rcu_head;
 614};
 615
 616/*
 617 *  To save some memory in conn table when name space is disabled.
 618 */
 619static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
 620{
 621#ifdef CONFIG_NET_NS
 622        return cp->net;
 623#else
 624        return &init_net;
 625#endif
 626}
 627static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
 628{
 629#ifdef CONFIG_NET_NS
 630        cp->net = net;
 631#endif
 632}
 633
 634static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
 635                                    struct net *net)
 636{
 637#ifdef CONFIG_NET_NS
 638        return cp->net == net;
 639#else
 640        return 1;
 641#endif
 642}
 643
 644/*
 645 *      Extended internal versions of struct ip_vs_service_user and
 646 *      ip_vs_dest_user for IPv6 support.
 647 *
 648 *      We need these to conveniently pass around service and destination
 649 *      options, but unfortunately, we also need to keep the old definitions to
 650 *      maintain userspace backwards compatibility for the setsockopt interface.
 651 */
 652struct ip_vs_service_user_kern {
 653        /* virtual service addresses */
 654        u16                     af;
 655        u16                     protocol;
 656        union nf_inet_addr      addr;           /* virtual ip address */
 657        __be16                  port;
 658        u32                     fwmark;         /* firwall mark of service */
 659
 660        /* virtual service options */
 661        char                    *sched_name;
 662        char                    *pe_name;
 663        unsigned int            flags;          /* virtual service flags */
 664        unsigned int            timeout;        /* persistent timeout in sec */
 665        __be32                  netmask;        /* persistent netmask or plen */
 666};
 667
 668
 669struct ip_vs_dest_user_kern {
 670        /* destination server address */
 671        union nf_inet_addr      addr;
 672        __be16                  port;
 673
 674        /* real server options */
 675        unsigned int            conn_flags;     /* connection flags */
 676        int                     weight;         /* destination weight */
 677
 678        /* thresholds for active connections */
 679        u32                     u_threshold;    /* upper threshold */
 680        u32                     l_threshold;    /* lower threshold */
 681};
 682
 683
 684/*
 685 *      The information about the virtual service offered to the net
 686 *      and the forwarding entries
 687 */
 688struct ip_vs_service {
 689        struct hlist_node       s_list;   /* for normal service table */
 690        struct hlist_node       f_list;   /* for fwmark-based service table */
 691        atomic_t                refcnt;   /* reference counter */
 692
 693        u16                     af;       /* address family */
 694        __u16                   protocol; /* which protocol (TCP/UDP) */
 695        union nf_inet_addr      addr;     /* IP address for virtual service */
 696        __be16                  port;     /* port number for the service */
 697        __u32                   fwmark;   /* firewall mark of the service */
 698        unsigned int            flags;    /* service status flags */
 699        unsigned int            timeout;  /* persistent timeout in ticks */
 700        __be32                  netmask;  /* grouping granularity, mask/plen */
 701        struct net              *net;
 702
 703        struct list_head        destinations;  /* real server d-linked list */
 704        __u32                   num_dests;     /* number of servers */
 705        struct ip_vs_stats      stats;         /* statistics for the service */
 706
 707        /* for scheduling */
 708        struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */
 709        spinlock_t              sched_lock;    /* lock sched_data */
 710        void                    *sched_data;   /* scheduler application data */
 711
 712        /* alternate persistence engine */
 713        struct ip_vs_pe __rcu   *pe;
 714
 715        struct rcu_head         rcu_head;
 716};
 717
 718/* Information for cached dst */
 719struct ip_vs_dest_dst {
 720        struct dst_entry        *dst_cache;     /* destination cache entry */
 721        u32                     dst_cookie;
 722        union nf_inet_addr      dst_saddr;
 723        struct rcu_head         rcu_head;
 724};
 725
 726/* In grace period after removing */
 727#define IP_VS_DEST_STATE_REMOVING       0x01
 728/*
 729 *      The real server destination forwarding entry
 730 *      with ip address, port number, and so on.
 731 */
 732struct ip_vs_dest {
 733        struct list_head        n_list;   /* for the dests in the service */
 734        struct hlist_node       d_list;   /* for table with all the dests */
 735
 736        u16                     af;             /* address family */
 737        __be16                  port;           /* port number of the server */
 738        union nf_inet_addr      addr;           /* IP address of the server */
 739        volatile unsigned int   flags;          /* dest status flags */
 740        atomic_t                conn_flags;     /* flags to copy to conn */
 741        atomic_t                weight;         /* server weight */
 742
 743        atomic_t                refcnt;         /* reference counter */
 744        struct ip_vs_stats      stats;          /* statistics */
 745        unsigned long           state;          /* state flags */
 746
 747        /* connection counters and thresholds */
 748        atomic_t                activeconns;    /* active connections */
 749        atomic_t                inactconns;     /* inactive connections */
 750        atomic_t                persistconns;   /* persistent connections */
 751        __u32                   u_threshold;    /* upper threshold */
 752        __u32                   l_threshold;    /* lower threshold */
 753
 754        /* for destination cache */
 755        spinlock_t              dst_lock;       /* lock of dst_cache */
 756        struct ip_vs_dest_dst __rcu *dest_dst;  /* cached dst info */
 757
 758        /* for virtual service */
 759        struct ip_vs_service    *svc;           /* service it belongs to */
 760        __u16                   protocol;       /* which protocol (TCP/UDP) */
 761        __be16                  vport;          /* virtual port number */
 762        union nf_inet_addr      vaddr;          /* virtual IP address */
 763        __u32                   vfwmark;        /* firewall mark of service */
 764
 765        struct list_head        t_list;         /* in dest_trash */
 766        struct rcu_head         rcu_head;
 767        unsigned int            in_rs_table:1;  /* we are in rs_table */
 768};
 769
 770
 771/*
 772 *      The scheduler object
 773 */
 774struct ip_vs_scheduler {
 775        struct list_head        n_list;         /* d-linked list head */
 776        char                    *name;          /* scheduler name */
 777        atomic_t                refcnt;         /* reference counter */
 778        struct module           *module;        /* THIS_MODULE/NULL */
 779
 780        /* scheduler initializing service */
 781        int (*init_service)(struct ip_vs_service *svc);
 782        /* scheduling service finish */
 783        void (*done_service)(struct ip_vs_service *svc);
 784        /* dest is linked */
 785        int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
 786        /* dest is unlinked */
 787        int (*del_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
 788        /* dest is updated */
 789        int (*upd_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
 790
 791        /* selecting a server from the given service */
 792        struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
 793                                       const struct sk_buff *skb,
 794                                       struct ip_vs_iphdr *iph);
 795};
 796
 797/* The persistence engine object */
 798struct ip_vs_pe {
 799        struct list_head        n_list;         /* d-linked list head */
 800        char                    *name;          /* scheduler name */
 801        atomic_t                refcnt;         /* reference counter */
 802        struct module           *module;        /* THIS_MODULE/NULL */
 803
 804        /* get the connection template, if any */
 805        int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb);
 806        bool (*ct_match)(const struct ip_vs_conn_param *p,
 807                         struct ip_vs_conn *ct);
 808        u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval,
 809                           bool inverse);
 810        int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf);
 811};
 812
 813/*
 814 *      The application module object (a.k.a. app incarnation)
 815 */
 816struct ip_vs_app {
 817        struct list_head        a_list;         /* member in app list */
 818        int                     type;           /* IP_VS_APP_TYPE_xxx */
 819        char                    *name;          /* application module name */
 820        __u16                   protocol;
 821        struct module           *module;        /* THIS_MODULE/NULL */
 822        struct list_head        incs_list;      /* list of incarnations */
 823
 824        /* members for application incarnations */
 825        struct list_head        p_list;         /* member in proto app list */
 826        struct ip_vs_app        *app;           /* its real application */
 827        __be16                  port;           /* port number in net order */
 828        atomic_t                usecnt;         /* usage counter */
 829        struct rcu_head         rcu_head;
 830
 831        /*
 832         * output hook: Process packet in inout direction, diff set for TCP.
 833         * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
 834         *         2=Mangled but checksum was not updated
 835         */
 836        int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
 837                       struct sk_buff *, int *diff);
 838
 839        /*
 840         * input hook: Process packet in outin direction, diff set for TCP.
 841         * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
 842         *         2=Mangled but checksum was not updated
 843         */
 844        int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
 845                      struct sk_buff *, int *diff);
 846
 847        /* ip_vs_app initializer */
 848        int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *);
 849
 850        /* ip_vs_app finish */
 851        int (*done_conn)(struct ip_vs_app *, struct ip_vs_conn *);
 852
 853
 854        /* not used now */
 855        int (*bind_conn)(struct ip_vs_app *, struct ip_vs_conn *,
 856                         struct ip_vs_protocol *);
 857
 858        void (*unbind_conn)(struct ip_vs_app *, struct ip_vs_conn *);
 859
 860        int *                   timeout_table;
 861        int *                   timeouts;
 862        int                     timeouts_size;
 863
 864        int (*conn_schedule)(struct sk_buff *skb, struct ip_vs_app *app,
 865                             int *verdict, struct ip_vs_conn **cpp);
 866
 867        struct ip_vs_conn *
 868        (*conn_in_get)(const struct sk_buff *skb, struct ip_vs_app *app,
 869                       const struct iphdr *iph, int inverse);
 870
 871        struct ip_vs_conn *
 872        (*conn_out_get)(const struct sk_buff *skb, struct ip_vs_app *app,
 873                        const struct iphdr *iph, int inverse);
 874
 875        int (*state_transition)(struct ip_vs_conn *cp, int direction,
 876                                const struct sk_buff *skb,
 877                                struct ip_vs_app *app);
 878
 879        void (*timeout_change)(struct ip_vs_app *app, int flags);
 880};
 881
 882struct ipvs_master_sync_state {
 883        struct list_head        sync_queue;
 884        struct ip_vs_sync_buff  *sync_buff;
 885        unsigned long           sync_queue_len;
 886        unsigned int            sync_queue_delay;
 887        struct task_struct      *master_thread;
 888        struct delayed_work     master_wakeup_work;
 889        struct netns_ipvs       *ipvs;
 890};
 891
 892/* How much time to keep dests in trash */
 893#define IP_VS_DEST_TRASH_PERIOD         (120 * HZ)
 894
 895/* IPVS in network namespace */
 896struct netns_ipvs {
 897        int                     gen;            /* Generation */
 898        int                     enable;         /* enable like nf_hooks do */
 899        /*
 900         *      Hash table: for real service lookups
 901         */
 902        #define IP_VS_RTAB_BITS 4
 903        #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
 904        #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
 905
 906        struct hlist_head       rs_table[IP_VS_RTAB_SIZE];
 907        /* ip_vs_app */
 908        struct list_head        app_list;
 909        /* ip_vs_proto */
 910        #define IP_VS_PROTO_TAB_SIZE    32      /* must be power of 2 */
 911        struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
 912        /* ip_vs_proto_tcp */
 913#ifdef CONFIG_IP_VS_PROTO_TCP
 914        #define TCP_APP_TAB_BITS        4
 915        #define TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
 916        #define TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
 917        struct list_head        tcp_apps[TCP_APP_TAB_SIZE];
 918#endif
 919        /* ip_vs_proto_udp */
 920#ifdef CONFIG_IP_VS_PROTO_UDP
 921        #define UDP_APP_TAB_BITS        4
 922        #define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
 923        #define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
 924        struct list_head        udp_apps[UDP_APP_TAB_SIZE];
 925#endif
 926        /* ip_vs_proto_sctp */
 927#ifdef CONFIG_IP_VS_PROTO_SCTP
 928        #define SCTP_APP_TAB_BITS       4
 929        #define SCTP_APP_TAB_SIZE       (1 << SCTP_APP_TAB_BITS)
 930        #define SCTP_APP_TAB_MASK       (SCTP_APP_TAB_SIZE - 1)
 931        /* Hash table for SCTP application incarnations  */
 932        struct list_head        sctp_apps[SCTP_APP_TAB_SIZE];
 933#endif
 934        /* ip_vs_conn */
 935        atomic_t                conn_count;      /*  connection counter */
 936
 937        /* ip_vs_ctl */
 938        struct ip_vs_stats              tot_stats;  /* Statistics & est. */
 939
 940        int                     num_services;    /* no of virtual services */
 941
 942        /* Trash for destinations */
 943        struct list_head        dest_trash;
 944        spinlock_t              dest_trash_lock;
 945        struct timer_list       dest_trash_timer; /* expiration timer */
 946        /* Service counters */
 947        atomic_t                ftpsvc_counter;
 948        atomic_t                nullsvc_counter;
 949
 950#ifdef CONFIG_SYSCTL
 951        /* 1/rate drop and drop-entry variables */
 952        struct delayed_work     defense_work;   /* Work handler */
 953        int                     drop_rate;
 954        int                     drop_counter;
 955        atomic_t                dropentry;
 956        /* locks in ctl.c */
 957        spinlock_t              dropentry_lock;  /* drop entry handling */
 958        spinlock_t              droppacket_lock; /* drop packet handling */
 959        spinlock_t              securetcp_lock;  /* state and timeout tables */
 960
 961        /* sys-ctl struct */
 962        struct ctl_table_header *sysctl_hdr;
 963        struct ctl_table        *sysctl_tbl;
 964#endif
 965
 966        /* sysctl variables */
 967        int                     sysctl_amemthresh;
 968        int                     sysctl_am_droprate;
 969        int                     sysctl_drop_entry;
 970        int                     sysctl_drop_packet;
 971        int                     sysctl_secure_tcp;
 972#ifdef CONFIG_IP_VS_NFCT
 973        int                     sysctl_conntrack;
 974#endif
 975        int                     sysctl_snat_reroute;
 976        int                     sysctl_sync_ver;
 977        int                     sysctl_sync_ports;
 978        int                     sysctl_sync_persist_mode;
 979        unsigned long           sysctl_sync_qlen_max;
 980        int                     sysctl_sync_sock_size;
 981        int                     sysctl_cache_bypass;
 982        int                     sysctl_expire_nodest_conn;
 983        int                     sysctl_sloppy_tcp;
 984        int                     sysctl_sloppy_sctp;
 985        int                     sysctl_expire_quiescent_template;
 986        int                     sysctl_sync_threshold[2];
 987        unsigned int            sysctl_sync_refresh_period;
 988        int                     sysctl_sync_retries;
 989        int                     sysctl_nat_icmp_send;
 990        int                     sysctl_pmtu_disc;
 991        int                     sysctl_backup_only;
 992
 993        /* ip_vs_lblc */
 994        int                     sysctl_lblc_expiration;
 995        struct ctl_table_header *lblc_ctl_header;
 996        struct ctl_table        *lblc_ctl_table;
 997        /* ip_vs_lblcr */
 998        int                     sysctl_lblcr_expiration;
 999        struct ctl_table_header *lblcr_ctl_header;
1000        struct ctl_table        *lblcr_ctl_table;
1001        /* ip_vs_est */
1002        struct list_head        est_list;       /* estimator list */
1003        spinlock_t              est_lock;
1004        struct timer_list       est_timer;      /* Estimation timer */
1005        /* ip_vs_sync */
1006        spinlock_t              sync_lock;
1007        struct ipvs_master_sync_state *ms;
1008        spinlock_t              sync_buff_lock;
1009        struct task_struct      **backup_threads;
1010        int                     threads_mask;
1011        int                     send_mesg_maxlen;
1012        int                     recv_mesg_maxlen;
1013        volatile int            sync_state;
1014        volatile int            master_syncid;
1015        volatile int            backup_syncid;
1016        struct mutex            sync_mutex;
1017        /* multicast interface name */
1018        char                    master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
1019        char                    backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
1020        /* net name space ptr */
1021        struct net              *net;            /* Needed by timer routines */
1022};
1023
1024#define DEFAULT_SYNC_THRESHOLD  3
1025#define DEFAULT_SYNC_PERIOD     50
1026#define DEFAULT_SYNC_VER        1
1027#define DEFAULT_SLOPPY_TCP      0
1028#define DEFAULT_SLOPPY_SCTP     0
1029#define DEFAULT_SYNC_REFRESH_PERIOD     (0U * HZ)
1030#define DEFAULT_SYNC_RETRIES            0
1031#define IPVS_SYNC_WAKEUP_RATE   8
1032#define IPVS_SYNC_QLEN_MAX      (IPVS_SYNC_WAKEUP_RATE * 4)
1033#define IPVS_SYNC_SEND_DELAY    (HZ / 50)
1034#define IPVS_SYNC_CHECK_PERIOD  HZ
1035#define IPVS_SYNC_FLUSH_TIME    (HZ * 2)
1036#define IPVS_SYNC_PORTS_MAX     (1 << 6)
1037
1038#ifdef CONFIG_SYSCTL
1039
1040static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
1041{
1042        return ipvs->sysctl_sync_threshold[0];
1043}
1044
1045static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
1046{
1047        return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]);
1048}
1049
1050static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
1051{
1052        return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period);
1053}
1054
1055static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
1056{
1057        return ipvs->sysctl_sync_retries;
1058}
1059
1060static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
1061{
1062        return ipvs->sysctl_sync_ver;
1063}
1064
1065static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs)
1066{
1067        return ipvs->sysctl_sloppy_tcp;
1068}
1069
1070static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
1071{
1072        return ipvs->sysctl_sloppy_sctp;
1073}
1074
1075static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
1076{
1077        return ACCESS_ONCE(ipvs->sysctl_sync_ports);
1078}
1079
1080static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs)
1081{
1082        return ipvs->sysctl_sync_persist_mode;
1083}
1084
1085static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
1086{
1087        return ipvs->sysctl_sync_qlen_max;
1088}
1089
1090static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
1091{
1092        return ipvs->sysctl_sync_sock_size;
1093}
1094
1095static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
1096{
1097        return ipvs->sysctl_pmtu_disc;
1098}
1099
1100static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
1101{
1102        return ipvs->sync_state & IP_VS_STATE_BACKUP &&
1103               ipvs->sysctl_backup_only;
1104}
1105
1106#else
1107
1108static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
1109{
1110        return DEFAULT_SYNC_THRESHOLD;
1111}
1112
1113static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
1114{
1115        return DEFAULT_SYNC_PERIOD;
1116}
1117
1118static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
1119{
1120        return DEFAULT_SYNC_REFRESH_PERIOD;
1121}
1122
1123static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
1124{
1125        return DEFAULT_SYNC_RETRIES & 3;
1126}
1127
1128static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
1129{
1130        return DEFAULT_SYNC_VER;
1131}
1132
1133static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs)
1134{
1135        return DEFAULT_SLOPPY_TCP;
1136}
1137
1138static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
1139{
1140        return DEFAULT_SLOPPY_SCTP;
1141}
1142
1143static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
1144{
1145        return 1;
1146}
1147
1148static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs)
1149{
1150        return 0;
1151}
1152
1153static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
1154{
1155        return IPVS_SYNC_QLEN_MAX;
1156}
1157
1158static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
1159{
1160        return 0;
1161}
1162
1163static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
1164{
1165        return 1;
1166}
1167
1168static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
1169{
1170        return 0;
1171}
1172
1173#endif
1174
1175/*
1176 *      IPVS core functions
1177 *      (from ip_vs_core.c)
1178 */
1179extern const char *ip_vs_proto_name(unsigned int proto);
1180extern void ip_vs_init_hash_table(struct list_head *table, int rows);
1181#define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t)))
1182
1183#define IP_VS_APP_TYPE_FTP      1
1184
1185/*
1186 *     ip_vs_conn handling functions
1187 *     (from ip_vs_conn.c)
1188 */
1189
1190enum {
1191        IP_VS_DIR_INPUT = 0,
1192        IP_VS_DIR_OUTPUT,
1193        IP_VS_DIR_INPUT_ONLY,
1194        IP_VS_DIR_LAST,
1195};
1196
1197static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
1198                                         const union nf_inet_addr *caddr,
1199                                         __be16 cport,
1200                                         const union nf_inet_addr *vaddr,
1201                                         __be16 vport,
1202                                         struct ip_vs_conn_param *p)
1203{
1204        p->net = net;
1205        p->af = af;
1206        p->protocol = protocol;
1207        p->caddr = caddr;
1208        p->cport = cport;
1209        p->vaddr = vaddr;
1210        p->vport = vport;
1211        p->pe = NULL;
1212        p->pe_data = NULL;
1213}
1214
1215struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
1216struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
1217
1218struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
1219                                            const struct ip_vs_iphdr *iph,
1220                                            int inverse);
1221
1222struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
1223
1224struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
1225                                             const struct ip_vs_iphdr *iph,
1226                                             int inverse);
1227
1228/* Get reference to gain full access to conn.
1229 * By default, RCU read-side critical sections have access only to
1230 * conn fields and its PE data, see ip_vs_conn_rcu_free() for reference.
1231 */
1232static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp)
1233{
1234        return atomic_inc_not_zero(&cp->refcnt);
1235}
1236
1237/* put back the conn without restarting its timer */
1238static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
1239{
1240        smp_mb__before_atomic_dec();
1241        atomic_dec(&cp->refcnt);
1242}
1243extern void ip_vs_conn_put(struct ip_vs_conn *cp);
1244extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
1245
1246struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
1247                                  const union nf_inet_addr *daddr,
1248                                  __be16 dport, unsigned int flags,
1249                                  struct ip_vs_dest *dest, __u32 fwmark);
1250extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
1251
1252extern const char * ip_vs_state_name(__u16 proto, int state);
1253
1254extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
1255extern int ip_vs_check_template(struct ip_vs_conn *ct);
1256extern void ip_vs_random_dropentry(struct net *net);
1257extern int ip_vs_conn_init(void);
1258extern void ip_vs_conn_cleanup(void);
1259
1260static inline void ip_vs_control_del(struct ip_vs_conn *cp)
1261{
1262        struct ip_vs_conn *ctl_cp = cp->control;
1263        if (!ctl_cp) {
1264                IP_VS_ERR_BUF("request control DEL for uncontrolled: "
1265                              "%s:%d to %s:%d\n",
1266                              IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1267                              ntohs(cp->cport),
1268                              IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1269                              ntohs(cp->vport));
1270
1271                return;
1272        }
1273
1274        IP_VS_DBG_BUF(7, "DELeting control for: "
1275                      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
1276                      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1277                      ntohs(cp->cport),
1278                      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
1279                      ntohs(ctl_cp->cport));
1280
1281        cp->control = NULL;
1282        if (atomic_read(&ctl_cp->n_control) == 0) {
1283                IP_VS_ERR_BUF("BUG control DEL with n=0 : "
1284                              "%s:%d to %s:%d\n",
1285                              IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1286                              ntohs(cp->cport),
1287                              IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1288                              ntohs(cp->vport));
1289
1290                return;
1291        }
1292        atomic_dec(&ctl_cp->n_control);
1293}
1294
1295static inline void
1296ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
1297{
1298        if (cp->control) {
1299                IP_VS_ERR_BUF("request control ADD for already controlled: "
1300                              "%s:%d to %s:%d\n",
1301                              IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1302                              ntohs(cp->cport),
1303                              IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1304                              ntohs(cp->vport));
1305
1306                ip_vs_control_del(cp);
1307        }
1308
1309        IP_VS_DBG_BUF(7, "ADDing control for: "
1310                      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
1311                      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1312                      ntohs(cp->cport),
1313                      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
1314                      ntohs(ctl_cp->cport));
1315
1316        cp->control = ctl_cp;
1317        atomic_inc(&ctl_cp->n_control);
1318}
1319
1320/*
1321 * IPVS netns init & cleanup functions
1322 */
1323extern int ip_vs_estimator_net_init(struct net *net);
1324extern int ip_vs_control_net_init(struct net *net);
1325extern int ip_vs_protocol_net_init(struct net *net);
1326extern int ip_vs_app_net_init(struct net *net);
1327extern int ip_vs_conn_net_init(struct net *net);
1328extern int ip_vs_sync_net_init(struct net *net);
1329extern void ip_vs_conn_net_cleanup(struct net *net);
1330extern void ip_vs_app_net_cleanup(struct net *net);
1331extern void ip_vs_protocol_net_cleanup(struct net *net);
1332extern void ip_vs_control_net_cleanup(struct net *net);
1333extern void ip_vs_estimator_net_cleanup(struct net *net);
1334extern void ip_vs_sync_net_cleanup(struct net *net);
1335extern void ip_vs_service_net_cleanup(struct net *net);
1336
1337/*
1338 *      IPVS application functions
1339 *      (from ip_vs_app.c)
1340 */
1341#define IP_VS_APP_MAX_PORTS  8
1342extern struct ip_vs_app *register_ip_vs_app(struct net *net,
1343                                            struct ip_vs_app *app);
1344extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
1345extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1346extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
1347extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app,
1348                                  __u16 proto, __u16 port);
1349extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
1350extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
1351
1352extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
1353extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
1354
1355int register_ip_vs_pe(struct ip_vs_pe *pe);
1356int unregister_ip_vs_pe(struct ip_vs_pe *pe);
1357struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
1358struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
1359
1360/*
1361 * Use a #define to avoid all of module.h just for these trivial ops
1362 */
1363#define ip_vs_pe_get(pe)                        \
1364        if (pe && pe->module)                   \
1365                __module_get(pe->module);
1366
1367#define ip_vs_pe_put(pe)                        \
1368        if (pe && pe->module)                   \
1369                module_put(pe->module);
1370
1371/*
1372 *      IPVS protocol functions (from ip_vs_proto.c)
1373 */
1374extern int ip_vs_protocol_init(void);
1375extern void ip_vs_protocol_cleanup(void);
1376extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
1377extern int *ip_vs_create_timeout_table(int *table, int size);
1378extern int
1379ip_vs_set_state_timeout(int *table, int num, const char *const *names,
1380                        const char *name, int to);
1381extern void
1382ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
1383                          const struct sk_buff *skb,
1384                          int offset, const char *msg);
1385
1386extern struct ip_vs_protocol ip_vs_protocol_tcp;
1387extern struct ip_vs_protocol ip_vs_protocol_udp;
1388extern struct ip_vs_protocol ip_vs_protocol_icmp;
1389extern struct ip_vs_protocol ip_vs_protocol_esp;
1390extern struct ip_vs_protocol ip_vs_protocol_ah;
1391extern struct ip_vs_protocol ip_vs_protocol_sctp;
1392
1393/*
1394 *      Registering/unregistering scheduler functions
1395 *      (from ip_vs_sched.c)
1396 */
1397extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
1398extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
1399extern int ip_vs_bind_scheduler(struct ip_vs_service *svc,
1400                                struct ip_vs_scheduler *scheduler);
1401extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
1402                                   struct ip_vs_scheduler *sched);
1403extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
1404extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
1405extern struct ip_vs_conn *
1406ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
1407               struct ip_vs_proto_data *pd, int *ignored,
1408               struct ip_vs_iphdr *iph);
1409extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
1410                        struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph);
1411
1412extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
1413
1414
1415/*
1416 *      IPVS control data and functions (from ip_vs_ctl.c)
1417 */
1418extern struct ip_vs_stats ip_vs_stats;
1419extern int sysctl_ip_vs_sync_ver;
1420
1421extern struct ip_vs_service *
1422ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
1423                  const union nf_inet_addr *vaddr, __be16 vport);
1424
1425extern bool
1426ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
1427                       const union nf_inet_addr *daddr, __be16 dport);
1428
1429extern int ip_vs_use_count_inc(void);
1430extern void ip_vs_use_count_dec(void);
1431extern int ip_vs_register_nl_ioctl(void);
1432extern void ip_vs_unregister_nl_ioctl(void);
1433extern int ip_vs_control_init(void);
1434extern void ip_vs_control_cleanup(void);
1435extern struct ip_vs_dest *
1436ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
1437                __be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
1438                __u16 protocol, __u32 fwmark, __u32 flags);
1439extern void ip_vs_try_bind_dest(struct ip_vs_conn *cp);
1440
1441static inline void ip_vs_dest_hold(struct ip_vs_dest *dest)
1442{
1443        atomic_inc(&dest->refcnt);
1444}
1445
1446static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
1447{
1448        smp_mb__before_atomic_dec();
1449        atomic_dec(&dest->refcnt);
1450}
1451
1452/*
1453 *      IPVS sync daemon data and function prototypes
1454 *      (from ip_vs_sync.c)
1455 */
1456extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
1457                             __u8 syncid);
1458extern int stop_sync_thread(struct net *net, int state);
1459extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
1460
1461
1462/*
1463 *      IPVS rate estimator prototypes (from ip_vs_est.c)
1464 */
1465extern void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
1466extern void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
1467extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
1468extern void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
1469                                 struct ip_vs_stats *stats);
1470
1471/*
1472 *      Various IPVS packet transmitters (from ip_vs_xmit.c)
1473 */
1474extern int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1475                           struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1476extern int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1477                             struct ip_vs_protocol *pp,
1478                             struct ip_vs_iphdr *iph);
1479extern int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1480                          struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1481extern int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1482                             struct ip_vs_protocol *pp,
1483                             struct ip_vs_iphdr *iph);
1484extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1485                         struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1486extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1487                           struct ip_vs_protocol *pp, int offset,
1488                           unsigned int hooknum, struct ip_vs_iphdr *iph);
1489extern void ip_vs_dest_dst_rcu_free(struct rcu_head *head);
1490
1491#ifdef CONFIG_IP_VS_IPV6
1492extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1493                                struct ip_vs_protocol *pp,
1494                                struct ip_vs_iphdr *iph);
1495extern int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1496                             struct ip_vs_protocol *pp,
1497                             struct ip_vs_iphdr *iph);
1498extern int ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1499                                struct ip_vs_protocol *pp,
1500                                struct ip_vs_iphdr *iph);
1501extern int ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1502                            struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1503extern int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1504                              struct ip_vs_protocol *pp, int offset,
1505                              unsigned int hooknum, struct ip_vs_iphdr *iph);
1506#endif
1507
1508#ifdef CONFIG_SYSCTL
1509/*
1510 *      This is a simple mechanism to ignore packets when
1511 *      we are loaded. Just set ip_vs_drop_rate to 'n' and
1512 *      we start to drop 1/rate of the packets
1513 */
1514
1515static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
1516{
1517        if (!ipvs->drop_rate)
1518                return 0;
1519        if (--ipvs->drop_counter > 0)
1520                return 0;
1521        ipvs->drop_counter = ipvs->drop_rate;
1522        return 1;
1523}
1524#else
1525static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
1526#endif
1527
1528/*
1529 *      ip_vs_fwd_tag returns the forwarding tag of the connection
1530 */
1531#define IP_VS_FWD_METHOD(cp)  (cp->flags & IP_VS_CONN_F_FWD_MASK)
1532
1533static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
1534{
1535        char fwd;
1536
1537        switch (IP_VS_FWD_METHOD(cp)) {
1538        case IP_VS_CONN_F_MASQ:
1539                fwd = 'M'; break;
1540        case IP_VS_CONN_F_LOCALNODE:
1541                fwd = 'L'; break;
1542        case IP_VS_CONN_F_TUNNEL:
1543                fwd = 'T'; break;
1544        case IP_VS_CONN_F_DROUTE:
1545                fwd = 'R'; break;
1546        case IP_VS_CONN_F_BYPASS:
1547                fwd = 'B'; break;
1548        default:
1549                fwd = '?'; break;
1550        }
1551        return fwd;
1552}
1553
1554extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
1555                           struct ip_vs_conn *cp, int dir);
1556
1557#ifdef CONFIG_IP_VS_IPV6
1558extern void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
1559                              struct ip_vs_conn *cp, int dir);
1560#endif
1561
1562extern __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset);
1563
1564static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
1565{
1566        __be32 diff[2] = { ~old, new };
1567
1568        return csum_partial(diff, sizeof(diff), oldsum);
1569}
1570
1571#ifdef CONFIG_IP_VS_IPV6
1572static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
1573                                        __wsum oldsum)
1574{
1575        __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
1576                            new[3],  new[2],  new[1],  new[0] };
1577
1578        return csum_partial(diff, sizeof(diff), oldsum);
1579}
1580#endif
1581
1582static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
1583{
1584        __be16 diff[2] = { ~old, new };
1585
1586        return csum_partial(diff, sizeof(diff), oldsum);
1587}
1588
1589/*
1590 * Forget current conntrack (unconfirmed) and attach notrack entry
1591 */
1592static inline void ip_vs_notrack(struct sk_buff *skb)
1593{
1594#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1595        enum ip_conntrack_info ctinfo;
1596        struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1597
1598        if (!ct || !nf_ct_is_untracked(ct)) {
1599                nf_conntrack_put(skb->nfct);
1600                skb->nfct = &nf_ct_untracked_get()->ct_general;
1601                skb->nfctinfo = IP_CT_NEW;
1602                nf_conntrack_get(skb->nfct);
1603        }
1604#endif
1605}
1606
1607#ifdef CONFIG_IP_VS_NFCT
1608/*
1609 *      Netfilter connection tracking
1610 *      (from ip_vs_nfct.c)
1611 */
1612static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1613{
1614#ifdef CONFIG_SYSCTL
1615        return ipvs->sysctl_conntrack;
1616#else
1617        return 0;
1618#endif
1619}
1620
1621extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
1622                                   int outin);
1623extern int ip_vs_confirm_conntrack(struct sk_buff *skb);
1624extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
1625                                      struct ip_vs_conn *cp, u_int8_t proto,
1626                                      const __be16 port, int from_rs);
1627extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
1628
1629#else
1630
1631static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1632{
1633        return 0;
1634}
1635
1636static inline void ip_vs_update_conntrack(struct sk_buff *skb,
1637                                          struct ip_vs_conn *cp, int outin)
1638{
1639}
1640
1641static inline int ip_vs_confirm_conntrack(struct sk_buff *skb)
1642{
1643        return NF_ACCEPT;
1644}
1645
1646static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
1647{
1648}
1649/* CONFIG_IP_VS_NFCT */
1650#endif
1651
1652static inline unsigned int
1653ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
1654{
1655        /*
1656         * We think the overhead of processing active connections is 256
1657         * times higher than that of inactive connections in average. (This
1658         * 256 times might not be accurate, we will change it later) We
1659         * use the following formula to estimate the overhead now:
1660         *                dest->activeconns*256 + dest->inactconns
1661         */
1662        return (atomic_read(&dest->activeconns) << 8) +
1663                atomic_read(&dest->inactconns);
1664}
1665
1666#endif  /* _NET_IP_VS_H */
1667