linux/include/net/ip_vs.h
<<
>>
Prefs
   1/*
   2 *      IP Virtual Server
   3 *      data structure and functionality definitions
   4 */
   5
   6#ifndef _NET_IP_VS_H
   7#define _NET_IP_VS_H
   8
   9#include <linux/ip_vs.h>                /* definitions shared with userland */
  10
  11/* old ipvsadm versions still include this file directly */
  12#ifdef __KERNEL__
  13
  14#include <asm/types.h>                  /* for __uXX types */
  15
  16#include <linux/sysctl.h>               /* for ctl_path */
  17#include <linux/list.h>                 /* for struct list_head */
  18#include <linux/spinlock.h>             /* for struct rwlock_t */
  19#include <asm/atomic.h>                 /* for struct atomic_t */
  20#include <linux/compiler.h>
  21#include <linux/timer.h>
  22
  23#include <net/checksum.h>
  24#include <linux/netfilter.h>            /* for union nf_inet_addr */
  25#include <linux/ip.h>
  26#include <linux/ipv6.h>                 /* for struct ipv6hdr */
  27#include <net/ipv6.h>                   /* for ipv6_addr_copy */
  28#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
  29#include <net/netfilter/nf_conntrack.h>
  30#endif
  31
  32/* Connections' size value needed by ip_vs_ctl.c */
  33extern int ip_vs_conn_tab_size;
  34
  35
  36struct ip_vs_iphdr {
  37        int len;
  38        __u8 protocol;
  39        union nf_inet_addr saddr;
  40        union nf_inet_addr daddr;
  41};
  42
  43static inline void
  44ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr)
  45{
  46#ifdef CONFIG_IP_VS_IPV6
  47        if (af == AF_INET6) {
  48                const struct ipv6hdr *iph = nh;
  49                iphdr->len = sizeof(struct ipv6hdr);
  50                iphdr->protocol = iph->nexthdr;
  51                ipv6_addr_copy(&iphdr->saddr.in6, &iph->saddr);
  52                ipv6_addr_copy(&iphdr->daddr.in6, &iph->daddr);
  53        } else
  54#endif
  55        {
  56                const struct iphdr *iph = nh;
  57                iphdr->len = iph->ihl * 4;
  58                iphdr->protocol = iph->protocol;
  59                iphdr->saddr.ip = iph->saddr;
  60                iphdr->daddr.ip = iph->daddr;
  61        }
  62}
  63
  64static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
  65                                   const union nf_inet_addr *src)
  66{
  67#ifdef CONFIG_IP_VS_IPV6
  68        if (af == AF_INET6)
  69                ipv6_addr_copy(&dst->in6, &src->in6);
  70        else
  71#endif
  72        dst->ip = src->ip;
  73}
  74
  75static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
  76                                   const union nf_inet_addr *b)
  77{
  78#ifdef CONFIG_IP_VS_IPV6
  79        if (af == AF_INET6)
  80                return ipv6_addr_equal(&a->in6, &b->in6);
  81#endif
  82        return a->ip == b->ip;
  83}
  84
  85#ifdef CONFIG_IP_VS_DEBUG
  86#include <linux/net.h>
  87
  88extern int ip_vs_get_debug_level(void);
  89
  90static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
  91                                         const union nf_inet_addr *addr,
  92                                         int *idx)
  93{
  94        int len;
  95#ifdef CONFIG_IP_VS_IPV6
  96        if (af == AF_INET6)
  97                len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6]",
  98                               &addr->in6) + 1;
  99        else
 100#endif
 101                len = snprintf(&buf[*idx], buf_len - *idx, "%pI4",
 102                               &addr->ip) + 1;
 103
 104        *idx += len;
 105        BUG_ON(*idx > buf_len + 1);
 106        return &buf[*idx - len];
 107}
 108
 109#define IP_VS_DBG_BUF(level, msg, ...)                                  \
 110        do {                                                            \
 111                char ip_vs_dbg_buf[160];                                \
 112                int ip_vs_dbg_idx = 0;                                  \
 113                if (level <= ip_vs_get_debug_level())                   \
 114                        printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);  \
 115        } while (0)
 116#define IP_VS_ERR_BUF(msg...)                                           \
 117        do {                                                            \
 118                char ip_vs_dbg_buf[160];                                \
 119                int ip_vs_dbg_idx = 0;                                  \
 120                pr_err(msg);                                            \
 121        } while (0)
 122
 123/* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */
 124#define IP_VS_DBG_ADDR(af, addr)                                        \
 125        ip_vs_dbg_addr(af, ip_vs_dbg_buf,                               \
 126                       sizeof(ip_vs_dbg_buf), addr,                     \
 127                       &ip_vs_dbg_idx)
 128
 129#define IP_VS_DBG(level, msg, ...)                                      \
 130        do {                                                            \
 131                if (level <= ip_vs_get_debug_level())                   \
 132                        printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);  \
 133        } while (0)
 134#define IP_VS_DBG_RL(msg, ...)                                          \
 135        do {                                                            \
 136                if (net_ratelimit())                                    \
 137                        printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);  \
 138        } while (0)
 139#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)                     \
 140        do {                                                            \
 141                if (level <= ip_vs_get_debug_level())                   \
 142                        pp->debug_packet(af, pp, skb, ofs, msg);        \
 143        } while (0)
 144#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)                  \
 145        do {                                                            \
 146                if (level <= ip_vs_get_debug_level() &&                 \
 147                    net_ratelimit())                                    \
 148                        pp->debug_packet(af, pp, skb, ofs, msg);        \
 149        } while (0)
 150#else   /* NO DEBUGGING at ALL */
 151#define IP_VS_DBG_BUF(level, msg...)  do {} while (0)
 152#define IP_VS_ERR_BUF(msg...)  do {} while (0)
 153#define IP_VS_DBG(level, msg...)  do {} while (0)
 154#define IP_VS_DBG_RL(msg...)  do {} while (0)
 155#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)     do {} while (0)
 156#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)  do {} while (0)
 157#endif
 158
 159#define IP_VS_BUG() BUG()
 160#define IP_VS_ERR_RL(msg, ...)                                          \
 161        do {                                                            \
 162                if (net_ratelimit())                                    \
 163                        pr_err(msg, ##__VA_ARGS__);                     \
 164        } while (0)
 165
 166#ifdef CONFIG_IP_VS_DEBUG
 167#define EnterFunction(level)                                            \
 168        do {                                                            \
 169                if (level <= ip_vs_get_debug_level())                   \
 170                        printk(KERN_DEBUG                               \
 171                               pr_fmt("Enter: %s, %s line %i\n"),       \
 172                               __func__, __FILE__, __LINE__);           \
 173        } while (0)
 174#define LeaveFunction(level)                                            \
 175        do {                                                            \
 176                if (level <= ip_vs_get_debug_level())                   \
 177                        printk(KERN_DEBUG                               \
 178                               pr_fmt("Leave: %s, %s line %i\n"),       \
 179                               __func__, __FILE__, __LINE__);           \
 180        } while (0)
 181#else
 182#define EnterFunction(level)   do {} while (0)
 183#define LeaveFunction(level)   do {} while (0)
 184#endif
 185
 186#define IP_VS_WAIT_WHILE(expr)  while (expr) { cpu_relax(); }
 187
 188
 189/*
 190 *      The port number of FTP service (in network order).
 191 */
 192#define FTPPORT  cpu_to_be16(21)
 193#define FTPDATA  cpu_to_be16(20)
 194
 195/*
 196 *      TCP State Values
 197 */
 198enum {
 199        IP_VS_TCP_S_NONE = 0,
 200        IP_VS_TCP_S_ESTABLISHED,
 201        IP_VS_TCP_S_SYN_SENT,
 202        IP_VS_TCP_S_SYN_RECV,
 203        IP_VS_TCP_S_FIN_WAIT,
 204        IP_VS_TCP_S_TIME_WAIT,
 205        IP_VS_TCP_S_CLOSE,
 206        IP_VS_TCP_S_CLOSE_WAIT,
 207        IP_VS_TCP_S_LAST_ACK,
 208        IP_VS_TCP_S_LISTEN,
 209        IP_VS_TCP_S_SYNACK,
 210        IP_VS_TCP_S_LAST
 211};
 212
 213/*
 214 *      UDP State Values
 215 */
 216enum {
 217        IP_VS_UDP_S_NORMAL,
 218        IP_VS_UDP_S_LAST,
 219};
 220
 221/*
 222 *      ICMP State Values
 223 */
 224enum {
 225        IP_VS_ICMP_S_NORMAL,
 226        IP_VS_ICMP_S_LAST,
 227};
 228
 229/*
 230 *      SCTP State Values
 231 */
 232enum ip_vs_sctp_states {
 233        IP_VS_SCTP_S_NONE,
 234        IP_VS_SCTP_S_INIT_CLI,
 235        IP_VS_SCTP_S_INIT_SER,
 236        IP_VS_SCTP_S_INIT_ACK_CLI,
 237        IP_VS_SCTP_S_INIT_ACK_SER,
 238        IP_VS_SCTP_S_ECHO_CLI,
 239        IP_VS_SCTP_S_ECHO_SER,
 240        IP_VS_SCTP_S_ESTABLISHED,
 241        IP_VS_SCTP_S_SHUT_CLI,
 242        IP_VS_SCTP_S_SHUT_SER,
 243        IP_VS_SCTP_S_SHUT_ACK_CLI,
 244        IP_VS_SCTP_S_SHUT_ACK_SER,
 245        IP_VS_SCTP_S_CLOSED,
 246        IP_VS_SCTP_S_LAST
 247};
 248
 249/*
 250 *      Delta sequence info structure
 251 *      Each ip_vs_conn has 2 (output AND input seq. changes).
 252 *      Only used in the VS/NAT.
 253 */
 254struct ip_vs_seq {
 255        __u32                   init_seq;       /* Add delta from this seq */
 256        __u32                   delta;          /* Delta in sequence numbers */
 257        __u32                   previous_delta; /* Delta in sequence numbers
 258                                                   before last resized pkt */
 259};
 260
 261
 262/*
 263 *      IPVS statistics objects
 264 */
 265struct ip_vs_estimator {
 266        struct list_head        list;
 267
 268        u64                     last_inbytes;
 269        u64                     last_outbytes;
 270        u32                     last_conns;
 271        u32                     last_inpkts;
 272        u32                     last_outpkts;
 273
 274        u32                     cps;
 275        u32                     inpps;
 276        u32                     outpps;
 277        u32                     inbps;
 278        u32                     outbps;
 279};
 280
 281struct ip_vs_stats {
 282        struct ip_vs_stats_user ustats;         /* statistics */
 283        struct ip_vs_estimator  est;            /* estimator */
 284
 285        spinlock_t              lock;           /* spin lock */
 286};
 287
 288struct dst_entry;
 289struct iphdr;
 290struct ip_vs_conn;
 291struct ip_vs_app;
 292struct sk_buff;
 293
 294struct ip_vs_protocol {
 295        struct ip_vs_protocol   *next;
 296        char                    *name;
 297        u16                     protocol;
 298        u16                     num_states;
 299        int                     dont_defrag;
 300        atomic_t                appcnt;         /* counter of proto app incs */
 301        int                     *timeout_table; /* protocol timeout table */
 302
 303        void (*init)(struct ip_vs_protocol *pp);
 304
 305        void (*exit)(struct ip_vs_protocol *pp);
 306
 307        int (*conn_schedule)(int af, struct sk_buff *skb,
 308                             struct ip_vs_protocol *pp,
 309                             int *verdict, struct ip_vs_conn **cpp);
 310
 311        struct ip_vs_conn *
 312        (*conn_in_get)(int af,
 313                       const struct sk_buff *skb,
 314                       struct ip_vs_protocol *pp,
 315                       const struct ip_vs_iphdr *iph,
 316                       unsigned int proto_off,
 317                       int inverse);
 318
 319        struct ip_vs_conn *
 320        (*conn_out_get)(int af,
 321                        const struct sk_buff *skb,
 322                        struct ip_vs_protocol *pp,
 323                        const struct ip_vs_iphdr *iph,
 324                        unsigned int proto_off,
 325                        int inverse);
 326
 327        int (*snat_handler)(struct sk_buff *skb,
 328                            struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
 329
 330        int (*dnat_handler)(struct sk_buff *skb,
 331                            struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
 332
 333        int (*csum_check)(int af, struct sk_buff *skb,
 334                          struct ip_vs_protocol *pp);
 335
 336        const char *(*state_name)(int state);
 337
 338        int (*state_transition)(struct ip_vs_conn *cp, int direction,
 339                                const struct sk_buff *skb,
 340                                struct ip_vs_protocol *pp);
 341
 342        int (*register_app)(struct ip_vs_app *inc);
 343
 344        void (*unregister_app)(struct ip_vs_app *inc);
 345
 346        int (*app_conn_bind)(struct ip_vs_conn *cp);
 347
 348        void (*debug_packet)(int af, struct ip_vs_protocol *pp,
 349                             const struct sk_buff *skb,
 350                             int offset,
 351                             const char *msg);
 352
 353        void (*timeout_change)(struct ip_vs_protocol *pp, int flags);
 354
 355        int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to);
 356};
 357
 358extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
 359
 360struct ip_vs_conn_param {
 361        const union nf_inet_addr        *caddr;
 362        const union nf_inet_addr        *vaddr;
 363        __be16                          cport;
 364        __be16                          vport;
 365        __u16                           protocol;
 366        u16                             af;
 367
 368        const struct ip_vs_pe           *pe;
 369        char                            *pe_data;
 370        __u8                            pe_data_len;
 371};
 372
 373/*
 374 *      IP_VS structure allocated for each dynamically scheduled connection
 375 */
 376struct ip_vs_conn {
 377        struct list_head        c_list;         /* hashed list heads */
 378
 379        /* Protocol, addresses and port numbers */
 380        u16                      af;            /* address family */
 381        union nf_inet_addr       caddr;          /* client address */
 382        union nf_inet_addr       vaddr;          /* virtual address */
 383        union nf_inet_addr       daddr;          /* destination address */
 384        volatile __u32           flags;          /* status flags */
 385        __be16                   cport;
 386        __be16                   vport;
 387        __be16                   dport;
 388        __u16                   protocol;       /* Which protocol (TCP/UDP) */
 389
 390        /* counter and timer */
 391        atomic_t                refcnt;         /* reference count */
 392        struct timer_list       timer;          /* Expiration timer */
 393        volatile unsigned long  timeout;        /* timeout */
 394
 395        /* Flags and state transition */
 396        spinlock_t              lock;           /* lock for state transition */
 397        volatile __u16          state;          /* state info */
 398        volatile __u16          old_state;      /* old state, to be used for
 399                                                 * state transition triggerd
 400                                                 * synchronization
 401                                                 */
 402
 403        /* Control members */
 404        struct ip_vs_conn       *control;       /* Master control connection */
 405        atomic_t                n_control;      /* Number of controlled ones */
 406        struct ip_vs_dest       *dest;          /* real server */
 407        atomic_t                in_pkts;        /* incoming packet counter */
 408
 409        /* packet transmitter for different forwarding methods.  If it
 410           mangles the packet, it must return NF_DROP or better NF_STOLEN,
 411           otherwise this must be changed to a sk_buff **.
 412           NF_ACCEPT can be returned when destination is local.
 413         */
 414        int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
 415                           struct ip_vs_protocol *pp);
 416
 417        /* Note: we can group the following members into a structure,
 418           in order to save more space, and the following members are
 419           only used in VS/NAT anyway */
 420        struct ip_vs_app        *app;           /* bound ip_vs_app object */
 421        void                    *app_data;      /* Application private data */
 422        struct ip_vs_seq        in_seq;         /* incoming seq. struct */
 423        struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
 424
 425        char                    *pe_data;
 426        __u8                    pe_data_len;
 427};
 428
 429
 430/*
 431 *      Extended internal versions of struct ip_vs_service_user and
 432 *      ip_vs_dest_user for IPv6 support.
 433 *
 434 *      We need these to conveniently pass around service and destination
 435 *      options, but unfortunately, we also need to keep the old definitions to
 436 *      maintain userspace backwards compatibility for the setsockopt interface.
 437 */
 438struct ip_vs_service_user_kern {
 439        /* virtual service addresses */
 440        u16                     af;
 441        u16                     protocol;
 442        union nf_inet_addr      addr;           /* virtual ip address */
 443        u16                     port;
 444        u32                     fwmark;         /* firwall mark of service */
 445
 446        /* virtual service options */
 447        char                    *sched_name;
 448        char                    *pe_name;
 449        unsigned                flags;          /* virtual service flags */
 450        unsigned                timeout;        /* persistent timeout in sec */
 451        u32                     netmask;        /* persistent netmask */
 452};
 453
 454
 455struct ip_vs_dest_user_kern {
 456        /* destination server address */
 457        union nf_inet_addr      addr;
 458        u16                     port;
 459
 460        /* real server options */
 461        unsigned                conn_flags;     /* connection flags */
 462        int                     weight;         /* destination weight */
 463
 464        /* thresholds for active connections */
 465        u32                     u_threshold;    /* upper threshold */
 466        u32                     l_threshold;    /* lower threshold */
 467};
 468
 469
 470/*
 471 *      The information about the virtual service offered to the net
 472 *      and the forwarding entries
 473 */
 474struct ip_vs_service {
 475        struct list_head        s_list;   /* for normal service table */
 476        struct list_head        f_list;   /* for fwmark-based service table */
 477        atomic_t                refcnt;   /* reference counter */
 478        atomic_t                usecnt;   /* use counter */
 479
 480        u16                     af;       /* address family */
 481        __u16                   protocol; /* which protocol (TCP/UDP) */
 482        union nf_inet_addr      addr;     /* IP address for virtual service */
 483        __be16                  port;     /* port number for the service */
 484        __u32                   fwmark;   /* firewall mark of the service */
 485        unsigned                flags;    /* service status flags */
 486        unsigned                timeout;  /* persistent timeout in ticks */
 487        __be32                  netmask;  /* grouping granularity */
 488
 489        struct list_head        destinations;  /* real server d-linked list */
 490        __u32                   num_dests;     /* number of servers */
 491        struct ip_vs_stats      stats;         /* statistics for the service */
 492        struct ip_vs_app        *inc;     /* bind conns to this app inc */
 493
 494        /* for scheduling */
 495        struct ip_vs_scheduler  *scheduler;    /* bound scheduler object */
 496        rwlock_t                sched_lock;    /* lock sched_data */
 497        void                    *sched_data;   /* scheduler application data */
 498
 499        /* alternate persistence engine */
 500        struct ip_vs_pe         *pe;
 501};
 502
 503
 504/*
 505 *      The real server destination forwarding entry
 506 *      with ip address, port number, and so on.
 507 */
 508struct ip_vs_dest {
 509        struct list_head        n_list;   /* for the dests in the service */
 510        struct list_head        d_list;   /* for table with all the dests */
 511
 512        u16                     af;             /* address family */
 513        union nf_inet_addr      addr;           /* IP address of the server */
 514        __be16                  port;           /* port number of the server */
 515        volatile unsigned       flags;          /* dest status flags */
 516        atomic_t                conn_flags;     /* flags to copy to conn */
 517        atomic_t                weight;         /* server weight */
 518
 519        atomic_t                refcnt;         /* reference counter */
 520        struct ip_vs_stats      stats;          /* statistics */
 521
 522        /* connection counters and thresholds */
 523        atomic_t                activeconns;    /* active connections */
 524        atomic_t                inactconns;     /* inactive connections */
 525        atomic_t                persistconns;   /* persistent connections */
 526        __u32                   u_threshold;    /* upper threshold */
 527        __u32                   l_threshold;    /* lower threshold */
 528
 529        /* for destination cache */
 530        spinlock_t              dst_lock;       /* lock of dst_cache */
 531        struct dst_entry        *dst_cache;     /* destination cache entry */
 532        u32                     dst_rtos;       /* RT_TOS(tos) for dst */
 533        u32                     dst_cookie;
 534#ifdef CONFIG_IP_VS_IPV6
 535        struct in6_addr         dst_saddr;
 536#endif
 537
 538        /* for virtual service */
 539        struct ip_vs_service    *svc;           /* service it belongs to */
 540        __u16                   protocol;       /* which protocol (TCP/UDP) */
 541        union nf_inet_addr      vaddr;          /* virtual IP address */
 542        __be16                  vport;          /* virtual port number */
 543        __u32                   vfwmark;        /* firewall mark of service */
 544};
 545
 546
 547/*
 548 *      The scheduler object
 549 */
 550struct ip_vs_scheduler {
 551        struct list_head        n_list;         /* d-linked list head */
 552        char                    *name;          /* scheduler name */
 553        atomic_t                refcnt;         /* reference counter */
 554        struct module           *module;        /* THIS_MODULE/NULL */
 555
 556        /* scheduler initializing service */
 557        int (*init_service)(struct ip_vs_service *svc);
 558        /* scheduling service finish */
 559        int (*done_service)(struct ip_vs_service *svc);
 560        /* scheduler updating service */
 561        int (*update_service)(struct ip_vs_service *svc);
 562
 563        /* selecting a server from the given service */
 564        struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
 565                                       const struct sk_buff *skb);
 566};
 567
 568/* The persistence engine object */
 569struct ip_vs_pe {
 570        struct list_head        n_list;         /* d-linked list head */
 571        char                    *name;          /* scheduler name */
 572        atomic_t                refcnt;         /* reference counter */
 573        struct module           *module;        /* THIS_MODULE/NULL */
 574
 575        /* get the connection template, if any */
 576        int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb);
 577        bool (*ct_match)(const struct ip_vs_conn_param *p,
 578                         struct ip_vs_conn *ct);
 579        u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval,
 580                           bool inverse);
 581        int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf);
 582};
 583
 584/*
 585 *      The application module object (a.k.a. app incarnation)
 586 */
 587struct ip_vs_app {
 588        struct list_head        a_list;         /* member in app list */
 589        int                     type;           /* IP_VS_APP_TYPE_xxx */
 590        char                    *name;          /* application module name */
 591        __u16                   protocol;
 592        struct module           *module;        /* THIS_MODULE/NULL */
 593        struct list_head        incs_list;      /* list of incarnations */
 594
 595        /* members for application incarnations */
 596        struct list_head        p_list;         /* member in proto app list */
 597        struct ip_vs_app        *app;           /* its real application */
 598        __be16                  port;           /* port number in net order */
 599        atomic_t                usecnt;         /* usage counter */
 600
 601        /*
 602         * output hook: Process packet in inout direction, diff set for TCP.
 603         * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
 604         *         2=Mangled but checksum was not updated
 605         */
 606        int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
 607                       struct sk_buff *, int *diff);
 608
 609        /*
 610         * input hook: Process packet in outin direction, diff set for TCP.
 611         * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
 612         *         2=Mangled but checksum was not updated
 613         */
 614        int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
 615                      struct sk_buff *, int *diff);
 616
 617        /* ip_vs_app initializer */
 618        int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *);
 619
 620        /* ip_vs_app finish */
 621        int (*done_conn)(struct ip_vs_app *, struct ip_vs_conn *);
 622
 623
 624        /* not used now */
 625        int (*bind_conn)(struct ip_vs_app *, struct ip_vs_conn *,
 626                         struct ip_vs_protocol *);
 627
 628        void (*unbind_conn)(struct ip_vs_app *, struct ip_vs_conn *);
 629
 630        int *                   timeout_table;
 631        int *                   timeouts;
 632        int                     timeouts_size;
 633
 634        int (*conn_schedule)(struct sk_buff *skb, struct ip_vs_app *app,
 635                             int *verdict, struct ip_vs_conn **cpp);
 636
 637        struct ip_vs_conn *
 638        (*conn_in_get)(const struct sk_buff *skb, struct ip_vs_app *app,
 639                       const struct iphdr *iph, unsigned int proto_off,
 640                       int inverse);
 641
 642        struct ip_vs_conn *
 643        (*conn_out_get)(const struct sk_buff *skb, struct ip_vs_app *app,
 644                        const struct iphdr *iph, unsigned int proto_off,
 645                        int inverse);
 646
 647        int (*state_transition)(struct ip_vs_conn *cp, int direction,
 648                                const struct sk_buff *skb,
 649                                struct ip_vs_app *app);
 650
 651        void (*timeout_change)(struct ip_vs_app *app, int flags);
 652};
 653
 654
 655/*
 656 *      IPVS core functions
 657 *      (from ip_vs_core.c)
 658 */
 659extern const char *ip_vs_proto_name(unsigned proto);
 660extern void ip_vs_init_hash_table(struct list_head *table, int rows);
 661#define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t)))
 662
 663#define IP_VS_APP_TYPE_FTP      1
 664
 665/*
 666 *     ip_vs_conn handling functions
 667 *     (from ip_vs_conn.c)
 668 */
 669
 670enum {
 671        IP_VS_DIR_INPUT = 0,
 672        IP_VS_DIR_OUTPUT,
 673        IP_VS_DIR_INPUT_ONLY,
 674        IP_VS_DIR_LAST,
 675};
 676
 677static inline void ip_vs_conn_fill_param(int af, int protocol,
 678                                         const union nf_inet_addr *caddr,
 679                                         __be16 cport,
 680                                         const union nf_inet_addr *vaddr,
 681                                         __be16 vport,
 682                                         struct ip_vs_conn_param *p)
 683{
 684        p->af = af;
 685        p->protocol = protocol;
 686        p->caddr = caddr;
 687        p->cport = cport;
 688        p->vaddr = vaddr;
 689        p->vport = vport;
 690        p->pe = NULL;
 691        p->pe_data = NULL;
 692}
 693
 694struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
 695struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
 696
 697struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
 698                                            struct ip_vs_protocol *pp,
 699                                            const struct ip_vs_iphdr *iph,
 700                                            unsigned int proto_off,
 701                                            int inverse);
 702
 703struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
 704
 705struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
 706                                             struct ip_vs_protocol *pp,
 707                                             const struct ip_vs_iphdr *iph,
 708                                             unsigned int proto_off,
 709                                             int inverse);
 710
 711/* put back the conn without restarting its timer */
 712static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
 713{
 714        atomic_dec(&cp->refcnt);
 715}
 716extern void ip_vs_conn_put(struct ip_vs_conn *cp);
 717extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
 718
 719struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
 720                                  const union nf_inet_addr *daddr,
 721                                  __be16 dport, unsigned flags,
 722                                  struct ip_vs_dest *dest);
 723extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
 724
 725extern const char * ip_vs_state_name(__u16 proto, int state);
 726
 727extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
 728extern int ip_vs_check_template(struct ip_vs_conn *ct);
 729extern void ip_vs_random_dropentry(void);
 730extern int ip_vs_conn_init(void);
 731extern void ip_vs_conn_cleanup(void);
 732
 733static inline void ip_vs_control_del(struct ip_vs_conn *cp)
 734{
 735        struct ip_vs_conn *ctl_cp = cp->control;
 736        if (!ctl_cp) {
 737                IP_VS_ERR_BUF("request control DEL for uncontrolled: "
 738                              "%s:%d to %s:%d\n",
 739                              IP_VS_DBG_ADDR(cp->af, &cp->caddr),
 740                              ntohs(cp->cport),
 741                              IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
 742                              ntohs(cp->vport));
 743
 744                return;
 745        }
 746
 747        IP_VS_DBG_BUF(7, "DELeting control for: "
 748                      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
 749                      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
 750                      ntohs(cp->cport),
 751                      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
 752                      ntohs(ctl_cp->cport));
 753
 754        cp->control = NULL;
 755        if (atomic_read(&ctl_cp->n_control) == 0) {
 756                IP_VS_ERR_BUF("BUG control DEL with n=0 : "
 757                              "%s:%d to %s:%d\n",
 758                              IP_VS_DBG_ADDR(cp->af, &cp->caddr),
 759                              ntohs(cp->cport),
 760                              IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
 761                              ntohs(cp->vport));
 762
 763                return;
 764        }
 765        atomic_dec(&ctl_cp->n_control);
 766}
 767
 768static inline void
 769ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
 770{
 771        if (cp->control) {
 772                IP_VS_ERR_BUF("request control ADD for already controlled: "
 773                              "%s:%d to %s:%d\n",
 774                              IP_VS_DBG_ADDR(cp->af, &cp->caddr),
 775                              ntohs(cp->cport),
 776                              IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
 777                              ntohs(cp->vport));
 778
 779                ip_vs_control_del(cp);
 780        }
 781
 782        IP_VS_DBG_BUF(7, "ADDing control for: "
 783                      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
 784                      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
 785                      ntohs(cp->cport),
 786                      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
 787                      ntohs(ctl_cp->cport));
 788
 789        cp->control = ctl_cp;
 790        atomic_inc(&ctl_cp->n_control);
 791}
 792
 793
 794/*
 795 *      IPVS application functions
 796 *      (from ip_vs_app.c)
 797 */
 798#define IP_VS_APP_MAX_PORTS  8
 799extern int register_ip_vs_app(struct ip_vs_app *app);
 800extern void unregister_ip_vs_app(struct ip_vs_app *app);
 801extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 802extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
 803extern int
 804register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
 805extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
 806extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
 807
 808extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
 809extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
 810extern int ip_vs_app_init(void);
 811extern void ip_vs_app_cleanup(void);
 812
 813void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
 814void ip_vs_unbind_pe(struct ip_vs_service *svc);
 815int register_ip_vs_pe(struct ip_vs_pe *pe);
 816int unregister_ip_vs_pe(struct ip_vs_pe *pe);
 817extern struct ip_vs_pe *ip_vs_pe_get(const char *name);
 818extern void ip_vs_pe_put(struct ip_vs_pe *pe);
 819
 820/*
 821 *      IPVS protocol functions (from ip_vs_proto.c)
 822 */
 823extern int ip_vs_protocol_init(void);
 824extern void ip_vs_protocol_cleanup(void);
 825extern void ip_vs_protocol_timeout_change(int flags);
 826extern int *ip_vs_create_timeout_table(int *table, int size);
 827extern int
 828ip_vs_set_state_timeout(int *table, int num, const char *const *names,
 829                        const char *name, int to);
 830extern void
 831ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
 832                          const struct sk_buff *skb,
 833                          int offset, const char *msg);
 834
 835extern struct ip_vs_protocol ip_vs_protocol_tcp;
 836extern struct ip_vs_protocol ip_vs_protocol_udp;
 837extern struct ip_vs_protocol ip_vs_protocol_icmp;
 838extern struct ip_vs_protocol ip_vs_protocol_esp;
 839extern struct ip_vs_protocol ip_vs_protocol_ah;
 840extern struct ip_vs_protocol ip_vs_protocol_sctp;
 841
 842/*
 843 *      Registering/unregistering scheduler functions
 844 *      (from ip_vs_sched.c)
 845 */
 846extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
 847extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
 848extern int ip_vs_bind_scheduler(struct ip_vs_service *svc,
 849                                struct ip_vs_scheduler *scheduler);
 850extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc);
 851extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
 852extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
 853extern struct ip_vs_conn *
 854ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 855               struct ip_vs_protocol *pp, int *ignored);
 856extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 857                        struct ip_vs_protocol *pp);
 858
 859
 860/*
 861 *      IPVS control data and functions (from ip_vs_ctl.c)
 862 */
 863extern int sysctl_ip_vs_cache_bypass;
 864extern int sysctl_ip_vs_expire_nodest_conn;
 865extern int sysctl_ip_vs_expire_quiescent_template;
 866extern int sysctl_ip_vs_sync_threshold[2];
 867extern int sysctl_ip_vs_nat_icmp_send;
 868extern int sysctl_ip_vs_conntrack;
 869extern int sysctl_ip_vs_snat_reroute;
 870extern struct ip_vs_stats ip_vs_stats;
 871extern const struct ctl_path net_vs_ctl_path[];
 872
 873extern struct ip_vs_service *
 874ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
 875                  const union nf_inet_addr *vaddr, __be16 vport);
 876
 877static inline void ip_vs_service_put(struct ip_vs_service *svc)
 878{
 879        atomic_dec(&svc->usecnt);
 880}
 881
 882extern struct ip_vs_dest *
 883ip_vs_lookup_real_service(int af, __u16 protocol,
 884                          const union nf_inet_addr *daddr, __be16 dport);
 885
 886extern int ip_vs_use_count_inc(void);
 887extern void ip_vs_use_count_dec(void);
 888extern int ip_vs_control_init(void);
 889extern void ip_vs_control_cleanup(void);
 890extern struct ip_vs_dest *
 891ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
 892                const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
 893extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
 894
 895
 896/*
 897 *      IPVS sync daemon data and function prototypes
 898 *      (from ip_vs_sync.c)
 899 */
 900extern volatile int ip_vs_sync_state;
 901extern volatile int ip_vs_master_syncid;
 902extern volatile int ip_vs_backup_syncid;
 903extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
 904extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
 905extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
 906extern int stop_sync_thread(int state);
 907extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
 908
 909
 910/*
 911 *      IPVS rate estimator prototypes (from ip_vs_est.c)
 912 */
 913extern int ip_vs_estimator_init(void);
 914extern void ip_vs_estimator_cleanup(void);
 915extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
 916extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
 917extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
 918
 919/*
 920 *      Various IPVS packet transmitters (from ip_vs_xmit.c)
 921 */
 922extern int ip_vs_null_xmit
 923(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 924extern int ip_vs_bypass_xmit
 925(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 926extern int ip_vs_nat_xmit
 927(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 928extern int ip_vs_tunnel_xmit
 929(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 930extern int ip_vs_dr_xmit
 931(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 932extern int ip_vs_icmp_xmit
 933(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset);
 934extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
 935
 936#ifdef CONFIG_IP_VS_IPV6
 937extern int ip_vs_bypass_xmit_v6
 938(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 939extern int ip_vs_nat_xmit_v6
 940(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 941extern int ip_vs_tunnel_xmit_v6
 942(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 943extern int ip_vs_dr_xmit_v6
 944(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 945extern int ip_vs_icmp_xmit_v6
 946(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp,
 947 int offset);
 948#endif
 949
 950/*
 951 *      This is a simple mechanism to ignore packets when
 952 *      we are loaded. Just set ip_vs_drop_rate to 'n' and
 953 *      we start to drop 1/rate of the packets
 954 */
 955extern int ip_vs_drop_rate;
 956extern int ip_vs_drop_counter;
 957
 958static __inline__ int ip_vs_todrop(void)
 959{
 960        if (!ip_vs_drop_rate) return 0;
 961        if (--ip_vs_drop_counter > 0) return 0;
 962        ip_vs_drop_counter = ip_vs_drop_rate;
 963        return 1;
 964}
 965
 966/*
 967 *      ip_vs_fwd_tag returns the forwarding tag of the connection
 968 */
 969#define IP_VS_FWD_METHOD(cp)  (cp->flags & IP_VS_CONN_F_FWD_MASK)
 970
 971static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
 972{
 973        char fwd;
 974
 975        switch (IP_VS_FWD_METHOD(cp)) {
 976        case IP_VS_CONN_F_MASQ:
 977                fwd = 'M'; break;
 978        case IP_VS_CONN_F_LOCALNODE:
 979                fwd = 'L'; break;
 980        case IP_VS_CONN_F_TUNNEL:
 981                fwd = 'T'; break;
 982        case IP_VS_CONN_F_DROUTE:
 983                fwd = 'R'; break;
 984        case IP_VS_CONN_F_BYPASS:
 985                fwd = 'B'; break;
 986        default:
 987                fwd = '?'; break;
 988        }
 989        return fwd;
 990}
 991
 992extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 993                           struct ip_vs_conn *cp, int dir);
 994
 995#ifdef CONFIG_IP_VS_IPV6
 996extern void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
 997                              struct ip_vs_conn *cp, int dir);
 998#endif
 999
1000extern __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset);
1001
1002static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
1003{
1004        __be32 diff[2] = { ~old, new };
1005
1006        return csum_partial(diff, sizeof(diff), oldsum);
1007}
1008
1009#ifdef CONFIG_IP_VS_IPV6
1010static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
1011                                        __wsum oldsum)
1012{
1013        __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
1014                            new[3],  new[2],  new[1],  new[0] };
1015
1016        return csum_partial(diff, sizeof(diff), oldsum);
1017}
1018#endif
1019
1020static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
1021{
1022        __be16 diff[2] = { ~old, new };
1023
1024        return csum_partial(diff, sizeof(diff), oldsum);
1025}
1026
1027/*
1028 * Forget current conntrack (unconfirmed) and attach notrack entry
1029 */
1030static inline void ip_vs_notrack(struct sk_buff *skb)
1031{
1032#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1033        enum ip_conntrack_info ctinfo;
1034        struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1035
1036        if (!ct || !nf_ct_is_untracked(ct)) {
1037                nf_reset(skb);
1038                skb->nfct = &nf_ct_untracked_get()->ct_general;
1039                skb->nfctinfo = IP_CT_NEW;
1040                nf_conntrack_get(skb->nfct);
1041        }
1042#endif
1043}
1044
1045#ifdef CONFIG_IP_VS_NFCT
1046/*
1047 *      Netfilter connection tracking
1048 *      (from ip_vs_nfct.c)
1049 */
1050static inline int ip_vs_conntrack_enabled(void)
1051{
1052        return sysctl_ip_vs_conntrack;
1053}
1054
1055extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
1056                                   int outin);
1057extern int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp);
1058extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
1059                                      struct ip_vs_conn *cp, u_int8_t proto,
1060                                      const __be16 port, int from_rs);
1061extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
1062
1063#else
1064
1065static inline int ip_vs_conntrack_enabled(void)
1066{
1067        return 0;
1068}
1069
1070static inline void ip_vs_update_conntrack(struct sk_buff *skb,
1071                                          struct ip_vs_conn *cp, int outin)
1072{
1073}
1074
1075static inline int ip_vs_confirm_conntrack(struct sk_buff *skb,
1076                                          struct ip_vs_conn *cp)
1077{
1078        return NF_ACCEPT;
1079}
1080
1081static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
1082{
1083}
1084/* CONFIG_IP_VS_NFCT */
1085#endif
1086
1087#endif /* __KERNEL__ */
1088
1089#endif  /* _NET_IP_VS_H */
1090