linux/net/netfilter/ipvs/ip_vs_proto_udp.c
<<
>>
Prefs
   1/*
   2 * ip_vs_proto_udp.c:   UDP load balancing support for IPVS
   3 *
   4 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   5 *              Julian Anastasov <ja@ssi.bg>
   6 *
   7 *              This program is free software; you can redistribute it and/or
   8 *              modify it under the terms of the GNU General Public License
   9 *              as published by the Free Software Foundation; either version
  10 *              2 of the License, or (at your option) any later version.
  11 *
  12 * Changes:
  13 *
  14 */
  15
  16#define KMSG_COMPONENT "IPVS"
  17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  18
  19#include <linux/in.h>
  20#include <linux/ip.h>
  21#include <linux/kernel.h>
  22#include <linux/netfilter.h>
  23#include <linux/netfilter_ipv4.h>
  24#include <linux/udp.h>
  25
  26#include <net/ip_vs.h>
  27#include <net/ip.h>
  28#include <net/ip6_checksum.h>
  29
  30static struct ip_vs_conn *
  31udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
  32                const struct ip_vs_iphdr *iph, unsigned int proto_off,
  33                int inverse)
  34{
  35        struct ip_vs_conn *cp;
  36        __be16 _ports[2], *pptr;
  37
  38        pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  39        if (pptr == NULL)
  40                return NULL;
  41
  42        if (likely(!inverse)) {
  43                cp = ip_vs_conn_in_get(af, iph->protocol,
  44                                       &iph->saddr, pptr[0],
  45                                       &iph->daddr, pptr[1]);
  46        } else {
  47                cp = ip_vs_conn_in_get(af, iph->protocol,
  48                                       &iph->daddr, pptr[1],
  49                                       &iph->saddr, pptr[0]);
  50        }
  51
  52        return cp;
  53}
  54
  55
  56static struct ip_vs_conn *
  57udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
  58                 const struct ip_vs_iphdr *iph, unsigned int proto_off,
  59                 int inverse)
  60{
  61        struct ip_vs_conn *cp;
  62        __be16 _ports[2], *pptr;
  63
  64        pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  65        if (pptr == NULL)
  66                return NULL;
  67
  68        if (likely(!inverse)) {
  69                cp = ip_vs_conn_out_get(af, iph->protocol,
  70                                        &iph->saddr, pptr[0],
  71                                        &iph->daddr, pptr[1]);
  72        } else {
  73                cp = ip_vs_conn_out_get(af, iph->protocol,
  74                                        &iph->daddr, pptr[1],
  75                                        &iph->saddr, pptr[0]);
  76        }
  77
  78        return cp;
  79}
  80
  81
  82static int
  83udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
  84                  int *verdict, struct ip_vs_conn **cpp)
  85{
  86        struct ip_vs_service *svc;
  87        struct udphdr _udph, *uh;
  88        struct ip_vs_iphdr iph;
  89
  90        ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
  91
  92        uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
  93        if (uh == NULL) {
  94                *verdict = NF_DROP;
  95                return 0;
  96        }
  97
  98        svc = ip_vs_service_get(af, skb->mark, iph.protocol,
  99                                &iph.daddr, uh->dest);
 100        if (svc) {
 101                if (ip_vs_todrop()) {
 102                        /*
 103                         * It seems that we are very loaded.
 104                         * We have to drop this packet :(
 105                         */
 106                        ip_vs_service_put(svc);
 107                        *verdict = NF_DROP;
 108                        return 0;
 109                }
 110
 111                /*
 112                 * Let the virtual server select a real server for the
 113                 * incoming connection, and create a connection entry.
 114                 */
 115                *cpp = ip_vs_schedule(svc, skb);
 116                if (!*cpp) {
 117                        *verdict = ip_vs_leave(svc, skb, pp);
 118                        return 0;
 119                }
 120                ip_vs_service_put(svc);
 121        }
 122        return 1;
 123}
 124
 125
 126static inline void
 127udp_fast_csum_update(int af, struct udphdr *uhdr,
 128                     const union nf_inet_addr *oldip,
 129                     const union nf_inet_addr *newip,
 130                     __be16 oldport, __be16 newport)
 131{
 132#ifdef CONFIG_IP_VS_IPV6
 133        if (af == AF_INET6)
 134                uhdr->check =
 135                        csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
 136                                         ip_vs_check_diff2(oldport, newport,
 137                                                ~csum_unfold(uhdr->check))));
 138        else
 139#endif
 140                uhdr->check =
 141                        csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
 142                                         ip_vs_check_diff2(oldport, newport,
 143                                                ~csum_unfold(uhdr->check))));
 144        if (!uhdr->check)
 145                uhdr->check = CSUM_MANGLED_0;
 146}
 147
 148static inline void
 149udp_partial_csum_update(int af, struct udphdr *uhdr,
 150                     const union nf_inet_addr *oldip,
 151                     const union nf_inet_addr *newip,
 152                     __be16 oldlen, __be16 newlen)
 153{
 154#ifdef CONFIG_IP_VS_IPV6
 155        if (af == AF_INET6)
 156                uhdr->check =
 157                        csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
 158                                         ip_vs_check_diff2(oldlen, newlen,
 159                                                ~csum_unfold(uhdr->check))));
 160        else
 161#endif
 162        uhdr->check =
 163                csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
 164                                ip_vs_check_diff2(oldlen, newlen,
 165                                                ~csum_unfold(uhdr->check))));
 166}
 167
 168
 169static int
 170udp_snat_handler(struct sk_buff *skb,
 171                 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 172{
 173        struct udphdr *udph;
 174        unsigned int udphoff;
 175        int oldlen;
 176
 177#ifdef CONFIG_IP_VS_IPV6
 178        if (cp->af == AF_INET6)
 179                udphoff = sizeof(struct ipv6hdr);
 180        else
 181#endif
 182                udphoff = ip_hdrlen(skb);
 183        oldlen = skb->len - udphoff;
 184
 185        /* csum_check requires unshared skb */
 186        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 187                return 0;
 188
 189        if (unlikely(cp->app != NULL)) {
 190                /* Some checks before mangling */
 191                if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 192                        return 0;
 193
 194                /*
 195                 *      Call application helper if needed
 196                 */
 197                if (!ip_vs_app_pkt_out(cp, skb))
 198                        return 0;
 199        }
 200
 201        udph = (void *)skb_network_header(skb) + udphoff;
 202        udph->source = cp->vport;
 203
 204        /*
 205         *      Adjust UDP checksums
 206         */
 207        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 208                udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 209                                        htons(oldlen),
 210                                        htons(skb->len - udphoff));
 211        } else if (!cp->app && (udph->check != 0)) {
 212                /* Only port and addr are changed, do fast csum update */
 213                udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 214                                     cp->dport, cp->vport);
 215                if (skb->ip_summed == CHECKSUM_COMPLETE)
 216                        skb->ip_summed = CHECKSUM_NONE;
 217        } else {
 218                /* full checksum calculation */
 219                udph->check = 0;
 220                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 221#ifdef CONFIG_IP_VS_IPV6
 222                if (cp->af == AF_INET6)
 223                        udph->check = csum_ipv6_magic(&cp->vaddr.in6,
 224                                                      &cp->caddr.in6,
 225                                                      skb->len - udphoff,
 226                                                      cp->protocol, skb->csum);
 227                else
 228#endif
 229                        udph->check = csum_tcpudp_magic(cp->vaddr.ip,
 230                                                        cp->caddr.ip,
 231                                                        skb->len - udphoff,
 232                                                        cp->protocol,
 233                                                        skb->csum);
 234                if (udph->check == 0)
 235                        udph->check = CSUM_MANGLED_0;
 236                IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
 237                          pp->name, udph->check,
 238                          (char*)&(udph->check) - (char*)udph);
 239        }
 240        return 1;
 241}
 242
 243
 244static int
 245udp_dnat_handler(struct sk_buff *skb,
 246                 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 247{
 248        struct udphdr *udph;
 249        unsigned int udphoff;
 250        int oldlen;
 251
 252#ifdef CONFIG_IP_VS_IPV6
 253        if (cp->af == AF_INET6)
 254                udphoff = sizeof(struct ipv6hdr);
 255        else
 256#endif
 257                udphoff = ip_hdrlen(skb);
 258        oldlen = skb->len - udphoff;
 259
 260        /* csum_check requires unshared skb */
 261        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 262                return 0;
 263
 264        if (unlikely(cp->app != NULL)) {
 265                /* Some checks before mangling */
 266                if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 267                        return 0;
 268
 269                /*
 270                 *      Attempt ip_vs_app call.
 271                 *      It will fix ip_vs_conn
 272                 */
 273                if (!ip_vs_app_pkt_in(cp, skb))
 274                        return 0;
 275        }
 276
 277        udph = (void *)skb_network_header(skb) + udphoff;
 278        udph->dest = cp->dport;
 279
 280        /*
 281         *      Adjust UDP checksums
 282         */
 283        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 284                udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 285                                        htons(oldlen),
 286                                        htons(skb->len - udphoff));
 287        } else if (!cp->app && (udph->check != 0)) {
 288                /* Only port and addr are changed, do fast csum update */
 289                udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 290                                     cp->vport, cp->dport);
 291                if (skb->ip_summed == CHECKSUM_COMPLETE)
 292                        skb->ip_summed = CHECKSUM_NONE;
 293        } else {
 294                /* full checksum calculation */
 295                udph->check = 0;
 296                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 297#ifdef CONFIG_IP_VS_IPV6
 298                if (cp->af == AF_INET6)
 299                        udph->check = csum_ipv6_magic(&cp->caddr.in6,
 300                                                      &cp->daddr.in6,
 301                                                      skb->len - udphoff,
 302                                                      cp->protocol, skb->csum);
 303                else
 304#endif
 305                        udph->check = csum_tcpudp_magic(cp->caddr.ip,
 306                                                        cp->daddr.ip,
 307                                                        skb->len - udphoff,
 308                                                        cp->protocol,
 309                                                        skb->csum);
 310                if (udph->check == 0)
 311                        udph->check = CSUM_MANGLED_0;
 312                skb->ip_summed = CHECKSUM_UNNECESSARY;
 313        }
 314        return 1;
 315}
 316
 317
 318static int
 319udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 320{
 321        struct udphdr _udph, *uh;
 322        unsigned int udphoff;
 323
 324#ifdef CONFIG_IP_VS_IPV6
 325        if (af == AF_INET6)
 326                udphoff = sizeof(struct ipv6hdr);
 327        else
 328#endif
 329                udphoff = ip_hdrlen(skb);
 330
 331        uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
 332        if (uh == NULL)
 333                return 0;
 334
 335        if (uh->check != 0) {
 336                switch (skb->ip_summed) {
 337                case CHECKSUM_NONE:
 338                        skb->csum = skb_checksum(skb, udphoff,
 339                                                 skb->len - udphoff, 0);
 340                case CHECKSUM_COMPLETE:
 341#ifdef CONFIG_IP_VS_IPV6
 342                        if (af == AF_INET6) {
 343                                if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 344                                                    &ipv6_hdr(skb)->daddr,
 345                                                    skb->len - udphoff,
 346                                                    ipv6_hdr(skb)->nexthdr,
 347                                                    skb->csum)) {
 348                                        IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 349                                                         "Failed checksum for");
 350                                        return 0;
 351                                }
 352                        } else
 353#endif
 354                                if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
 355                                                      ip_hdr(skb)->daddr,
 356                                                      skb->len - udphoff,
 357                                                      ip_hdr(skb)->protocol,
 358                                                      skb->csum)) {
 359                                        IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 360                                                         "Failed checksum for");
 361                                        return 0;
 362                                }
 363                        break;
 364                default:
 365                        /* No need to checksum. */
 366                        break;
 367                }
 368        }
 369        return 1;
 370}
 371
 372
 373/*
 374 *      Note: the caller guarantees that only one of register_app,
 375 *      unregister_app or app_conn_bind is called each time.
 376 */
 377
 378#define UDP_APP_TAB_BITS        4
 379#define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
 380#define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
 381
 382static struct list_head udp_apps[UDP_APP_TAB_SIZE];
 383static DEFINE_SPINLOCK(udp_app_lock);
 384
 385static inline __u16 udp_app_hashkey(__be16 port)
 386{
 387        return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
 388                & UDP_APP_TAB_MASK;
 389}
 390
 391
 392static int udp_register_app(struct ip_vs_app *inc)
 393{
 394        struct ip_vs_app *i;
 395        __u16 hash;
 396        __be16 port = inc->port;
 397        int ret = 0;
 398
 399        hash = udp_app_hashkey(port);
 400
 401
 402        spin_lock_bh(&udp_app_lock);
 403        list_for_each_entry(i, &udp_apps[hash], p_list) {
 404                if (i->port == port) {
 405                        ret = -EEXIST;
 406                        goto out;
 407                }
 408        }
 409        list_add(&inc->p_list, &udp_apps[hash]);
 410        atomic_inc(&ip_vs_protocol_udp.appcnt);
 411
 412  out:
 413        spin_unlock_bh(&udp_app_lock);
 414        return ret;
 415}
 416
 417
 418static void
 419udp_unregister_app(struct ip_vs_app *inc)
 420{
 421        spin_lock_bh(&udp_app_lock);
 422        atomic_dec(&ip_vs_protocol_udp.appcnt);
 423        list_del(&inc->p_list);
 424        spin_unlock_bh(&udp_app_lock);
 425}
 426
 427
 428static int udp_app_conn_bind(struct ip_vs_conn *cp)
 429{
 430        int hash;
 431        struct ip_vs_app *inc;
 432        int result = 0;
 433
 434        /* Default binding: bind app only for NAT */
 435        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
 436                return 0;
 437
 438        /* Lookup application incarnations and bind the right one */
 439        hash = udp_app_hashkey(cp->vport);
 440
 441        spin_lock(&udp_app_lock);
 442        list_for_each_entry(inc, &udp_apps[hash], p_list) {
 443                if (inc->port == cp->vport) {
 444                        if (unlikely(!ip_vs_app_inc_get(inc)))
 445                                break;
 446                        spin_unlock(&udp_app_lock);
 447
 448                        IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 449                                      "%s:%u to app %s on port %u\n",
 450                                      __func__,
 451                                      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
 452                                      ntohs(cp->cport),
 453                                      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
 454                                      ntohs(cp->vport),
 455                                      inc->name, ntohs(inc->port));
 456
 457                        cp->app = inc;
 458                        if (inc->init_conn)
 459                                result = inc->init_conn(inc, cp);
 460                        goto out;
 461                }
 462        }
 463        spin_unlock(&udp_app_lock);
 464
 465  out:
 466        return result;
 467}
 468
 469
 470static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
 471        [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
 472        [IP_VS_UDP_S_LAST]              =       2*HZ,
 473};
 474
 475static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
 476        [IP_VS_UDP_S_NORMAL]            =       "UDP",
 477        [IP_VS_UDP_S_LAST]              =       "BUG!",
 478};
 479
 480
 481static int
 482udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
 483{
 484        return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
 485                                       udp_state_name_table, sname, to);
 486}
 487
 488static const char * udp_state_name(int state)
 489{
 490        if (state >= IP_VS_UDP_S_LAST)
 491                return "ERR!";
 492        return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
 493}
 494
 495static int
 496udp_state_transition(struct ip_vs_conn *cp, int direction,
 497                     const struct sk_buff *skb,
 498                     struct ip_vs_protocol *pp)
 499{
 500        cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
 501        return 1;
 502}
 503
 504static void udp_init(struct ip_vs_protocol *pp)
 505{
 506        IP_VS_INIT_HASH_TABLE(udp_apps);
 507        pp->timeout_table = udp_timeouts;
 508}
 509
 510static void udp_exit(struct ip_vs_protocol *pp)
 511{
 512}
 513
 514
 515struct ip_vs_protocol ip_vs_protocol_udp = {
 516        .name =                 "UDP",
 517        .protocol =             IPPROTO_UDP,
 518        .num_states =           IP_VS_UDP_S_LAST,
 519        .dont_defrag =          0,
 520        .init =                 udp_init,
 521        .exit =                 udp_exit,
 522        .conn_schedule =        udp_conn_schedule,
 523        .conn_in_get =          udp_conn_in_get,
 524        .conn_out_get =         udp_conn_out_get,
 525        .snat_handler =         udp_snat_handler,
 526        .dnat_handler =         udp_dnat_handler,
 527        .csum_check =           udp_csum_check,
 528        .state_transition =     udp_state_transition,
 529        .state_name =           udp_state_name,
 530        .register_app =         udp_register_app,
 531        .unregister_app =       udp_unregister_app,
 532        .app_conn_bind =        udp_app_conn_bind,
 533        .debug_packet =         ip_vs_tcpudp_debug_packet,
 534        .timeout_change =       NULL,
 535        .set_state_timeout =    udp_set_state_timeout,
 536};
 537