linux/net/netfilter/ipvs/ip_vs_proto_udp.c
<<
>>
Prefs
   1/*
   2 * ip_vs_proto_udp.c:   UDP load balancing support for IPVS
   3 *
   4 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   5 *              Julian Anastasov <ja@ssi.bg>
   6 *
   7 *              This program is free software; you can redistribute it and/or
   8 *              modify it under the terms of the GNU General Public License
   9 *              as published by the Free Software Foundation; either version
  10 *              2 of the License, or (at your option) any later version.
  11 *
  12 * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
  13 *              Network name space (netns) aware.
  14 *
  15 */
  16
  17#define KMSG_COMPONENT "IPVS"
  18#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  19
  20#include <linux/in.h>
  21#include <linux/ip.h>
  22#include <linux/kernel.h>
  23#include <linux/netfilter.h>
  24#include <linux/netfilter_ipv4.h>
  25#include <linux/udp.h>
  26
  27#include <net/ip_vs.h>
  28#include <net/ip.h>
  29#include <net/ip6_checksum.h>
  30
  31static int
  32udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
  33                  struct ip_vs_proto_data *pd,
  34                  int *verdict, struct ip_vs_conn **cpp,
  35                  struct ip_vs_iphdr *iph)
  36{
  37        struct ip_vs_service *svc;
  38        struct udphdr _udph, *uh;
  39        __be16 _ports[2], *ports = NULL;
  40
  41        if (likely(!ip_vs_iph_icmp(iph))) {
  42                /* IPv6 fragments, only first fragment will hit this */
  43                uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
  44                if (uh)
  45                        ports = &uh->source;
  46        } else {
  47                ports = skb_header_pointer(
  48                        skb, iph->len, sizeof(_ports), &_ports);
  49        }
  50
  51        if (!ports) {
  52                *verdict = NF_DROP;
  53                return 0;
  54        }
  55
  56        if (likely(!ip_vs_iph_inverse(iph)))
  57                svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
  58                                         &iph->daddr, ports[1]);
  59        else
  60                svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
  61                                         &iph->saddr, ports[0]);
  62
  63        if (svc) {
  64                int ignored;
  65
  66                if (ip_vs_todrop(ipvs)) {
  67                        /*
  68                         * It seems that we are very loaded.
  69                         * We have to drop this packet :(
  70                         */
  71                        *verdict = NF_DROP;
  72                        return 0;
  73                }
  74
  75                /*
  76                 * Let the virtual server select a real server for the
  77                 * incoming connection, and create a connection entry.
  78                 */
  79                *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
  80                if (!*cpp && ignored <= 0) {
  81                        if (!ignored)
  82                                *verdict = ip_vs_leave(svc, skb, pd, iph);
  83                        else
  84                                *verdict = NF_DROP;
  85                        return 0;
  86                }
  87        }
  88        /* NF_ACCEPT */
  89        return 1;
  90}
  91
  92
  93static inline void
  94udp_fast_csum_update(int af, struct udphdr *uhdr,
  95                     const union nf_inet_addr *oldip,
  96                     const union nf_inet_addr *newip,
  97                     __be16 oldport, __be16 newport)
  98{
  99#ifdef CONFIG_IP_VS_IPV6
 100        if (af == AF_INET6)
 101                uhdr->check =
 102                        csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
 103                                         ip_vs_check_diff2(oldport, newport,
 104                                                ~csum_unfold(uhdr->check))));
 105        else
 106#endif
 107                uhdr->check =
 108                        csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
 109                                         ip_vs_check_diff2(oldport, newport,
 110                                                ~csum_unfold(uhdr->check))));
 111        if (!uhdr->check)
 112                uhdr->check = CSUM_MANGLED_0;
 113}
 114
 115static inline void
 116udp_partial_csum_update(int af, struct udphdr *uhdr,
 117                     const union nf_inet_addr *oldip,
 118                     const union nf_inet_addr *newip,
 119                     __be16 oldlen, __be16 newlen)
 120{
 121#ifdef CONFIG_IP_VS_IPV6
 122        if (af == AF_INET6)
 123                uhdr->check =
 124                        ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
 125                                         ip_vs_check_diff2(oldlen, newlen,
 126                                                csum_unfold(uhdr->check))));
 127        else
 128#endif
 129        uhdr->check =
 130                ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
 131                                ip_vs_check_diff2(oldlen, newlen,
 132                                                csum_unfold(uhdr->check))));
 133}
 134
 135
 136static int
 137udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 138                 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 139{
 140        struct udphdr *udph;
 141        unsigned int udphoff = iph->len;
 142        int oldlen;
 143        int payload_csum = 0;
 144
 145#ifdef CONFIG_IP_VS_IPV6
 146        if (cp->af == AF_INET6 && iph->fragoffs)
 147                return 1;
 148#endif
 149        oldlen = skb->len - udphoff;
 150
 151        /* csum_check requires unshared skb */
 152        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 153                return 0;
 154
 155        if (unlikely(cp->app != NULL)) {
 156                int ret;
 157
 158                /* Some checks before mangling */
 159                if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 160                        return 0;
 161
 162                /*
 163                 *      Call application helper if needed
 164                 */
 165                if (!(ret = ip_vs_app_pkt_out(cp, skb)))
 166                        return 0;
 167                /* ret=2: csum update is needed after payload mangling */
 168                if (ret == 1)
 169                        oldlen = skb->len - udphoff;
 170                else
 171                        payload_csum = 1;
 172        }
 173
 174        udph = (void *)skb_network_header(skb) + udphoff;
 175        udph->source = cp->vport;
 176
 177        /*
 178         *      Adjust UDP checksums
 179         */
 180        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 181                udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 182                                        htons(oldlen),
 183                                        htons(skb->len - udphoff));
 184        } else if (!payload_csum && (udph->check != 0)) {
 185                /* Only port and addr are changed, do fast csum update */
 186                udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 187                                     cp->dport, cp->vport);
 188                if (skb->ip_summed == CHECKSUM_COMPLETE)
 189                        skb->ip_summed = (cp->app && pp->csum_check) ?
 190                                         CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 191        } else {
 192                /* full checksum calculation */
 193                udph->check = 0;
 194                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 195#ifdef CONFIG_IP_VS_IPV6
 196                if (cp->af == AF_INET6)
 197                        udph->check = csum_ipv6_magic(&cp->vaddr.in6,
 198                                                      &cp->caddr.in6,
 199                                                      skb->len - udphoff,
 200                                                      cp->protocol, skb->csum);
 201                else
 202#endif
 203                        udph->check = csum_tcpudp_magic(cp->vaddr.ip,
 204                                                        cp->caddr.ip,
 205                                                        skb->len - udphoff,
 206                                                        cp->protocol,
 207                                                        skb->csum);
 208                if (udph->check == 0)
 209                        udph->check = CSUM_MANGLED_0;
 210                skb->ip_summed = CHECKSUM_UNNECESSARY;
 211                IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
 212                          pp->name, udph->check,
 213                          (char*)&(udph->check) - (char*)udph);
 214        }
 215        return 1;
 216}
 217
 218
 219static int
 220udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 221                 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 222{
 223        struct udphdr *udph;
 224        unsigned int udphoff = iph->len;
 225        int oldlen;
 226        int payload_csum = 0;
 227
 228#ifdef CONFIG_IP_VS_IPV6
 229        if (cp->af == AF_INET6 && iph->fragoffs)
 230                return 1;
 231#endif
 232        oldlen = skb->len - udphoff;
 233
 234        /* csum_check requires unshared skb */
 235        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 236                return 0;
 237
 238        if (unlikely(cp->app != NULL)) {
 239                int ret;
 240
 241                /* Some checks before mangling */
 242                if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 243                        return 0;
 244
 245                /*
 246                 *      Attempt ip_vs_app call.
 247                 *      It will fix ip_vs_conn
 248                 */
 249                if (!(ret = ip_vs_app_pkt_in(cp, skb)))
 250                        return 0;
 251                /* ret=2: csum update is needed after payload mangling */
 252                if (ret == 1)
 253                        oldlen = skb->len - udphoff;
 254                else
 255                        payload_csum = 1;
 256        }
 257
 258        udph = (void *)skb_network_header(skb) + udphoff;
 259        udph->dest = cp->dport;
 260
 261        /*
 262         *      Adjust UDP checksums
 263         */
 264        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 265                udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 266                                        htons(oldlen),
 267                                        htons(skb->len - udphoff));
 268        } else if (!payload_csum && (udph->check != 0)) {
 269                /* Only port and addr are changed, do fast csum update */
 270                udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 271                                     cp->vport, cp->dport);
 272                if (skb->ip_summed == CHECKSUM_COMPLETE)
 273                        skb->ip_summed = (cp->app && pp->csum_check) ?
 274                                         CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 275        } else {
 276                /* full checksum calculation */
 277                udph->check = 0;
 278                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 279#ifdef CONFIG_IP_VS_IPV6
 280                if (cp->af == AF_INET6)
 281                        udph->check = csum_ipv6_magic(&cp->caddr.in6,
 282                                                      &cp->daddr.in6,
 283                                                      skb->len - udphoff,
 284                                                      cp->protocol, skb->csum);
 285                else
 286#endif
 287                        udph->check = csum_tcpudp_magic(cp->caddr.ip,
 288                                                        cp->daddr.ip,
 289                                                        skb->len - udphoff,
 290                                                        cp->protocol,
 291                                                        skb->csum);
 292                if (udph->check == 0)
 293                        udph->check = CSUM_MANGLED_0;
 294                skb->ip_summed = CHECKSUM_UNNECESSARY;
 295        }
 296        return 1;
 297}
 298
 299
 300static int
 301udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 302{
 303        struct udphdr _udph, *uh;
 304        unsigned int udphoff;
 305
 306#ifdef CONFIG_IP_VS_IPV6
 307        if (af == AF_INET6)
 308                udphoff = sizeof(struct ipv6hdr);
 309        else
 310#endif
 311                udphoff = ip_hdrlen(skb);
 312
 313        uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
 314        if (uh == NULL)
 315                return 0;
 316
 317        if (uh->check != 0) {
 318                switch (skb->ip_summed) {
 319                case CHECKSUM_NONE:
 320                        skb->csum = skb_checksum(skb, udphoff,
 321                                                 skb->len - udphoff, 0);
 322                        /* fall through */
 323                case CHECKSUM_COMPLETE:
 324#ifdef CONFIG_IP_VS_IPV6
 325                        if (af == AF_INET6) {
 326                                if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 327                                                    &ipv6_hdr(skb)->daddr,
 328                                                    skb->len - udphoff,
 329                                                    ipv6_hdr(skb)->nexthdr,
 330                                                    skb->csum)) {
 331                                        IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 332                                                         "Failed checksum for");
 333                                        return 0;
 334                                }
 335                        } else
 336#endif
 337                                if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
 338                                                      ip_hdr(skb)->daddr,
 339                                                      skb->len - udphoff,
 340                                                      ip_hdr(skb)->protocol,
 341                                                      skb->csum)) {
 342                                        IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 343                                                         "Failed checksum for");
 344                                        return 0;
 345                                }
 346                        break;
 347                default:
 348                        /* No need to checksum. */
 349                        break;
 350                }
 351        }
 352        return 1;
 353}
 354
 355static inline __u16 udp_app_hashkey(__be16 port)
 356{
 357        return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
 358                & UDP_APP_TAB_MASK;
 359}
 360
 361
 362static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 363{
 364        struct ip_vs_app *i;
 365        __u16 hash;
 366        __be16 port = inc->port;
 367        int ret = 0;
 368        struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
 369
 370        hash = udp_app_hashkey(port);
 371
 372        list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
 373                if (i->port == port) {
 374                        ret = -EEXIST;
 375                        goto out;
 376                }
 377        }
 378        list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]);
 379        atomic_inc(&pd->appcnt);
 380
 381  out:
 382        return ret;
 383}
 384
 385
 386static void
 387udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 388{
 389        struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
 390
 391        atomic_dec(&pd->appcnt);
 392        list_del_rcu(&inc->p_list);
 393}
 394
 395
 396static int udp_app_conn_bind(struct ip_vs_conn *cp)
 397{
 398        struct netns_ipvs *ipvs = cp->ipvs;
 399        int hash;
 400        struct ip_vs_app *inc;
 401        int result = 0;
 402
 403        /* Default binding: bind app only for NAT */
 404        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
 405                return 0;
 406
 407        /* Lookup application incarnations and bind the right one */
 408        hash = udp_app_hashkey(cp->vport);
 409
 410        list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
 411                if (inc->port == cp->vport) {
 412                        if (unlikely(!ip_vs_app_inc_get(inc)))
 413                                break;
 414
 415                        IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 416                                      "%s:%u to app %s on port %u\n",
 417                                      __func__,
 418                                      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
 419                                      ntohs(cp->cport),
 420                                      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
 421                                      ntohs(cp->vport),
 422                                      inc->name, ntohs(inc->port));
 423
 424                        cp->app = inc;
 425                        if (inc->init_conn)
 426                                result = inc->init_conn(inc, cp);
 427                        break;
 428                }
 429        }
 430
 431        return result;
 432}
 433
 434
 435static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
 436        [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
 437        [IP_VS_UDP_S_LAST]              =       2*HZ,
 438};
 439
 440static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
 441        [IP_VS_UDP_S_NORMAL]            =       "UDP",
 442        [IP_VS_UDP_S_LAST]              =       "BUG!",
 443};
 444
 445static const char * udp_state_name(int state)
 446{
 447        if (state >= IP_VS_UDP_S_LAST)
 448                return "ERR!";
 449        return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
 450}
 451
 452static void
 453udp_state_transition(struct ip_vs_conn *cp, int direction,
 454                     const struct sk_buff *skb,
 455                     struct ip_vs_proto_data *pd)
 456{
 457        if (unlikely(!pd)) {
 458                pr_err("UDP no ns data\n");
 459                return;
 460        }
 461
 462        cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
 463}
 464
 465static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 466{
 467        ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
 468        pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
 469                                                        sizeof(udp_timeouts));
 470        if (!pd->timeout_table)
 471                return -ENOMEM;
 472        return 0;
 473}
 474
 475static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 476{
 477        kfree(pd->timeout_table);
 478}
 479
 480
 481struct ip_vs_protocol ip_vs_protocol_udp = {
 482        .name =                 "UDP",
 483        .protocol =             IPPROTO_UDP,
 484        .num_states =           IP_VS_UDP_S_LAST,
 485        .dont_defrag =          0,
 486        .init =                 NULL,
 487        .exit =                 NULL,
 488        .init_netns =           __udp_init,
 489        .exit_netns =           __udp_exit,
 490        .conn_schedule =        udp_conn_schedule,
 491        .conn_in_get =          ip_vs_conn_in_get_proto,
 492        .conn_out_get =         ip_vs_conn_out_get_proto,
 493        .snat_handler =         udp_snat_handler,
 494        .dnat_handler =         udp_dnat_handler,
 495        .csum_check =           udp_csum_check,
 496        .state_transition =     udp_state_transition,
 497        .state_name =           udp_state_name,
 498        .register_app =         udp_register_app,
 499        .unregister_app =       udp_unregister_app,
 500        .app_conn_bind =        udp_app_conn_bind,
 501        .debug_packet =         ip_vs_tcpudp_debug_packet,
 502        .timeout_change =       NULL,
 503};
 504