linux/net/netfilter/ipvs/ip_vs_proto_udp.c
<<
>>
Prefs
   1/*
   2 * ip_vs_proto_udp.c:   UDP load balancing support for IPVS
   3 *
   4 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   5 *              Julian Anastasov <ja@ssi.bg>
   6 *
   7 *              This program is free software; you can redistribute it and/or
   8 *              modify it under the terms of the GNU General Public License
   9 *              as published by the Free Software Foundation; either version
  10 *              2 of the License, or (at your option) any later version.
  11 *
  12 * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
  13 *              Network name space (netns) aware.
  14 *
  15 */
  16
  17#define KMSG_COMPONENT "IPVS"
  18#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  19
  20#include <linux/in.h>
  21#include <linux/ip.h>
  22#include <linux/kernel.h>
  23#include <linux/netfilter.h>
  24#include <linux/netfilter_ipv4.h>
  25#include <linux/udp.h>
  26
  27#include <net/ip_vs.h>
  28#include <net/ip.h>
  29#include <net/ip6_checksum.h>
  30
  31static int
  32udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
  33                  struct ip_vs_proto_data *pd,
  34                  int *verdict, struct ip_vs_conn **cpp,
  35                  struct ip_vs_iphdr *iph)
  36{
  37        struct ip_vs_service *svc;
  38        struct udphdr _udph, *uh;
  39        __be16 _ports[2], *ports = NULL;
  40
  41        if (likely(!ip_vs_iph_icmp(iph))) {
  42                /* IPv6 fragments, only first fragment will hit this */
  43                uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
  44                if (uh)
  45                        ports = &uh->source;
  46        } else {
  47                ports = skb_header_pointer(
  48                        skb, iph->len, sizeof(_ports), &_ports);
  49        }
  50
  51        if (!ports) {
  52                *verdict = NF_DROP;
  53                return 0;
  54        }
  55
  56        rcu_read_lock();
  57        if (likely(!ip_vs_iph_inverse(iph)))
  58                svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
  59                                         &iph->daddr, ports[1]);
  60        else
  61                svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
  62                                         &iph->saddr, ports[0]);
  63
  64        if (svc) {
  65                int ignored;
  66
  67                if (ip_vs_todrop(ipvs)) {
  68                        /*
  69                         * It seems that we are very loaded.
  70                         * We have to drop this packet :(
  71                         */
  72                        rcu_read_unlock();
  73                        *verdict = NF_DROP;
  74                        return 0;
  75                }
  76
  77                /*
  78                 * Let the virtual server select a real server for the
  79                 * incoming connection, and create a connection entry.
  80                 */
  81                *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
  82                if (!*cpp && ignored <= 0) {
  83                        if (!ignored)
  84                                *verdict = ip_vs_leave(svc, skb, pd, iph);
  85                        else
  86                                *verdict = NF_DROP;
  87                        rcu_read_unlock();
  88                        return 0;
  89                }
  90        }
  91        rcu_read_unlock();
  92        /* NF_ACCEPT */
  93        return 1;
  94}
  95
  96
  97static inline void
  98udp_fast_csum_update(int af, struct udphdr *uhdr,
  99                     const union nf_inet_addr *oldip,
 100                     const union nf_inet_addr *newip,
 101                     __be16 oldport, __be16 newport)
 102{
 103#ifdef CONFIG_IP_VS_IPV6
 104        if (af == AF_INET6)
 105                uhdr->check =
 106                        csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
 107                                         ip_vs_check_diff2(oldport, newport,
 108                                                ~csum_unfold(uhdr->check))));
 109        else
 110#endif
 111                uhdr->check =
 112                        csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
 113                                         ip_vs_check_diff2(oldport, newport,
 114                                                ~csum_unfold(uhdr->check))));
 115        if (!uhdr->check)
 116                uhdr->check = CSUM_MANGLED_0;
 117}
 118
 119static inline void
 120udp_partial_csum_update(int af, struct udphdr *uhdr,
 121                     const union nf_inet_addr *oldip,
 122                     const union nf_inet_addr *newip,
 123                     __be16 oldlen, __be16 newlen)
 124{
 125#ifdef CONFIG_IP_VS_IPV6
 126        if (af == AF_INET6)
 127                uhdr->check =
 128                        ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
 129                                         ip_vs_check_diff2(oldlen, newlen,
 130                                                csum_unfold(uhdr->check))));
 131        else
 132#endif
 133        uhdr->check =
 134                ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
 135                                ip_vs_check_diff2(oldlen, newlen,
 136                                                csum_unfold(uhdr->check))));
 137}
 138
 139
 140static int
 141udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 142                 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 143{
 144        struct udphdr *udph;
 145        unsigned int udphoff = iph->len;
 146        int oldlen;
 147        int payload_csum = 0;
 148
 149#ifdef CONFIG_IP_VS_IPV6
 150        if (cp->af == AF_INET6 && iph->fragoffs)
 151                return 1;
 152#endif
 153        oldlen = skb->len - udphoff;
 154
 155        /* csum_check requires unshared skb */
 156        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 157                return 0;
 158
 159        if (unlikely(cp->app != NULL)) {
 160                int ret;
 161
 162                /* Some checks before mangling */
 163                if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 164                        return 0;
 165
 166                /*
 167                 *      Call application helper if needed
 168                 */
 169                if (!(ret = ip_vs_app_pkt_out(cp, skb)))
 170                        return 0;
 171                /* ret=2: csum update is needed after payload mangling */
 172                if (ret == 1)
 173                        oldlen = skb->len - udphoff;
 174                else
 175                        payload_csum = 1;
 176        }
 177
 178        udph = (void *)skb_network_header(skb) + udphoff;
 179        udph->source = cp->vport;
 180
 181        /*
 182         *      Adjust UDP checksums
 183         */
 184        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 185                udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 186                                        htons(oldlen),
 187                                        htons(skb->len - udphoff));
 188        } else if (!payload_csum && (udph->check != 0)) {
 189                /* Only port and addr are changed, do fast csum update */
 190                udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 191                                     cp->dport, cp->vport);
 192                if (skb->ip_summed == CHECKSUM_COMPLETE)
 193                        skb->ip_summed = (cp->app && pp->csum_check) ?
 194                                         CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 195        } else {
 196                /* full checksum calculation */
 197                udph->check = 0;
 198                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 199#ifdef CONFIG_IP_VS_IPV6
 200                if (cp->af == AF_INET6)
 201                        udph->check = csum_ipv6_magic(&cp->vaddr.in6,
 202                                                      &cp->caddr.in6,
 203                                                      skb->len - udphoff,
 204                                                      cp->protocol, skb->csum);
 205                else
 206#endif
 207                        udph->check = csum_tcpudp_magic(cp->vaddr.ip,
 208                                                        cp->caddr.ip,
 209                                                        skb->len - udphoff,
 210                                                        cp->protocol,
 211                                                        skb->csum);
 212                if (udph->check == 0)
 213                        udph->check = CSUM_MANGLED_0;
 214                skb->ip_summed = CHECKSUM_UNNECESSARY;
 215                IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
 216                          pp->name, udph->check,
 217                          (char*)&(udph->check) - (char*)udph);
 218        }
 219        return 1;
 220}
 221
 222
 223static int
 224udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 225                 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 226{
 227        struct udphdr *udph;
 228        unsigned int udphoff = iph->len;
 229        int oldlen;
 230        int payload_csum = 0;
 231
 232#ifdef CONFIG_IP_VS_IPV6
 233        if (cp->af == AF_INET6 && iph->fragoffs)
 234                return 1;
 235#endif
 236        oldlen = skb->len - udphoff;
 237
 238        /* csum_check requires unshared skb */
 239        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 240                return 0;
 241
 242        if (unlikely(cp->app != NULL)) {
 243                int ret;
 244
 245                /* Some checks before mangling */
 246                if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 247                        return 0;
 248
 249                /*
 250                 *      Attempt ip_vs_app call.
 251                 *      It will fix ip_vs_conn
 252                 */
 253                if (!(ret = ip_vs_app_pkt_in(cp, skb)))
 254                        return 0;
 255                /* ret=2: csum update is needed after payload mangling */
 256                if (ret == 1)
 257                        oldlen = skb->len - udphoff;
 258                else
 259                        payload_csum = 1;
 260        }
 261
 262        udph = (void *)skb_network_header(skb) + udphoff;
 263        udph->dest = cp->dport;
 264
 265        /*
 266         *      Adjust UDP checksums
 267         */
 268        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 269                udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 270                                        htons(oldlen),
 271                                        htons(skb->len - udphoff));
 272        } else if (!payload_csum && (udph->check != 0)) {
 273                /* Only port and addr are changed, do fast csum update */
 274                udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 275                                     cp->vport, cp->dport);
 276                if (skb->ip_summed == CHECKSUM_COMPLETE)
 277                        skb->ip_summed = (cp->app && pp->csum_check) ?
 278                                         CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 279        } else {
 280                /* full checksum calculation */
 281                udph->check = 0;
 282                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 283#ifdef CONFIG_IP_VS_IPV6
 284                if (cp->af == AF_INET6)
 285                        udph->check = csum_ipv6_magic(&cp->caddr.in6,
 286                                                      &cp->daddr.in6,
 287                                                      skb->len - udphoff,
 288                                                      cp->protocol, skb->csum);
 289                else
 290#endif
 291                        udph->check = csum_tcpudp_magic(cp->caddr.ip,
 292                                                        cp->daddr.ip,
 293                                                        skb->len - udphoff,
 294                                                        cp->protocol,
 295                                                        skb->csum);
 296                if (udph->check == 0)
 297                        udph->check = CSUM_MANGLED_0;
 298                skb->ip_summed = CHECKSUM_UNNECESSARY;
 299        }
 300        return 1;
 301}
 302
 303
 304static int
 305udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 306{
 307        struct udphdr _udph, *uh;
 308        unsigned int udphoff;
 309
 310#ifdef CONFIG_IP_VS_IPV6
 311        if (af == AF_INET6)
 312                udphoff = sizeof(struct ipv6hdr);
 313        else
 314#endif
 315                udphoff = ip_hdrlen(skb);
 316
 317        uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
 318        if (uh == NULL)
 319                return 0;
 320
 321        if (uh->check != 0) {
 322                switch (skb->ip_summed) {
 323                case CHECKSUM_NONE:
 324                        skb->csum = skb_checksum(skb, udphoff,
 325                                                 skb->len - udphoff, 0);
 326                case CHECKSUM_COMPLETE:
 327#ifdef CONFIG_IP_VS_IPV6
 328                        if (af == AF_INET6) {
 329                                if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 330                                                    &ipv6_hdr(skb)->daddr,
 331                                                    skb->len - udphoff,
 332                                                    ipv6_hdr(skb)->nexthdr,
 333                                                    skb->csum)) {
 334                                        IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 335                                                         "Failed checksum for");
 336                                        return 0;
 337                                }
 338                        } else
 339#endif
 340                                if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
 341                                                      ip_hdr(skb)->daddr,
 342                                                      skb->len - udphoff,
 343                                                      ip_hdr(skb)->protocol,
 344                                                      skb->csum)) {
 345                                        IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 346                                                         "Failed checksum for");
 347                                        return 0;
 348                                }
 349                        break;
 350                default:
 351                        /* No need to checksum. */
 352                        break;
 353                }
 354        }
 355        return 1;
 356}
 357
 358static inline __u16 udp_app_hashkey(__be16 port)
 359{
 360        return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
 361                & UDP_APP_TAB_MASK;
 362}
 363
 364
 365static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 366{
 367        struct ip_vs_app *i;
 368        __u16 hash;
 369        __be16 port = inc->port;
 370        int ret = 0;
 371        struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
 372
 373        hash = udp_app_hashkey(port);
 374
 375        list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
 376                if (i->port == port) {
 377                        ret = -EEXIST;
 378                        goto out;
 379                }
 380        }
 381        list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]);
 382        atomic_inc(&pd->appcnt);
 383
 384  out:
 385        return ret;
 386}
 387
 388
 389static void
 390udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 391{
 392        struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
 393
 394        atomic_dec(&pd->appcnt);
 395        list_del_rcu(&inc->p_list);
 396}
 397
 398
 399static int udp_app_conn_bind(struct ip_vs_conn *cp)
 400{
 401        struct netns_ipvs *ipvs = cp->ipvs;
 402        int hash;
 403        struct ip_vs_app *inc;
 404        int result = 0;
 405
 406        /* Default binding: bind app only for NAT */
 407        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
 408                return 0;
 409
 410        /* Lookup application incarnations and bind the right one */
 411        hash = udp_app_hashkey(cp->vport);
 412
 413        rcu_read_lock();
 414        list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
 415                if (inc->port == cp->vport) {
 416                        if (unlikely(!ip_vs_app_inc_get(inc)))
 417                                break;
 418                        rcu_read_unlock();
 419
 420                        IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 421                                      "%s:%u to app %s on port %u\n",
 422                                      __func__,
 423                                      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
 424                                      ntohs(cp->cport),
 425                                      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
 426                                      ntohs(cp->vport),
 427                                      inc->name, ntohs(inc->port));
 428
 429                        cp->app = inc;
 430                        if (inc->init_conn)
 431                                result = inc->init_conn(inc, cp);
 432                        goto out;
 433                }
 434        }
 435        rcu_read_unlock();
 436
 437  out:
 438        return result;
 439}
 440
 441
 442static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
 443        [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
 444        [IP_VS_UDP_S_LAST]              =       2*HZ,
 445};
 446
 447static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
 448        [IP_VS_UDP_S_NORMAL]            =       "UDP",
 449        [IP_VS_UDP_S_LAST]              =       "BUG!",
 450};
 451
 452static const char * udp_state_name(int state)
 453{
 454        if (state >= IP_VS_UDP_S_LAST)
 455                return "ERR!";
 456        return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
 457}
 458
 459static void
 460udp_state_transition(struct ip_vs_conn *cp, int direction,
 461                     const struct sk_buff *skb,
 462                     struct ip_vs_proto_data *pd)
 463{
 464        if (unlikely(!pd)) {
 465                pr_err("UDP no ns data\n");
 466                return;
 467        }
 468
 469        cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
 470}
 471
 472static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 473{
 474        ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
 475        pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
 476                                                        sizeof(udp_timeouts));
 477        if (!pd->timeout_table)
 478                return -ENOMEM;
 479        return 0;
 480}
 481
 482static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 483{
 484        kfree(pd->timeout_table);
 485}
 486
 487
 488struct ip_vs_protocol ip_vs_protocol_udp = {
 489        .name =                 "UDP",
 490        .protocol =             IPPROTO_UDP,
 491        .num_states =           IP_VS_UDP_S_LAST,
 492        .dont_defrag =          0,
 493        .init =                 NULL,
 494        .exit =                 NULL,
 495        .init_netns =           __udp_init,
 496        .exit_netns =           __udp_exit,
 497        .conn_schedule =        udp_conn_schedule,
 498        .conn_in_get =          ip_vs_conn_in_get_proto,
 499        .conn_out_get =         ip_vs_conn_out_get_proto,
 500        .snat_handler =         udp_snat_handler,
 501        .dnat_handler =         udp_dnat_handler,
 502        .csum_check =           udp_csum_check,
 503        .state_transition =     udp_state_transition,
 504        .state_name =           udp_state_name,
 505        .register_app =         udp_register_app,
 506        .unregister_app =       udp_unregister_app,
 507        .app_conn_bind =        udp_app_conn_bind,
 508        .debug_packet =         ip_vs_tcpudp_debug_packet,
 509        .timeout_change =       NULL,
 510};
 511