linux/net/netfilter/ipvs/ip_vs_proto_udp.c
<<
>>
Prefs
   1/*
   2 * ip_vs_proto_udp.c:   UDP load balancing support for IPVS
   3 *
   4 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   5 *              Julian Anastasov <ja@ssi.bg>
   6 *
   7 *              This program is free software; you can redistribute it and/or
   8 *              modify it under the terms of the GNU General Public License
   9 *              as published by the Free Software Foundation; either version
  10 *              2 of the License, or (at your option) any later version.
  11 *
  12 * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
  13 *              Network name space (netns) aware.
  14 *
  15 */
  16
  17#define KMSG_COMPONENT "IPVS"
  18#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  19
  20#include <linux/in.h>
  21#include <linux/ip.h>
  22#include <linux/kernel.h>
  23#include <linux/netfilter.h>
  24#include <linux/netfilter_ipv4.h>
  25#include <linux/udp.h>
  26
  27#include <net/ip_vs.h>
  28#include <net/ip.h>
  29#include <net/ip6_checksum.h>
  30
  31static int
  32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
  33                  int *verdict, struct ip_vs_conn **cpp,
  34                  struct ip_vs_iphdr *iph)
  35{
  36        struct net *net;
  37        struct ip_vs_service *svc;
  38        struct udphdr _udph, *uh;
  39
  40        /* IPv6 fragments, only first fragment will hit this */
  41        uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
  42        if (uh == NULL) {
  43                *verdict = NF_DROP;
  44                return 0;
  45        }
  46        net = skb_net(skb);
  47        rcu_read_lock();
  48        svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
  49                                 &iph->daddr, uh->dest);
  50        if (svc) {
  51                int ignored;
  52
  53                if (ip_vs_todrop(net_ipvs(net))) {
  54                        /*
  55                         * It seems that we are very loaded.
  56                         * We have to drop this packet :(
  57                         */
  58                        rcu_read_unlock();
  59                        *verdict = NF_DROP;
  60                        return 0;
  61                }
  62
  63                /*
  64                 * Let the virtual server select a real server for the
  65                 * incoming connection, and create a connection entry.
  66                 */
  67                *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
  68                if (!*cpp && ignored <= 0) {
  69                        if (!ignored)
  70                                *verdict = ip_vs_leave(svc, skb, pd, iph);
  71                        else
  72                                *verdict = NF_DROP;
  73                        rcu_read_unlock();
  74                        return 0;
  75                }
  76        }
  77        rcu_read_unlock();
  78        /* NF_ACCEPT */
  79        return 1;
  80}
  81
  82
  83static inline void
  84udp_fast_csum_update(int af, struct udphdr *uhdr,
  85                     const union nf_inet_addr *oldip,
  86                     const union nf_inet_addr *newip,
  87                     __be16 oldport, __be16 newport)
  88{
  89#ifdef CONFIG_IP_VS_IPV6
  90        if (af == AF_INET6)
  91                uhdr->check =
  92                        csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
  93                                         ip_vs_check_diff2(oldport, newport,
  94                                                ~csum_unfold(uhdr->check))));
  95        else
  96#endif
  97                uhdr->check =
  98                        csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
  99                                         ip_vs_check_diff2(oldport, newport,
 100                                                ~csum_unfold(uhdr->check))));
 101        if (!uhdr->check)
 102                uhdr->check = CSUM_MANGLED_0;
 103}
 104
 105static inline void
 106udp_partial_csum_update(int af, struct udphdr *uhdr,
 107                     const union nf_inet_addr *oldip,
 108                     const union nf_inet_addr *newip,
 109                     __be16 oldlen, __be16 newlen)
 110{
 111#ifdef CONFIG_IP_VS_IPV6
 112        if (af == AF_INET6)
 113                uhdr->check =
 114                        ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
 115                                         ip_vs_check_diff2(oldlen, newlen,
 116                                                csum_unfold(uhdr->check))));
 117        else
 118#endif
 119        uhdr->check =
 120                ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
 121                                ip_vs_check_diff2(oldlen, newlen,
 122                                                csum_unfold(uhdr->check))));
 123}
 124
 125
 126static int
 127udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 128                 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 129{
 130        struct udphdr *udph;
 131        unsigned int udphoff = iph->len;
 132        int oldlen;
 133        int payload_csum = 0;
 134
 135#ifdef CONFIG_IP_VS_IPV6
 136        if (cp->af == AF_INET6 && iph->fragoffs)
 137                return 1;
 138#endif
 139        oldlen = skb->len - udphoff;
 140
 141        /* csum_check requires unshared skb */
 142        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 143                return 0;
 144
 145        if (unlikely(cp->app != NULL)) {
 146                int ret;
 147
 148                /* Some checks before mangling */
 149                if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 150                        return 0;
 151
 152                /*
 153                 *      Call application helper if needed
 154                 */
 155                if (!(ret = ip_vs_app_pkt_out(cp, skb)))
 156                        return 0;
 157                /* ret=2: csum update is needed after payload mangling */
 158                if (ret == 1)
 159                        oldlen = skb->len - udphoff;
 160                else
 161                        payload_csum = 1;
 162        }
 163
 164        udph = (void *)skb_network_header(skb) + udphoff;
 165        udph->source = cp->vport;
 166
 167        /*
 168         *      Adjust UDP checksums
 169         */
 170        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 171                udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 172                                        htons(oldlen),
 173                                        htons(skb->len - udphoff));
 174        } else if (!payload_csum && (udph->check != 0)) {
 175                /* Only port and addr are changed, do fast csum update */
 176                udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 177                                     cp->dport, cp->vport);
 178                if (skb->ip_summed == CHECKSUM_COMPLETE)
 179                        skb->ip_summed = (cp->app && pp->csum_check) ?
 180                                         CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 181        } else {
 182                /* full checksum calculation */
 183                udph->check = 0;
 184                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 185#ifdef CONFIG_IP_VS_IPV6
 186                if (cp->af == AF_INET6)
 187                        udph->check = csum_ipv6_magic(&cp->vaddr.in6,
 188                                                      &cp->caddr.in6,
 189                                                      skb->len - udphoff,
 190                                                      cp->protocol, skb->csum);
 191                else
 192#endif
 193                        udph->check = csum_tcpudp_magic(cp->vaddr.ip,
 194                                                        cp->caddr.ip,
 195                                                        skb->len - udphoff,
 196                                                        cp->protocol,
 197                                                        skb->csum);
 198                if (udph->check == 0)
 199                        udph->check = CSUM_MANGLED_0;
 200                skb->ip_summed = CHECKSUM_UNNECESSARY;
 201                IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
 202                          pp->name, udph->check,
 203                          (char*)&(udph->check) - (char*)udph);
 204        }
 205        return 1;
 206}
 207
 208
 209static int
 210udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 211                 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 212{
 213        struct udphdr *udph;
 214        unsigned int udphoff = iph->len;
 215        int oldlen;
 216        int payload_csum = 0;
 217
 218#ifdef CONFIG_IP_VS_IPV6
 219        if (cp->af == AF_INET6 && iph->fragoffs)
 220                return 1;
 221#endif
 222        oldlen = skb->len - udphoff;
 223
 224        /* csum_check requires unshared skb */
 225        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 226                return 0;
 227
 228        if (unlikely(cp->app != NULL)) {
 229                int ret;
 230
 231                /* Some checks before mangling */
 232                if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 233                        return 0;
 234
 235                /*
 236                 *      Attempt ip_vs_app call.
 237                 *      It will fix ip_vs_conn
 238                 */
 239                if (!(ret = ip_vs_app_pkt_in(cp, skb)))
 240                        return 0;
 241                /* ret=2: csum update is needed after payload mangling */
 242                if (ret == 1)
 243                        oldlen = skb->len - udphoff;
 244                else
 245                        payload_csum = 1;
 246        }
 247
 248        udph = (void *)skb_network_header(skb) + udphoff;
 249        udph->dest = cp->dport;
 250
 251        /*
 252         *      Adjust UDP checksums
 253         */
 254        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 255                udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 256                                        htons(oldlen),
 257                                        htons(skb->len - udphoff));
 258        } else if (!payload_csum && (udph->check != 0)) {
 259                /* Only port and addr are changed, do fast csum update */
 260                udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 261                                     cp->vport, cp->dport);
 262                if (skb->ip_summed == CHECKSUM_COMPLETE)
 263                        skb->ip_summed = (cp->app && pp->csum_check) ?
 264                                         CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 265        } else {
 266                /* full checksum calculation */
 267                udph->check = 0;
 268                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 269#ifdef CONFIG_IP_VS_IPV6
 270                if (cp->af == AF_INET6)
 271                        udph->check = csum_ipv6_magic(&cp->caddr.in6,
 272                                                      &cp->daddr.in6,
 273                                                      skb->len - udphoff,
 274                                                      cp->protocol, skb->csum);
 275                else
 276#endif
 277                        udph->check = csum_tcpudp_magic(cp->caddr.ip,
 278                                                        cp->daddr.ip,
 279                                                        skb->len - udphoff,
 280                                                        cp->protocol,
 281                                                        skb->csum);
 282                if (udph->check == 0)
 283                        udph->check = CSUM_MANGLED_0;
 284                skb->ip_summed = CHECKSUM_UNNECESSARY;
 285        }
 286        return 1;
 287}
 288
 289
 290static int
 291udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 292{
 293        struct udphdr _udph, *uh;
 294        unsigned int udphoff;
 295
 296#ifdef CONFIG_IP_VS_IPV6
 297        if (af == AF_INET6)
 298                udphoff = sizeof(struct ipv6hdr);
 299        else
 300#endif
 301                udphoff = ip_hdrlen(skb);
 302
 303        uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
 304        if (uh == NULL)
 305                return 0;
 306
 307        if (uh->check != 0) {
 308                switch (skb->ip_summed) {
 309                case CHECKSUM_NONE:
 310                        skb->csum = skb_checksum(skb, udphoff,
 311                                                 skb->len - udphoff, 0);
 312                case CHECKSUM_COMPLETE:
 313#ifdef CONFIG_IP_VS_IPV6
 314                        if (af == AF_INET6) {
 315                                if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 316                                                    &ipv6_hdr(skb)->daddr,
 317                                                    skb->len - udphoff,
 318                                                    ipv6_hdr(skb)->nexthdr,
 319                                                    skb->csum)) {
 320                                        IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 321                                                         "Failed checksum for");
 322                                        return 0;
 323                                }
 324                        } else
 325#endif
 326                                if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
 327                                                      ip_hdr(skb)->daddr,
 328                                                      skb->len - udphoff,
 329                                                      ip_hdr(skb)->protocol,
 330                                                      skb->csum)) {
 331                                        IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 332                                                         "Failed checksum for");
 333                                        return 0;
 334                                }
 335                        break;
 336                default:
 337                        /* No need to checksum. */
 338                        break;
 339                }
 340        }
 341        return 1;
 342}
 343
 344static inline __u16 udp_app_hashkey(__be16 port)
 345{
 346        return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
 347                & UDP_APP_TAB_MASK;
 348}
 349
 350
 351static int udp_register_app(struct net *net, struct ip_vs_app *inc)
 352{
 353        struct ip_vs_app *i;
 354        __u16 hash;
 355        __be16 port = inc->port;
 356        int ret = 0;
 357        struct netns_ipvs *ipvs = net_ipvs(net);
 358        struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
 359
 360        hash = udp_app_hashkey(port);
 361
 362        list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
 363                if (i->port == port) {
 364                        ret = -EEXIST;
 365                        goto out;
 366                }
 367        }
 368        list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]);
 369        atomic_inc(&pd->appcnt);
 370
 371  out:
 372        return ret;
 373}
 374
 375
 376static void
 377udp_unregister_app(struct net *net, struct ip_vs_app *inc)
 378{
 379        struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
 380
 381        atomic_dec(&pd->appcnt);
 382        list_del_rcu(&inc->p_list);
 383}
 384
 385
 386static int udp_app_conn_bind(struct ip_vs_conn *cp)
 387{
 388        struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
 389        int hash;
 390        struct ip_vs_app *inc;
 391        int result = 0;
 392
 393        /* Default binding: bind app only for NAT */
 394        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
 395                return 0;
 396
 397        /* Lookup application incarnations and bind the right one */
 398        hash = udp_app_hashkey(cp->vport);
 399
 400        rcu_read_lock();
 401        list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
 402                if (inc->port == cp->vport) {
 403                        if (unlikely(!ip_vs_app_inc_get(inc)))
 404                                break;
 405                        rcu_read_unlock();
 406
 407                        IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 408                                      "%s:%u to app %s on port %u\n",
 409                                      __func__,
 410                                      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
 411                                      ntohs(cp->cport),
 412                                      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
 413                                      ntohs(cp->vport),
 414                                      inc->name, ntohs(inc->port));
 415
 416                        cp->app = inc;
 417                        if (inc->init_conn)
 418                                result = inc->init_conn(inc, cp);
 419                        goto out;
 420                }
 421        }
 422        rcu_read_unlock();
 423
 424  out:
 425        return result;
 426}
 427
 428
 429static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
 430        [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
 431        [IP_VS_UDP_S_LAST]              =       2*HZ,
 432};
 433
 434static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
 435        [IP_VS_UDP_S_NORMAL]            =       "UDP",
 436        [IP_VS_UDP_S_LAST]              =       "BUG!",
 437};
 438
 439static const char * udp_state_name(int state)
 440{
 441        if (state >= IP_VS_UDP_S_LAST)
 442                return "ERR!";
 443        return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
 444}
 445
 446static void
 447udp_state_transition(struct ip_vs_conn *cp, int direction,
 448                     const struct sk_buff *skb,
 449                     struct ip_vs_proto_data *pd)
 450{
 451        if (unlikely(!pd)) {
 452                pr_err("UDP no ns data\n");
 453                return;
 454        }
 455
 456        cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
 457}
 458
 459static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
 460{
 461        struct netns_ipvs *ipvs = net_ipvs(net);
 462
 463        ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
 464        pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
 465                                                        sizeof(udp_timeouts));
 466        if (!pd->timeout_table)
 467                return -ENOMEM;
 468        return 0;
 469}
 470
 471static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
 472{
 473        kfree(pd->timeout_table);
 474}
 475
 476
 477struct ip_vs_protocol ip_vs_protocol_udp = {
 478        .name =                 "UDP",
 479        .protocol =             IPPROTO_UDP,
 480        .num_states =           IP_VS_UDP_S_LAST,
 481        .dont_defrag =          0,
 482        .init =                 NULL,
 483        .exit =                 NULL,
 484        .init_netns =           __udp_init,
 485        .exit_netns =           __udp_exit,
 486        .conn_schedule =        udp_conn_schedule,
 487        .conn_in_get =          ip_vs_conn_in_get_proto,
 488        .conn_out_get =         ip_vs_conn_out_get_proto,
 489        .snat_handler =         udp_snat_handler,
 490        .dnat_handler =         udp_dnat_handler,
 491        .csum_check =           udp_csum_check,
 492        .state_transition =     udp_state_transition,
 493        .state_name =           udp_state_name,
 494        .register_app =         udp_register_app,
 495        .unregister_app =       udp_unregister_app,
 496        .app_conn_bind =        udp_app_conn_bind,
 497        .debug_packet =         ip_vs_tcpudp_debug_packet,
 498        .timeout_change =       NULL,
 499};
 500