linux/net/netfilter/ipvs/ip_vs_xmit.c
<<
>>
Prefs
   1/*
   2 * ip_vs_xmit.c: various packet transmitters for IPVS
   3 *
   4 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   5 *              Julian Anastasov <ja@ssi.bg>
   6 *
   7 *              This program is free software; you can redistribute it and/or
   8 *              modify it under the terms of the GNU General Public License
   9 *              as published by the Free Software Foundation; either version
  10 *              2 of the License, or (at your option) any later version.
  11 *
  12 * Changes:
  13 *
  14 * Description of forwarding methods:
  15 * - all transmitters are called from LOCAL_IN (remote clients) and
  16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
  17 * - not all connections have destination server, for example,
  18 * connections in backup server when fwmark is used
  19 * - bypass connections use daddr from packet
  20 * LOCAL_OUT rules:
  21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
  22 * - skb->pkt_type is not set yet
  23 * - the only place where we can see skb->sk != NULL
  24 */
  25
  26#define KMSG_COMPONENT "IPVS"
  27#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  28
  29#include <linux/kernel.h>
  30#include <linux/slab.h>
  31#include <linux/tcp.h>                  /* for tcphdr */
  32#include <net/ip.h>
  33#include <net/tcp.h>                    /* for csum_tcpudp_magic */
  34#include <net/udp.h>
  35#include <net/icmp.h>                   /* for icmp_send */
  36#include <net/route.h>                  /* for ip_route_output */
  37#include <net/ipv6.h>
  38#include <net/ip6_route.h>
  39#include <net/addrconf.h>
  40#include <linux/icmpv6.h>
  41#include <linux/netfilter.h>
  42#include <linux/netfilter_ipv4.h>
  43
  44#include <net/ip_vs.h>
  45
  46enum {
  47        IP_VS_RT_MODE_LOCAL     = 1, /* Allow local dest */
  48        IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */
  49        IP_VS_RT_MODE_RDR       = 4, /* Allow redirect from remote daddr to
  50                                      * local
  51                                      */
  52        IP_VS_RT_MODE_CONNECT   = 8, /* Always bind route to saddr */
  53        IP_VS_RT_MODE_KNOWN_NH  = 16,/* Route via remote addr */
  54};
  55
  56/*
  57 *      Destination cache to speed up outgoing route lookup
  58 */
  59static inline void
  60__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
  61                u32 dst_cookie)
  62{
  63        struct dst_entry *old_dst;
  64
  65        old_dst = dest->dst_cache;
  66        dest->dst_cache = dst;
  67        dest->dst_rtos = rtos;
  68        dest->dst_cookie = dst_cookie;
  69        dst_release(old_dst);
  70}
  71
  72static inline struct dst_entry *
  73__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
  74{
  75        struct dst_entry *dst = dest->dst_cache;
  76
  77        if (!dst)
  78                return NULL;
  79        if ((dst->obsolete || rtos != dest->dst_rtos) &&
  80            dst->ops->check(dst, dest->dst_cookie) == NULL) {
  81                dest->dst_cache = NULL;
  82                dst_release(dst);
  83                return NULL;
  84        }
  85        dst_hold(dst);
  86        return dst;
  87}
  88
  89static inline bool
  90__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
  91{
  92        if (IP6CB(skb)->frag_max_size) {
  93                /* frag_max_size tell us that, this packet have been
  94                 * defragmented by netfilter IPv6 conntrack module.
  95                 */
  96                if (IP6CB(skb)->frag_max_size > mtu)
  97                        return true; /* largest fragment violate MTU */
  98        }
  99        else if (skb->len > mtu && !skb_is_gso(skb)) {
 100                return true; /* Packet size violate MTU size */
 101        }
 102        return false;
 103}
 104
 105/* Get route to daddr, update *saddr, optionally bind route to saddr */
 106static struct rtable *do_output_route4(struct net *net, __be32 daddr,
 107                                       u32 rtos, int rt_mode, __be32 *saddr)
 108{
 109        struct flowi4 fl4;
 110        struct rtable *rt;
 111        int loop = 0;
 112
 113        memset(&fl4, 0, sizeof(fl4));
 114        fl4.daddr = daddr;
 115        fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
 116        fl4.flowi4_tos = rtos;
 117        fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
 118                           FLOWI_FLAG_KNOWN_NH : 0;
 119
 120retry:
 121        rt = ip_route_output_key(net, &fl4);
 122        if (IS_ERR(rt)) {
 123                /* Invalid saddr ? */
 124                if (PTR_ERR(rt) == -EINVAL && *saddr &&
 125                    rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
 126                        *saddr = 0;
 127                        flowi4_update_output(&fl4, 0, rtos, daddr, 0);
 128                        goto retry;
 129                }
 130                IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
 131                return NULL;
 132        } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
 133                ip_rt_put(rt);
 134                *saddr = fl4.saddr;
 135                flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
 136                loop++;
 137                goto retry;
 138        }
 139        *saddr = fl4.saddr;
 140        return rt;
 141}
 142
 143/* Get route to destination or remote server */
 144static struct rtable *
 145__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
 146                   __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
 147{
 148        struct net *net = dev_net(skb_dst(skb)->dev);
 149        struct rtable *rt;                      /* Route to the other host */
 150        struct rtable *ort;                     /* Original route */
 151        int local;
 152
 153        if (dest) {
 154                spin_lock(&dest->dst_lock);
 155                if (!(rt = (struct rtable *)
 156                      __ip_vs_dst_check(dest, rtos))) {
 157                        rt = do_output_route4(net, dest->addr.ip, rtos,
 158                                              rt_mode, &dest->dst_saddr.ip);
 159                        if (!rt) {
 160                                spin_unlock(&dest->dst_lock);
 161                                return NULL;
 162                        }
 163                        __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
 164                        IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
 165                                  "rtos=%X\n",
 166                                  &dest->addr.ip, &dest->dst_saddr.ip,
 167                                  atomic_read(&rt->dst.__refcnt), rtos);
 168                }
 169                daddr = dest->addr.ip;
 170                if (ret_saddr)
 171                        *ret_saddr = dest->dst_saddr.ip;
 172                spin_unlock(&dest->dst_lock);
 173        } else {
 174                __be32 saddr = htonl(INADDR_ANY);
 175
 176                /* For such unconfigured boxes avoid many route lookups
 177                 * for performance reasons because we do not remember saddr
 178                 */
 179                rt_mode &= ~IP_VS_RT_MODE_CONNECT;
 180                rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
 181                if (!rt)
 182                        return NULL;
 183                if (ret_saddr)
 184                        *ret_saddr = saddr;
 185        }
 186
 187        local = rt->rt_flags & RTCF_LOCAL;
 188        if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
 189              rt_mode)) {
 190                IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
 191                             (rt->rt_flags & RTCF_LOCAL) ?
 192                             "local":"non-local", &daddr);
 193                ip_rt_put(rt);
 194                return NULL;
 195        }
 196        if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
 197            !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
 198                IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
 199                             "requires NAT method, dest: %pI4\n",
 200                             &ip_hdr(skb)->daddr, &daddr);
 201                ip_rt_put(rt);
 202                return NULL;
 203        }
 204        if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
 205                IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
 206                             "to non-local address, dest: %pI4\n",
 207                             &ip_hdr(skb)->saddr, &daddr);
 208                ip_rt_put(rt);
 209                return NULL;
 210        }
 211
 212        return rt;
 213}
 214
 215/* Reroute packet to local IPv4 stack after DNAT */
 216static int
 217__ip_vs_reroute_locally(struct sk_buff *skb)
 218{
 219        struct rtable *rt = skb_rtable(skb);
 220        struct net_device *dev = rt->dst.dev;
 221        struct net *net = dev_net(dev);
 222        struct iphdr *iph = ip_hdr(skb);
 223
 224        if (rt_is_input_route(rt)) {
 225                unsigned long orefdst = skb->_skb_refdst;
 226
 227                if (ip_route_input(skb, iph->daddr, iph->saddr,
 228                                   iph->tos, skb->dev))
 229                        return 0;
 230                refdst_drop(orefdst);
 231        } else {
 232                struct flowi4 fl4 = {
 233                        .daddr = iph->daddr,
 234                        .saddr = iph->saddr,
 235                        .flowi4_tos = RT_TOS(iph->tos),
 236                        .flowi4_mark = skb->mark,
 237                };
 238
 239                rt = ip_route_output_key(net, &fl4);
 240                if (IS_ERR(rt))
 241                        return 0;
 242                if (!(rt->rt_flags & RTCF_LOCAL)) {
 243                        ip_rt_put(rt);
 244                        return 0;
 245                }
 246                /* Drop old route. */
 247                skb_dst_drop(skb);
 248                skb_dst_set(skb, &rt->dst);
 249        }
 250        return 1;
 251}
 252
 253#ifdef CONFIG_IP_VS_IPV6
 254
 255static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
 256{
 257        return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
 258}
 259
 260static struct dst_entry *
 261__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
 262                        struct in6_addr *ret_saddr, int do_xfrm)
 263{
 264        struct dst_entry *dst;
 265        struct flowi6 fl6 = {
 266                .daddr = *daddr,
 267        };
 268
 269        dst = ip6_route_output(net, NULL, &fl6);
 270        if (dst->error)
 271                goto out_err;
 272        if (!ret_saddr)
 273                return dst;
 274        if (ipv6_addr_any(&fl6.saddr) &&
 275            ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
 276                               &fl6.daddr, 0, &fl6.saddr) < 0)
 277                goto out_err;
 278        if (do_xfrm) {
 279                dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
 280                if (IS_ERR(dst)) {
 281                        dst = NULL;
 282                        goto out_err;
 283                }
 284        }
 285        *ret_saddr = fl6.saddr;
 286        return dst;
 287
 288out_err:
 289        dst_release(dst);
 290        IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
 291        return NULL;
 292}
 293
 294/*
 295 * Get route to destination or remote server
 296 */
 297static struct rt6_info *
 298__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
 299                      struct in6_addr *daddr, struct in6_addr *ret_saddr,
 300                      int do_xfrm, int rt_mode)
 301{
 302        struct net *net = dev_net(skb_dst(skb)->dev);
 303        struct rt6_info *rt;                    /* Route to the other host */
 304        struct rt6_info *ort;                   /* Original route */
 305        struct dst_entry *dst;
 306        int local;
 307
 308        if (dest) {
 309                spin_lock(&dest->dst_lock);
 310                rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
 311                if (!rt) {
 312                        u32 cookie;
 313
 314                        dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
 315                                                      &dest->dst_saddr.in6,
 316                                                      do_xfrm);
 317                        if (!dst) {
 318                                spin_unlock(&dest->dst_lock);
 319                                return NULL;
 320                        }
 321                        rt = (struct rt6_info *) dst;
 322                        cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
 323                        __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
 324                        IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
 325                                  &dest->addr.in6, &dest->dst_saddr.in6,
 326                                  atomic_read(&rt->dst.__refcnt));
 327                }
 328                if (ret_saddr)
 329                        *ret_saddr = dest->dst_saddr.in6;
 330                spin_unlock(&dest->dst_lock);
 331        } else {
 332                dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
 333                if (!dst)
 334                        return NULL;
 335                rt = (struct rt6_info *) dst;
 336        }
 337
 338        local = __ip_vs_is_local_route6(rt);
 339        if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
 340              rt_mode)) {
 341                IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
 342                             local ? "local":"non-local", daddr);
 343                dst_release(&rt->dst);
 344                return NULL;
 345        }
 346        if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
 347            !((ort = (struct rt6_info *) skb_dst(skb)) &&
 348              __ip_vs_is_local_route6(ort))) {
 349                IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
 350                             "requires NAT method, dest: %pI6\n",
 351                             &ipv6_hdr(skb)->daddr, daddr);
 352                dst_release(&rt->dst);
 353                return NULL;
 354        }
 355        if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
 356                     ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
 357                                    IPV6_ADDR_LOOPBACK)) {
 358                IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
 359                             "to non-local address, dest: %pI6\n",
 360                             &ipv6_hdr(skb)->saddr, daddr);
 361                dst_release(&rt->dst);
 362                return NULL;
 363        }
 364
 365        return rt;
 366}
 367#endif
 368
 369
 370/*
 371 *      Release dest->dst_cache before a dest is removed
 372 */
 373void
 374ip_vs_dst_reset(struct ip_vs_dest *dest)
 375{
 376        struct dst_entry *old_dst;
 377
 378        old_dst = dest->dst_cache;
 379        dest->dst_cache = NULL;
 380        dst_release(old_dst);
 381        dest->dst_saddr.ip = 0;
 382}
 383
 384#define IP_VS_XMIT_TUNNEL(skb, cp)                              \
 385({                                                              \
 386        int __ret = NF_ACCEPT;                                  \
 387                                                                \
 388        (skb)->ipvs_property = 1;                               \
 389        if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT))          \
 390                __ret = ip_vs_confirm_conntrack(skb);           \
 391        if (__ret == NF_ACCEPT) {                               \
 392                nf_reset(skb);                                  \
 393                skb_forward_csum(skb);                          \
 394        }                                                       \
 395        __ret;                                                  \
 396})
 397
 398#define IP_VS_XMIT_NAT(pf, skb, cp, local)              \
 399do {                                                    \
 400        (skb)->ipvs_property = 1;                       \
 401        if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
 402                ip_vs_notrack(skb);                     \
 403        else                                            \
 404                ip_vs_update_conntrack(skb, cp, 1);     \
 405        if (local)                                      \
 406                return NF_ACCEPT;                       \
 407        skb_forward_csum(skb);                          \
 408        NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,     \
 409                skb_dst(skb)->dev, dst_output);         \
 410} while (0)
 411
 412#define IP_VS_XMIT(pf, skb, cp, local)                  \
 413do {                                                    \
 414        (skb)->ipvs_property = 1;                       \
 415        if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
 416                ip_vs_notrack(skb);                     \
 417        if (local)                                      \
 418                return NF_ACCEPT;                       \
 419        skb_forward_csum(skb);                          \
 420        NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,     \
 421                skb_dst(skb)->dev, dst_output);         \
 422} while (0)
 423
 424
 425/*
 426 *      NULL transmitter (do nothing except return NF_ACCEPT)
 427 */
 428int
 429ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 430                struct ip_vs_protocol *pp)
 431{
 432        /* we do not touch skb and do not need pskb ptr */
 433        IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
 434}
 435
 436
 437/*
 438 *      Bypass transmitter
 439 *      Let packets bypass the destination when the destination is not
 440 *      available, it may be only used in transparent cache cluster.
 441 */
 442int
 443ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 444                  struct ip_vs_protocol *pp)
 445{
 446        struct rtable *rt;                      /* Route to the other host */
 447        struct iphdr  *iph = ip_hdr(skb);
 448        int    mtu;
 449
 450        EnterFunction(10);
 451
 452        if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
 453                                      IP_VS_RT_MODE_NON_LOCAL, NULL)))
 454                goto tx_error_icmp;
 455
 456        /* MTU checking */
 457        mtu = dst_mtu(&rt->dst);
 458        if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
 459            !skb_is_gso(skb)) {
 460                ip_rt_put(rt);
 461                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 462                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 463                goto tx_error;
 464        }
 465
 466        /*
 467         * Call ip_send_check because we are not sure it is called
 468         * after ip_defrag. Is copy-on-write needed?
 469         */
 470        if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
 471                ip_rt_put(rt);
 472                return NF_STOLEN;
 473        }
 474        ip_send_check(ip_hdr(skb));
 475
 476        /* drop old route */
 477        skb_dst_drop(skb);
 478        skb_dst_set(skb, &rt->dst);
 479
 480        /* Another hack: avoid icmp_send in ip_fragment */
 481        skb->local_df = 1;
 482
 483        IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
 484
 485        LeaveFunction(10);
 486        return NF_STOLEN;
 487
 488 tx_error_icmp:
 489        dst_link_failure(skb);
 490 tx_error:
 491        kfree_skb(skb);
 492        LeaveFunction(10);
 493        return NF_STOLEN;
 494}
 495
 496#ifdef CONFIG_IP_VS_IPV6
 497int
 498ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 499                     struct ip_vs_protocol *pp)
 500{
 501        struct rt6_info *rt;                    /* Route to the other host */
 502        struct ipv6hdr  *iph = ipv6_hdr(skb);
 503        int    mtu;
 504
 505        EnterFunction(10);
 506
 507        if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0,
 508                                         IP_VS_RT_MODE_NON_LOCAL)))
 509                goto tx_error_icmp;
 510
 511        /* MTU checking */
 512        mtu = dst_mtu(&rt->dst);
 513        if (__mtu_check_toobig_v6(skb, mtu)) {
 514                if (!skb->dev) {
 515                        struct net *net = dev_net(skb_dst(skb)->dev);
 516
 517                        skb->dev = net->loopback_dev;
 518                }
 519                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 520                dst_release(&rt->dst);
 521                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 522                goto tx_error;
 523        }
 524
 525        /*
 526         * Call ip_send_check because we are not sure it is called
 527         * after ip_defrag. Is copy-on-write needed?
 528         */
 529        skb = skb_share_check(skb, GFP_ATOMIC);
 530        if (unlikely(skb == NULL)) {
 531                dst_release(&rt->dst);
 532                return NF_STOLEN;
 533        }
 534
 535        /* drop old route */
 536        skb_dst_drop(skb);
 537        skb_dst_set(skb, &rt->dst);
 538
 539        /* Another hack: avoid icmp_send in ip_fragment */
 540        skb->local_df = 1;
 541
 542        IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
 543
 544        LeaveFunction(10);
 545        return NF_STOLEN;
 546
 547 tx_error_icmp:
 548        dst_link_failure(skb);
 549 tx_error:
 550        kfree_skb(skb);
 551        LeaveFunction(10);
 552        return NF_STOLEN;
 553}
 554#endif
 555
 556/*
 557 *      NAT transmitter (only for outside-to-inside nat forwarding)
 558 *      Not used for related ICMP
 559 */
 560int
 561ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 562               struct ip_vs_protocol *pp)
 563{
 564        struct rtable *rt;              /* Route to the other host */
 565        int mtu;
 566        struct iphdr *iph = ip_hdr(skb);
 567        int local;
 568
 569        EnterFunction(10);
 570
 571        /* check if it is a connection of no-client-port */
 572        if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
 573                __be16 _pt, *p;
 574                p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
 575                if (p == NULL)
 576                        goto tx_error;
 577                ip_vs_conn_fill_cport(cp, *p);
 578                IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
 579        }
 580
 581        if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
 582                                      RT_TOS(iph->tos),
 583                                      IP_VS_RT_MODE_LOCAL |
 584                                        IP_VS_RT_MODE_NON_LOCAL |
 585                                        IP_VS_RT_MODE_RDR, NULL)))
 586                goto tx_error_icmp;
 587        local = rt->rt_flags & RTCF_LOCAL;
 588        /*
 589         * Avoid duplicate tuple in reply direction for NAT traffic
 590         * to local address when connection is sync-ed
 591         */
 592#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 593        if (cp->flags & IP_VS_CONN_F_SYNC && local) {
 594                enum ip_conntrack_info ctinfo;
 595                struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
 596
 597                if (ct && !nf_ct_is_untracked(ct)) {
 598                        IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
 599                                         "ip_vs_nat_xmit(): "
 600                                         "stopping DNAT to local address");
 601                        goto tx_error_put;
 602                }
 603        }
 604#endif
 605
 606        /* From world but DNAT to loopback address? */
 607        if (local && ipv4_is_loopback(cp->daddr.ip) &&
 608            rt_is_input_route(skb_rtable(skb))) {
 609                IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
 610                                 "stopping DNAT to loopback address");
 611                goto tx_error_put;
 612        }
 613
 614        /* MTU checking */
 615        mtu = dst_mtu(&rt->dst);
 616        if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
 617            !skb_is_gso(skb)) {
 618                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 619                IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
 620                                 "ip_vs_nat_xmit(): frag needed for");
 621                goto tx_error_put;
 622        }
 623
 624        /* copy-on-write the packet before mangling it */
 625        if (!skb_make_writable(skb, sizeof(struct iphdr)))
 626                goto tx_error_put;
 627
 628        if (skb_cow(skb, rt->dst.dev->hard_header_len))
 629                goto tx_error_put;
 630
 631        /* mangle the packet */
 632        if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
 633                goto tx_error_put;
 634        ip_hdr(skb)->daddr = cp->daddr.ip;
 635        ip_send_check(ip_hdr(skb));
 636
 637        if (!local) {
 638                /* drop old route */
 639                skb_dst_drop(skb);
 640                skb_dst_set(skb, &rt->dst);
 641        } else {
 642                ip_rt_put(rt);
 643                /*
 644                 * Some IPv4 replies get local address from routes,
 645                 * not from iph, so while we DNAT after routing
 646                 * we need this second input/output route.
 647                 */
 648                if (!__ip_vs_reroute_locally(skb))
 649                        goto tx_error;
 650        }
 651
 652        IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
 653
 654        /* FIXME: when application helper enlarges the packet and the length
 655           is larger than the MTU of outgoing device, there will be still
 656           MTU problem. */
 657
 658        /* Another hack: avoid icmp_send in ip_fragment */
 659        skb->local_df = 1;
 660
 661        IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
 662
 663        LeaveFunction(10);
 664        return NF_STOLEN;
 665
 666  tx_error_icmp:
 667        dst_link_failure(skb);
 668  tx_error:
 669        kfree_skb(skb);
 670        LeaveFunction(10);
 671        return NF_STOLEN;
 672  tx_error_put:
 673        ip_rt_put(rt);
 674        goto tx_error;
 675}
 676
 677#ifdef CONFIG_IP_VS_IPV6
 678int
 679ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 680                  struct ip_vs_protocol *pp)
 681{
 682        struct rt6_info *rt;            /* Route to the other host */
 683        int mtu;
 684        int local;
 685
 686        EnterFunction(10);
 687
 688        /* check if it is a connection of no-client-port */
 689        if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
 690                __be16 _pt, *p;
 691                p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
 692                                       sizeof(_pt), &_pt);
 693                if (p == NULL)
 694                        goto tx_error;
 695                ip_vs_conn_fill_cport(cp, *p);
 696                IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
 697        }
 698
 699        if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
 700                                         0, (IP_VS_RT_MODE_LOCAL |
 701                                             IP_VS_RT_MODE_NON_LOCAL |
 702                                             IP_VS_RT_MODE_RDR))))
 703                goto tx_error_icmp;
 704        local = __ip_vs_is_local_route6(rt);
 705        /*
 706         * Avoid duplicate tuple in reply direction for NAT traffic
 707         * to local address when connection is sync-ed
 708         */
 709#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 710        if (cp->flags & IP_VS_CONN_F_SYNC && local) {
 711                enum ip_conntrack_info ctinfo;
 712                struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
 713
 714                if (ct && !nf_ct_is_untracked(ct)) {
 715                        IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
 716                                         "ip_vs_nat_xmit_v6(): "
 717                                         "stopping DNAT to local address");
 718                        goto tx_error_put;
 719                }
 720        }
 721#endif
 722
 723        /* From world but DNAT to loopback address? */
 724        if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
 725            ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
 726                IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
 727                                 "ip_vs_nat_xmit_v6(): "
 728                                 "stopping DNAT to loopback address");
 729                goto tx_error_put;
 730        }
 731
 732        /* MTU checking */
 733        mtu = dst_mtu(&rt->dst);
 734        if (__mtu_check_toobig_v6(skb, mtu)) {
 735                if (!skb->dev) {
 736                        struct net *net = dev_net(skb_dst(skb)->dev);
 737
 738                        skb->dev = net->loopback_dev;
 739                }
 740                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 741                IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
 742                                 "ip_vs_nat_xmit_v6(): frag needed for");
 743                goto tx_error_put;
 744        }
 745
 746        /* copy-on-write the packet before mangling it */
 747        if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
 748                goto tx_error_put;
 749
 750        if (skb_cow(skb, rt->dst.dev->hard_header_len))
 751                goto tx_error_put;
 752
 753        /* mangle the packet */
 754        if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
 755                goto tx_error;
 756        ipv6_hdr(skb)->daddr = cp->daddr.in6;
 757
 758        if (!local || !skb->dev) {
 759                /* drop the old route when skb is not shared */
 760                skb_dst_drop(skb);
 761                skb_dst_set(skb, &rt->dst);
 762        } else {
 763                /* destined to loopback, do we need to change route? */
 764                dst_release(&rt->dst);
 765        }
 766
 767        IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
 768
 769        /* FIXME: when application helper enlarges the packet and the length
 770           is larger than the MTU of outgoing device, there will be still
 771           MTU problem. */
 772
 773        /* Another hack: avoid icmp_send in ip_fragment */
 774        skb->local_df = 1;
 775
 776        IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
 777
 778        LeaveFunction(10);
 779        return NF_STOLEN;
 780
 781tx_error_icmp:
 782        dst_link_failure(skb);
 783tx_error:
 784        LeaveFunction(10);
 785        kfree_skb(skb);
 786        return NF_STOLEN;
 787tx_error_put:
 788        dst_release(&rt->dst);
 789        goto tx_error;
 790}
 791#endif
 792
 793
 794/*
 795 *   IP Tunneling transmitter
 796 *
 797 *   This function encapsulates the packet in a new IP packet, its
 798 *   destination will be set to cp->daddr. Most code of this function
 799 *   is taken from ipip.c.
 800 *
 801 *   It is used in VS/TUN cluster. The load balancer selects a real
 802 *   server from a cluster based on a scheduling algorithm,
 803 *   encapsulates the request packet and forwards it to the selected
 804 *   server. For example, all real servers are configured with
 805 *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
 806 *   the encapsulated packet, it will decapsulate the packet, processe
 807 *   the request and return the response packets directly to the client
 808 *   without passing the load balancer. This can greatly increase the
 809 *   scalability of virtual server.
 810 *
 811 *   Used for ANY protocol
 812 */
 813int
 814ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 815                  struct ip_vs_protocol *pp)
 816{
 817        struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
 818        struct rtable *rt;                      /* Route to the other host */
 819        __be32 saddr;                           /* Source for tunnel */
 820        struct net_device *tdev;                /* Device to other host */
 821        struct iphdr  *old_iph = ip_hdr(skb);
 822        u8     tos = old_iph->tos;
 823        __be16 df;
 824        struct iphdr  *iph;                     /* Our new IP header */
 825        unsigned int max_headroom;              /* The extra header space needed */
 826        int    mtu;
 827        int ret;
 828
 829        EnterFunction(10);
 830
 831        if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
 832                                      RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
 833                                                   IP_VS_RT_MODE_NON_LOCAL |
 834                                                   IP_VS_RT_MODE_CONNECT,
 835                                                   &saddr)))
 836                goto tx_error_icmp;
 837        if (rt->rt_flags & RTCF_LOCAL) {
 838                ip_rt_put(rt);
 839                IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
 840        }
 841
 842        tdev = rt->dst.dev;
 843
 844        mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 845        if (mtu < 68) {
 846                IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
 847                goto tx_error_put;
 848        }
 849        if (rt_is_output_route(skb_rtable(skb)))
 850                skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 851
 852        /* Copy DF, reset fragment offset and MF */
 853        df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
 854
 855        if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
 856                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 857                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 858                goto tx_error_put;
 859        }
 860
 861        /*
 862         * Okay, now see if we can stuff it in the buffer as-is.
 863         */
 864        max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
 865
 866        if (skb_headroom(skb) < max_headroom
 867            || skb_cloned(skb) || skb_shared(skb)) {
 868                struct sk_buff *new_skb =
 869                        skb_realloc_headroom(skb, max_headroom);
 870                if (!new_skb) {
 871                        ip_rt_put(rt);
 872                        kfree_skb(skb);
 873                        IP_VS_ERR_RL("%s(): no memory\n", __func__);
 874                        return NF_STOLEN;
 875                }
 876                consume_skb(skb);
 877                skb = new_skb;
 878                old_iph = ip_hdr(skb);
 879        }
 880
 881        skb->transport_header = skb->network_header;
 882
 883        /* fix old IP header checksum */
 884        ip_send_check(old_iph);
 885
 886        skb_push(skb, sizeof(struct iphdr));
 887        skb_reset_network_header(skb);
 888        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 889
 890        /* drop old route */
 891        skb_dst_drop(skb);
 892        skb_dst_set(skb, &rt->dst);
 893
 894        /*
 895         *      Push down and install the IPIP header.
 896         */
 897        iph                     =       ip_hdr(skb);
 898        iph->version            =       4;
 899        iph->ihl                =       sizeof(struct iphdr)>>2;
 900        iph->frag_off           =       df;
 901        iph->protocol           =       IPPROTO_IPIP;
 902        iph->tos                =       tos;
 903        iph->daddr              =       cp->daddr.ip;
 904        iph->saddr              =       saddr;
 905        iph->ttl                =       old_iph->ttl;
 906        ip_select_ident(iph, &rt->dst, NULL);
 907
 908        /* Another hack: avoid icmp_send in ip_fragment */
 909        skb->local_df = 1;
 910
 911        ret = IP_VS_XMIT_TUNNEL(skb, cp);
 912        if (ret == NF_ACCEPT)
 913                ip_local_out(skb);
 914        else if (ret == NF_DROP)
 915                kfree_skb(skb);
 916
 917        LeaveFunction(10);
 918
 919        return NF_STOLEN;
 920
 921  tx_error_icmp:
 922        dst_link_failure(skb);
 923  tx_error:
 924        kfree_skb(skb);
 925        LeaveFunction(10);
 926        return NF_STOLEN;
 927tx_error_put:
 928        ip_rt_put(rt);
 929        goto tx_error;
 930}
 931
 932#ifdef CONFIG_IP_VS_IPV6
 933int
 934ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 935                     struct ip_vs_protocol *pp)
 936{
 937        struct rt6_info *rt;            /* Route to the other host */
 938        struct in6_addr saddr;          /* Source for tunnel */
 939        struct net_device *tdev;        /* Device to other host */
 940        struct ipv6hdr  *old_iph = ipv6_hdr(skb);
 941        struct ipv6hdr  *iph;           /* Our new IP header */
 942        unsigned int max_headroom;      /* The extra header space needed */
 943        int    mtu;
 944        int ret;
 945
 946        EnterFunction(10);
 947
 948        if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
 949                                         &saddr, 1, (IP_VS_RT_MODE_LOCAL |
 950                                                     IP_VS_RT_MODE_NON_LOCAL))))
 951                goto tx_error_icmp;
 952        if (__ip_vs_is_local_route6(rt)) {
 953                dst_release(&rt->dst);
 954                IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
 955        }
 956
 957        tdev = rt->dst.dev;
 958
 959        mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
 960        if (mtu < IPV6_MIN_MTU) {
 961                IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
 962                             IPV6_MIN_MTU);
 963                goto tx_error_put;
 964        }
 965        if (skb_dst(skb))
 966                skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 967
 968        /* MTU checking: Notice that 'mtu' have been adjusted before hand */
 969        if (__mtu_check_toobig_v6(skb, mtu)) {
 970                if (!skb->dev) {
 971                        struct net *net = dev_net(skb_dst(skb)->dev);
 972
 973                        skb->dev = net->loopback_dev;
 974                }
 975                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 976                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 977                goto tx_error_put;
 978        }
 979
 980        /*
 981         * Okay, now see if we can stuff it in the buffer as-is.
 982         */
 983        max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
 984
 985        if (skb_headroom(skb) < max_headroom
 986            || skb_cloned(skb) || skb_shared(skb)) {
 987                struct sk_buff *new_skb =
 988                        skb_realloc_headroom(skb, max_headroom);
 989                if (!new_skb) {
 990                        dst_release(&rt->dst);
 991                        kfree_skb(skb);
 992                        IP_VS_ERR_RL("%s(): no memory\n", __func__);
 993                        return NF_STOLEN;
 994                }
 995                consume_skb(skb);
 996                skb = new_skb;
 997                old_iph = ipv6_hdr(skb);
 998        }
 999
1000        skb->transport_header = skb->network_header;
1001
1002        skb_push(skb, sizeof(struct ipv6hdr));
1003        skb_reset_network_header(skb);
1004        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1005
1006        /* drop old route */
1007        skb_dst_drop(skb);
1008        skb_dst_set(skb, &rt->dst);
1009
1010        /*
1011         *      Push down and install the IPIP header.
1012         */
1013        iph                     =       ipv6_hdr(skb);
1014        iph->version            =       6;
1015        iph->nexthdr            =       IPPROTO_IPV6;
1016        iph->payload_len        =       old_iph->payload_len;
1017        be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
1018        iph->priority           =       old_iph->priority;
1019        memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
1020        iph->daddr = cp->daddr.in6;
1021        iph->saddr = saddr;
1022        iph->hop_limit          =       old_iph->hop_limit;
1023
1024        /* Another hack: avoid icmp_send in ip_fragment */
1025        skb->local_df = 1;
1026
1027        ret = IP_VS_XMIT_TUNNEL(skb, cp);
1028        if (ret == NF_ACCEPT)
1029                ip6_local_out(skb);
1030        else if (ret == NF_DROP)
1031                kfree_skb(skb);
1032
1033        LeaveFunction(10);
1034
1035        return NF_STOLEN;
1036
1037tx_error_icmp:
1038        dst_link_failure(skb);
1039tx_error:
1040        kfree_skb(skb);
1041        LeaveFunction(10);
1042        return NF_STOLEN;
1043tx_error_put:
1044        dst_release(&rt->dst);
1045        goto tx_error;
1046}
1047#endif
1048
1049
1050/*
1051 *      Direct Routing transmitter
1052 *      Used for ANY protocol
1053 */
1054int
1055ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1056              struct ip_vs_protocol *pp)
1057{
1058        struct rtable *rt;                      /* Route to the other host */
1059        struct iphdr  *iph = ip_hdr(skb);
1060        int    mtu;
1061
1062        EnterFunction(10);
1063
1064        if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1065                                      RT_TOS(iph->tos),
1066                                      IP_VS_RT_MODE_LOCAL |
1067                                      IP_VS_RT_MODE_NON_LOCAL |
1068                                      IP_VS_RT_MODE_KNOWN_NH, NULL)))
1069                goto tx_error_icmp;
1070        if (rt->rt_flags & RTCF_LOCAL) {
1071                ip_rt_put(rt);
1072                IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1073        }
1074
1075        /* MTU checking */
1076        mtu = dst_mtu(&rt->dst);
1077        if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1078            !skb_is_gso(skb)) {
1079                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1080                ip_rt_put(rt);
1081                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1082                goto tx_error;
1083        }
1084
1085        /*
1086         * Call ip_send_check because we are not sure it is called
1087         * after ip_defrag. Is copy-on-write needed?
1088         */
1089        if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1090                ip_rt_put(rt);
1091                return NF_STOLEN;
1092        }
1093        ip_send_check(ip_hdr(skb));
1094
1095        /* drop old route */
1096        skb_dst_drop(skb);
1097        skb_dst_set(skb, &rt->dst);
1098
1099        /* Another hack: avoid icmp_send in ip_fragment */
1100        skb->local_df = 1;
1101
1102        IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
1103
1104        LeaveFunction(10);
1105        return NF_STOLEN;
1106
1107  tx_error_icmp:
1108        dst_link_failure(skb);
1109  tx_error:
1110        kfree_skb(skb);
1111        LeaveFunction(10);
1112        return NF_STOLEN;
1113}
1114
1115#ifdef CONFIG_IP_VS_IPV6
1116int
1117ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1118                 struct ip_vs_protocol *pp)
1119{
1120        struct rt6_info *rt;                    /* Route to the other host */
1121        int    mtu;
1122
1123        EnterFunction(10);
1124
1125        if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1126                                         0, (IP_VS_RT_MODE_LOCAL |
1127                                             IP_VS_RT_MODE_NON_LOCAL))))
1128                goto tx_error_icmp;
1129        if (__ip_vs_is_local_route6(rt)) {
1130                dst_release(&rt->dst);
1131                IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1132        }
1133
1134        /* MTU checking */
1135        mtu = dst_mtu(&rt->dst);
1136        if (__mtu_check_toobig_v6(skb, mtu)) {
1137                if (!skb->dev) {
1138                        struct net *net = dev_net(skb_dst(skb)->dev);
1139
1140                        skb->dev = net->loopback_dev;
1141                }
1142                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1143                dst_release(&rt->dst);
1144                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1145                goto tx_error;
1146        }
1147
1148        /*
1149         * Call ip_send_check because we are not sure it is called
1150         * after ip_defrag. Is copy-on-write needed?
1151         */
1152        skb = skb_share_check(skb, GFP_ATOMIC);
1153        if (unlikely(skb == NULL)) {
1154                dst_release(&rt->dst);
1155                return NF_STOLEN;
1156        }
1157
1158        /* drop old route */
1159        skb_dst_drop(skb);
1160        skb_dst_set(skb, &rt->dst);
1161
1162        /* Another hack: avoid icmp_send in ip_fragment */
1163        skb->local_df = 1;
1164
1165        IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
1166
1167        LeaveFunction(10);
1168        return NF_STOLEN;
1169
1170tx_error_icmp:
1171        dst_link_failure(skb);
1172tx_error:
1173        kfree_skb(skb);
1174        LeaveFunction(10);
1175        return NF_STOLEN;
1176}
1177#endif
1178
1179
1180/*
1181 *      ICMP packet transmitter
1182 *      called by the ip_vs_in_icmp
1183 */
1184int
1185ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1186                struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
1187{
1188        struct rtable   *rt;    /* Route to the other host */
1189        int mtu;
1190        int rc;
1191        int local;
1192        int rt_mode;
1193
1194        EnterFunction(10);
1195
1196        /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1197           forwarded directly here, because there is no need to
1198           translate address/port back */
1199        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1200                if (cp->packet_xmit)
1201                        rc = cp->packet_xmit(skb, cp, pp);
1202                else
1203                        rc = NF_ACCEPT;
1204                /* do not touch skb anymore */
1205                atomic_inc(&cp->in_pkts);
1206                goto out;
1207        }
1208
1209        /*
1210         * mangle and send the packet here (only for VS/NAT)
1211         */
1212
1213        /* LOCALNODE from FORWARD hook is not supported */
1214        rt_mode = (hooknum != NF_INET_FORWARD) ?
1215                  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1216                  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1217        if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1218                                      RT_TOS(ip_hdr(skb)->tos),
1219                                      rt_mode, NULL)))
1220                goto tx_error_icmp;
1221        local = rt->rt_flags & RTCF_LOCAL;
1222
1223        /*
1224         * Avoid duplicate tuple in reply direction for NAT traffic
1225         * to local address when connection is sync-ed
1226         */
1227#if IS_ENABLED(CONFIG_NF_CONNTRACK)
1228        if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1229                enum ip_conntrack_info ctinfo;
1230                struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1231
1232                if (ct && !nf_ct_is_untracked(ct)) {
1233                        IP_VS_DBG(10, "%s(): "
1234                                  "stopping DNAT to local address %pI4\n",
1235                                  __func__, &cp->daddr.ip);
1236                        goto tx_error_put;
1237                }
1238        }
1239#endif
1240
1241        /* From world but DNAT to loopback address? */
1242        if (local && ipv4_is_loopback(cp->daddr.ip) &&
1243            rt_is_input_route(skb_rtable(skb))) {
1244                IP_VS_DBG(1, "%s(): "
1245                          "stopping DNAT to loopback %pI4\n",
1246                          __func__, &cp->daddr.ip);
1247                goto tx_error_put;
1248        }
1249
1250        /* MTU checking */
1251        mtu = dst_mtu(&rt->dst);
1252        if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1253            !skb_is_gso(skb)) {
1254                icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1255                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1256                goto tx_error_put;
1257        }
1258
1259        /* copy-on-write the packet before mangling it */
1260        if (!skb_make_writable(skb, offset))
1261                goto tx_error_put;
1262
1263        if (skb_cow(skb, rt->dst.dev->hard_header_len))
1264                goto tx_error_put;
1265
1266        ip_vs_nat_icmp(skb, pp, cp, 0);
1267
1268        if (!local) {
1269                /* drop the old route when skb is not shared */
1270                skb_dst_drop(skb);
1271                skb_dst_set(skb, &rt->dst);
1272        } else {
1273                ip_rt_put(rt);
1274                /*
1275                 * Some IPv4 replies get local address from routes,
1276                 * not from iph, so while we DNAT after routing
1277                 * we need this second input/output route.
1278                 */
1279                if (!__ip_vs_reroute_locally(skb))
1280                        goto tx_error;
1281        }
1282
1283        /* Another hack: avoid icmp_send in ip_fragment */
1284        skb->local_df = 1;
1285
1286        IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
1287
1288        rc = NF_STOLEN;
1289        goto out;
1290
1291  tx_error_icmp:
1292        dst_link_failure(skb);
1293  tx_error:
1294        dev_kfree_skb(skb);
1295        rc = NF_STOLEN;
1296  out:
1297        LeaveFunction(10);
1298        return rc;
1299  tx_error_put:
1300        ip_rt_put(rt);
1301        goto tx_error;
1302}
1303
1304#ifdef CONFIG_IP_VS_IPV6
1305int
1306ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1307                struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
1308{
1309        struct rt6_info *rt;    /* Route to the other host */
1310        int mtu;
1311        int rc;
1312        int local;
1313        int rt_mode;
1314
1315        EnterFunction(10);
1316
1317        /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1318           forwarded directly here, because there is no need to
1319           translate address/port back */
1320        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1321                if (cp->packet_xmit)
1322                        rc = cp->packet_xmit(skb, cp, pp);
1323                else
1324                        rc = NF_ACCEPT;
1325                /* do not touch skb anymore */
1326                atomic_inc(&cp->in_pkts);
1327                goto out;
1328        }
1329
1330        /*
1331         * mangle and send the packet here (only for VS/NAT)
1332         */
1333
1334        /* LOCALNODE from FORWARD hook is not supported */
1335        rt_mode = (hooknum != NF_INET_FORWARD) ?
1336                  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1337                  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1338        if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1339                                         0, rt_mode)))
1340                goto tx_error_icmp;
1341
1342        local = __ip_vs_is_local_route6(rt);
1343        /*
1344         * Avoid duplicate tuple in reply direction for NAT traffic
1345         * to local address when connection is sync-ed
1346         */
1347#if IS_ENABLED(CONFIG_NF_CONNTRACK)
1348        if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1349                enum ip_conntrack_info ctinfo;
1350                struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1351
1352                if (ct && !nf_ct_is_untracked(ct)) {
1353                        IP_VS_DBG(10, "%s(): "
1354                                  "stopping DNAT to local address %pI6\n",
1355                                  __func__, &cp->daddr.in6);
1356                        goto tx_error_put;
1357                }
1358        }
1359#endif
1360
1361        /* From world but DNAT to loopback address? */
1362        if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1363            ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1364                IP_VS_DBG(1, "%s(): "
1365                          "stopping DNAT to loopback %pI6\n",
1366                          __func__, &cp->daddr.in6);
1367                goto tx_error_put;
1368        }
1369
1370        /* MTU checking */
1371        mtu = dst_mtu(&rt->dst);
1372        if (__mtu_check_toobig_v6(skb, mtu)) {
1373                if (!skb->dev) {
1374                        struct net *net = dev_net(skb_dst(skb)->dev);
1375
1376                        skb->dev = net->loopback_dev;
1377                }
1378                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1379                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1380                goto tx_error_put;
1381        }
1382
1383        /* copy-on-write the packet before mangling it */
1384        if (!skb_make_writable(skb, offset))
1385                goto tx_error_put;
1386
1387        if (skb_cow(skb, rt->dst.dev->hard_header_len))
1388                goto tx_error_put;
1389
1390        ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1391
1392        if (!local || !skb->dev) {
1393                /* drop the old route when skb is not shared */
1394                skb_dst_drop(skb);
1395                skb_dst_set(skb, &rt->dst);
1396        } else {
1397                /* destined to loopback, do we need to change route? */
1398                dst_release(&rt->dst);
1399        }
1400
1401        /* Another hack: avoid icmp_send in ip_fragment */
1402        skb->local_df = 1;
1403
1404        IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
1405
1406        rc = NF_STOLEN;
1407        goto out;
1408
1409tx_error_icmp:
1410        dst_link_failure(skb);
1411tx_error:
1412        dev_kfree_skb(skb);
1413        rc = NF_STOLEN;
1414out:
1415        LeaveFunction(10);
1416        return rc;
1417tx_error_put:
1418        dst_release(&rt->dst);
1419        goto tx_error;
1420}
1421#endif
1422