linux/net/netfilter/nf_nat_proto.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* (C) 1999-2001 Paul `Rusty' Russell
   3 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
   4 */
   5
   6#include <linux/types.h>
   7#include <linux/export.h>
   8#include <linux/init.h>
   9#include <linux/udp.h>
  10#include <linux/tcp.h>
  11#include <linux/icmp.h>
  12#include <linux/icmpv6.h>
  13
  14#include <linux/dccp.h>
  15#include <linux/sctp.h>
  16#include <net/sctp/checksum.h>
  17
  18#include <linux/netfilter.h>
  19#include <net/netfilter/nf_nat.h>
  20
  21#include <linux/ipv6.h>
  22#include <linux/netfilter_ipv6.h>
  23#include <net/checksum.h>
  24#include <net/ip6_checksum.h>
  25#include <net/ip6_route.h>
  26#include <net/xfrm.h>
  27#include <net/ipv6.h>
  28
  29#include <net/netfilter/nf_conntrack_core.h>
  30#include <net/netfilter/nf_conntrack.h>
  31#include <linux/netfilter/nfnetlink_conntrack.h>
  32
  33static void nf_csum_update(struct sk_buff *skb,
  34                           unsigned int iphdroff, __sum16 *check,
  35                           const struct nf_conntrack_tuple *t,
  36                           enum nf_nat_manip_type maniptype);
  37
  38static void
  39__udp_manip_pkt(struct sk_buff *skb,
  40                unsigned int iphdroff, struct udphdr *hdr,
  41                const struct nf_conntrack_tuple *tuple,
  42                enum nf_nat_manip_type maniptype, bool do_csum)
  43{
  44        __be16 *portptr, newport;
  45
  46        if (maniptype == NF_NAT_MANIP_SRC) {
  47                /* Get rid of src port */
  48                newport = tuple->src.u.udp.port;
  49                portptr = &hdr->source;
  50        } else {
  51                /* Get rid of dst port */
  52                newport = tuple->dst.u.udp.port;
  53                portptr = &hdr->dest;
  54        }
  55        if (do_csum) {
  56                nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
  57                inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
  58                                         false);
  59                if (!hdr->check)
  60                        hdr->check = CSUM_MANGLED_0;
  61        }
  62        *portptr = newport;
  63}
  64
  65static bool udp_manip_pkt(struct sk_buff *skb,
  66                          unsigned int iphdroff, unsigned int hdroff,
  67                          const struct nf_conntrack_tuple *tuple,
  68                          enum nf_nat_manip_type maniptype)
  69{
  70        struct udphdr *hdr;
  71
  72        if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
  73                return false;
  74
  75        hdr = (struct udphdr *)(skb->data + hdroff);
  76        __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check);
  77
  78        return true;
  79}
  80
  81static bool udplite_manip_pkt(struct sk_buff *skb,
  82                              unsigned int iphdroff, unsigned int hdroff,
  83                              const struct nf_conntrack_tuple *tuple,
  84                              enum nf_nat_manip_type maniptype)
  85{
  86#ifdef CONFIG_NF_CT_PROTO_UDPLITE
  87        struct udphdr *hdr;
  88
  89        if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
  90                return false;
  91
  92        hdr = (struct udphdr *)(skb->data + hdroff);
  93        __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
  94#endif
  95        return true;
  96}
  97
  98static bool
  99sctp_manip_pkt(struct sk_buff *skb,
 100               unsigned int iphdroff, unsigned int hdroff,
 101               const struct nf_conntrack_tuple *tuple,
 102               enum nf_nat_manip_type maniptype)
 103{
 104#ifdef CONFIG_NF_CT_PROTO_SCTP
 105        struct sctphdr *hdr;
 106        int hdrsize = 8;
 107
 108        /* This could be an inner header returned in imcp packet; in such
 109         * cases we cannot update the checksum field since it is outside
 110         * of the 8 bytes of transport layer headers we are guaranteed.
 111         */
 112        if (skb->len >= hdroff + sizeof(*hdr))
 113                hdrsize = sizeof(*hdr);
 114
 115        if (skb_ensure_writable(skb, hdroff + hdrsize))
 116                return false;
 117
 118        hdr = (struct sctphdr *)(skb->data + hdroff);
 119
 120        if (maniptype == NF_NAT_MANIP_SRC) {
 121                /* Get rid of src port */
 122                hdr->source = tuple->src.u.sctp.port;
 123        } else {
 124                /* Get rid of dst port */
 125                hdr->dest = tuple->dst.u.sctp.port;
 126        }
 127
 128        if (hdrsize < sizeof(*hdr))
 129                return true;
 130
 131        if (skb->ip_summed != CHECKSUM_PARTIAL) {
 132                hdr->checksum = sctp_compute_cksum(skb, hdroff);
 133                skb->ip_summed = CHECKSUM_NONE;
 134        }
 135
 136#endif
 137        return true;
 138}
 139
 140static bool
 141tcp_manip_pkt(struct sk_buff *skb,
 142              unsigned int iphdroff, unsigned int hdroff,
 143              const struct nf_conntrack_tuple *tuple,
 144              enum nf_nat_manip_type maniptype)
 145{
 146        struct tcphdr *hdr;
 147        __be16 *portptr, newport, oldport;
 148        int hdrsize = 8; /* TCP connection tracking guarantees this much */
 149
 150        /* this could be a inner header returned in icmp packet; in such
 151           cases we cannot update the checksum field since it is outside of
 152           the 8 bytes of transport layer headers we are guaranteed */
 153        if (skb->len >= hdroff + sizeof(struct tcphdr))
 154                hdrsize = sizeof(struct tcphdr);
 155
 156        if (skb_ensure_writable(skb, hdroff + hdrsize))
 157                return false;
 158
 159        hdr = (struct tcphdr *)(skb->data + hdroff);
 160
 161        if (maniptype == NF_NAT_MANIP_SRC) {
 162                /* Get rid of src port */
 163                newport = tuple->src.u.tcp.port;
 164                portptr = &hdr->source;
 165        } else {
 166                /* Get rid of dst port */
 167                newport = tuple->dst.u.tcp.port;
 168                portptr = &hdr->dest;
 169        }
 170
 171        oldport = *portptr;
 172        *portptr = newport;
 173
 174        if (hdrsize < sizeof(*hdr))
 175                return true;
 176
 177        nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
 178        inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
 179        return true;
 180}
 181
 182static bool
 183dccp_manip_pkt(struct sk_buff *skb,
 184               unsigned int iphdroff, unsigned int hdroff,
 185               const struct nf_conntrack_tuple *tuple,
 186               enum nf_nat_manip_type maniptype)
 187{
 188#ifdef CONFIG_NF_CT_PROTO_DCCP
 189        struct dccp_hdr *hdr;
 190        __be16 *portptr, oldport, newport;
 191        int hdrsize = 8; /* DCCP connection tracking guarantees this much */
 192
 193        if (skb->len >= hdroff + sizeof(struct dccp_hdr))
 194                hdrsize = sizeof(struct dccp_hdr);
 195
 196        if (skb_ensure_writable(skb, hdroff + hdrsize))
 197                return false;
 198
 199        hdr = (struct dccp_hdr *)(skb->data + hdroff);
 200
 201        if (maniptype == NF_NAT_MANIP_SRC) {
 202                newport = tuple->src.u.dccp.port;
 203                portptr = &hdr->dccph_sport;
 204        } else {
 205                newport = tuple->dst.u.dccp.port;
 206                portptr = &hdr->dccph_dport;
 207        }
 208
 209        oldport = *portptr;
 210        *portptr = newport;
 211
 212        if (hdrsize < sizeof(*hdr))
 213                return true;
 214
 215        nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
 216        inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
 217                                 false);
 218#endif
 219        return true;
 220}
 221
 222static bool
 223icmp_manip_pkt(struct sk_buff *skb,
 224               unsigned int iphdroff, unsigned int hdroff,
 225               const struct nf_conntrack_tuple *tuple,
 226               enum nf_nat_manip_type maniptype)
 227{
 228        struct icmphdr *hdr;
 229
 230        if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
 231                return false;
 232
 233        hdr = (struct icmphdr *)(skb->data + hdroff);
 234        switch (hdr->type) {
 235        case ICMP_ECHO:
 236        case ICMP_ECHOREPLY:
 237        case ICMP_TIMESTAMP:
 238        case ICMP_TIMESTAMPREPLY:
 239        case ICMP_INFO_REQUEST:
 240        case ICMP_INFO_REPLY:
 241        case ICMP_ADDRESS:
 242        case ICMP_ADDRESSREPLY:
 243                break;
 244        default:
 245                return true;
 246        }
 247        inet_proto_csum_replace2(&hdr->checksum, skb,
 248                                 hdr->un.echo.id, tuple->src.u.icmp.id, false);
 249        hdr->un.echo.id = tuple->src.u.icmp.id;
 250        return true;
 251}
 252
 253static bool
 254icmpv6_manip_pkt(struct sk_buff *skb,
 255                 unsigned int iphdroff, unsigned int hdroff,
 256                 const struct nf_conntrack_tuple *tuple,
 257                 enum nf_nat_manip_type maniptype)
 258{
 259        struct icmp6hdr *hdr;
 260
 261        if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
 262                return false;
 263
 264        hdr = (struct icmp6hdr *)(skb->data + hdroff);
 265        nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
 266        if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
 267            hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
 268                inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
 269                                         hdr->icmp6_identifier,
 270                                         tuple->src.u.icmp.id, false);
 271                hdr->icmp6_identifier = tuple->src.u.icmp.id;
 272        }
 273        return true;
 274}
 275
 276/* manipulate a GRE packet according to maniptype */
 277static bool
 278gre_manip_pkt(struct sk_buff *skb,
 279              unsigned int iphdroff, unsigned int hdroff,
 280              const struct nf_conntrack_tuple *tuple,
 281              enum nf_nat_manip_type maniptype)
 282{
 283#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
 284        const struct gre_base_hdr *greh;
 285        struct pptp_gre_header *pgreh;
 286
 287        /* pgreh includes two optional 32bit fields which are not required
 288         * to be there.  That's where the magic '8' comes from */
 289        if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
 290                return false;
 291
 292        greh = (void *)skb->data + hdroff;
 293        pgreh = (struct pptp_gre_header *)greh;
 294
 295        /* we only have destination manip of a packet, since 'source key'
 296         * is not present in the packet itself */
 297        if (maniptype != NF_NAT_MANIP_DST)
 298                return true;
 299
 300        switch (greh->flags & GRE_VERSION) {
 301        case GRE_VERSION_0:
 302                /* We do not currently NAT any GREv0 packets.
 303                 * Try to behave like "nf_nat_proto_unknown" */
 304                break;
 305        case GRE_VERSION_1:
 306                pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
 307                pgreh->call_id = tuple->dst.u.gre.key;
 308                break;
 309        default:
 310                pr_debug("can't nat unknown GRE version\n");
 311                return false;
 312        }
 313#endif
 314        return true;
 315}
 316
 317static bool l4proto_manip_pkt(struct sk_buff *skb,
 318                              unsigned int iphdroff, unsigned int hdroff,
 319                              const struct nf_conntrack_tuple *tuple,
 320                              enum nf_nat_manip_type maniptype)
 321{
 322        switch (tuple->dst.protonum) {
 323        case IPPROTO_TCP:
 324                return tcp_manip_pkt(skb, iphdroff, hdroff,
 325                                     tuple, maniptype);
 326        case IPPROTO_UDP:
 327                return udp_manip_pkt(skb, iphdroff, hdroff,
 328                                     tuple, maniptype);
 329        case IPPROTO_UDPLITE:
 330                return udplite_manip_pkt(skb, iphdroff, hdroff,
 331                                         tuple, maniptype);
 332        case IPPROTO_SCTP:
 333                return sctp_manip_pkt(skb, iphdroff, hdroff,
 334                                      tuple, maniptype);
 335        case IPPROTO_ICMP:
 336                return icmp_manip_pkt(skb, iphdroff, hdroff,
 337                                      tuple, maniptype);
 338        case IPPROTO_ICMPV6:
 339                return icmpv6_manip_pkt(skb, iphdroff, hdroff,
 340                                        tuple, maniptype);
 341        case IPPROTO_DCCP:
 342                return dccp_manip_pkt(skb, iphdroff, hdroff,
 343                                      tuple, maniptype);
 344        case IPPROTO_GRE:
 345                return gre_manip_pkt(skb, iphdroff, hdroff,
 346                                     tuple, maniptype);
 347        }
 348
 349        /* If we don't know protocol -- no error, pass it unmodified. */
 350        return true;
 351}
 352
 353static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
 354                                  unsigned int iphdroff,
 355                                  const struct nf_conntrack_tuple *target,
 356                                  enum nf_nat_manip_type maniptype)
 357{
 358        struct iphdr *iph;
 359        unsigned int hdroff;
 360
 361        if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
 362                return false;
 363
 364        iph = (void *)skb->data + iphdroff;
 365        hdroff = iphdroff + iph->ihl * 4;
 366
 367        if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
 368                return false;
 369        iph = (void *)skb->data + iphdroff;
 370
 371        if (maniptype == NF_NAT_MANIP_SRC) {
 372                csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
 373                iph->saddr = target->src.u3.ip;
 374        } else {
 375                csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
 376                iph->daddr = target->dst.u3.ip;
 377        }
 378        return true;
 379}
 380
 381static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
 382                                  unsigned int iphdroff,
 383                                  const struct nf_conntrack_tuple *target,
 384                                  enum nf_nat_manip_type maniptype)
 385{
 386#if IS_ENABLED(CONFIG_IPV6)
 387        struct ipv6hdr *ipv6h;
 388        __be16 frag_off;
 389        int hdroff;
 390        u8 nexthdr;
 391
 392        if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
 393                return false;
 394
 395        ipv6h = (void *)skb->data + iphdroff;
 396        nexthdr = ipv6h->nexthdr;
 397        hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
 398                                  &nexthdr, &frag_off);
 399        if (hdroff < 0)
 400                goto manip_addr;
 401
 402        if ((frag_off & htons(~0x7)) == 0 &&
 403            !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
 404                return false;
 405
 406        /* must reload, offset might have changed */
 407        ipv6h = (void *)skb->data + iphdroff;
 408
 409manip_addr:
 410        if (maniptype == NF_NAT_MANIP_SRC)
 411                ipv6h->saddr = target->src.u3.in6;
 412        else
 413                ipv6h->daddr = target->dst.u3.in6;
 414
 415#endif
 416        return true;
 417}
 418
 419unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
 420                              enum nf_nat_manip_type mtype,
 421                              enum ip_conntrack_dir dir)
 422{
 423        struct nf_conntrack_tuple target;
 424
 425        /* We are aiming to look like inverse of other direction. */
 426        nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
 427
 428        switch (target.src.l3num) {
 429        case NFPROTO_IPV6:
 430                if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
 431                        return NF_ACCEPT;
 432                break;
 433        case NFPROTO_IPV4:
 434                if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
 435                        return NF_ACCEPT;
 436                break;
 437        default:
 438                WARN_ON_ONCE(1);
 439                break;
 440        }
 441
 442        return NF_DROP;
 443}
 444
 445static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
 446                                    unsigned int iphdroff, __sum16 *check,
 447                                    const struct nf_conntrack_tuple *t,
 448                                    enum nf_nat_manip_type maniptype)
 449{
 450        struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
 451        __be32 oldip, newip;
 452
 453        if (maniptype == NF_NAT_MANIP_SRC) {
 454                oldip = iph->saddr;
 455                newip = t->src.u3.ip;
 456        } else {
 457                oldip = iph->daddr;
 458                newip = t->dst.u3.ip;
 459        }
 460        inet_proto_csum_replace4(check, skb, oldip, newip, true);
 461}
 462
 463static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
 464                                    unsigned int iphdroff, __sum16 *check,
 465                                    const struct nf_conntrack_tuple *t,
 466                                    enum nf_nat_manip_type maniptype)
 467{
 468#if IS_ENABLED(CONFIG_IPV6)
 469        const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
 470        const struct in6_addr *oldip, *newip;
 471
 472        if (maniptype == NF_NAT_MANIP_SRC) {
 473                oldip = &ipv6h->saddr;
 474                newip = &t->src.u3.in6;
 475        } else {
 476                oldip = &ipv6h->daddr;
 477                newip = &t->dst.u3.in6;
 478        }
 479        inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
 480                                  newip->s6_addr32, true);
 481#endif
 482}
 483
 484static void nf_csum_update(struct sk_buff *skb,
 485                           unsigned int iphdroff, __sum16 *check,
 486                           const struct nf_conntrack_tuple *t,
 487                           enum nf_nat_manip_type maniptype)
 488{
 489        switch (t->src.l3num) {
 490        case NFPROTO_IPV4:
 491                nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
 492                return;
 493        case NFPROTO_IPV6:
 494                nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
 495                return;
 496        }
 497}
 498
 499static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
 500                                    u8 proto, void *data, __sum16 *check,
 501                                    int datalen, int oldlen)
 502{
 503        if (skb->ip_summed != CHECKSUM_PARTIAL) {
 504                const struct iphdr *iph = ip_hdr(skb);
 505
 506                skb->ip_summed = CHECKSUM_PARTIAL;
 507                skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
 508                        ip_hdrlen(skb);
 509                skb->csum_offset = (void *)check - data;
 510                *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
 511                                            proto, 0);
 512        } else {
 513                inet_proto_csum_replace2(check, skb,
 514                                         htons(oldlen), htons(datalen), true);
 515        }
 516}
 517
 518#if IS_ENABLED(CONFIG_IPV6)
 519static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
 520                                    u8 proto, void *data, __sum16 *check,
 521                                    int datalen, int oldlen)
 522{
 523        if (skb->ip_summed != CHECKSUM_PARTIAL) {
 524                const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 525
 526                skb->ip_summed = CHECKSUM_PARTIAL;
 527                skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
 528                        (data - (void *)skb->data);
 529                skb->csum_offset = (void *)check - data;
 530                *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
 531                                          datalen, proto, 0);
 532        } else {
 533                inet_proto_csum_replace2(check, skb,
 534                                         htons(oldlen), htons(datalen), true);
 535        }
 536}
 537#endif
 538
 539void nf_nat_csum_recalc(struct sk_buff *skb,
 540                        u8 nfproto, u8 proto, void *data, __sum16 *check,
 541                        int datalen, int oldlen)
 542{
 543        switch (nfproto) {
 544        case NFPROTO_IPV4:
 545                nf_nat_ipv4_csum_recalc(skb, proto, data, check,
 546                                        datalen, oldlen);
 547                return;
 548#if IS_ENABLED(CONFIG_IPV6)
 549        case NFPROTO_IPV6:
 550                nf_nat_ipv6_csum_recalc(skb, proto, data, check,
 551                                        datalen, oldlen);
 552                return;
 553#endif
 554        }
 555
 556        WARN_ON_ONCE(1);
 557}
 558
 559int nf_nat_icmp_reply_translation(struct sk_buff *skb,
 560                                  struct nf_conn *ct,
 561                                  enum ip_conntrack_info ctinfo,
 562                                  unsigned int hooknum)
 563{
 564        struct {
 565                struct icmphdr  icmp;
 566                struct iphdr    ip;
 567        } *inside;
 568        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 569        enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
 570        unsigned int hdrlen = ip_hdrlen(skb);
 571        struct nf_conntrack_tuple target;
 572        unsigned long statusbit;
 573
 574        WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
 575
 576        if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
 577                return 0;
 578        if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP))
 579                return 0;
 580
 581        inside = (void *)skb->data + hdrlen;
 582        if (inside->icmp.type == ICMP_REDIRECT) {
 583                if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
 584                        return 0;
 585                if (ct->status & IPS_NAT_MASK)
 586                        return 0;
 587        }
 588
 589        if (manip == NF_NAT_MANIP_SRC)
 590                statusbit = IPS_SRC_NAT;
 591        else
 592                statusbit = IPS_DST_NAT;
 593
 594        /* Invert if this is reply direction */
 595        if (dir == IP_CT_DIR_REPLY)
 596                statusbit ^= IPS_NAT_MASK;
 597
 598        if (!(ct->status & statusbit))
 599                return 1;
 600
 601        if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
 602                                   &ct->tuplehash[!dir].tuple, !manip))
 603                return 0;
 604
 605        if (skb->ip_summed != CHECKSUM_PARTIAL) {
 606                /* Reloading "inside" here since manip_pkt may reallocate */
 607                inside = (void *)skb->data + hdrlen;
 608                inside->icmp.checksum = 0;
 609                inside->icmp.checksum =
 610                        csum_fold(skb_checksum(skb, hdrlen,
 611                                               skb->len - hdrlen, 0));
 612        }
 613
 614        /* Change outer to look like the reply to an incoming packet */
 615        nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
 616        target.dst.protonum = IPPROTO_ICMP;
 617        if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
 618                return 0;
 619
 620        return 1;
 621}
 622EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
 623
 624static unsigned int
 625nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 626               const struct nf_hook_state *state)
 627{
 628        struct nf_conn *ct;
 629        enum ip_conntrack_info ctinfo;
 630
 631        ct = nf_ct_get(skb, &ctinfo);
 632        if (!ct)
 633                return NF_ACCEPT;
 634
 635        if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
 636                if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 637                        if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
 638                                                           state->hook))
 639                                return NF_DROP;
 640                        else
 641                                return NF_ACCEPT;
 642                }
 643        }
 644
 645        return nf_nat_inet_fn(priv, skb, state);
 646}
 647
 648static unsigned int
 649nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb,
 650                        const struct nf_hook_state *state)
 651{
 652        unsigned int ret;
 653        __be32 daddr = ip_hdr(skb)->daddr;
 654
 655        ret = nf_nat_ipv4_fn(priv, skb, state);
 656        if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
 657                skb_dst_drop(skb);
 658
 659        return ret;
 660}
 661
 662#ifdef CONFIG_XFRM
 663static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 664{
 665        struct sock *sk = skb->sk;
 666        struct dst_entry *dst;
 667        unsigned int hh_len;
 668        struct flowi fl;
 669        int err;
 670
 671        err = xfrm_decode_session(skb, &fl, family);
 672        if (err < 0)
 673                return err;
 674
 675        dst = skb_dst(skb);
 676        if (dst->xfrm)
 677                dst = ((struct xfrm_dst *)dst)->route;
 678        if (!dst_hold_safe(dst))
 679                return -EHOSTUNREACH;
 680
 681        if (sk && !net_eq(net, sock_net(sk)))
 682                sk = NULL;
 683
 684        dst = xfrm_lookup(net, dst, &fl, sk, 0);
 685        if (IS_ERR(dst))
 686                return PTR_ERR(dst);
 687
 688        skb_dst_drop(skb);
 689        skb_dst_set(skb, dst);
 690
 691        /* Change in oif may mean change in hh_len. */
 692        hh_len = skb_dst(skb)->dev->hard_header_len;
 693        if (skb_headroom(skb) < hh_len &&
 694            pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
 695                return -ENOMEM;
 696        return 0;
 697}
 698#endif
 699
 700static unsigned int
 701nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
 702                     const struct nf_hook_state *state)
 703{
 704        __be32 saddr = ip_hdr(skb)->saddr;
 705        struct sock *sk = skb->sk;
 706        unsigned int ret;
 707
 708        ret = nf_nat_ipv4_fn(priv, skb, state);
 709
 710        if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
 711            !inet_sk_transparent(sk))
 712                skb_orphan(skb); /* TCP edemux obtained wrong socket */
 713
 714        return ret;
 715}
 716
 717static unsigned int
 718nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
 719                const struct nf_hook_state *state)
 720{
 721#ifdef CONFIG_XFRM
 722        const struct nf_conn *ct;
 723        enum ip_conntrack_info ctinfo;
 724        int err;
 725#endif
 726        unsigned int ret;
 727
 728        ret = nf_nat_ipv4_fn(priv, skb, state);
 729#ifdef CONFIG_XFRM
 730        if (ret != NF_ACCEPT)
 731                return ret;
 732
 733        if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
 734                return ret;
 735
 736        ct = nf_ct_get(skb, &ctinfo);
 737        if (ct) {
 738                enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 739
 740                if (ct->tuplehash[dir].tuple.src.u3.ip !=
 741                     ct->tuplehash[!dir].tuple.dst.u3.ip ||
 742                    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
 743                     ct->tuplehash[dir].tuple.src.u.all !=
 744                     ct->tuplehash[!dir].tuple.dst.u.all)) {
 745                        err = nf_xfrm_me_harder(state->net, skb, AF_INET);
 746                        if (err < 0)
 747                                ret = NF_DROP_ERR(err);
 748                }
 749        }
 750#endif
 751        return ret;
 752}
 753
 754static unsigned int
 755nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
 756                     const struct nf_hook_state *state)
 757{
 758        const struct nf_conn *ct;
 759        enum ip_conntrack_info ctinfo;
 760        unsigned int ret;
 761        int err;
 762
 763        ret = nf_nat_ipv4_fn(priv, skb, state);
 764        if (ret != NF_ACCEPT)
 765                return ret;
 766
 767        ct = nf_ct_get(skb, &ctinfo);
 768        if (ct) {
 769                enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 770
 771                if (ct->tuplehash[dir].tuple.dst.u3.ip !=
 772                    ct->tuplehash[!dir].tuple.src.u3.ip) {
 773                        err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
 774                        if (err < 0)
 775                                ret = NF_DROP_ERR(err);
 776                }
 777#ifdef CONFIG_XFRM
 778                else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
 779                         ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
 780                         ct->tuplehash[dir].tuple.dst.u.all !=
 781                         ct->tuplehash[!dir].tuple.src.u.all) {
 782                        err = nf_xfrm_me_harder(state->net, skb, AF_INET);
 783                        if (err < 0)
 784                                ret = NF_DROP_ERR(err);
 785                }
 786#endif
 787        }
 788        return ret;
 789}
 790
 791static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
 792        /* Before packet filtering, change destination */
 793        {
 794                .hook           = nf_nat_ipv4_pre_routing,
 795                .pf             = NFPROTO_IPV4,
 796                .hooknum        = NF_INET_PRE_ROUTING,
 797                .priority       = NF_IP_PRI_NAT_DST,
 798        },
 799        /* After packet filtering, change source */
 800        {
 801                .hook           = nf_nat_ipv4_out,
 802                .pf             = NFPROTO_IPV4,
 803                .hooknum        = NF_INET_POST_ROUTING,
 804                .priority       = NF_IP_PRI_NAT_SRC,
 805        },
 806        /* Before packet filtering, change destination */
 807        {
 808                .hook           = nf_nat_ipv4_local_fn,
 809                .pf             = NFPROTO_IPV4,
 810                .hooknum        = NF_INET_LOCAL_OUT,
 811                .priority       = NF_IP_PRI_NAT_DST,
 812        },
 813        /* After packet filtering, change source */
 814        {
 815                .hook           = nf_nat_ipv4_local_in,
 816                .pf             = NFPROTO_IPV4,
 817                .hooknum        = NF_INET_LOCAL_IN,
 818                .priority       = NF_IP_PRI_NAT_SRC,
 819        },
 820};
 821
 822int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
 823{
 824        return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops,
 825                                  ARRAY_SIZE(nf_nat_ipv4_ops));
 826}
 827EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
 828
 829void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
 830{
 831        nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
 832}
 833EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
 834
 835#if IS_ENABLED(CONFIG_IPV6)
 836int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
 837                                    struct nf_conn *ct,
 838                                    enum ip_conntrack_info ctinfo,
 839                                    unsigned int hooknum,
 840                                    unsigned int hdrlen)
 841{
 842        struct {
 843                struct icmp6hdr icmp6;
 844                struct ipv6hdr  ip6;
 845        } *inside;
 846        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 847        enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
 848        struct nf_conntrack_tuple target;
 849        unsigned long statusbit;
 850
 851        WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
 852
 853        if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
 854                return 0;
 855        if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
 856                return 0;
 857
 858        inside = (void *)skb->data + hdrlen;
 859        if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
 860                if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
 861                        return 0;
 862                if (ct->status & IPS_NAT_MASK)
 863                        return 0;
 864        }
 865
 866        if (manip == NF_NAT_MANIP_SRC)
 867                statusbit = IPS_SRC_NAT;
 868        else
 869                statusbit = IPS_DST_NAT;
 870
 871        /* Invert if this is reply direction */
 872        if (dir == IP_CT_DIR_REPLY)
 873                statusbit ^= IPS_NAT_MASK;
 874
 875        if (!(ct->status & statusbit))
 876                return 1;
 877
 878        if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
 879                                   &ct->tuplehash[!dir].tuple, !manip))
 880                return 0;
 881
 882        if (skb->ip_summed != CHECKSUM_PARTIAL) {
 883                struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 884
 885                inside = (void *)skb->data + hdrlen;
 886                inside->icmp6.icmp6_cksum = 0;
 887                inside->icmp6.icmp6_cksum =
 888                        csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
 889                                        skb->len - hdrlen, IPPROTO_ICMPV6,
 890                                        skb_checksum(skb, hdrlen,
 891                                                     skb->len - hdrlen, 0));
 892        }
 893
 894        nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
 895        target.dst.protonum = IPPROTO_ICMPV6;
 896        if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
 897                return 0;
 898
 899        return 1;
 900}
 901EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
 902
 903static unsigned int
 904nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
 905               const struct nf_hook_state *state)
 906{
 907        struct nf_conn *ct;
 908        enum ip_conntrack_info ctinfo;
 909        __be16 frag_off;
 910        int hdrlen;
 911        u8 nexthdr;
 912
 913        ct = nf_ct_get(skb, &ctinfo);
 914        /* Can't track?  It's not due to stress, or conntrack would
 915         * have dropped it.  Hence it's the user's responsibilty to
 916         * packet filter it out, or implement conntrack/NAT for that
 917         * protocol. 8) --RR
 918         */
 919        if (!ct)
 920                return NF_ACCEPT;
 921
 922        if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
 923                nexthdr = ipv6_hdr(skb)->nexthdr;
 924                hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
 925                                          &nexthdr, &frag_off);
 926
 927                if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
 928                        if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
 929                                                             state->hook,
 930                                                             hdrlen))
 931                                return NF_DROP;
 932                        else
 933                                return NF_ACCEPT;
 934                }
 935        }
 936
 937        return nf_nat_inet_fn(priv, skb, state);
 938}
 939
 940static unsigned int
 941nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
 942               const struct nf_hook_state *state)
 943{
 944        unsigned int ret;
 945        struct in6_addr daddr = ipv6_hdr(skb)->daddr;
 946
 947        ret = nf_nat_ipv6_fn(priv, skb, state);
 948        if (ret != NF_DROP && ret != NF_STOLEN &&
 949            ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
 950                skb_dst_drop(skb);
 951
 952        return ret;
 953}
 954
 955static unsigned int
 956nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
 957                const struct nf_hook_state *state)
 958{
 959#ifdef CONFIG_XFRM
 960        const struct nf_conn *ct;
 961        enum ip_conntrack_info ctinfo;
 962        int err;
 963#endif
 964        unsigned int ret;
 965
 966        ret = nf_nat_ipv6_fn(priv, skb, state);
 967#ifdef CONFIG_XFRM
 968        if (ret != NF_ACCEPT)
 969                return ret;
 970
 971        if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
 972                return ret;
 973        ct = nf_ct_get(skb, &ctinfo);
 974        if (ct) {
 975                enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 976
 977                if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
 978                                      &ct->tuplehash[!dir].tuple.dst.u3) ||
 979                    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
 980                     ct->tuplehash[dir].tuple.src.u.all !=
 981                     ct->tuplehash[!dir].tuple.dst.u.all)) {
 982                        err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
 983                        if (err < 0)
 984                                ret = NF_DROP_ERR(err);
 985                }
 986        }
 987#endif
 988
 989        return ret;
 990}
 991
 992static unsigned int
 993nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
 994                     const struct nf_hook_state *state)
 995{
 996        const struct nf_conn *ct;
 997        enum ip_conntrack_info ctinfo;
 998        unsigned int ret;
 999        int err;
1000
1001        ret = nf_nat_ipv6_fn(priv, skb, state);
1002        if (ret != NF_ACCEPT)
1003                return ret;
1004
1005        ct = nf_ct_get(skb, &ctinfo);
1006        if (ct) {
1007                enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
1008
1009                if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
1010                                      &ct->tuplehash[!dir].tuple.src.u3)) {
1011                        err = nf_ip6_route_me_harder(state->net, state->sk, skb);
1012                        if (err < 0)
1013                                ret = NF_DROP_ERR(err);
1014                }
1015#ifdef CONFIG_XFRM
1016                else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
1017                         ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
1018                         ct->tuplehash[dir].tuple.dst.u.all !=
1019                         ct->tuplehash[!dir].tuple.src.u.all) {
1020                        err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
1021                        if (err < 0)
1022                                ret = NF_DROP_ERR(err);
1023                }
1024#endif
1025        }
1026
1027        return ret;
1028}
1029
1030static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
1031        /* Before packet filtering, change destination */
1032        {
1033                .hook           = nf_nat_ipv6_in,
1034                .pf             = NFPROTO_IPV6,
1035                .hooknum        = NF_INET_PRE_ROUTING,
1036                .priority       = NF_IP6_PRI_NAT_DST,
1037        },
1038        /* After packet filtering, change source */
1039        {
1040                .hook           = nf_nat_ipv6_out,
1041                .pf             = NFPROTO_IPV6,
1042                .hooknum        = NF_INET_POST_ROUTING,
1043                .priority       = NF_IP6_PRI_NAT_SRC,
1044        },
1045        /* Before packet filtering, change destination */
1046        {
1047                .hook           = nf_nat_ipv6_local_fn,
1048                .pf             = NFPROTO_IPV6,
1049                .hooknum        = NF_INET_LOCAL_OUT,
1050                .priority       = NF_IP6_PRI_NAT_DST,
1051        },
1052        /* After packet filtering, change source */
1053        {
1054                .hook           = nf_nat_ipv6_fn,
1055                .pf             = NFPROTO_IPV6,
1056                .hooknum        = NF_INET_LOCAL_IN,
1057                .priority       = NF_IP6_PRI_NAT_SRC,
1058        },
1059};
1060
1061int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
1062{
1063        return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops,
1064                                  ARRAY_SIZE(nf_nat_ipv6_ops));
1065}
1066EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
1067
1068void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1069{
1070        nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1071}
1072EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
1073#endif /* CONFIG_IPV6 */
1074
1075#if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
1076int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
1077{
1078        int ret;
1079
1080        if (WARN_ON_ONCE(ops->pf != NFPROTO_INET))
1081                return -EINVAL;
1082
1083        ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops,
1084                                 ARRAY_SIZE(nf_nat_ipv6_ops));
1085        if (ret)
1086                return ret;
1087
1088        ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
1089                                 ARRAY_SIZE(nf_nat_ipv4_ops));
1090        if (ret)
1091                nf_nat_unregister_fn(net, NFPROTO_IPV6, ops,
1092                                        ARRAY_SIZE(nf_nat_ipv6_ops));
1093        return ret;
1094}
1095EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
1096
1097void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1098{
1099        nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
1100        nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1101}
1102EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn);
1103#endif /* NFT INET NAT */
1104