linux/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
<<
>>
Prefs
   1/*
   2 * (C) 1999-2001 Paul `Rusty' Russell
   3 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
   4 * (C) 2011 Patrick McHardy <kaber@trash.net>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10
  11#include <linux/types.h>
  12#include <linux/module.h>
  13#include <linux/skbuff.h>
  14#include <linux/ip.h>
  15#include <linux/icmp.h>
  16#include <linux/netfilter.h>
  17#include <linux/netfilter_ipv4.h>
  18#include <net/secure_seq.h>
  19#include <net/checksum.h>
  20#include <net/route.h>
  21#include <net/ip.h>
  22
  23#include <net/netfilter/nf_conntrack_core.h>
  24#include <net/netfilter/nf_conntrack.h>
  25#include <net/netfilter/nf_nat_core.h>
  26#include <net/netfilter/nf_nat_l3proto.h>
  27#include <net/netfilter/nf_nat_l4proto.h>
  28
  29static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
  30
  31#ifdef CONFIG_XFRM
  32static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
  33                                       const struct nf_conn *ct,
  34                                       enum ip_conntrack_dir dir,
  35                                       unsigned long statusbit,
  36                                       struct flowi *fl)
  37{
  38        const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
  39        struct flowi4 *fl4 = &fl->u.ip4;
  40
  41        if (ct->status & statusbit) {
  42                fl4->daddr = t->dst.u3.ip;
  43                if (t->dst.protonum == IPPROTO_TCP ||
  44                    t->dst.protonum == IPPROTO_UDP ||
  45                    t->dst.protonum == IPPROTO_UDPLITE ||
  46                    t->dst.protonum == IPPROTO_DCCP ||
  47                    t->dst.protonum == IPPROTO_SCTP)
  48                        fl4->fl4_dport = t->dst.u.all;
  49        }
  50
  51        statusbit ^= IPS_NAT_MASK;
  52
  53        if (ct->status & statusbit) {
  54                fl4->saddr = t->src.u3.ip;
  55                if (t->dst.protonum == IPPROTO_TCP ||
  56                    t->dst.protonum == IPPROTO_UDP ||
  57                    t->dst.protonum == IPPROTO_UDPLITE ||
  58                    t->dst.protonum == IPPROTO_DCCP ||
  59                    t->dst.protonum == IPPROTO_SCTP)
  60                        fl4->fl4_sport = t->src.u.all;
  61        }
  62}
  63#endif /* CONFIG_XFRM */
  64
  65static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
  66                                 const struct nf_nat_range *range)
  67{
  68        return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
  69               ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
  70}
  71
  72static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t,
  73                                   __be16 dport)
  74{
  75        return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport);
  76}
  77
  78static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
  79                                  unsigned int iphdroff,
  80                                  const struct nf_nat_l4proto *l4proto,
  81                                  const struct nf_conntrack_tuple *target,
  82                                  enum nf_nat_manip_type maniptype)
  83{
  84        struct iphdr *iph;
  85        unsigned int hdroff;
  86
  87        if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
  88                return false;
  89
  90        iph = (void *)skb->data + iphdroff;
  91        hdroff = iphdroff + iph->ihl * 4;
  92
  93        if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff,
  94                                target, maniptype))
  95                return false;
  96        iph = (void *)skb->data + iphdroff;
  97
  98        if (maniptype == NF_NAT_MANIP_SRC) {
  99                csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
 100                iph->saddr = target->src.u3.ip;
 101        } else {
 102                csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
 103                iph->daddr = target->dst.u3.ip;
 104        }
 105        return true;
 106}
 107
 108static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
 109                                    unsigned int iphdroff, __sum16 *check,
 110                                    const struct nf_conntrack_tuple *t,
 111                                    enum nf_nat_manip_type maniptype)
 112{
 113        struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
 114        __be32 oldip, newip;
 115
 116        if (maniptype == NF_NAT_MANIP_SRC) {
 117                oldip = iph->saddr;
 118                newip = t->src.u3.ip;
 119        } else {
 120                oldip = iph->daddr;
 121                newip = t->dst.u3.ip;
 122        }
 123        inet_proto_csum_replace4(check, skb, oldip, newip, 1);
 124}
 125
 126static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
 127                                    u8 proto, void *data, __sum16 *check,
 128                                    int datalen, int oldlen)
 129{
 130        const struct iphdr *iph = ip_hdr(skb);
 131        struct rtable *rt = skb_rtable(skb);
 132
 133        if (skb->ip_summed != CHECKSUM_PARTIAL) {
 134                if (!(rt->rt_flags & RTCF_LOCAL) &&
 135                    (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
 136                        skb->ip_summed = CHECKSUM_PARTIAL;
 137                        skb->csum_start = skb_headroom(skb) +
 138                                          skb_network_offset(skb) +
 139                                          ip_hdrlen(skb);
 140                        skb->csum_offset = (void *)check - data;
 141                        *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
 142                                                    datalen, proto, 0);
 143                } else {
 144                        *check = 0;
 145                        *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 146                                                   datalen, proto,
 147                                                   csum_partial(data, datalen,
 148                                                                0));
 149                        if (proto == IPPROTO_UDP && !*check)
 150                                *check = CSUM_MANGLED_0;
 151                }
 152        } else
 153                inet_proto_csum_replace2(check, skb,
 154                                         htons(oldlen), htons(datalen), 1);
 155}
 156
 157#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 158static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
 159                                       struct nf_nat_range *range)
 160{
 161        if (tb[CTA_NAT_V4_MINIP]) {
 162                range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
 163                range->flags |= NF_NAT_RANGE_MAP_IPS;
 164        }
 165
 166        if (tb[CTA_NAT_V4_MAXIP])
 167                range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
 168        else
 169                range->max_addr.ip = range->min_addr.ip;
 170
 171        return 0;
 172}
 173#endif
 174
 175static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
 176        .l3proto                = NFPROTO_IPV4,
 177        .in_range               = nf_nat_ipv4_in_range,
 178        .secure_port            = nf_nat_ipv4_secure_port,
 179        .manip_pkt              = nf_nat_ipv4_manip_pkt,
 180        .csum_update            = nf_nat_ipv4_csum_update,
 181        .csum_recalc            = nf_nat_ipv4_csum_recalc,
 182#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 183        .nlattr_to_range        = nf_nat_ipv4_nlattr_to_range,
 184#endif
 185#ifdef CONFIG_XFRM
 186        .decode_session         = nf_nat_ipv4_decode_session,
 187#endif
 188};
 189
 190int nf_nat_icmp_reply_translation(struct sk_buff *skb,
 191                                  struct nf_conn *ct,
 192                                  enum ip_conntrack_info ctinfo,
 193                                  unsigned int hooknum)
 194{
 195        struct {
 196                struct icmphdr  icmp;
 197                struct iphdr    ip;
 198        } *inside;
 199        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 200        enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
 201        unsigned int hdrlen = ip_hdrlen(skb);
 202        const struct nf_nat_l4proto *l4proto;
 203        struct nf_conntrack_tuple target;
 204        unsigned long statusbit;
 205
 206        NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
 207
 208        if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
 209                return 0;
 210        if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
 211                return 0;
 212
 213        inside = (void *)skb->data + hdrlen;
 214        if (inside->icmp.type == ICMP_REDIRECT) {
 215                if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
 216                        return 0;
 217                if (ct->status & IPS_NAT_MASK)
 218                        return 0;
 219        }
 220
 221        if (manip == NF_NAT_MANIP_SRC)
 222                statusbit = IPS_SRC_NAT;
 223        else
 224                statusbit = IPS_DST_NAT;
 225
 226        /* Invert if this is reply direction */
 227        if (dir == IP_CT_DIR_REPLY)
 228                statusbit ^= IPS_NAT_MASK;
 229
 230        if (!(ct->status & statusbit))
 231                return 1;
 232
 233        l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol);
 234        if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
 235                                   l4proto, &ct->tuplehash[!dir].tuple, !manip))
 236                return 0;
 237
 238        if (skb->ip_summed != CHECKSUM_PARTIAL) {
 239                /* Reloading "inside" here since manip_pkt may reallocate */
 240                inside = (void *)skb->data + hdrlen;
 241                inside->icmp.checksum = 0;
 242                inside->icmp.checksum =
 243                        csum_fold(skb_checksum(skb, hdrlen,
 244                                               skb->len - hdrlen, 0));
 245        }
 246
 247        /* Change outer to look like the reply to an incoming packet */
 248        nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
 249        l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0);
 250        if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip))
 251                return 0;
 252
 253        return 1;
 254}
 255EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
 256
 257unsigned int
 258nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 259               const struct net_device *in, const struct net_device *out,
 260               unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 261                                        struct sk_buff *skb,
 262                                        const struct net_device *in,
 263                                        const struct net_device *out,
 264                                        struct nf_conn *ct))
 265{
 266        struct nf_conn *ct;
 267        enum ip_conntrack_info ctinfo;
 268        struct nf_conn_nat *nat;
 269        /* maniptype == SRC for postrouting. */
 270        enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
 271
 272        /* We never see fragments: conntrack defrags on pre-routing
 273         * and local-out, and nf_nat_out protects post-routing.
 274         */
 275        NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
 276
 277        ct = nf_ct_get(skb, &ctinfo);
 278        /* Can't track?  It's not due to stress, or conntrack would
 279         * have dropped it.  Hence it's the user's responsibilty to
 280         * packet filter it out, or implement conntrack/NAT for that
 281         * protocol. 8) --RR
 282         */
 283        if (!ct)
 284                return NF_ACCEPT;
 285
 286        /* Don't try to NAT if this packet is not conntracked */
 287        if (nf_ct_is_untracked(ct))
 288                return NF_ACCEPT;
 289
 290        nat = nf_ct_nat_ext_add(ct);
 291        if (nat == NULL)
 292                return NF_ACCEPT;
 293
 294        switch (ctinfo) {
 295        case IP_CT_RELATED:
 296        case IP_CT_RELATED_REPLY:
 297                if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 298                        if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
 299                                                           ops->hooknum))
 300                                return NF_DROP;
 301                        else
 302                                return NF_ACCEPT;
 303                }
 304                /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
 305        case IP_CT_NEW:
 306                /* Seen it before?  This can happen for loopback, retrans,
 307                 * or local packets.
 308                 */
 309                if (!nf_nat_initialized(ct, maniptype)) {
 310                        unsigned int ret;
 311
 312                        ret = do_chain(ops, skb, in, out, ct);
 313                        if (ret != NF_ACCEPT)
 314                                return ret;
 315
 316                        if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum)))
 317                                break;
 318
 319                        ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
 320                        if (ret != NF_ACCEPT)
 321                                return ret;
 322                } else {
 323                        pr_debug("Already setup manip %s for ct %p\n",
 324                                 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 325                                 ct);
 326                        if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
 327                                goto oif_changed;
 328                }
 329                break;
 330
 331        default:
 332                /* ESTABLISHED */
 333                NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 334                             ctinfo == IP_CT_ESTABLISHED_REPLY);
 335                if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
 336                        goto oif_changed;
 337        }
 338
 339        return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
 340
 341oif_changed:
 342        nf_ct_kill_acct(ct, ctinfo, skb);
 343        return NF_DROP;
 344}
 345EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
 346
 347unsigned int
 348nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
 349               const struct net_device *in, const struct net_device *out,
 350               unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 351                                         struct sk_buff *skb,
 352                                         const struct net_device *in,
 353                                         const struct net_device *out,
 354                                         struct nf_conn *ct))
 355{
 356        unsigned int ret;
 357        __be32 daddr = ip_hdr(skb)->daddr;
 358
 359        ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain);
 360        if (ret != NF_DROP && ret != NF_STOLEN &&
 361            daddr != ip_hdr(skb)->daddr)
 362                skb_dst_drop(skb);
 363
 364        return ret;
 365}
 366EXPORT_SYMBOL_GPL(nf_nat_ipv4_in);
 367
 368unsigned int
 369nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 370                const struct net_device *in, const struct net_device *out,
 371                unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 372                                          struct sk_buff *skb,
 373                                          const struct net_device *in,
 374                                          const struct net_device *out,
 375                                          struct nf_conn *ct))
 376{
 377#ifdef CONFIG_XFRM
 378        const struct nf_conn *ct;
 379        enum ip_conntrack_info ctinfo;
 380        int err;
 381#endif
 382        unsigned int ret;
 383
 384        /* root is playing with raw sockets. */
 385        if (skb->len < sizeof(struct iphdr) ||
 386            ip_hdrlen(skb) < sizeof(struct iphdr))
 387                return NF_ACCEPT;
 388
 389        ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain);
 390#ifdef CONFIG_XFRM
 391        if (ret != NF_DROP && ret != NF_STOLEN &&
 392            !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
 393            (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 394                enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 395
 396                if ((ct->tuplehash[dir].tuple.src.u3.ip !=
 397                     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
 398                    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
 399                     ct->tuplehash[dir].tuple.src.u.all !=
 400                     ct->tuplehash[!dir].tuple.dst.u.all)) {
 401                        err = nf_xfrm_me_harder(skb, AF_INET);
 402                        if (err < 0)
 403                                ret = NF_DROP_ERR(err);
 404                }
 405        }
 406#endif
 407        return ret;
 408}
 409EXPORT_SYMBOL_GPL(nf_nat_ipv4_out);
 410
 411unsigned int
 412nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 413                     const struct net_device *in, const struct net_device *out,
 414                     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 415                                               struct sk_buff *skb,
 416                                               const struct net_device *in,
 417                                               const struct net_device *out,
 418                                               struct nf_conn *ct))
 419{
 420        const struct nf_conn *ct;
 421        enum ip_conntrack_info ctinfo;
 422        unsigned int ret;
 423        int err;
 424
 425        /* root is playing with raw sockets. */
 426        if (skb->len < sizeof(struct iphdr) ||
 427            ip_hdrlen(skb) < sizeof(struct iphdr))
 428                return NF_ACCEPT;
 429
 430        ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain);
 431        if (ret != NF_DROP && ret != NF_STOLEN &&
 432            (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 433                enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 434
 435                if (ct->tuplehash[dir].tuple.dst.u3.ip !=
 436                    ct->tuplehash[!dir].tuple.src.u3.ip) {
 437                        err = ip_route_me_harder(skb, RTN_UNSPEC);
 438                        if (err < 0)
 439                                ret = NF_DROP_ERR(err);
 440                }
 441#ifdef CONFIG_XFRM
 442                else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
 443                         ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
 444                         ct->tuplehash[dir].tuple.dst.u.all !=
 445                         ct->tuplehash[!dir].tuple.src.u.all) {
 446                        err = nf_xfrm_me_harder(skb, AF_INET);
 447                        if (err < 0)
 448                                ret = NF_DROP_ERR(err);
 449                }
 450#endif
 451        }
 452        return ret;
 453}
 454EXPORT_SYMBOL_GPL(nf_nat_ipv4_local_fn);
 455
 456static int __init nf_nat_l3proto_ipv4_init(void)
 457{
 458        int err;
 459
 460        err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
 461        if (err < 0)
 462                goto err1;
 463        err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
 464        if (err < 0)
 465                goto err2;
 466        return err;
 467
 468err2:
 469        nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
 470err1:
 471        return err;
 472}
 473
 474static void __exit nf_nat_l3proto_ipv4_exit(void)
 475{
 476        nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
 477        nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
 478}
 479
 480MODULE_LICENSE("GPL");
 481MODULE_ALIAS("nf-nat-" __stringify(AF_INET));
 482
 483module_init(nf_nat_l3proto_ipv4_init);
 484module_exit(nf_nat_l3proto_ipv4_exit);
 485