linux/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
<<
>>
Prefs
   1/*
   2 * (C) 1999-2001 Paul `Rusty' Russell
   3 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
   4 * (C) 2011 Patrick McHardy <kaber@trash.net>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10
  11#include <linux/types.h>
  12#include <linux/module.h>
  13#include <linux/skbuff.h>
  14#include <linux/ip.h>
  15#include <linux/icmp.h>
  16#include <linux/netfilter.h>
  17#include <linux/netfilter_ipv4.h>
  18#include <net/secure_seq.h>
  19#include <net/checksum.h>
  20#include <net/route.h>
  21#include <net/ip.h>
  22
  23#include <net/netfilter/nf_conntrack_core.h>
  24#include <net/netfilter/nf_conntrack.h>
  25#include <net/netfilter/nf_nat_core.h>
  26#include <net/netfilter/nf_nat_l3proto.h>
  27#include <net/netfilter/nf_nat_l4proto.h>
  28
  29static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
  30
  31#ifdef CONFIG_XFRM
  32static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
  33                                       const struct nf_conn *ct,
  34                                       enum ip_conntrack_dir dir,
  35                                       unsigned long statusbit,
  36                                       struct flowi *fl)
  37{
  38        const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
  39        struct flowi4 *fl4 = &fl->u.ip4;
  40
  41        if (ct->status & statusbit) {
  42                fl4->daddr = t->dst.u3.ip;
  43                if (t->dst.protonum == IPPROTO_TCP ||
  44                    t->dst.protonum == IPPROTO_UDP ||
  45                    t->dst.protonum == IPPROTO_UDPLITE ||
  46                    t->dst.protonum == IPPROTO_DCCP ||
  47                    t->dst.protonum == IPPROTO_SCTP)
  48                        fl4->fl4_dport = t->dst.u.all;
  49        }
  50
  51        statusbit ^= IPS_NAT_MASK;
  52
  53        if (ct->status & statusbit) {
  54                fl4->saddr = t->src.u3.ip;
  55                if (t->dst.protonum == IPPROTO_TCP ||
  56                    t->dst.protonum == IPPROTO_UDP ||
  57                    t->dst.protonum == IPPROTO_UDPLITE ||
  58                    t->dst.protonum == IPPROTO_DCCP ||
  59                    t->dst.protonum == IPPROTO_SCTP)
  60                        fl4->fl4_sport = t->src.u.all;
  61        }
  62}
  63#endif /* CONFIG_XFRM */
  64
  65static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
  66                                 const struct nf_nat_range *range)
  67{
  68        return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
  69               ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
  70}
  71
  72static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t,
  73                                   __be16 dport)
  74{
  75        return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport);
  76}
  77
  78static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
  79                                  unsigned int iphdroff,
  80                                  const struct nf_nat_l4proto *l4proto,
  81                                  const struct nf_conntrack_tuple *target,
  82                                  enum nf_nat_manip_type maniptype)
  83{
  84        struct iphdr *iph;
  85        unsigned int hdroff;
  86
  87        if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
  88                return false;
  89
  90        iph = (void *)skb->data + iphdroff;
  91        hdroff = iphdroff + iph->ihl * 4;
  92
  93        if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff,
  94                                target, maniptype))
  95                return false;
  96        iph = (void *)skb->data + iphdroff;
  97
  98        if (maniptype == NF_NAT_MANIP_SRC) {
  99                csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
 100                iph->saddr = target->src.u3.ip;
 101        } else {
 102                csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
 103                iph->daddr = target->dst.u3.ip;
 104        }
 105        return true;
 106}
 107
 108static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
 109                                    unsigned int iphdroff, __sum16 *check,
 110                                    const struct nf_conntrack_tuple *t,
 111                                    enum nf_nat_manip_type maniptype)
 112{
 113        struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
 114        __be32 oldip, newip;
 115
 116        if (maniptype == NF_NAT_MANIP_SRC) {
 117                oldip = iph->saddr;
 118                newip = t->src.u3.ip;
 119        } else {
 120                oldip = iph->daddr;
 121                newip = t->dst.u3.ip;
 122        }
 123        inet_proto_csum_replace4(check, skb, oldip, newip, true);
 124}
 125
 126static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
 127                                    u8 proto, void *data, __sum16 *check,
 128                                    int datalen, int oldlen)
 129{
 130        const struct iphdr *iph = ip_hdr(skb);
 131        struct rtable *rt = skb_rtable(skb);
 132
 133        if (skb->ip_summed != CHECKSUM_PARTIAL) {
 134                if (!(rt->rt_flags & RTCF_LOCAL) &&
 135                    (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
 136                        skb->ip_summed = CHECKSUM_PARTIAL;
 137                        skb->csum_start = skb_headroom(skb) +
 138                                          skb_network_offset(skb) +
 139                                          ip_hdrlen(skb);
 140                        skb->csum_offset = (void *)check - data;
 141                        *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
 142                                                    datalen, proto, 0);
 143                } else {
 144                        *check = 0;
 145                        *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 146                                                   datalen, proto,
 147                                                   csum_partial(data, datalen,
 148                                                                0));
 149                        if (proto == IPPROTO_UDP && !*check)
 150                                *check = CSUM_MANGLED_0;
 151                }
 152        } else
 153                inet_proto_csum_replace2(check, skb,
 154                                         htons(oldlen), htons(datalen), true);
 155}
 156
 157#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 158static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
 159                                       struct nf_nat_range *range)
 160{
 161        if (tb[CTA_NAT_V4_MINIP]) {
 162                range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
 163                range->flags |= NF_NAT_RANGE_MAP_IPS;
 164        }
 165
 166        if (tb[CTA_NAT_V4_MAXIP])
 167                range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
 168        else
 169                range->max_addr.ip = range->min_addr.ip;
 170
 171        return 0;
 172}
 173#endif
 174
 175static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
 176        .l3proto                = NFPROTO_IPV4,
 177        .in_range               = nf_nat_ipv4_in_range,
 178        .secure_port            = nf_nat_ipv4_secure_port,
 179        .manip_pkt              = nf_nat_ipv4_manip_pkt,
 180        .csum_update            = nf_nat_ipv4_csum_update,
 181        .csum_recalc            = nf_nat_ipv4_csum_recalc,
 182#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 183        .nlattr_to_range        = nf_nat_ipv4_nlattr_to_range,
 184#endif
 185#ifdef CONFIG_XFRM
 186        .decode_session         = nf_nat_ipv4_decode_session,
 187#endif
 188};
 189
 190int nf_nat_icmp_reply_translation(struct sk_buff *skb,
 191                                  struct nf_conn *ct,
 192                                  enum ip_conntrack_info ctinfo,
 193                                  unsigned int hooknum)
 194{
 195        struct {
 196                struct icmphdr  icmp;
 197                struct iphdr    ip;
 198        } *inside;
 199        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 200        enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
 201        unsigned int hdrlen = ip_hdrlen(skb);
 202        const struct nf_nat_l4proto *l4proto;
 203        struct nf_conntrack_tuple target;
 204        unsigned long statusbit;
 205
 206        NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
 207
 208        if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
 209                return 0;
 210        if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
 211                return 0;
 212
 213        inside = (void *)skb->data + hdrlen;
 214        if (inside->icmp.type == ICMP_REDIRECT) {
 215                if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
 216                        return 0;
 217                if (ct->status & IPS_NAT_MASK)
 218                        return 0;
 219        }
 220
 221        if (manip == NF_NAT_MANIP_SRC)
 222                statusbit = IPS_SRC_NAT;
 223        else
 224                statusbit = IPS_DST_NAT;
 225
 226        /* Invert if this is reply direction */
 227        if (dir == IP_CT_DIR_REPLY)
 228                statusbit ^= IPS_NAT_MASK;
 229
 230        if (!(ct->status & statusbit))
 231                return 1;
 232
 233        l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol);
 234        if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
 235                                   l4proto, &ct->tuplehash[!dir].tuple, !manip))
 236                return 0;
 237
 238        if (skb->ip_summed != CHECKSUM_PARTIAL) {
 239                /* Reloading "inside" here since manip_pkt may reallocate */
 240                inside = (void *)skb->data + hdrlen;
 241                inside->icmp.checksum = 0;
 242                inside->icmp.checksum =
 243                        csum_fold(skb_checksum(skb, hdrlen,
 244                                               skb->len - hdrlen, 0));
 245        }
 246
 247        /* Change outer to look like the reply to an incoming packet */
 248        nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
 249        l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0);
 250        if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip))
 251                return 0;
 252
 253        return 1;
 254}
 255EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
 256
 257unsigned int
 258nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 259               const struct nf_hook_state *state,
 260               unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 261                                        struct sk_buff *skb,
 262                                        const struct nf_hook_state *state,
 263                                        struct nf_conn *ct))
 264{
 265        struct nf_conn *ct;
 266        enum ip_conntrack_info ctinfo;
 267        struct nf_conn_nat *nat;
 268        /* maniptype == SRC for postrouting. */
 269        enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
 270
 271        /* We never see fragments: conntrack defrags on pre-routing
 272         * and local-out, and nf_nat_out protects post-routing.
 273         */
 274        NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
 275
 276        ct = nf_ct_get(skb, &ctinfo);
 277        /* Can't track?  It's not due to stress, or conntrack would
 278         * have dropped it.  Hence it's the user's responsibilty to
 279         * packet filter it out, or implement conntrack/NAT for that
 280         * protocol. 8) --RR
 281         */
 282        if (!ct)
 283                return NF_ACCEPT;
 284
 285        /* Don't try to NAT if this packet is not conntracked */
 286        if (nf_ct_is_untracked(ct))
 287                return NF_ACCEPT;
 288
 289        nat = nf_ct_nat_ext_add(ct);
 290        if (nat == NULL)
 291                return NF_ACCEPT;
 292
 293        switch (ctinfo) {
 294        case IP_CT_RELATED:
 295        case IP_CT_RELATED_REPLY:
 296                if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 297                        if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
 298                                                           ops->hooknum))
 299                                return NF_DROP;
 300                        else
 301                                return NF_ACCEPT;
 302                }
 303                /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
 304        case IP_CT_NEW:
 305                /* Seen it before?  This can happen for loopback, retrans,
 306                 * or local packets.
 307                 */
 308                if (!nf_nat_initialized(ct, maniptype)) {
 309                        unsigned int ret;
 310
 311                        ret = do_chain(ops, skb, state, ct);
 312                        if (ret != NF_ACCEPT)
 313                                return ret;
 314
 315                        if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum)))
 316                                break;
 317
 318                        ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
 319                        if (ret != NF_ACCEPT)
 320                                return ret;
 321                } else {
 322                        pr_debug("Already setup manip %s for ct %p\n",
 323                                 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 324                                 ct);
 325                        if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat,
 326                                               state->out))
 327                                goto oif_changed;
 328                }
 329                break;
 330
 331        default:
 332                /* ESTABLISHED */
 333                NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 334                             ctinfo == IP_CT_ESTABLISHED_REPLY);
 335                if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
 336                        goto oif_changed;
 337        }
 338
 339        return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
 340
 341oif_changed:
 342        nf_ct_kill_acct(ct, ctinfo, skb);
 343        return NF_DROP;
 344}
 345EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
 346
 347unsigned int
 348nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
 349               const struct nf_hook_state *state,
 350               unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 351                                         struct sk_buff *skb,
 352                                         const struct nf_hook_state *state,
 353                                         struct nf_conn *ct))
 354{
 355        unsigned int ret;
 356        __be32 daddr = ip_hdr(skb)->daddr;
 357
 358        ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
 359        if (ret != NF_DROP && ret != NF_STOLEN &&
 360            daddr != ip_hdr(skb)->daddr)
 361                skb_dst_drop(skb);
 362
 363        return ret;
 364}
 365EXPORT_SYMBOL_GPL(nf_nat_ipv4_in);
 366
 367unsigned int
 368nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 369                const struct nf_hook_state *state,
 370                unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 371                                          struct sk_buff *skb,
 372                                          const struct nf_hook_state *state,
 373                                          struct nf_conn *ct))
 374{
 375#ifdef CONFIG_XFRM
 376        const struct nf_conn *ct;
 377        enum ip_conntrack_info ctinfo;
 378        int err;
 379#endif
 380        unsigned int ret;
 381
 382        /* root is playing with raw sockets. */
 383        if (skb->len < sizeof(struct iphdr) ||
 384            ip_hdrlen(skb) < sizeof(struct iphdr))
 385                return NF_ACCEPT;
 386
 387        ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
 388#ifdef CONFIG_XFRM
 389        if (ret != NF_DROP && ret != NF_STOLEN &&
 390            !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
 391            (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 392                enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 393
 394                if ((ct->tuplehash[dir].tuple.src.u3.ip !=
 395                     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
 396                    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
 397                     ct->tuplehash[dir].tuple.src.u.all !=
 398                     ct->tuplehash[!dir].tuple.dst.u.all)) {
 399                        err = nf_xfrm_me_harder(skb, AF_INET);
 400                        if (err < 0)
 401                                ret = NF_DROP_ERR(err);
 402                }
 403        }
 404#endif
 405        return ret;
 406}
 407EXPORT_SYMBOL_GPL(nf_nat_ipv4_out);
 408
 409unsigned int
 410nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 411                     const struct nf_hook_state *state,
 412                     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 413                                               struct sk_buff *skb,
 414                                               const struct nf_hook_state *state,
 415                                               struct nf_conn *ct))
 416{
 417        const struct nf_conn *ct;
 418        enum ip_conntrack_info ctinfo;
 419        unsigned int ret;
 420        int err;
 421
 422        /* root is playing with raw sockets. */
 423        if (skb->len < sizeof(struct iphdr) ||
 424            ip_hdrlen(skb) < sizeof(struct iphdr))
 425                return NF_ACCEPT;
 426
 427        ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
 428        if (ret != NF_DROP && ret != NF_STOLEN &&
 429            (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 430                enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 431
 432                if (ct->tuplehash[dir].tuple.dst.u3.ip !=
 433                    ct->tuplehash[!dir].tuple.src.u3.ip) {
 434                        err = ip_route_me_harder(skb, RTN_UNSPEC);
 435                        if (err < 0)
 436                                ret = NF_DROP_ERR(err);
 437                }
 438#ifdef CONFIG_XFRM
 439                else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
 440                         ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
 441                         ct->tuplehash[dir].tuple.dst.u.all !=
 442                         ct->tuplehash[!dir].tuple.src.u.all) {
 443                        err = nf_xfrm_me_harder(skb, AF_INET);
 444                        if (err < 0)
 445                                ret = NF_DROP_ERR(err);
 446                }
 447#endif
 448        }
 449        return ret;
 450}
 451EXPORT_SYMBOL_GPL(nf_nat_ipv4_local_fn);
 452
 453static int __init nf_nat_l3proto_ipv4_init(void)
 454{
 455        int err;
 456
 457        err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
 458        if (err < 0)
 459                goto err1;
 460        err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
 461        if (err < 0)
 462                goto err2;
 463        return err;
 464
 465err2:
 466        nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
 467err1:
 468        return err;
 469}
 470
 471static void __exit nf_nat_l3proto_ipv4_exit(void)
 472{
 473        nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
 474        nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
 475}
 476
 477MODULE_LICENSE("GPL");
 478MODULE_ALIAS("nf-nat-" __stringify(AF_INET));
 479
 480module_init(nf_nat_l3proto_ipv4_init);
 481module_exit(nf_nat_l3proto_ipv4_exit);
 482