linux/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
<<
>>
Prefs
   1
   2/* (C) 1999-2001 Paul `Rusty' Russell
   3 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
   4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10
  11#include <linux/types.h>
  12#include <linux/ip.h>
  13#include <linux/netfilter.h>
  14#include <linux/module.h>
  15#include <linux/skbuff.h>
  16#include <linux/icmp.h>
  17#include <linux/sysctl.h>
  18#include <net/route.h>
  19#include <net/ip.h>
  20
  21#include <linux/netfilter_ipv4.h>
  22#include <net/netfilter/nf_conntrack.h>
  23#include <net/netfilter/nf_conntrack_helper.h>
  24#include <net/netfilter/nf_conntrack_l4proto.h>
  25#include <net/netfilter/nf_conntrack_l3proto.h>
  26#include <net/netfilter/nf_conntrack_zones.h>
  27#include <net/netfilter/nf_conntrack_core.h>
  28#include <net/netfilter/nf_conntrack_seqadj.h>
  29#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
  30#include <net/netfilter/nf_nat_helper.h>
  31#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
  32#include <net/netfilter/nf_log.h>
  33
  34static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
  35                              struct nf_conntrack_tuple *tuple)
  36{
  37        const __be32 *ap;
  38        __be32 _addrs[2];
  39        ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
  40                                sizeof(u_int32_t) * 2, _addrs);
  41        if (ap == NULL)
  42                return false;
  43
  44        tuple->src.u3.ip = ap[0];
  45        tuple->dst.u3.ip = ap[1];
  46
  47        return true;
  48}
  49
  50static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
  51                              const struct nf_conntrack_tuple *orig)
  52{
  53        tuple->src.u3.ip = orig->dst.u3.ip;
  54        tuple->dst.u3.ip = orig->src.u3.ip;
  55
  56        return true;
  57}
  58
  59static void ipv4_print_tuple(struct seq_file *s,
  60                            const struct nf_conntrack_tuple *tuple)
  61{
  62        seq_printf(s, "src=%pI4 dst=%pI4 ",
  63                   &tuple->src.u3.ip, &tuple->dst.u3.ip);
  64}
  65
  66static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
  67                            unsigned int *dataoff, u_int8_t *protonum)
  68{
  69        const struct iphdr *iph;
  70        struct iphdr _iph;
  71
  72        iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
  73        if (iph == NULL)
  74                return -NF_ACCEPT;
  75
  76        /* Conntrack defragments packets, we might still see fragments
  77         * inside ICMP packets though. */
  78        if (iph->frag_off & htons(IP_OFFSET))
  79                return -NF_ACCEPT;
  80
  81        *dataoff = nhoff + (iph->ihl << 2);
  82        *protonum = iph->protocol;
  83
  84        /* Check bogus IP headers */
  85        if (*dataoff > skb->len) {
  86                pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: "
  87                         "nhoff %u, ihl %u, skblen %u\n",
  88                         nhoff, iph->ihl << 2, skb->len);
  89                return -NF_ACCEPT;
  90        }
  91
  92        return NF_ACCEPT;
  93}
  94
  95static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
  96                                struct sk_buff *skb,
  97                                const struct nf_hook_state *state)
  98{
  99        struct nf_conn *ct;
 100        enum ip_conntrack_info ctinfo;
 101        const struct nf_conn_help *help;
 102        const struct nf_conntrack_helper *helper;
 103
 104        /* This is where we call the helper: as the packet goes out. */
 105        ct = nf_ct_get(skb, &ctinfo);
 106        if (!ct || ctinfo == IP_CT_RELATED_REPLY)
 107                return NF_ACCEPT;
 108
 109        help = nfct_help(ct);
 110        if (!help)
 111                return NF_ACCEPT;
 112
 113        /* rcu_read_lock()ed by nf_hook_slow */
 114        helper = rcu_dereference(help->helper);
 115        if (!helper)
 116                return NF_ACCEPT;
 117
 118        return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
 119                            ct, ctinfo);
 120}
 121
 122static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
 123                                 struct sk_buff *skb,
 124                                 const struct nf_hook_state *state)
 125{
 126        struct nf_conn *ct;
 127        enum ip_conntrack_info ctinfo;
 128
 129        ct = nf_ct_get(skb, &ctinfo);
 130        if (!ct || ctinfo == IP_CT_RELATED_REPLY)
 131                goto out;
 132
 133        /* adjust seqs for loopback traffic only in outgoing direction */
 134        if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
 135            !nf_is_loopback_packet(skb)) {
 136                if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
 137                        NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
 138                        return NF_DROP;
 139                }
 140        }
 141out:
 142        /* We've seen it coming out the other side: confirm it */
 143        return nf_conntrack_confirm(skb);
 144}
 145
 146static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
 147                                      struct sk_buff *skb,
 148                                      const struct nf_hook_state *state)
 149{
 150        return nf_conntrack_in(dev_net(state->in), PF_INET, ops->hooknum, skb);
 151}
 152
 153static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
 154                                         struct sk_buff *skb,
 155                                         const struct nf_hook_state *state)
 156{
 157        /* root is playing with raw sockets. */
 158        if (skb->len < sizeof(struct iphdr) ||
 159            ip_hdrlen(skb) < sizeof(struct iphdr))
 160                return NF_ACCEPT;
 161        return nf_conntrack_in(dev_net(state->out), PF_INET, ops->hooknum, skb);
 162}
 163
 164/* Connection tracking may drop packets, but never alters them, so
 165   make it the first hook. */
 166static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
 167        {
 168                .hook           = ipv4_conntrack_in,
 169                .owner          = THIS_MODULE,
 170                .pf             = NFPROTO_IPV4,
 171                .hooknum        = NF_INET_PRE_ROUTING,
 172                .priority       = NF_IP_PRI_CONNTRACK,
 173        },
 174        {
 175                .hook           = ipv4_conntrack_local,
 176                .owner          = THIS_MODULE,
 177                .pf             = NFPROTO_IPV4,
 178                .hooknum        = NF_INET_LOCAL_OUT,
 179                .priority       = NF_IP_PRI_CONNTRACK,
 180        },
 181        {
 182                .hook           = ipv4_helper,
 183                .owner          = THIS_MODULE,
 184                .pf             = NFPROTO_IPV4,
 185                .hooknum        = NF_INET_POST_ROUTING,
 186                .priority       = NF_IP_PRI_CONNTRACK_HELPER,
 187        },
 188        {
 189                .hook           = ipv4_confirm,
 190                .owner          = THIS_MODULE,
 191                .pf             = NFPROTO_IPV4,
 192                .hooknum        = NF_INET_POST_ROUTING,
 193                .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
 194        },
 195        {
 196                .hook           = ipv4_helper,
 197                .owner          = THIS_MODULE,
 198                .pf             = NFPROTO_IPV4,
 199                .hooknum        = NF_INET_LOCAL_IN,
 200                .priority       = NF_IP_PRI_CONNTRACK_HELPER,
 201        },
 202        {
 203                .hook           = ipv4_confirm,
 204                .owner          = THIS_MODULE,
 205                .pf             = NFPROTO_IPV4,
 206                .hooknum        = NF_INET_LOCAL_IN,
 207                .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
 208        },
 209};
 210
 211#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 212static int log_invalid_proto_min = 0;
 213static int log_invalid_proto_max = 255;
 214
 215static struct ctl_table ip_ct_sysctl_table[] = {
 216        {
 217                .procname       = "ip_conntrack_max",
 218                .maxlen         = sizeof(int),
 219                .mode           = 0644,
 220                .proc_handler   = proc_dointvec,
 221        },
 222        {
 223                .procname       = "ip_conntrack_count",
 224                .maxlen         = sizeof(int),
 225                .mode           = 0444,
 226                .proc_handler   = proc_dointvec,
 227        },
 228        {
 229                .procname       = "ip_conntrack_buckets",
 230                .maxlen         = sizeof(unsigned int),
 231                .mode           = 0444,
 232                .proc_handler   = proc_dointvec,
 233        },
 234        {
 235                .procname       = "ip_conntrack_checksum",
 236                .maxlen         = sizeof(int),
 237                .mode           = 0644,
 238                .proc_handler   = proc_dointvec,
 239        },
 240        {
 241                .procname       = "ip_conntrack_log_invalid",
 242                .maxlen         = sizeof(unsigned int),
 243                .mode           = 0644,
 244                .proc_handler   = proc_dointvec_minmax,
 245                .extra1         = &log_invalid_proto_min,
 246                .extra2         = &log_invalid_proto_max,
 247        },
 248        { }
 249};
 250#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */
 251
 252/* Fast function for those who don't want to parse /proc (and I don't
 253   blame them). */
 254/* Reversing the socket's dst/src point of view gives us the reply
 255   mapping. */
 256static int
 257getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 258{
 259        const struct inet_sock *inet = inet_sk(sk);
 260        const struct nf_conntrack_tuple_hash *h;
 261        struct nf_conntrack_tuple tuple;
 262
 263        memset(&tuple, 0, sizeof(tuple));
 264        tuple.src.u3.ip = inet->inet_rcv_saddr;
 265        tuple.src.u.tcp.port = inet->inet_sport;
 266        tuple.dst.u3.ip = inet->inet_daddr;
 267        tuple.dst.u.tcp.port = inet->inet_dport;
 268        tuple.src.l3num = PF_INET;
 269        tuple.dst.protonum = sk->sk_protocol;
 270
 271        /* We only do TCP and SCTP at the moment: is there a better way? */
 272        if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) {
 273                pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
 274                return -ENOPROTOOPT;
 275        }
 276
 277        if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
 278                pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n",
 279                         *len, sizeof(struct sockaddr_in));
 280                return -EINVAL;
 281        }
 282
 283        h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
 284        if (h) {
 285                struct sockaddr_in sin;
 286                struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
 287
 288                sin.sin_family = AF_INET;
 289                sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
 290                        .tuple.dst.u.tcp.port;
 291                sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
 292                        .tuple.dst.u3.ip;
 293                memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
 294
 295                pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
 296                         &sin.sin_addr.s_addr, ntohs(sin.sin_port));
 297                nf_ct_put(ct);
 298                if (copy_to_user(user, &sin, sizeof(sin)) != 0)
 299                        return -EFAULT;
 300                else
 301                        return 0;
 302        }
 303        pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
 304                 &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
 305                 &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
 306        return -ENOENT;
 307}
 308
 309#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 310
 311#include <linux/netfilter/nfnetlink.h>
 312#include <linux/netfilter/nfnetlink_conntrack.h>
 313
 314static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
 315                                const struct nf_conntrack_tuple *tuple)
 316{
 317        if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
 318            nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
 319                goto nla_put_failure;
 320        return 0;
 321
 322nla_put_failure:
 323        return -1;
 324}
 325
 326static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = {
 327        [CTA_IP_V4_SRC] = { .type = NLA_U32 },
 328        [CTA_IP_V4_DST] = { .type = NLA_U32 },
 329};
 330
 331static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
 332                                struct nf_conntrack_tuple *t)
 333{
 334        if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
 335                return -EINVAL;
 336
 337        t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
 338        t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
 339
 340        return 0;
 341}
 342
 343static int ipv4_nlattr_tuple_size(void)
 344{
 345        return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
 346}
 347#endif
 348
 349static struct nf_sockopt_ops so_getorigdst = {
 350        .pf             = PF_INET,
 351        .get_optmin     = SO_ORIGINAL_DST,
 352        .get_optmax     = SO_ORIGINAL_DST+1,
 353        .get            = getorigdst,
 354        .owner          = THIS_MODULE,
 355};
 356
 357static int ipv4_init_net(struct net *net)
 358{
 359#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 360        struct nf_ip_net *in = &net->ct.nf_ct_proto;
 361        in->ctl_table = kmemdup(ip_ct_sysctl_table,
 362                                sizeof(ip_ct_sysctl_table),
 363                                GFP_KERNEL);
 364        if (!in->ctl_table)
 365                return -ENOMEM;
 366
 367        in->ctl_table[0].data = &nf_conntrack_max;
 368        in->ctl_table[1].data = &net->ct.count;
 369        in->ctl_table[2].data = &net->ct.htable_size;
 370        in->ctl_table[3].data = &net->ct.sysctl_checksum;
 371        in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
 372#endif
 373        return 0;
 374}
 375
 376struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
 377        .l3proto         = PF_INET,
 378        .name            = "ipv4",
 379        .pkt_to_tuple    = ipv4_pkt_to_tuple,
 380        .invert_tuple    = ipv4_invert_tuple,
 381        .print_tuple     = ipv4_print_tuple,
 382        .get_l4proto     = ipv4_get_l4proto,
 383#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 384        .tuple_to_nlattr = ipv4_tuple_to_nlattr,
 385        .nlattr_tuple_size = ipv4_nlattr_tuple_size,
 386        .nlattr_to_tuple = ipv4_nlattr_to_tuple,
 387        .nla_policy      = ipv4_nla_policy,
 388#endif
 389#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 390        .ctl_table_path  = "net/ipv4/netfilter",
 391#endif
 392        .init_net        = ipv4_init_net,
 393        .me              = THIS_MODULE,
 394};
 395
 396module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 397                  &nf_conntrack_htable_size, 0600);
 398
 399MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
 400MODULE_ALIAS("ip_conntrack");
 401MODULE_LICENSE("GPL");
 402
 403static int ipv4_net_init(struct net *net)
 404{
 405        int ret = 0;
 406
 407        ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4);
 408        if (ret < 0) {
 409                pr_err("nf_conntrack_tcp4: pernet registration failed\n");
 410                goto out_tcp;
 411        }
 412        ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4);
 413        if (ret < 0) {
 414                pr_err("nf_conntrack_udp4: pernet registration failed\n");
 415                goto out_udp;
 416        }
 417        ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp);
 418        if (ret < 0) {
 419                pr_err("nf_conntrack_icmp4: pernet registration failed\n");
 420                goto out_icmp;
 421        }
 422        ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
 423        if (ret < 0) {
 424                pr_err("nf_conntrack_ipv4: pernet registration failed\n");
 425                goto out_ipv4;
 426        }
 427        return 0;
 428out_ipv4:
 429        nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
 430out_icmp:
 431        nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
 432out_udp:
 433        nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
 434out_tcp:
 435        return ret;
 436}
 437
 438static void ipv4_net_exit(struct net *net)
 439{
 440        nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
 441        nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
 442        nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
 443        nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
 444}
 445
 446static struct pernet_operations ipv4_net_ops = {
 447        .init = ipv4_net_init,
 448        .exit = ipv4_net_exit,
 449};
 450
 451static int __init nf_conntrack_l3proto_ipv4_init(void)
 452{
 453        int ret = 0;
 454
 455        need_conntrack();
 456        nf_defrag_ipv4_enable();
 457
 458        ret = nf_register_sockopt(&so_getorigdst);
 459        if (ret < 0) {
 460                printk(KERN_ERR "Unable to register netfilter socket option\n");
 461                return ret;
 462        }
 463
 464        ret = register_pernet_subsys(&ipv4_net_ops);
 465        if (ret < 0) {
 466                pr_err("nf_conntrack_ipv4: can't register pernet ops\n");
 467                goto cleanup_sockopt;
 468        }
 469
 470        ret = nf_register_hooks(ipv4_conntrack_ops,
 471                                ARRAY_SIZE(ipv4_conntrack_ops));
 472        if (ret < 0) {
 473                pr_err("nf_conntrack_ipv4: can't register hooks.\n");
 474                goto cleanup_pernet;
 475        }
 476
 477        ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4);
 478        if (ret < 0) {
 479                pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n");
 480                goto cleanup_hooks;
 481        }
 482
 483        ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4);
 484        if (ret < 0) {
 485                pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n");
 486                goto cleanup_tcp4;
 487        }
 488
 489        ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp);
 490        if (ret < 0) {
 491                pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n");
 492                goto cleanup_udp4;
 493        }
 494
 495        ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
 496        if (ret < 0) {
 497                pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
 498                goto cleanup_icmpv4;
 499        }
 500
 501#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 502        ret = nf_conntrack_ipv4_compat_init();
 503        if (ret < 0)
 504                goto cleanup_proto;
 505#endif
 506        return ret;
 507#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 508 cleanup_proto:
 509        nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
 510#endif
 511 cleanup_icmpv4:
 512        nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
 513 cleanup_udp4:
 514        nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
 515 cleanup_tcp4:
 516        nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
 517 cleanup_hooks:
 518        nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
 519 cleanup_pernet:
 520        unregister_pernet_subsys(&ipv4_net_ops);
 521 cleanup_sockopt:
 522        nf_unregister_sockopt(&so_getorigdst);
 523        return ret;
 524}
 525
 526static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 527{
 528        synchronize_net();
 529#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 530        nf_conntrack_ipv4_compat_fini();
 531#endif
 532        nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
 533        nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
 534        nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
 535        nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
 536        nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
 537        unregister_pernet_subsys(&ipv4_net_ops);
 538        nf_unregister_sockopt(&so_getorigdst);
 539}
 540
 541module_init(nf_conntrack_l3proto_ipv4_init);
 542module_exit(nf_conntrack_l3proto_ipv4_fini);
 543