linux/net/netfilter/nf_nat_masquerade.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3#include <linux/types.h>
   4#include <linux/atomic.h>
   5#include <linux/inetdevice.h>
   6#include <linux/netfilter.h>
   7#include <linux/netfilter_ipv4.h>
   8#include <linux/netfilter_ipv6.h>
   9
  10#include <net/netfilter/nf_nat_masquerade.h>
  11
  12static DEFINE_MUTEX(masq_mutex);
  13static unsigned int masq_refcnt __read_mostly;
  14
  15unsigned int
  16nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
  17                       const struct nf_nat_range2 *range,
  18                       const struct net_device *out)
  19{
  20        struct nf_conn *ct;
  21        struct nf_conn_nat *nat;
  22        enum ip_conntrack_info ctinfo;
  23        struct nf_nat_range2 newrange;
  24        const struct rtable *rt;
  25        __be32 newsrc, nh;
  26
  27        WARN_ON(hooknum != NF_INET_POST_ROUTING);
  28
  29        ct = nf_ct_get(skb, &ctinfo);
  30
  31        WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
  32                         ctinfo == IP_CT_RELATED_REPLY)));
  33
  34        /* Source address is 0.0.0.0 - locally generated packet that is
  35         * probably not supposed to be masqueraded.
  36         */
  37        if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
  38                return NF_ACCEPT;
  39
  40        rt = skb_rtable(skb);
  41        nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
  42        newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
  43        if (!newsrc) {
  44                pr_info("%s ate my IP address\n", out->name);
  45                return NF_DROP;
  46        }
  47
  48        nat = nf_ct_nat_ext_add(ct);
  49        if (nat)
  50                nat->masq_index = out->ifindex;
  51
  52        /* Transfer from original range. */
  53        memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
  54        memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
  55        newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
  56        newrange.min_addr.ip = newsrc;
  57        newrange.max_addr.ip = newsrc;
  58        newrange.min_proto   = range->min_proto;
  59        newrange.max_proto   = range->max_proto;
  60
  61        /* Hand modified range to generic setup. */
  62        return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
  63}
  64EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
  65
  66static int device_cmp(struct nf_conn *i, void *ifindex)
  67{
  68        const struct nf_conn_nat *nat = nfct_nat(i);
  69
  70        if (!nat)
  71                return 0;
  72        return nat->masq_index == (int)(long)ifindex;
  73}
  74
  75static int masq_device_event(struct notifier_block *this,
  76                             unsigned long event,
  77                             void *ptr)
  78{
  79        const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  80        struct net *net = dev_net(dev);
  81
  82        if (event == NETDEV_DOWN) {
  83                /* Device was downed.  Search entire table for
  84                 * conntracks which were associated with that device,
  85                 * and forget them.
  86                 */
  87
  88                nf_ct_iterate_cleanup_net(net, device_cmp,
  89                                          (void *)(long)dev->ifindex, 0, 0);
  90        }
  91
  92        return NOTIFY_DONE;
  93}
  94
  95static int inet_cmp(struct nf_conn *ct, void *ptr)
  96{
  97        struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
  98        struct net_device *dev = ifa->ifa_dev->dev;
  99        struct nf_conntrack_tuple *tuple;
 100
 101        if (!device_cmp(ct, (void *)(long)dev->ifindex))
 102                return 0;
 103
 104        tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 105
 106        return ifa->ifa_address == tuple->dst.u3.ip;
 107}
 108
 109static int masq_inet_event(struct notifier_block *this,
 110                           unsigned long event,
 111                           void *ptr)
 112{
 113        struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
 114        struct net *net = dev_net(idev->dev);
 115
 116        /* The masq_dev_notifier will catch the case of the device going
 117         * down.  So if the inetdev is dead and being destroyed we have
 118         * no work to do.  Otherwise this is an individual address removal
 119         * and we have to perform the flush.
 120         */
 121        if (idev->dead)
 122                return NOTIFY_DONE;
 123
 124        if (event == NETDEV_DOWN)
 125                nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
 126
 127        return NOTIFY_DONE;
 128}
 129
 130static struct notifier_block masq_dev_notifier = {
 131        .notifier_call  = masq_device_event,
 132};
 133
 134static struct notifier_block masq_inet_notifier = {
 135        .notifier_call  = masq_inet_event,
 136};
 137
 138#if IS_ENABLED(CONFIG_IPV6)
 139static atomic_t v6_worker_count __read_mostly;
 140
 141static int
 142nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
 143                       const struct in6_addr *daddr, unsigned int srcprefs,
 144                       struct in6_addr *saddr)
 145{
 146#ifdef CONFIG_IPV6_MODULE
 147        const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
 148
 149        if (!v6_ops)
 150                return -EHOSTUNREACH;
 151
 152        return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
 153#else
 154        return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
 155#endif
 156}
 157
 158unsigned int
 159nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 160                       const struct net_device *out)
 161{
 162        enum ip_conntrack_info ctinfo;
 163        struct nf_conn_nat *nat;
 164        struct in6_addr src;
 165        struct nf_conn *ct;
 166        struct nf_nat_range2 newrange;
 167
 168        ct = nf_ct_get(skb, &ctinfo);
 169        WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
 170                         ctinfo == IP_CT_RELATED_REPLY)));
 171
 172        if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
 173                                   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
 174                return NF_DROP;
 175
 176        nat = nf_ct_nat_ext_add(ct);
 177        if (nat)
 178                nat->masq_index = out->ifindex;
 179
 180        newrange.flags          = range->flags | NF_NAT_RANGE_MAP_IPS;
 181        newrange.min_addr.in6   = src;
 182        newrange.max_addr.in6   = src;
 183        newrange.min_proto      = range->min_proto;
 184        newrange.max_proto      = range->max_proto;
 185
 186        return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
 187}
 188EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
 189
 190struct masq_dev_work {
 191        struct work_struct work;
 192        struct net *net;
 193        struct in6_addr addr;
 194        int ifindex;
 195};
 196
 197static int inet6_cmp(struct nf_conn *ct, void *work)
 198{
 199        struct masq_dev_work *w = (struct masq_dev_work *)work;
 200        struct nf_conntrack_tuple *tuple;
 201
 202        if (!device_cmp(ct, (void *)(long)w->ifindex))
 203                return 0;
 204
 205        tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 206
 207        return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
 208}
 209
 210static void iterate_cleanup_work(struct work_struct *work)
 211{
 212        struct masq_dev_work *w;
 213
 214        w = container_of(work, struct masq_dev_work, work);
 215
 216        nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
 217
 218        put_net(w->net);
 219        kfree(w);
 220        atomic_dec(&v6_worker_count);
 221        module_put(THIS_MODULE);
 222}
 223
 224/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
 225 *
 226 * Defer it to the system workqueue.
 227 *
 228 * As we can have 'a lot' of inet_events (depending on amount of ipv6
 229 * addresses being deleted), we also need to limit work item queue.
 230 */
 231static int masq_inet6_event(struct notifier_block *this,
 232                            unsigned long event, void *ptr)
 233{
 234        struct inet6_ifaddr *ifa = ptr;
 235        const struct net_device *dev;
 236        struct masq_dev_work *w;
 237        struct net *net;
 238
 239        if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
 240                return NOTIFY_DONE;
 241
 242        dev = ifa->idev->dev;
 243        net = maybe_get_net(dev_net(dev));
 244        if (!net)
 245                return NOTIFY_DONE;
 246
 247        if (!try_module_get(THIS_MODULE))
 248                goto err_module;
 249
 250        w = kmalloc(sizeof(*w), GFP_ATOMIC);
 251        if (w) {
 252                atomic_inc(&v6_worker_count);
 253
 254                INIT_WORK(&w->work, iterate_cleanup_work);
 255                w->ifindex = dev->ifindex;
 256                w->net = net;
 257                w->addr = ifa->addr;
 258                schedule_work(&w->work);
 259
 260                return NOTIFY_DONE;
 261        }
 262
 263        module_put(THIS_MODULE);
 264 err_module:
 265        put_net(net);
 266        return NOTIFY_DONE;
 267}
 268
 269static struct notifier_block masq_inet6_notifier = {
 270        .notifier_call  = masq_inet6_event,
 271};
 272
 273static int nf_nat_masquerade_ipv6_register_notifier(void)
 274{
 275        return register_inet6addr_notifier(&masq_inet6_notifier);
 276}
 277#else
 278static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
 279#endif
 280
 281int nf_nat_masquerade_inet_register_notifiers(void)
 282{
 283        int ret = 0;
 284
 285        mutex_lock(&masq_mutex);
 286        if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
 287                ret = -EOVERFLOW;
 288                goto out_unlock;
 289        }
 290
 291        /* check if the notifier was already set */
 292        if (++masq_refcnt > 1)
 293                goto out_unlock;
 294
 295        /* Register for device down reports */
 296        ret = register_netdevice_notifier(&masq_dev_notifier);
 297        if (ret)
 298                goto err_dec;
 299        /* Register IP address change reports */
 300        ret = register_inetaddr_notifier(&masq_inet_notifier);
 301        if (ret)
 302                goto err_unregister;
 303
 304        ret = nf_nat_masquerade_ipv6_register_notifier();
 305        if (ret)
 306                goto err_unreg_inet;
 307
 308        mutex_unlock(&masq_mutex);
 309        return ret;
 310err_unreg_inet:
 311        unregister_inetaddr_notifier(&masq_inet_notifier);
 312err_unregister:
 313        unregister_netdevice_notifier(&masq_dev_notifier);
 314err_dec:
 315        masq_refcnt--;
 316out_unlock:
 317        mutex_unlock(&masq_mutex);
 318        return ret;
 319}
 320EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);
 321
 322void nf_nat_masquerade_inet_unregister_notifiers(void)
 323{
 324        mutex_lock(&masq_mutex);
 325        /* check if the notifiers still have clients */
 326        if (--masq_refcnt > 0)
 327                goto out_unlock;
 328
 329        unregister_netdevice_notifier(&masq_dev_notifier);
 330        unregister_inetaddr_notifier(&masq_inet_notifier);
 331#if IS_ENABLED(CONFIG_IPV6)
 332        unregister_inet6addr_notifier(&masq_inet6_notifier);
 333#endif
 334out_unlock:
 335        mutex_unlock(&masq_mutex);
 336}
 337EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);
 338