linux/net/netfilter/nf_nat_masquerade.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3#include <linux/types.h>
   4#include <linux/atomic.h>
   5#include <linux/inetdevice.h>
   6#include <linux/netfilter.h>
   7#include <linux/netfilter_ipv4.h>
   8#include <linux/netfilter_ipv6.h>
   9
  10#include <net/netfilter/nf_nat_masquerade.h>
  11
  12struct masq_dev_work {
  13        struct work_struct work;
  14        struct net *net;
  15        netns_tracker ns_tracker;
  16        union nf_inet_addr addr;
  17        int ifindex;
  18        int (*iter)(struct nf_conn *i, void *data);
  19};
  20
  21#define MAX_MASQ_WORKER_COUNT   16
  22
  23static DEFINE_MUTEX(masq_mutex);
  24static unsigned int masq_refcnt __read_mostly;
  25static atomic_t masq_worker_count __read_mostly;
  26
  27unsigned int
  28nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
  29                       const struct nf_nat_range2 *range,
  30                       const struct net_device *out)
  31{
  32        struct nf_conn *ct;
  33        struct nf_conn_nat *nat;
  34        enum ip_conntrack_info ctinfo;
  35        struct nf_nat_range2 newrange;
  36        const struct rtable *rt;
  37        __be32 newsrc, nh;
  38
  39        WARN_ON(hooknum != NF_INET_POST_ROUTING);
  40
  41        ct = nf_ct_get(skb, &ctinfo);
  42
  43        WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
  44                         ctinfo == IP_CT_RELATED_REPLY)));
  45
  46        /* Source address is 0.0.0.0 - locally generated packet that is
  47         * probably not supposed to be masqueraded.
  48         */
  49        if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
  50                return NF_ACCEPT;
  51
  52        rt = skb_rtable(skb);
  53        nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
  54        newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
  55        if (!newsrc) {
  56                pr_info("%s ate my IP address\n", out->name);
  57                return NF_DROP;
  58        }
  59
  60        nat = nf_ct_nat_ext_add(ct);
  61        if (nat)
  62                nat->masq_index = out->ifindex;
  63
  64        /* Transfer from original range. */
  65        memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
  66        memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
  67        newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
  68        newrange.min_addr.ip = newsrc;
  69        newrange.max_addr.ip = newsrc;
  70        newrange.min_proto   = range->min_proto;
  71        newrange.max_proto   = range->max_proto;
  72
  73        /* Hand modified range to generic setup. */
  74        return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
  75}
  76EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
  77
  78static void iterate_cleanup_work(struct work_struct *work)
  79{
  80        struct masq_dev_work *w;
  81
  82        w = container_of(work, struct masq_dev_work, work);
  83
  84        nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
  85
  86        put_net_track(w->net, &w->ns_tracker);
  87        kfree(w);
  88        atomic_dec(&masq_worker_count);
  89        module_put(THIS_MODULE);
  90}
  91
  92/* Iterate conntrack table in the background and remove conntrack entries
  93 * that use the device/address being removed.
  94 *
  95 * In case too many work items have been queued already or memory allocation
  96 * fails iteration is skipped, conntrack entries will time out eventually.
  97 */
  98static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
  99                                 int ifindex,
 100                                 int (*iter)(struct nf_conn *i, void *data),
 101                                 gfp_t gfp_flags)
 102{
 103        struct masq_dev_work *w;
 104
 105        if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
 106                return;
 107
 108        net = maybe_get_net(net);
 109        if (!net)
 110                return;
 111
 112        if (!try_module_get(THIS_MODULE))
 113                goto err_module;
 114
 115        w = kzalloc(sizeof(*w), gfp_flags);
 116        if (w) {
 117                /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
 118                atomic_inc(&masq_worker_count);
 119
 120                INIT_WORK(&w->work, iterate_cleanup_work);
 121                w->ifindex = ifindex;
 122                w->net = net;
 123                netns_tracker_alloc(net, &w->ns_tracker, gfp_flags);
 124                w->iter = iter;
 125                if (addr)
 126                        w->addr = *addr;
 127                schedule_work(&w->work);
 128                return;
 129        }
 130
 131        module_put(THIS_MODULE);
 132 err_module:
 133        put_net(net);
 134}
 135
 136static int device_cmp(struct nf_conn *i, void *arg)
 137{
 138        const struct nf_conn_nat *nat = nfct_nat(i);
 139        const struct masq_dev_work *w = arg;
 140
 141        if (!nat)
 142                return 0;
 143        return nat->masq_index == w->ifindex;
 144}
 145
 146static int masq_device_event(struct notifier_block *this,
 147                             unsigned long event,
 148                             void *ptr)
 149{
 150        const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 151        struct net *net = dev_net(dev);
 152
 153        if (event == NETDEV_DOWN) {
 154                /* Device was downed.  Search entire table for
 155                 * conntracks which were associated with that device,
 156                 * and forget them.
 157                 */
 158
 159                nf_nat_masq_schedule(net, NULL, dev->ifindex,
 160                                     device_cmp, GFP_KERNEL);
 161        }
 162
 163        return NOTIFY_DONE;
 164}
 165
 166static int inet_cmp(struct nf_conn *ct, void *ptr)
 167{
 168        struct nf_conntrack_tuple *tuple;
 169        struct masq_dev_work *w = ptr;
 170
 171        if (!device_cmp(ct, ptr))
 172                return 0;
 173
 174        tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 175
 176        return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
 177}
 178
 179static int masq_inet_event(struct notifier_block *this,
 180                           unsigned long event,
 181                           void *ptr)
 182{
 183        const struct in_ifaddr *ifa = ptr;
 184        const struct in_device *idev;
 185        const struct net_device *dev;
 186        union nf_inet_addr addr;
 187
 188        if (event != NETDEV_DOWN)
 189                return NOTIFY_DONE;
 190
 191        /* The masq_dev_notifier will catch the case of the device going
 192         * down.  So if the inetdev is dead and being destroyed we have
 193         * no work to do.  Otherwise this is an individual address removal
 194         * and we have to perform the flush.
 195         */
 196        idev = ifa->ifa_dev;
 197        if (idev->dead)
 198                return NOTIFY_DONE;
 199
 200        memset(&addr, 0, sizeof(addr));
 201
 202        addr.ip = ifa->ifa_address;
 203
 204        dev = idev->dev;
 205        nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
 206                             inet_cmp, GFP_KERNEL);
 207
 208        return NOTIFY_DONE;
 209}
 210
 211static struct notifier_block masq_dev_notifier = {
 212        .notifier_call  = masq_device_event,
 213};
 214
 215static struct notifier_block masq_inet_notifier = {
 216        .notifier_call  = masq_inet_event,
 217};
 218
 219#if IS_ENABLED(CONFIG_IPV6)
 220static int
 221nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
 222                       const struct in6_addr *daddr, unsigned int srcprefs,
 223                       struct in6_addr *saddr)
 224{
 225#ifdef CONFIG_IPV6_MODULE
 226        const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
 227
 228        if (!v6_ops)
 229                return -EHOSTUNREACH;
 230
 231        return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
 232#else
 233        return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
 234#endif
 235}
 236
 237unsigned int
 238nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 239                       const struct net_device *out)
 240{
 241        enum ip_conntrack_info ctinfo;
 242        struct nf_conn_nat *nat;
 243        struct in6_addr src;
 244        struct nf_conn *ct;
 245        struct nf_nat_range2 newrange;
 246
 247        ct = nf_ct_get(skb, &ctinfo);
 248        WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
 249                         ctinfo == IP_CT_RELATED_REPLY)));
 250
 251        if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
 252                                   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
 253                return NF_DROP;
 254
 255        nat = nf_ct_nat_ext_add(ct);
 256        if (nat)
 257                nat->masq_index = out->ifindex;
 258
 259        newrange.flags          = range->flags | NF_NAT_RANGE_MAP_IPS;
 260        newrange.min_addr.in6   = src;
 261        newrange.max_addr.in6   = src;
 262        newrange.min_proto      = range->min_proto;
 263        newrange.max_proto      = range->max_proto;
 264
 265        return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
 266}
 267EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
 268
 269/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
 270 *
 271 * Defer it to the system workqueue.
 272 *
 273 * As we can have 'a lot' of inet_events (depending on amount of ipv6
 274 * addresses being deleted), we also need to limit work item queue.
 275 */
 276static int masq_inet6_event(struct notifier_block *this,
 277                            unsigned long event, void *ptr)
 278{
 279        struct inet6_ifaddr *ifa = ptr;
 280        const struct net_device *dev;
 281        union nf_inet_addr addr;
 282
 283        if (event != NETDEV_DOWN)
 284                return NOTIFY_DONE;
 285
 286        dev = ifa->idev->dev;
 287
 288        memset(&addr, 0, sizeof(addr));
 289
 290        addr.in6 = ifa->addr;
 291
 292        nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
 293                             GFP_ATOMIC);
 294        return NOTIFY_DONE;
 295}
 296
 297static struct notifier_block masq_inet6_notifier = {
 298        .notifier_call  = masq_inet6_event,
 299};
 300
 301static int nf_nat_masquerade_ipv6_register_notifier(void)
 302{
 303        return register_inet6addr_notifier(&masq_inet6_notifier);
 304}
 305#else
 306static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
 307#endif
 308
 309int nf_nat_masquerade_inet_register_notifiers(void)
 310{
 311        int ret = 0;
 312
 313        mutex_lock(&masq_mutex);
 314        if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
 315                ret = -EOVERFLOW;
 316                goto out_unlock;
 317        }
 318
 319        /* check if the notifier was already set */
 320        if (++masq_refcnt > 1)
 321                goto out_unlock;
 322
 323        /* Register for device down reports */
 324        ret = register_netdevice_notifier(&masq_dev_notifier);
 325        if (ret)
 326                goto err_dec;
 327        /* Register IP address change reports */
 328        ret = register_inetaddr_notifier(&masq_inet_notifier);
 329        if (ret)
 330                goto err_unregister;
 331
 332        ret = nf_nat_masquerade_ipv6_register_notifier();
 333        if (ret)
 334                goto err_unreg_inet;
 335
 336        mutex_unlock(&masq_mutex);
 337        return ret;
 338err_unreg_inet:
 339        unregister_inetaddr_notifier(&masq_inet_notifier);
 340err_unregister:
 341        unregister_netdevice_notifier(&masq_dev_notifier);
 342err_dec:
 343        masq_refcnt--;
 344out_unlock:
 345        mutex_unlock(&masq_mutex);
 346        return ret;
 347}
 348EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);
 349
 350void nf_nat_masquerade_inet_unregister_notifiers(void)
 351{
 352        mutex_lock(&masq_mutex);
 353        /* check if the notifiers still have clients */
 354        if (--masq_refcnt > 0)
 355                goto out_unlock;
 356
 357        unregister_netdevice_notifier(&masq_dev_notifier);
 358        unregister_inetaddr_notifier(&masq_inet_notifier);
 359#if IS_ENABLED(CONFIG_IPV6)
 360        unregister_inet6addr_notifier(&masq_inet6_notifier);
 361#endif
 362out_unlock:
 363        mutex_unlock(&masq_mutex);
 364}
 365EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);
 366