linux/net/netfilter/nf_nat_masquerade.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3#include <linux/types.h>
   4#include <linux/atomic.h>
   5#include <linux/inetdevice.h>
   6#include <linux/netfilter.h>
   7#include <linux/netfilter_ipv4.h>
   8#include <linux/netfilter_ipv6.h>
   9
  10#include <net/netfilter/nf_nat_masquerade.h>
  11
  12struct masq_dev_work {
  13        struct work_struct work;
  14        struct net *net;
  15        union nf_inet_addr addr;
  16        int ifindex;
  17        int (*iter)(struct nf_conn *i, void *data);
  18};
  19
  20#define MAX_MASQ_WORKER_COUNT   16
  21
  22static DEFINE_MUTEX(masq_mutex);
  23static unsigned int masq_refcnt __read_mostly;
  24static atomic_t masq_worker_count __read_mostly;
  25
  26unsigned int
  27nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
  28                       const struct nf_nat_range2 *range,
  29                       const struct net_device *out)
  30{
  31        struct nf_conn *ct;
  32        struct nf_conn_nat *nat;
  33        enum ip_conntrack_info ctinfo;
  34        struct nf_nat_range2 newrange;
  35        const struct rtable *rt;
  36        __be32 newsrc, nh;
  37
  38        WARN_ON(hooknum != NF_INET_POST_ROUTING);
  39
  40        ct = nf_ct_get(skb, &ctinfo);
  41
  42        WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
  43                         ctinfo == IP_CT_RELATED_REPLY)));
  44
  45        /* Source address is 0.0.0.0 - locally generated packet that is
  46         * probably not supposed to be masqueraded.
  47         */
  48        if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
  49                return NF_ACCEPT;
  50
  51        rt = skb_rtable(skb);
  52        nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
  53        newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
  54        if (!newsrc) {
  55                pr_info("%s ate my IP address\n", out->name);
  56                return NF_DROP;
  57        }
  58
  59        nat = nf_ct_nat_ext_add(ct);
  60        if (nat)
  61                nat->masq_index = out->ifindex;
  62
  63        /* Transfer from original range. */
  64        memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
  65        memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
  66        newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
  67        newrange.min_addr.ip = newsrc;
  68        newrange.max_addr.ip = newsrc;
  69        newrange.min_proto   = range->min_proto;
  70        newrange.max_proto   = range->max_proto;
  71
  72        /* Hand modified range to generic setup. */
  73        return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
  74}
  75EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
  76
  77static void iterate_cleanup_work(struct work_struct *work)
  78{
  79        struct masq_dev_work *w;
  80
  81        w = container_of(work, struct masq_dev_work, work);
  82
  83        nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
  84
  85        put_net(w->net);
  86        kfree(w);
  87        atomic_dec(&masq_worker_count);
  88        module_put(THIS_MODULE);
  89}
  90
  91/* Iterate conntrack table in the background and remove conntrack entries
  92 * that use the device/address being removed.
  93 *
  94 * In case too many work items have been queued already or memory allocation
  95 * fails iteration is skipped, conntrack entries will time out eventually.
  96 */
  97static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
  98                                 int ifindex,
  99                                 int (*iter)(struct nf_conn *i, void *data),
 100                                 gfp_t gfp_flags)
 101{
 102        struct masq_dev_work *w;
 103
 104        if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
 105                return;
 106
 107        net = maybe_get_net(net);
 108        if (!net)
 109                return;
 110
 111        if (!try_module_get(THIS_MODULE))
 112                goto err_module;
 113
 114        w = kzalloc(sizeof(*w), gfp_flags);
 115        if (w) {
 116                /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
 117                atomic_inc(&masq_worker_count);
 118
 119                INIT_WORK(&w->work, iterate_cleanup_work);
 120                w->ifindex = ifindex;
 121                w->net = net;
 122                w->iter = iter;
 123                if (addr)
 124                        w->addr = *addr;
 125                schedule_work(&w->work);
 126                return;
 127        }
 128
 129        module_put(THIS_MODULE);
 130 err_module:
 131        put_net(net);
 132}
 133
 134static int device_cmp(struct nf_conn *i, void *arg)
 135{
 136        const struct nf_conn_nat *nat = nfct_nat(i);
 137        const struct masq_dev_work *w = arg;
 138
 139        if (!nat)
 140                return 0;
 141        return nat->masq_index == w->ifindex;
 142}
 143
 144static int masq_device_event(struct notifier_block *this,
 145                             unsigned long event,
 146                             void *ptr)
 147{
 148        const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 149        struct net *net = dev_net(dev);
 150
 151        if (event == NETDEV_DOWN) {
 152                /* Device was downed.  Search entire table for
 153                 * conntracks which were associated with that device,
 154                 * and forget them.
 155                 */
 156
 157                nf_nat_masq_schedule(net, NULL, dev->ifindex,
 158                                     device_cmp, GFP_KERNEL);
 159        }
 160
 161        return NOTIFY_DONE;
 162}
 163
 164static int inet_cmp(struct nf_conn *ct, void *ptr)
 165{
 166        struct nf_conntrack_tuple *tuple;
 167        struct masq_dev_work *w = ptr;
 168
 169        if (!device_cmp(ct, ptr))
 170                return 0;
 171
 172        tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 173
 174        return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
 175}
 176
 177static int masq_inet_event(struct notifier_block *this,
 178                           unsigned long event,
 179                           void *ptr)
 180{
 181        const struct in_ifaddr *ifa = ptr;
 182        const struct in_device *idev;
 183        const struct net_device *dev;
 184        union nf_inet_addr addr;
 185
 186        if (event != NETDEV_DOWN)
 187                return NOTIFY_DONE;
 188
 189        /* The masq_dev_notifier will catch the case of the device going
 190         * down.  So if the inetdev is dead and being destroyed we have
 191         * no work to do.  Otherwise this is an individual address removal
 192         * and we have to perform the flush.
 193         */
 194        idev = ifa->ifa_dev;
 195        if (idev->dead)
 196                return NOTIFY_DONE;
 197
 198        memset(&addr, 0, sizeof(addr));
 199
 200        addr.ip = ifa->ifa_address;
 201
 202        dev = idev->dev;
 203        nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
 204                             inet_cmp, GFP_KERNEL);
 205
 206        return NOTIFY_DONE;
 207}
 208
 209static struct notifier_block masq_dev_notifier = {
 210        .notifier_call  = masq_device_event,
 211};
 212
 213static struct notifier_block masq_inet_notifier = {
 214        .notifier_call  = masq_inet_event,
 215};
 216
 217#if IS_ENABLED(CONFIG_IPV6)
 218static int
 219nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
 220                       const struct in6_addr *daddr, unsigned int srcprefs,
 221                       struct in6_addr *saddr)
 222{
 223#ifdef CONFIG_IPV6_MODULE
 224        const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
 225
 226        if (!v6_ops)
 227                return -EHOSTUNREACH;
 228
 229        return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
 230#else
 231        return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
 232#endif
 233}
 234
 235unsigned int
 236nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 237                       const struct net_device *out)
 238{
 239        enum ip_conntrack_info ctinfo;
 240        struct nf_conn_nat *nat;
 241        struct in6_addr src;
 242        struct nf_conn *ct;
 243        struct nf_nat_range2 newrange;
 244
 245        ct = nf_ct_get(skb, &ctinfo);
 246        WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
 247                         ctinfo == IP_CT_RELATED_REPLY)));
 248
 249        if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
 250                                   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
 251                return NF_DROP;
 252
 253        nat = nf_ct_nat_ext_add(ct);
 254        if (nat)
 255                nat->masq_index = out->ifindex;
 256
 257        newrange.flags          = range->flags | NF_NAT_RANGE_MAP_IPS;
 258        newrange.min_addr.in6   = src;
 259        newrange.max_addr.in6   = src;
 260        newrange.min_proto      = range->min_proto;
 261        newrange.max_proto      = range->max_proto;
 262
 263        return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
 264}
 265EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
 266
 267/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
 268 *
 269 * Defer it to the system workqueue.
 270 *
 271 * As we can have 'a lot' of inet_events (depending on amount of ipv6
 272 * addresses being deleted), we also need to limit work item queue.
 273 */
 274static int masq_inet6_event(struct notifier_block *this,
 275                            unsigned long event, void *ptr)
 276{
 277        struct inet6_ifaddr *ifa = ptr;
 278        const struct net_device *dev;
 279        union nf_inet_addr addr;
 280
 281        if (event != NETDEV_DOWN)
 282                return NOTIFY_DONE;
 283
 284        dev = ifa->idev->dev;
 285
 286        memset(&addr, 0, sizeof(addr));
 287
 288        addr.in6 = ifa->addr;
 289
 290        nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
 291                             GFP_ATOMIC);
 292        return NOTIFY_DONE;
 293}
 294
 295static struct notifier_block masq_inet6_notifier = {
 296        .notifier_call  = masq_inet6_event,
 297};
 298
 299static int nf_nat_masquerade_ipv6_register_notifier(void)
 300{
 301        return register_inet6addr_notifier(&masq_inet6_notifier);
 302}
 303#else
 304static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
 305#endif
 306
 307int nf_nat_masquerade_inet_register_notifiers(void)
 308{
 309        int ret = 0;
 310
 311        mutex_lock(&masq_mutex);
 312        if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
 313                ret = -EOVERFLOW;
 314                goto out_unlock;
 315        }
 316
 317        /* check if the notifier was already set */
 318        if (++masq_refcnt > 1)
 319                goto out_unlock;
 320
 321        /* Register for device down reports */
 322        ret = register_netdevice_notifier(&masq_dev_notifier);
 323        if (ret)
 324                goto err_dec;
 325        /* Register IP address change reports */
 326        ret = register_inetaddr_notifier(&masq_inet_notifier);
 327        if (ret)
 328                goto err_unregister;
 329
 330        ret = nf_nat_masquerade_ipv6_register_notifier();
 331        if (ret)
 332                goto err_unreg_inet;
 333
 334        mutex_unlock(&masq_mutex);
 335        return ret;
 336err_unreg_inet:
 337        unregister_inetaddr_notifier(&masq_inet_notifier);
 338err_unregister:
 339        unregister_netdevice_notifier(&masq_dev_notifier);
 340err_dec:
 341        masq_refcnt--;
 342out_unlock:
 343        mutex_unlock(&masq_mutex);
 344        return ret;
 345}
 346EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);
 347
 348void nf_nat_masquerade_inet_unregister_notifiers(void)
 349{
 350        mutex_lock(&masq_mutex);
 351        /* check if the notifiers still have clients */
 352        if (--masq_refcnt > 0)
 353                goto out_unlock;
 354
 355        unregister_netdevice_notifier(&masq_dev_notifier);
 356        unregister_inetaddr_notifier(&masq_inet_notifier);
 357#if IS_ENABLED(CONFIG_IPV6)
 358        unregister_inet6addr_notifier(&masq_inet6_notifier);
 359#endif
 360out_unlock:
 361        mutex_unlock(&masq_mutex);
 362}
 363EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);
 364