linux/net/netfilter/xt_connlimit.c
<<
>>
Prefs
   1/*
   2 * netfilter module to limit the number of parallel tcp
   3 * connections per IP address.
   4 *   (c) 2000 Gerd Knorr <kraxel@bytesex.org>
   5 *   Nov 2002: Martin Bene <martin.bene@icomedias.com>:
   6 *              only ignore TIME_WAIT or gone connections
   7 *   (C) CC Computer Consultants GmbH, 2007
   8 *
   9 * based on ...
  10 *
  11 * Kernel module to match connection tracking information.
  12 * GPL (C) 1999  Rusty Russell (rusty@rustcorp.com.au).
  13 */
  14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  15#include <linux/in.h>
  16#include <linux/in6.h>
  17#include <linux/ip.h>
  18#include <linux/ipv6.h>
  19#include <linux/jhash.h>
  20#include <linux/slab.h>
  21#include <linux/list.h>
  22#include <linux/rbtree.h>
  23#include <linux/module.h>
  24#include <linux/random.h>
  25#include <linux/skbuff.h>
  26#include <linux/spinlock.h>
  27#include <linux/netfilter/nf_conntrack_tcp.h>
  28#include <linux/netfilter/x_tables.h>
  29#include <linux/netfilter/xt_connlimit.h>
  30#include <net/netfilter/nf_conntrack.h>
  31#include <net/netfilter/nf_conntrack_core.h>
  32#include <net/netfilter/nf_conntrack_tuple.h>
  33#include <net/netfilter/nf_conntrack_zones.h>
  34
  35#define CONNLIMIT_SLOTS         256U
  36
  37#ifdef CONFIG_LOCKDEP
  38#define CONNLIMIT_LOCK_SLOTS    8U
  39#else
  40#define CONNLIMIT_LOCK_SLOTS    256U
  41#endif
  42
  43#define CONNLIMIT_GC_MAX_NODES  8
  44
  45/* we will save the tuples of all connections we care about */
  46struct xt_connlimit_conn {
  47        struct hlist_node               node;
  48        struct nf_conntrack_tuple       tuple;
  49        union nf_inet_addr              addr;
  50};
  51
  52struct xt_connlimit_rb {
  53        struct rb_node node;
  54        struct hlist_head hhead; /* connections/hosts in same subnet */
  55        union nf_inet_addr addr; /* search key */
  56};
  57
  58static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
  59
  60struct xt_connlimit_data {
  61        struct rb_root climit_root4[CONNLIMIT_SLOTS];
  62        struct rb_root climit_root6[CONNLIMIT_SLOTS];
  63};
  64
  65static u_int32_t connlimit_rnd __read_mostly;
  66static struct kmem_cache *connlimit_rb_cachep __read_mostly;
  67static struct kmem_cache *connlimit_conn_cachep __read_mostly;
  68
  69static inline unsigned int connlimit_iphash(__be32 addr)
  70{
  71        return jhash_1word((__force __u32)addr,
  72                            connlimit_rnd) % CONNLIMIT_SLOTS;
  73}
  74
  75static inline unsigned int
  76connlimit_iphash6(const union nf_inet_addr *addr,
  77                  const union nf_inet_addr *mask)
  78{
  79        union nf_inet_addr res;
  80        unsigned int i;
  81
  82        for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
  83                res.ip6[i] = addr->ip6[i] & mask->ip6[i];
  84
  85        return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6),
  86                       connlimit_rnd) % CONNLIMIT_SLOTS;
  87}
  88
  89static inline bool already_closed(const struct nf_conn *conn)
  90{
  91        if (nf_ct_protonum(conn) == IPPROTO_TCP)
  92                return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
  93                       conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
  94        else
  95                return 0;
  96}
  97
  98static int
  99same_source_net(const union nf_inet_addr *addr,
 100                const union nf_inet_addr *mask,
 101                const union nf_inet_addr *u3, u_int8_t family)
 102{
 103        if (family == NFPROTO_IPV4) {
 104                return ntohl(addr->ip & mask->ip) -
 105                       ntohl(u3->ip & mask->ip);
 106        } else {
 107                union nf_inet_addr lh, rh;
 108                unsigned int i;
 109
 110                for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) {
 111                        lh.ip6[i] = addr->ip6[i] & mask->ip6[i];
 112                        rh.ip6[i] = u3->ip6[i] & mask->ip6[i];
 113                }
 114
 115                return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6));
 116        }
 117}
 118
 119static bool add_hlist(struct hlist_head *head,
 120                      const struct nf_conntrack_tuple *tuple,
 121                      const union nf_inet_addr *addr)
 122{
 123        struct xt_connlimit_conn *conn;
 124
 125        conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
 126        if (conn == NULL)
 127                return false;
 128        conn->tuple = *tuple;
 129        conn->addr = *addr;
 130        hlist_add_head(&conn->node, head);
 131        return true;
 132}
 133
 134static unsigned int check_hlist(struct net *net,
 135                                struct hlist_head *head,
 136                                const struct nf_conntrack_tuple *tuple,
 137                                const struct nf_conntrack_zone *zone,
 138                                bool *addit)
 139{
 140        const struct nf_conntrack_tuple_hash *found;
 141        struct xt_connlimit_conn *conn;
 142        struct hlist_node *n;
 143        struct nf_conn *found_ct;
 144        unsigned int length = 0;
 145
 146        *addit = true;
 147        rcu_read_lock();
 148
 149        /* check the saved connections */
 150        hlist_for_each_entry_safe(conn, n, head, node) {
 151                found = nf_conntrack_find_get(net, zone, &conn->tuple);
 152                if (found == NULL) {
 153                        hlist_del(&conn->node);
 154                        kmem_cache_free(connlimit_conn_cachep, conn);
 155                        continue;
 156                }
 157
 158                found_ct = nf_ct_tuplehash_to_ctrack(found);
 159
 160                if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
 161                        /*
 162                         * Just to be sure we have it only once in the list.
 163                         * We should not see tuples twice unless someone hooks
 164                         * this into a table without "-p tcp --syn".
 165                         */
 166                        *addit = false;
 167                } else if (already_closed(found_ct)) {
 168                        /*
 169                         * we do not care about connections which are
 170                         * closed already -> ditch it
 171                         */
 172                        nf_ct_put(found_ct);
 173                        hlist_del(&conn->node);
 174                        kmem_cache_free(connlimit_conn_cachep, conn);
 175                        continue;
 176                }
 177
 178                nf_ct_put(found_ct);
 179                length++;
 180        }
 181
 182        rcu_read_unlock();
 183
 184        return length;
 185}
 186
 187static void tree_nodes_free(struct rb_root *root,
 188                            struct xt_connlimit_rb *gc_nodes[],
 189                            unsigned int gc_count)
 190{
 191        struct xt_connlimit_rb *rbconn;
 192
 193        while (gc_count) {
 194                rbconn = gc_nodes[--gc_count];
 195                rb_erase(&rbconn->node, root);
 196                kmem_cache_free(connlimit_rb_cachep, rbconn);
 197        }
 198}
 199
 200static unsigned int
 201count_tree(struct net *net, struct rb_root *root,
 202           const struct nf_conntrack_tuple *tuple,
 203           const union nf_inet_addr *addr, const union nf_inet_addr *mask,
 204           u8 family, const struct nf_conntrack_zone *zone)
 205{
 206        struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
 207        struct rb_node **rbnode, *parent;
 208        struct xt_connlimit_rb *rbconn;
 209        struct xt_connlimit_conn *conn;
 210        unsigned int gc_count;
 211        bool no_gc = false;
 212
 213 restart:
 214        gc_count = 0;
 215        parent = NULL;
 216        rbnode = &(root->rb_node);
 217        while (*rbnode) {
 218                int diff;
 219                bool addit;
 220
 221                rbconn = container_of(*rbnode, struct xt_connlimit_rb, node);
 222
 223                parent = *rbnode;
 224                diff = same_source_net(addr, mask, &rbconn->addr, family);
 225                if (diff < 0) {
 226                        rbnode = &((*rbnode)->rb_left);
 227                } else if (diff > 0) {
 228                        rbnode = &((*rbnode)->rb_right);
 229                } else {
 230                        /* same source network -> be counted! */
 231                        unsigned int count;
 232                        count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
 233
 234                        tree_nodes_free(root, gc_nodes, gc_count);
 235                        if (!addit)
 236                                return count;
 237
 238                        if (!add_hlist(&rbconn->hhead, tuple, addr))
 239                                return 0; /* hotdrop */
 240
 241                        return count + 1;
 242                }
 243
 244                if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
 245                        continue;
 246
 247                /* only used for GC on hhead, retval and 'addit' ignored */
 248                check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
 249                if (hlist_empty(&rbconn->hhead))
 250                        gc_nodes[gc_count++] = rbconn;
 251        }
 252
 253        if (gc_count) {
 254                no_gc = true;
 255                tree_nodes_free(root, gc_nodes, gc_count);
 256                /* tree_node_free before new allocation permits
 257                 * allocator to re-use newly free'd object.
 258                 *
 259                 * This is a rare event; in most cases we will find
 260                 * existing node to re-use. (or gc_count is 0).
 261                 */
 262                goto restart;
 263        }
 264
 265        /* no match, need to insert new node */
 266        rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC);
 267        if (rbconn == NULL)
 268                return 0;
 269
 270        conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
 271        if (conn == NULL) {
 272                kmem_cache_free(connlimit_rb_cachep, rbconn);
 273                return 0;
 274        }
 275
 276        conn->tuple = *tuple;
 277        conn->addr = *addr;
 278        rbconn->addr = *addr;
 279
 280        INIT_HLIST_HEAD(&rbconn->hhead);
 281        hlist_add_head(&conn->node, &rbconn->hhead);
 282
 283        rb_link_node(&rbconn->node, parent, rbnode);
 284        rb_insert_color(&rbconn->node, root);
 285        return 1;
 286}
 287
 288static int count_them(struct net *net,
 289                      struct xt_connlimit_data *data,
 290                      const struct nf_conntrack_tuple *tuple,
 291                      const union nf_inet_addr *addr,
 292                      const union nf_inet_addr *mask,
 293                      u_int8_t family,
 294                      const struct nf_conntrack_zone *zone)
 295{
 296        struct rb_root *root;
 297        int count;
 298        u32 hash;
 299
 300        if (family == NFPROTO_IPV6) {
 301                hash = connlimit_iphash6(addr, mask);
 302                root = &data->climit_root6[hash];
 303        } else {
 304                hash = connlimit_iphash(addr->ip & mask->ip);
 305                root = &data->climit_root4[hash];
 306        }
 307
 308        spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
 309
 310        count = count_tree(net, root, tuple, addr, mask, family, zone);
 311
 312        spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
 313
 314        return count;
 315}
 316
 317static bool
 318connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 319{
 320        struct net *net = par->net;
 321        const struct xt_connlimit_info *info = par->matchinfo;
 322        union nf_inet_addr addr;
 323        struct nf_conntrack_tuple tuple;
 324        const struct nf_conntrack_tuple *tuple_ptr = &tuple;
 325        const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
 326        enum ip_conntrack_info ctinfo;
 327        const struct nf_conn *ct;
 328        unsigned int connections;
 329
 330        ct = nf_ct_get(skb, &ctinfo);
 331        if (ct != NULL) {
 332                tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 333                zone = nf_ct_zone(ct);
 334        } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
 335                                      par->family, net, &tuple)) {
 336                goto hotdrop;
 337        }
 338
 339        if (par->family == NFPROTO_IPV6) {
 340                const struct ipv6hdr *iph = ipv6_hdr(skb);
 341                memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
 342                       &iph->daddr : &iph->saddr, sizeof(addr.ip6));
 343        } else {
 344                const struct iphdr *iph = ip_hdr(skb);
 345                addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
 346                          iph->daddr : iph->saddr;
 347        }
 348
 349        connections = count_them(net, info->data, tuple_ptr, &addr,
 350                                 &info->mask, par->family, zone);
 351        if (connections == 0)
 352                /* kmalloc failed, drop it entirely */
 353                goto hotdrop;
 354
 355        return (connections > info->limit) ^
 356               !!(info->flags & XT_CONNLIMIT_INVERT);
 357
 358 hotdrop:
 359        par->hotdrop = true;
 360        return false;
 361}
 362
 363static int connlimit_mt_check(const struct xt_mtchk_param *par)
 364{
 365        struct xt_connlimit_info *info = par->matchinfo;
 366        unsigned int i;
 367        int ret;
 368
 369        if (unlikely(!connlimit_rnd)) {
 370                u_int32_t rand;
 371
 372                do {
 373                        get_random_bytes(&rand, sizeof(rand));
 374                } while (!rand);
 375                cmpxchg(&connlimit_rnd, 0, rand);
 376        }
 377        ret = nf_ct_l3proto_try_module_get(par->family);
 378        if (ret < 0) {
 379                pr_info("cannot load conntrack support for "
 380                        "address family %u\n", par->family);
 381                return ret;
 382        }
 383
 384        /* init private data */
 385        info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
 386        if (info->data == NULL) {
 387                nf_ct_l3proto_module_put(par->family);
 388                return -ENOMEM;
 389        }
 390
 391        for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
 392                info->data->climit_root4[i] = RB_ROOT;
 393        for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
 394                info->data->climit_root6[i] = RB_ROOT;
 395
 396        return 0;
 397}
 398
 399static void destroy_tree(struct rb_root *r)
 400{
 401        struct xt_connlimit_conn *conn;
 402        struct xt_connlimit_rb *rbconn;
 403        struct hlist_node *n;
 404        struct rb_node *node;
 405
 406        while ((node = rb_first(r)) != NULL) {
 407                rbconn = container_of(node, struct xt_connlimit_rb, node);
 408
 409                rb_erase(node, r);
 410
 411                hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
 412                        kmem_cache_free(connlimit_conn_cachep, conn);
 413
 414                kmem_cache_free(connlimit_rb_cachep, rbconn);
 415        }
 416}
 417
 418static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
 419{
 420        const struct xt_connlimit_info *info = par->matchinfo;
 421        unsigned int i;
 422
 423        nf_ct_l3proto_module_put(par->family);
 424
 425        for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
 426                destroy_tree(&info->data->climit_root4[i]);
 427        for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
 428                destroy_tree(&info->data->climit_root6[i]);
 429
 430        kfree(info->data);
 431}
 432
 433static struct xt_match connlimit_mt_reg __read_mostly = {
 434        .name       = "connlimit",
 435        .revision   = 1,
 436        .family     = NFPROTO_UNSPEC,
 437        .checkentry = connlimit_mt_check,
 438        .match      = connlimit_mt,
 439        .matchsize  = sizeof(struct xt_connlimit_info),
 440        .destroy    = connlimit_mt_destroy,
 441        .me         = THIS_MODULE,
 442};
 443
 444static int __init connlimit_mt_init(void)
 445{
 446        int ret, i;
 447
 448        BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS);
 449        BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0);
 450
 451        for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i)
 452                spin_lock_init(&xt_connlimit_locks[i]);
 453
 454        connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn",
 455                                           sizeof(struct xt_connlimit_conn),
 456                                           0, 0, NULL);
 457        if (!connlimit_conn_cachep)
 458                return -ENOMEM;
 459
 460        connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb",
 461                                           sizeof(struct xt_connlimit_rb),
 462                                           0, 0, NULL);
 463        if (!connlimit_rb_cachep) {
 464                kmem_cache_destroy(connlimit_conn_cachep);
 465                return -ENOMEM;
 466        }
 467        ret = xt_register_match(&connlimit_mt_reg);
 468        if (ret != 0) {
 469                kmem_cache_destroy(connlimit_conn_cachep);
 470                kmem_cache_destroy(connlimit_rb_cachep);
 471        }
 472        return ret;
 473}
 474
 475static void __exit connlimit_mt_exit(void)
 476{
 477        xt_unregister_match(&connlimit_mt_reg);
 478        kmem_cache_destroy(connlimit_conn_cachep);
 479        kmem_cache_destroy(connlimit_rb_cachep);
 480}
 481
 482module_init(connlimit_mt_init);
 483module_exit(connlimit_mt_exit);
 484MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 485MODULE_DESCRIPTION("Xtables: Number of connections matching");
 486MODULE_LICENSE("GPL");
 487MODULE_ALIAS("ipt_connlimit");
 488MODULE_ALIAS("ip6t_connlimit");
 489