linux/net/netfilter/nf_conncount.c
<<
>>
Prefs
   1/*
   2 * count the number of connections matching an arbitrary key.
   3 *
   4 * (C) 2017 Red Hat GmbH
   5 * Author: Florian Westphal <fw@strlen.de>
   6 *
   7 * split from xt_connlimit.c:
   8 *   (c) 2000 Gerd Knorr <kraxel@bytesex.org>
   9 *   Nov 2002: Martin Bene <martin.bene@icomedias.com>:
  10 *              only ignore TIME_WAIT or gone connections
  11 *   (C) CC Computer Consultants GmbH, 2007
  12 */
  13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  14#include <linux/in.h>
  15#include <linux/in6.h>
  16#include <linux/ip.h>
  17#include <linux/ipv6.h>
  18#include <linux/jhash.h>
  19#include <linux/slab.h>
  20#include <linux/list.h>
  21#include <linux/rbtree.h>
  22#include <linux/module.h>
  23#include <linux/random.h>
  24#include <linux/skbuff.h>
  25#include <linux/spinlock.h>
  26#include <linux/netfilter/nf_conntrack_tcp.h>
  27#include <linux/netfilter/x_tables.h>
  28#include <net/netfilter/nf_conntrack.h>
  29#include <net/netfilter/nf_conntrack_count.h>
  30#include <net/netfilter/nf_conntrack_core.h>
  31#include <net/netfilter/nf_conntrack_tuple.h>
  32#include <net/netfilter/nf_conntrack_zones.h>
  33
  34#define CONNCOUNT_SLOTS         256U
  35
  36#ifdef CONFIG_LOCKDEP
  37#define CONNCOUNT_LOCK_SLOTS    8U
  38#else
  39#define CONNCOUNT_LOCK_SLOTS    256U
  40#endif
  41
  42#define CONNCOUNT_GC_MAX_NODES  8
  43#define MAX_KEYLEN              5
  44
  45/* we will save the tuples of all connections we care about */
  46struct nf_conncount_tuple {
  47        struct hlist_node               node;
  48        struct nf_conntrack_tuple       tuple;
  49};
  50
  51struct nf_conncount_rb {
  52        struct rb_node node;
  53        struct hlist_head hhead; /* connections/hosts in same subnet */
  54        u32 key[MAX_KEYLEN];
  55};
  56
  57static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
  58
  59struct nf_conncount_data {
  60        unsigned int keylen;
  61        struct rb_root root[CONNCOUNT_SLOTS];
  62};
  63
  64static u_int32_t conncount_rnd __read_mostly;
  65static struct kmem_cache *conncount_rb_cachep __read_mostly;
  66static struct kmem_cache *conncount_conn_cachep __read_mostly;
  67
  68static inline bool already_closed(const struct nf_conn *conn)
  69{
  70        if (nf_ct_protonum(conn) == IPPROTO_TCP)
  71                return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
  72                       conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
  73        else
  74                return false;
  75}
  76
  77static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
  78{
  79        return memcmp(a, b, klen * sizeof(u32));
  80}
  81
  82static bool add_hlist(struct hlist_head *head,
  83                      const struct nf_conntrack_tuple *tuple)
  84{
  85        struct nf_conncount_tuple *conn;
  86
  87        conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
  88        if (conn == NULL)
  89                return false;
  90        conn->tuple = *tuple;
  91        hlist_add_head(&conn->node, head);
  92        return true;
  93}
  94
  95static unsigned int check_hlist(struct net *net,
  96                                struct hlist_head *head,
  97                                const struct nf_conntrack_tuple *tuple,
  98                                const struct nf_conntrack_zone *zone,
  99                                bool *addit)
 100{
 101        const struct nf_conntrack_tuple_hash *found;
 102        struct nf_conncount_tuple *conn;
 103        struct hlist_node *n;
 104        struct nf_conn *found_ct;
 105        unsigned int length = 0;
 106
 107        *addit = tuple ? true : false;
 108
 109        /* check the saved connections */
 110        hlist_for_each_entry_safe(conn, n, head, node) {
 111                found = nf_conntrack_find_get(net, zone, &conn->tuple);
 112                if (found == NULL) {
 113                        hlist_del(&conn->node);
 114                        kmem_cache_free(conncount_conn_cachep, conn);
 115                        continue;
 116                }
 117
 118                found_ct = nf_ct_tuplehash_to_ctrack(found);
 119
 120                if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple)) {
 121                        /*
 122                         * Just to be sure we have it only once in the list.
 123                         * We should not see tuples twice unless someone hooks
 124                         * this into a table without "-p tcp --syn".
 125                         */
 126                        *addit = false;
 127                } else if (already_closed(found_ct)) {
 128                        /*
 129                         * we do not care about connections which are
 130                         * closed already -> ditch it
 131                         */
 132                        nf_ct_put(found_ct);
 133                        hlist_del(&conn->node);
 134                        kmem_cache_free(conncount_conn_cachep, conn);
 135                        continue;
 136                }
 137
 138                nf_ct_put(found_ct);
 139                length++;
 140        }
 141
 142        return length;
 143}
 144
 145static void tree_nodes_free(struct rb_root *root,
 146                            struct nf_conncount_rb *gc_nodes[],
 147                            unsigned int gc_count)
 148{
 149        struct nf_conncount_rb *rbconn;
 150
 151        while (gc_count) {
 152                rbconn = gc_nodes[--gc_count];
 153                rb_erase(&rbconn->node, root);
 154                kmem_cache_free(conncount_rb_cachep, rbconn);
 155        }
 156}
 157
 158static unsigned int
 159count_tree(struct net *net, struct rb_root *root,
 160           const u32 *key, u8 keylen,
 161           const struct nf_conntrack_tuple *tuple,
 162           const struct nf_conntrack_zone *zone)
 163{
 164        struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
 165        struct rb_node **rbnode, *parent;
 166        struct nf_conncount_rb *rbconn;
 167        struct nf_conncount_tuple *conn;
 168        unsigned int gc_count;
 169        bool no_gc = false;
 170
 171 restart:
 172        gc_count = 0;
 173        parent = NULL;
 174        rbnode = &(root->rb_node);
 175        while (*rbnode) {
 176                int diff;
 177                bool addit;
 178
 179                rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
 180
 181                parent = *rbnode;
 182                diff = key_diff(key, rbconn->key, keylen);
 183                if (diff < 0) {
 184                        rbnode = &((*rbnode)->rb_left);
 185                } else if (diff > 0) {
 186                        rbnode = &((*rbnode)->rb_right);
 187                } else {
 188                        /* same source network -> be counted! */
 189                        unsigned int count;
 190                        count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
 191
 192                        tree_nodes_free(root, gc_nodes, gc_count);
 193                        if (!addit)
 194                                return count;
 195
 196                        if (!add_hlist(&rbconn->hhead, tuple))
 197                                return 0; /* hotdrop */
 198
 199                        return count + 1;
 200                }
 201
 202                if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
 203                        continue;
 204
 205                /* only used for GC on hhead, retval and 'addit' ignored */
 206                check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
 207                if (hlist_empty(&rbconn->hhead))
 208                        gc_nodes[gc_count++] = rbconn;
 209        }
 210
 211        if (gc_count) {
 212                no_gc = true;
 213                tree_nodes_free(root, gc_nodes, gc_count);
 214                /* tree_node_free before new allocation permits
 215                 * allocator to re-use newly free'd object.
 216                 *
 217                 * This is a rare event; in most cases we will find
 218                 * existing node to re-use. (or gc_count is 0).
 219                 */
 220                goto restart;
 221        }
 222
 223        if (!tuple)
 224                return 0;
 225
 226        /* no match, need to insert new node */
 227        rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
 228        if (rbconn == NULL)
 229                return 0;
 230
 231        conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
 232        if (conn == NULL) {
 233                kmem_cache_free(conncount_rb_cachep, rbconn);
 234                return 0;
 235        }
 236
 237        conn->tuple = *tuple;
 238        memcpy(rbconn->key, key, sizeof(u32) * keylen);
 239
 240        INIT_HLIST_HEAD(&rbconn->hhead);
 241        hlist_add_head(&conn->node, &rbconn->hhead);
 242
 243        rb_link_node(&rbconn->node, parent, rbnode);
 244        rb_insert_color(&rbconn->node, root);
 245        return 1;
 246}
 247
 248/* Count and return number of conntrack entries in 'net' with particular 'key'.
 249 * If 'tuple' is not null, insert it into the accounting data structure.
 250 */
 251unsigned int nf_conncount_count(struct net *net,
 252                                struct nf_conncount_data *data,
 253                                const u32 *key,
 254                                const struct nf_conntrack_tuple *tuple,
 255                                const struct nf_conntrack_zone *zone)
 256{
 257        struct rb_root *root;
 258        int count;
 259        u32 hash;
 260
 261        hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
 262        root = &data->root[hash];
 263
 264        spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
 265
 266        count = count_tree(net, root, key, data->keylen, tuple, zone);
 267
 268        spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
 269
 270        return count;
 271}
 272EXPORT_SYMBOL_GPL(nf_conncount_count);
 273
 274struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
 275                                            unsigned int keylen)
 276{
 277        struct nf_conncount_data *data;
 278        int ret, i;
 279
 280        if (keylen % sizeof(u32) ||
 281            keylen / sizeof(u32) > MAX_KEYLEN ||
 282            keylen == 0)
 283                return ERR_PTR(-EINVAL);
 284
 285        net_get_random_once(&conncount_rnd, sizeof(conncount_rnd));
 286
 287        data = kmalloc(sizeof(*data), GFP_KERNEL);
 288        if (!data)
 289                return ERR_PTR(-ENOMEM);
 290
 291        ret = nf_ct_netns_get(net, family);
 292        if (ret < 0) {
 293                kfree(data);
 294                return ERR_PTR(ret);
 295        }
 296
 297        for (i = 0; i < ARRAY_SIZE(data->root); ++i)
 298                data->root[i] = RB_ROOT;
 299
 300        data->keylen = keylen / sizeof(u32);
 301
 302        return data;
 303}
 304EXPORT_SYMBOL_GPL(nf_conncount_init);
 305
 306static void destroy_tree(struct rb_root *r)
 307{
 308        struct nf_conncount_tuple *conn;
 309        struct nf_conncount_rb *rbconn;
 310        struct hlist_node *n;
 311        struct rb_node *node;
 312
 313        while ((node = rb_first(r)) != NULL) {
 314                rbconn = rb_entry(node, struct nf_conncount_rb, node);
 315
 316                rb_erase(node, r);
 317
 318                hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
 319                        kmem_cache_free(conncount_conn_cachep, conn);
 320
 321                kmem_cache_free(conncount_rb_cachep, rbconn);
 322        }
 323}
 324
 325void nf_conncount_destroy(struct net *net, unsigned int family,
 326                          struct nf_conncount_data *data)
 327{
 328        unsigned int i;
 329
 330        nf_ct_netns_put(net, family);
 331
 332        for (i = 0; i < ARRAY_SIZE(data->root); ++i)
 333                destroy_tree(&data->root[i]);
 334
 335        kfree(data);
 336}
 337EXPORT_SYMBOL_GPL(nf_conncount_destroy);
 338
 339static int __init nf_conncount_modinit(void)
 340{
 341        int i;
 342
 343        BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS);
 344        BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0);
 345
 346        for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i)
 347                spin_lock_init(&nf_conncount_locks[i]);
 348
 349        conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
 350                                           sizeof(struct nf_conncount_tuple),
 351                                           0, 0, NULL);
 352        if (!conncount_conn_cachep)
 353                return -ENOMEM;
 354
 355        conncount_rb_cachep = kmem_cache_create("nf_conncount_rb",
 356                                           sizeof(struct nf_conncount_rb),
 357                                           0, 0, NULL);
 358        if (!conncount_rb_cachep) {
 359                kmem_cache_destroy(conncount_conn_cachep);
 360                return -ENOMEM;
 361        }
 362
 363        return 0;
 364}
 365
 366static void __exit nf_conncount_modexit(void)
 367{
 368        kmem_cache_destroy(conncount_conn_cachep);
 369        kmem_cache_destroy(conncount_rb_cachep);
 370}
 371
 372module_init(nf_conncount_modinit);
 373module_exit(nf_conncount_modexit);
 374MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 375MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
 376MODULE_DESCRIPTION("netfilter: count number of connections matching a key");
 377MODULE_LICENSE("GPL");
 378