linux/net/netfilter/nft_set_rbtree.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License version 2 as
   6 * published by the Free Software Foundation.
   7 *
   8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
   9 */
  10
  11#include <linux/kernel.h>
  12#include <linux/init.h>
  13#include <linux/module.h>
  14#include <linux/list.h>
  15#include <linux/rbtree.h>
  16#include <linux/netlink.h>
  17#include <linux/netfilter.h>
  18#include <linux/netfilter/nf_tables.h>
  19#include <net/netfilter/nf_tables.h>
  20
  21struct nft_rbtree {
  22        struct rb_root          root;
  23        rwlock_t                lock;
  24        seqcount_t              count;
  25};
  26
  27struct nft_rbtree_elem {
  28        struct rb_node          node;
  29        struct nft_set_ext      ext;
  30};
  31
  32static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
  33{
  34        return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
  35               (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
  36}
  37
  38static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
  39                             const struct nft_rbtree_elem *interval)
  40{
  41        return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
  42}
  43
  44static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
  45                                const u32 *key, const struct nft_set_ext **ext,
  46                                unsigned int seq)
  47{
  48        struct nft_rbtree *priv = nft_set_priv(set);
  49        const struct nft_rbtree_elem *rbe, *interval = NULL;
  50        u8 genmask = nft_genmask_cur(net);
  51        const struct rb_node *parent;
  52        const void *this;
  53        int d;
  54
  55        parent = rcu_dereference_raw(priv->root.rb_node);
  56        while (parent != NULL) {
  57                if (read_seqcount_retry(&priv->count, seq))
  58                        return false;
  59
  60                rbe = rb_entry(parent, struct nft_rbtree_elem, node);
  61
  62                this = nft_set_ext_key(&rbe->ext);
  63                d = memcmp(this, key, set->klen);
  64                if (d < 0) {
  65                        parent = rcu_dereference_raw(parent->rb_left);
  66                        if (interval &&
  67                            nft_rbtree_equal(set, this, interval) &&
  68                            nft_rbtree_interval_end(this) &&
  69                            !nft_rbtree_interval_end(interval))
  70                                continue;
  71                        interval = rbe;
  72                } else if (d > 0)
  73                        parent = rcu_dereference_raw(parent->rb_right);
  74                else {
  75                        if (!nft_set_elem_active(&rbe->ext, genmask)) {
  76                                parent = rcu_dereference_raw(parent->rb_left);
  77                                continue;
  78                        }
  79                        if (nft_rbtree_interval_end(rbe))
  80                                goto out;
  81
  82                        *ext = &rbe->ext;
  83                        return true;
  84                }
  85        }
  86
  87        if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
  88            nft_set_elem_active(&interval->ext, genmask) &&
  89            !nft_rbtree_interval_end(interval)) {
  90                *ext = &interval->ext;
  91                return true;
  92        }
  93out:
  94        return false;
  95}
  96
  97static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
  98                              const u32 *key, const struct nft_set_ext **ext)
  99{
 100        struct nft_rbtree *priv = nft_set_priv(set);
 101        unsigned int seq = read_seqcount_begin(&priv->count);
 102        bool ret;
 103
 104        ret = __nft_rbtree_lookup(net, set, key, ext, seq);
 105        if (ret || !read_seqcount_retry(&priv->count, seq))
 106                return ret;
 107
 108        read_lock_bh(&priv->lock);
 109        seq = read_seqcount_begin(&priv->count);
 110        ret = __nft_rbtree_lookup(net, set, key, ext, seq);
 111        read_unlock_bh(&priv->lock);
 112
 113        return ret;
 114}
 115
 116static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 117                               struct nft_rbtree_elem *new,
 118                               struct nft_set_ext **ext)
 119{
 120        struct nft_rbtree *priv = nft_set_priv(set);
 121        u8 genmask = nft_genmask_next(net);
 122        struct nft_rbtree_elem *rbe;
 123        struct rb_node *parent, **p;
 124        int d;
 125
 126        parent = NULL;
 127        p = &priv->root.rb_node;
 128        while (*p != NULL) {
 129                parent = *p;
 130                rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 131                d = memcmp(nft_set_ext_key(&rbe->ext),
 132                           nft_set_ext_key(&new->ext),
 133                           set->klen);
 134                if (d < 0)
 135                        p = &parent->rb_left;
 136                else if (d > 0)
 137                        p = &parent->rb_right;
 138                else {
 139                        if (nft_rbtree_interval_end(rbe) &&
 140                            !nft_rbtree_interval_end(new)) {
 141                                p = &parent->rb_left;
 142                        } else if (!nft_rbtree_interval_end(rbe) &&
 143                                   nft_rbtree_interval_end(new)) {
 144                                p = &parent->rb_right;
 145                        } else if (nft_set_elem_active(&rbe->ext, genmask)) {
 146                                *ext = &rbe->ext;
 147                                return -EEXIST;
 148                        } else {
 149                                p = &parent->rb_left;
 150                        }
 151                }
 152        }
 153        rb_link_node_rcu(&new->node, parent, p);
 154        rb_insert_color(&new->node, &priv->root);
 155        return 0;
 156}
 157
 158static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 159                             const struct nft_set_elem *elem,
 160                             struct nft_set_ext **ext)
 161{
 162        struct nft_rbtree *priv = nft_set_priv(set);
 163        struct nft_rbtree_elem *rbe = elem->priv;
 164        int err;
 165
 166        write_lock_bh(&priv->lock);
 167        write_seqcount_begin(&priv->count);
 168        err = __nft_rbtree_insert(net, set, rbe, ext);
 169        write_seqcount_end(&priv->count);
 170        write_unlock_bh(&priv->lock);
 171
 172        return err;
 173}
 174
 175static void nft_rbtree_remove(const struct net *net,
 176                              const struct nft_set *set,
 177                              const struct nft_set_elem *elem)
 178{
 179        struct nft_rbtree *priv = nft_set_priv(set);
 180        struct nft_rbtree_elem *rbe = elem->priv;
 181
 182        write_lock_bh(&priv->lock);
 183        write_seqcount_begin(&priv->count);
 184        rb_erase(&rbe->node, &priv->root);
 185        write_seqcount_end(&priv->count);
 186        write_unlock_bh(&priv->lock);
 187}
 188
 189static void nft_rbtree_activate(const struct net *net,
 190                                const struct nft_set *set,
 191                                const struct nft_set_elem *elem)
 192{
 193        struct nft_rbtree_elem *rbe = elem->priv;
 194
 195        nft_set_elem_change_active(net, set, &rbe->ext);
 196}
 197
 198static bool nft_rbtree_flush(const struct net *net,
 199                             const struct nft_set *set, void *priv)
 200{
 201        struct nft_rbtree_elem *rbe = priv;
 202
 203        nft_set_elem_change_active(net, set, &rbe->ext);
 204        return true;
 205}
 206
 207static void *nft_rbtree_deactivate(const struct net *net,
 208                                   const struct nft_set *set,
 209                                   const struct nft_set_elem *elem)
 210{
 211        const struct nft_rbtree *priv = nft_set_priv(set);
 212        const struct rb_node *parent = priv->root.rb_node;
 213        struct nft_rbtree_elem *rbe, *this = elem->priv;
 214        u8 genmask = nft_genmask_next(net);
 215        int d;
 216
 217        while (parent != NULL) {
 218                rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 219
 220                d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
 221                                           set->klen);
 222                if (d < 0)
 223                        parent = parent->rb_left;
 224                else if (d > 0)
 225                        parent = parent->rb_right;
 226                else {
 227                        if (!nft_set_elem_active(&rbe->ext, genmask)) {
 228                                parent = parent->rb_left;
 229                                continue;
 230                        }
 231                        if (nft_rbtree_interval_end(rbe) &&
 232                            !nft_rbtree_interval_end(this)) {
 233                                parent = parent->rb_left;
 234                                continue;
 235                        } else if (!nft_rbtree_interval_end(rbe) &&
 236                                   nft_rbtree_interval_end(this)) {
 237                                parent = parent->rb_right;
 238                                continue;
 239                        }
 240                        nft_rbtree_flush(net, set, rbe);
 241                        return rbe;
 242                }
 243        }
 244        return NULL;
 245}
 246
 247static void nft_rbtree_walk(const struct nft_ctx *ctx,
 248                            struct nft_set *set,
 249                            struct nft_set_iter *iter)
 250{
 251        struct nft_rbtree *priv = nft_set_priv(set);
 252        struct nft_rbtree_elem *rbe;
 253        struct nft_set_elem elem;
 254        struct rb_node *node;
 255
 256        read_lock_bh(&priv->lock);
 257        for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
 258                rbe = rb_entry(node, struct nft_rbtree_elem, node);
 259
 260                if (iter->count < iter->skip)
 261                        goto cont;
 262                if (!nft_set_elem_active(&rbe->ext, iter->genmask))
 263                        goto cont;
 264
 265                elem.priv = rbe;
 266
 267                iter->err = iter->fn(ctx, set, iter, &elem);
 268                if (iter->err < 0) {
 269                        read_unlock_bh(&priv->lock);
 270                        return;
 271                }
 272cont:
 273                iter->count++;
 274        }
 275        read_unlock_bh(&priv->lock);
 276}
 277
 278static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[],
 279                                        const struct nft_set_desc *desc)
 280{
 281        return sizeof(struct nft_rbtree);
 282}
 283
 284static int nft_rbtree_init(const struct nft_set *set,
 285                           const struct nft_set_desc *desc,
 286                           const struct nlattr * const nla[])
 287{
 288        struct nft_rbtree *priv = nft_set_priv(set);
 289
 290        rwlock_init(&priv->lock);
 291        seqcount_init(&priv->count);
 292        priv->root = RB_ROOT;
 293        return 0;
 294}
 295
 296static void nft_rbtree_destroy(const struct nft_set *set)
 297{
 298        struct nft_rbtree *priv = nft_set_priv(set);
 299        struct nft_rbtree_elem *rbe;
 300        struct rb_node *node;
 301
 302        while ((node = priv->root.rb_node) != NULL) {
 303                rb_erase(node, &priv->root);
 304                rbe = rb_entry(node, struct nft_rbtree_elem, node);
 305                nft_set_elem_destroy(set, rbe, true);
 306        }
 307}
 308
 309static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
 310                                struct nft_set_estimate *est)
 311{
 312        if (desc->size)
 313                est->size = sizeof(struct nft_rbtree) +
 314                            desc->size * sizeof(struct nft_rbtree_elem);
 315        else
 316                est->size = ~0;
 317
 318        est->lookup = NFT_SET_CLASS_O_LOG_N;
 319        est->space  = NFT_SET_CLASS_O_N;
 320
 321        return true;
 322}
 323
 324static struct nft_set_type nft_rbtree_type;
 325static struct nft_set_ops nft_rbtree_ops __read_mostly = {
 326        .type           = &nft_rbtree_type,
 327        .privsize       = nft_rbtree_privsize,
 328        .elemsize       = offsetof(struct nft_rbtree_elem, ext),
 329        .estimate       = nft_rbtree_estimate,
 330        .init           = nft_rbtree_init,
 331        .destroy        = nft_rbtree_destroy,
 332        .insert         = nft_rbtree_insert,
 333        .remove         = nft_rbtree_remove,
 334        .deactivate     = nft_rbtree_deactivate,
 335        .flush          = nft_rbtree_flush,
 336        .activate       = nft_rbtree_activate,
 337        .lookup         = nft_rbtree_lookup,
 338        .walk           = nft_rbtree_walk,
 339        .features       = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
 340};
 341
 342static struct nft_set_type nft_rbtree_type __read_mostly = {
 343        .ops            = &nft_rbtree_ops,
 344        .owner          = THIS_MODULE,
 345};
 346
 347static int __init nft_rbtree_module_init(void)
 348{
 349        return nft_register_set(&nft_rbtree_type);
 350}
 351
 352static void __exit nft_rbtree_module_exit(void)
 353{
 354        nft_unregister_set(&nft_rbtree_type);
 355}
 356
 357module_init(nft_rbtree_module_init);
 358module_exit(nft_rbtree_module_exit);
 359
 360MODULE_LICENSE("GPL");
 361MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 362MODULE_ALIAS_NFT_SET();
 363