1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/list.h>
15#include <linux/rbtree.h>
16#include <linux/netlink.h>
17#include <linux/netfilter.h>
18#include <linux/netfilter/nf_tables.h>
19#include <net/netfilter/nf_tables.h>
20
21struct nft_rbtree {
22 struct rb_root root;
23 rwlock_t lock;
24 seqcount_t count;
25};
26
27struct nft_rbtree_elem {
28 struct rb_node node;
29 struct nft_set_ext ext;
30};
31
32static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
33{
34 return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
35 (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
36}
37
38static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
39 const struct nft_rbtree_elem *interval)
40{
41 return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
42}
43
44static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
45 const u32 *key, const struct nft_set_ext **ext,
46 unsigned int seq)
47{
48 struct nft_rbtree *priv = nft_set_priv(set);
49 const struct nft_rbtree_elem *rbe, *interval = NULL;
50 u8 genmask = nft_genmask_cur(net);
51 const struct rb_node *parent;
52 const void *this;
53 int d;
54
55 parent = rcu_dereference_raw(priv->root.rb_node);
56 while (parent != NULL) {
57 if (read_seqcount_retry(&priv->count, seq))
58 return false;
59
60 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
61
62 this = nft_set_ext_key(&rbe->ext);
63 d = memcmp(this, key, set->klen);
64 if (d < 0) {
65 parent = rcu_dereference_raw(parent->rb_left);
66 if (interval &&
67 nft_rbtree_equal(set, this, interval) &&
68 nft_rbtree_interval_end(this) &&
69 !nft_rbtree_interval_end(interval))
70 continue;
71 interval = rbe;
72 } else if (d > 0)
73 parent = rcu_dereference_raw(parent->rb_right);
74 else {
75 if (!nft_set_elem_active(&rbe->ext, genmask)) {
76 parent = rcu_dereference_raw(parent->rb_left);
77 continue;
78 }
79 if (nft_rbtree_interval_end(rbe))
80 goto out;
81
82 *ext = &rbe->ext;
83 return true;
84 }
85 }
86
87 if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
88 nft_set_elem_active(&interval->ext, genmask) &&
89 !nft_rbtree_interval_end(interval)) {
90 *ext = &interval->ext;
91 return true;
92 }
93out:
94 return false;
95}
96
97static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
98 const u32 *key, const struct nft_set_ext **ext)
99{
100 struct nft_rbtree *priv = nft_set_priv(set);
101 unsigned int seq = read_seqcount_begin(&priv->count);
102 bool ret;
103
104 ret = __nft_rbtree_lookup(net, set, key, ext, seq);
105 if (ret || !read_seqcount_retry(&priv->count, seq))
106 return ret;
107
108 read_lock_bh(&priv->lock);
109 seq = read_seqcount_begin(&priv->count);
110 ret = __nft_rbtree_lookup(net, set, key, ext, seq);
111 read_unlock_bh(&priv->lock);
112
113 return ret;
114}
115
116static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
117 struct nft_rbtree_elem *new,
118 struct nft_set_ext **ext)
119{
120 struct nft_rbtree *priv = nft_set_priv(set);
121 u8 genmask = nft_genmask_next(net);
122 struct nft_rbtree_elem *rbe;
123 struct rb_node *parent, **p;
124 int d;
125
126 parent = NULL;
127 p = &priv->root.rb_node;
128 while (*p != NULL) {
129 parent = *p;
130 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
131 d = memcmp(nft_set_ext_key(&rbe->ext),
132 nft_set_ext_key(&new->ext),
133 set->klen);
134 if (d < 0)
135 p = &parent->rb_left;
136 else if (d > 0)
137 p = &parent->rb_right;
138 else {
139 if (nft_rbtree_interval_end(rbe) &&
140 !nft_rbtree_interval_end(new)) {
141 p = &parent->rb_left;
142 } else if (!nft_rbtree_interval_end(rbe) &&
143 nft_rbtree_interval_end(new)) {
144 p = &parent->rb_right;
145 } else if (nft_set_elem_active(&rbe->ext, genmask)) {
146 *ext = &rbe->ext;
147 return -EEXIST;
148 } else {
149 p = &parent->rb_left;
150 }
151 }
152 }
153 rb_link_node_rcu(&new->node, parent, p);
154 rb_insert_color(&new->node, &priv->root);
155 return 0;
156}
157
158static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
159 const struct nft_set_elem *elem,
160 struct nft_set_ext **ext)
161{
162 struct nft_rbtree *priv = nft_set_priv(set);
163 struct nft_rbtree_elem *rbe = elem->priv;
164 int err;
165
166 write_lock_bh(&priv->lock);
167 write_seqcount_begin(&priv->count);
168 err = __nft_rbtree_insert(net, set, rbe, ext);
169 write_seqcount_end(&priv->count);
170 write_unlock_bh(&priv->lock);
171
172 return err;
173}
174
175static void nft_rbtree_remove(const struct net *net,
176 const struct nft_set *set,
177 const struct nft_set_elem *elem)
178{
179 struct nft_rbtree *priv = nft_set_priv(set);
180 struct nft_rbtree_elem *rbe = elem->priv;
181
182 write_lock_bh(&priv->lock);
183 write_seqcount_begin(&priv->count);
184 rb_erase(&rbe->node, &priv->root);
185 write_seqcount_end(&priv->count);
186 write_unlock_bh(&priv->lock);
187}
188
189static void nft_rbtree_activate(const struct net *net,
190 const struct nft_set *set,
191 const struct nft_set_elem *elem)
192{
193 struct nft_rbtree_elem *rbe = elem->priv;
194
195 nft_set_elem_change_active(net, set, &rbe->ext);
196}
197
198static bool nft_rbtree_flush(const struct net *net,
199 const struct nft_set *set, void *priv)
200{
201 struct nft_rbtree_elem *rbe = priv;
202
203 nft_set_elem_change_active(net, set, &rbe->ext);
204 return true;
205}
206
207static void *nft_rbtree_deactivate(const struct net *net,
208 const struct nft_set *set,
209 const struct nft_set_elem *elem)
210{
211 const struct nft_rbtree *priv = nft_set_priv(set);
212 const struct rb_node *parent = priv->root.rb_node;
213 struct nft_rbtree_elem *rbe, *this = elem->priv;
214 u8 genmask = nft_genmask_next(net);
215 int d;
216
217 while (parent != NULL) {
218 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
219
220 d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
221 set->klen);
222 if (d < 0)
223 parent = parent->rb_left;
224 else if (d > 0)
225 parent = parent->rb_right;
226 else {
227 if (!nft_set_elem_active(&rbe->ext, genmask)) {
228 parent = parent->rb_left;
229 continue;
230 }
231 if (nft_rbtree_interval_end(rbe) &&
232 !nft_rbtree_interval_end(this)) {
233 parent = parent->rb_left;
234 continue;
235 } else if (!nft_rbtree_interval_end(rbe) &&
236 nft_rbtree_interval_end(this)) {
237 parent = parent->rb_right;
238 continue;
239 }
240 nft_rbtree_flush(net, set, rbe);
241 return rbe;
242 }
243 }
244 return NULL;
245}
246
247static void nft_rbtree_walk(const struct nft_ctx *ctx,
248 struct nft_set *set,
249 struct nft_set_iter *iter)
250{
251 struct nft_rbtree *priv = nft_set_priv(set);
252 struct nft_rbtree_elem *rbe;
253 struct nft_set_elem elem;
254 struct rb_node *node;
255
256 read_lock_bh(&priv->lock);
257 for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
258 rbe = rb_entry(node, struct nft_rbtree_elem, node);
259
260 if (iter->count < iter->skip)
261 goto cont;
262 if (!nft_set_elem_active(&rbe->ext, iter->genmask))
263 goto cont;
264
265 elem.priv = rbe;
266
267 iter->err = iter->fn(ctx, set, iter, &elem);
268 if (iter->err < 0) {
269 read_unlock_bh(&priv->lock);
270 return;
271 }
272cont:
273 iter->count++;
274 }
275 read_unlock_bh(&priv->lock);
276}
277
278static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[],
279 const struct nft_set_desc *desc)
280{
281 return sizeof(struct nft_rbtree);
282}
283
284static int nft_rbtree_init(const struct nft_set *set,
285 const struct nft_set_desc *desc,
286 const struct nlattr * const nla[])
287{
288 struct nft_rbtree *priv = nft_set_priv(set);
289
290 rwlock_init(&priv->lock);
291 seqcount_init(&priv->count);
292 priv->root = RB_ROOT;
293 return 0;
294}
295
296static void nft_rbtree_destroy(const struct nft_set *set)
297{
298 struct nft_rbtree *priv = nft_set_priv(set);
299 struct nft_rbtree_elem *rbe;
300 struct rb_node *node;
301
302 while ((node = priv->root.rb_node) != NULL) {
303 rb_erase(node, &priv->root);
304 rbe = rb_entry(node, struct nft_rbtree_elem, node);
305 nft_set_elem_destroy(set, rbe, true);
306 }
307}
308
309static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
310 struct nft_set_estimate *est)
311{
312 if (desc->size)
313 est->size = sizeof(struct nft_rbtree) +
314 desc->size * sizeof(struct nft_rbtree_elem);
315 else
316 est->size = ~0;
317
318 est->lookup = NFT_SET_CLASS_O_LOG_N;
319 est->space = NFT_SET_CLASS_O_N;
320
321 return true;
322}
323
324static struct nft_set_type nft_rbtree_type;
325static struct nft_set_ops nft_rbtree_ops __read_mostly = {
326 .type = &nft_rbtree_type,
327 .privsize = nft_rbtree_privsize,
328 .elemsize = offsetof(struct nft_rbtree_elem, ext),
329 .estimate = nft_rbtree_estimate,
330 .init = nft_rbtree_init,
331 .destroy = nft_rbtree_destroy,
332 .insert = nft_rbtree_insert,
333 .remove = nft_rbtree_remove,
334 .deactivate = nft_rbtree_deactivate,
335 .flush = nft_rbtree_flush,
336 .activate = nft_rbtree_activate,
337 .lookup = nft_rbtree_lookup,
338 .walk = nft_rbtree_walk,
339 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
340};
341
342static struct nft_set_type nft_rbtree_type __read_mostly = {
343 .ops = &nft_rbtree_ops,
344 .owner = THIS_MODULE,
345};
346
347static int __init nft_rbtree_module_init(void)
348{
349 return nft_register_set(&nft_rbtree_type);
350}
351
352static void __exit nft_rbtree_module_exit(void)
353{
354 nft_unregister_set(&nft_rbtree_type);
355}
356
357module_init(nft_rbtree_module_init);
358module_exit(nft_rbtree_module_exit);
359
360MODULE_LICENSE("GPL");
361MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
362MODULE_ALIAS_NFT_SET();
363