1
2
3
4
5
6
7
8
9
10
11
12#include <linux/types.h>
13#include <linux/netfilter.h>
14#include <linux/skbuff.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/stddef.h>
18#include <linux/slab.h>
19#include <linux/err.h>
20#include <linux/percpu.h>
21#include <linux/kernel.h>
22#include <linux/jhash.h>
23#include <linux/moduleparam.h>
24#include <linux/export.h>
25#include <net/net_namespace.h>
26
27#include <net/netfilter/nf_conntrack.h>
28#include <net/netfilter/nf_conntrack_core.h>
29#include <net/netfilter/nf_conntrack_expect.h>
30#include <net/netfilter/nf_conntrack_helper.h>
31#include <net/netfilter/nf_conntrack_tuple.h>
32#include <net/netfilter/nf_conntrack_zones.h>
33
34unsigned int nf_ct_expect_hsize __read_mostly;
35EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
36
37unsigned int nf_ct_expect_max __read_mostly;
38
39static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
40
41
42void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
43 u32 pid, int report)
44{
45 struct nf_conn_help *master_help = nfct_help(exp->master);
46 struct net *net = nf_ct_exp_net(exp);
47
48 NF_CT_ASSERT(master_help);
49 NF_CT_ASSERT(!timer_pending(&exp->timeout));
50
51 hlist_del_rcu(&exp->hnode);
52 net->ct.expect_count--;
53
54 hlist_del(&exp->lnode);
55 master_help->expecting[exp->class]--;
56
57 nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
58 nf_ct_expect_put(exp);
59
60 NF_CT_STAT_INC(net, expect_delete);
61}
62EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
63
64static void nf_ct_expectation_timed_out(unsigned long ul_expect)
65{
66 struct nf_conntrack_expect *exp = (void *)ul_expect;
67
68 spin_lock_bh(&nf_conntrack_lock);
69 nf_ct_unlink_expect(exp);
70 spin_unlock_bh(&nf_conntrack_lock);
71 nf_ct_expect_put(exp);
72}
73
74static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
75{
76 unsigned int hash;
77
78 if (unlikely(!nf_conntrack_hash_rnd)) {
79 init_nf_conntrack_hash_rnd();
80 }
81
82 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
83 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
84 (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
85 return ((u64)hash * nf_ct_expect_hsize) >> 32;
86}
87
88struct nf_conntrack_expect *
89__nf_ct_expect_find(struct net *net, u16 zone,
90 const struct nf_conntrack_tuple *tuple)
91{
92 struct nf_conntrack_expect *i;
93 unsigned int h;
94
95 if (!net->ct.expect_count)
96 return NULL;
97
98 h = nf_ct_expect_dst_hash(tuple);
99 hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) {
100 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
101 nf_ct_zone(i->master) == zone)
102 return i;
103 }
104 return NULL;
105}
106EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
107
108
109struct nf_conntrack_expect *
110nf_ct_expect_find_get(struct net *net, u16 zone,
111 const struct nf_conntrack_tuple *tuple)
112{
113 struct nf_conntrack_expect *i;
114
115 rcu_read_lock();
116 i = __nf_ct_expect_find(net, zone, tuple);
117 if (i && !atomic_inc_not_zero(&i->use))
118 i = NULL;
119 rcu_read_unlock();
120
121 return i;
122}
123EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
124
125
126
127struct nf_conntrack_expect *
128nf_ct_find_expectation(struct net *net, u16 zone,
129 const struct nf_conntrack_tuple *tuple)
130{
131 struct nf_conntrack_expect *i, *exp = NULL;
132 unsigned int h;
133
134 if (!net->ct.expect_count)
135 return NULL;
136
137 h = nf_ct_expect_dst_hash(tuple);
138 hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
139 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
140 nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
141 nf_ct_zone(i->master) == zone) {
142 exp = i;
143 break;
144 }
145 }
146 if (!exp)
147 return NULL;
148
149
150
151
152
153
154 if (!nf_ct_is_confirmed(exp->master))
155 return NULL;
156
157 if (exp->flags & NF_CT_EXPECT_PERMANENT) {
158 atomic_inc(&exp->use);
159 return exp;
160 } else if (del_timer(&exp->timeout)) {
161 nf_ct_unlink_expect(exp);
162 return exp;
163 }
164
165 return NULL;
166}
167
168
169void nf_ct_remove_expectations(struct nf_conn *ct)
170{
171 struct nf_conn_help *help = nfct_help(ct);
172 struct nf_conntrack_expect *exp;
173 struct hlist_node *next;
174
175
176 if (!help)
177 return;
178
179 hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
180 if (del_timer(&exp->timeout)) {
181 nf_ct_unlink_expect(exp);
182 nf_ct_expect_put(exp);
183 }
184 }
185}
186EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
187
188
189static inline int expect_clash(const struct nf_conntrack_expect *a,
190 const struct nf_conntrack_expect *b)
191{
192
193
194 struct nf_conntrack_tuple_mask intersect_mask;
195 int count;
196
197 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
198
199 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
200 intersect_mask.src.u3.all[count] =
201 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
202 }
203
204 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
205}
206
207static inline int expect_matches(const struct nf_conntrack_expect *a,
208 const struct nf_conntrack_expect *b)
209{
210 return a->master == b->master && a->class == b->class &&
211 nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
212 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
213 nf_ct_zone(a->master) == nf_ct_zone(b->master);
214}
215
216
217void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
218{
219 spin_lock_bh(&nf_conntrack_lock);
220 if (del_timer(&exp->timeout)) {
221 nf_ct_unlink_expect(exp);
222 nf_ct_expect_put(exp);
223 }
224 spin_unlock_bh(&nf_conntrack_lock);
225}
226EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
227
228
229
230
231struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
232{
233 struct nf_conntrack_expect *new;
234
235 new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
236 if (!new)
237 return NULL;
238
239 new->master = me;
240 atomic_set(&new->use, 1);
241 return new;
242}
243EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
244
245void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
246 u_int8_t family,
247 const union nf_inet_addr *saddr,
248 const union nf_inet_addr *daddr,
249 u_int8_t proto, const __be16 *src, const __be16 *dst)
250{
251 int len;
252
253 if (family == AF_INET)
254 len = 4;
255 else
256 len = 16;
257
258 exp->flags = 0;
259 exp->class = class;
260 exp->expectfn = NULL;
261 exp->helper = NULL;
262 exp->tuple.src.l3num = family;
263 exp->tuple.dst.protonum = proto;
264
265 if (saddr) {
266 memcpy(&exp->tuple.src.u3, saddr, len);
267 if (sizeof(exp->tuple.src.u3) > len)
268
269 memset((void *)&exp->tuple.src.u3 + len, 0x00,
270 sizeof(exp->tuple.src.u3) - len);
271 memset(&exp->mask.src.u3, 0xFF, len);
272 if (sizeof(exp->mask.src.u3) > len)
273 memset((void *)&exp->mask.src.u3 + len, 0x00,
274 sizeof(exp->mask.src.u3) - len);
275 } else {
276 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
277 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
278 }
279
280 if (src) {
281 exp->tuple.src.u.all = *src;
282 exp->mask.src.u.all = htons(0xFFFF);
283 } else {
284 exp->tuple.src.u.all = 0;
285 exp->mask.src.u.all = 0;
286 }
287
288 memcpy(&exp->tuple.dst.u3, daddr, len);
289 if (sizeof(exp->tuple.dst.u3) > len)
290
291 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
292 sizeof(exp->tuple.dst.u3) - len);
293
294 exp->tuple.dst.u.all = *dst;
295}
296EXPORT_SYMBOL_GPL(nf_ct_expect_init);
297
298static void nf_ct_expect_free_rcu(struct rcu_head *head)
299{
300 struct nf_conntrack_expect *exp;
301
302 exp = container_of(head, struct nf_conntrack_expect, rcu);
303 kmem_cache_free(nf_ct_expect_cachep, exp);
304}
305
306void nf_ct_expect_put(struct nf_conntrack_expect *exp)
307{
308 if (atomic_dec_and_test(&exp->use))
309 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
310}
311EXPORT_SYMBOL_GPL(nf_ct_expect_put);
312
313static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
314{
315 struct nf_conn_help *master_help = nfct_help(exp->master);
316 struct nf_conntrack_helper *helper;
317 struct net *net = nf_ct_exp_net(exp);
318 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
319
320
321 atomic_add(2, &exp->use);
322
323 hlist_add_head(&exp->lnode, &master_help->expectations);
324 master_help->expecting[exp->class]++;
325
326 hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
327 net->ct.expect_count++;
328
329 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
330 (unsigned long)exp);
331 helper = rcu_dereference_protected(master_help->helper,
332 lockdep_is_held(&nf_conntrack_lock));
333 if (helper) {
334 exp->timeout.expires = jiffies +
335 helper->expect_policy[exp->class].timeout * HZ;
336 }
337 add_timer(&exp->timeout);
338
339 NF_CT_STAT_INC(net, expect_create);
340 return 0;
341}
342
343
344static void evict_oldest_expect(struct nf_conn *master,
345 struct nf_conntrack_expect *new)
346{
347 struct nf_conn_help *master_help = nfct_help(master);
348 struct nf_conntrack_expect *exp, *last = NULL;
349
350 hlist_for_each_entry(exp, &master_help->expectations, lnode) {
351 if (exp->class == new->class)
352 last = exp;
353 }
354
355 if (last && del_timer(&last->timeout)) {
356 nf_ct_unlink_expect(last);
357 nf_ct_expect_put(last);
358 }
359}
360
361static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
362{
363 const struct nf_conntrack_expect_policy *p;
364 struct nf_conntrack_expect *i;
365 struct nf_conn *master = expect->master;
366 struct nf_conn_help *master_help = nfct_help(master);
367 struct nf_conntrack_helper *helper;
368 struct net *net = nf_ct_exp_net(expect);
369 struct hlist_node *next;
370 unsigned int h;
371 int ret = 1;
372
373 if (!master_help) {
374 ret = -ESHUTDOWN;
375 goto out;
376 }
377 h = nf_ct_expect_dst_hash(&expect->tuple);
378 hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) {
379 if (expect_matches(i, expect)) {
380 if (del_timer(&i->timeout)) {
381 nf_ct_unlink_expect(i);
382 nf_ct_expect_put(i);
383 break;
384 }
385 } else if (expect_clash(i, expect)) {
386 ret = -EBUSY;
387 goto out;
388 }
389 }
390
391 helper = rcu_dereference_protected(master_help->helper,
392 lockdep_is_held(&nf_conntrack_lock));
393 if (helper) {
394 p = &helper->expect_policy[expect->class];
395 if (p->max_expected &&
396 master_help->expecting[expect->class] >= p->max_expected) {
397 evict_oldest_expect(master, expect);
398 if (master_help->expecting[expect->class]
399 >= p->max_expected) {
400 ret = -EMFILE;
401 goto out;
402 }
403 }
404 }
405
406 if (net->ct.expect_count >= nf_ct_expect_max) {
407 net_warn_ratelimited("nf_conntrack: expectation table full\n");
408 ret = -EMFILE;
409 }
410out:
411 return ret;
412}
413
414int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
415 u32 pid, int report)
416{
417 int ret;
418
419 spin_lock_bh(&nf_conntrack_lock);
420 ret = __nf_ct_expect_check(expect);
421 if (ret <= 0)
422 goto out;
423
424 ret = nf_ct_expect_insert(expect);
425 if (ret < 0)
426 goto out;
427 spin_unlock_bh(&nf_conntrack_lock);
428 nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
429 return ret;
430out:
431 spin_unlock_bh(&nf_conntrack_lock);
432 return ret;
433}
434EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
435
436#ifdef CONFIG_NF_CONNTRACK_PROCFS
437struct ct_expect_iter_state {
438 struct seq_net_private p;
439 unsigned int bucket;
440};
441
442static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
443{
444 struct net *net = seq_file_net(seq);
445 struct ct_expect_iter_state *st = seq->private;
446 struct hlist_node *n;
447
448 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
449 n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
450 if (n)
451 return n;
452 }
453 return NULL;
454}
455
456static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
457 struct hlist_node *head)
458{
459 struct net *net = seq_file_net(seq);
460 struct ct_expect_iter_state *st = seq->private;
461
462 head = rcu_dereference(hlist_next_rcu(head));
463 while (head == NULL) {
464 if (++st->bucket >= nf_ct_expect_hsize)
465 return NULL;
466 head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
467 }
468 return head;
469}
470
471static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
472{
473 struct hlist_node *head = ct_expect_get_first(seq);
474
475 if (head)
476 while (pos && (head = ct_expect_get_next(seq, head)))
477 pos--;
478 return pos ? NULL : head;
479}
480
481static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
482 __acquires(RCU)
483{
484 rcu_read_lock();
485 return ct_expect_get_idx(seq, *pos);
486}
487
488static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
489{
490 (*pos)++;
491 return ct_expect_get_next(seq, v);
492}
493
494static void exp_seq_stop(struct seq_file *seq, void *v)
495 __releases(RCU)
496{
497 rcu_read_unlock();
498}
499
500static int exp_seq_show(struct seq_file *s, void *v)
501{
502 struct nf_conntrack_expect *expect;
503 struct nf_conntrack_helper *helper;
504 struct hlist_node *n = v;
505 char *delim = "";
506
507 expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
508
509 if (expect->timeout.function)
510 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
511 ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
512 else
513 seq_printf(s, "- ");
514 seq_printf(s, "l3proto = %u proto=%u ",
515 expect->tuple.src.l3num,
516 expect->tuple.dst.protonum);
517 print_tuple(s, &expect->tuple,
518 __nf_ct_l3proto_find(expect->tuple.src.l3num),
519 __nf_ct_l4proto_find(expect->tuple.src.l3num,
520 expect->tuple.dst.protonum));
521
522 if (expect->flags & NF_CT_EXPECT_PERMANENT) {
523 seq_printf(s, "PERMANENT");
524 delim = ",";
525 }
526 if (expect->flags & NF_CT_EXPECT_INACTIVE) {
527 seq_printf(s, "%sINACTIVE", delim);
528 delim = ",";
529 }
530 if (expect->flags & NF_CT_EXPECT_USERSPACE)
531 seq_printf(s, "%sUSERSPACE", delim);
532
533 helper = rcu_dereference(nfct_help(expect->master)->helper);
534 if (helper) {
535 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
536 if (helper->expect_policy[expect->class].name)
537 seq_printf(s, "/%s",
538 helper->expect_policy[expect->class].name);
539 }
540
541 return seq_putc(s, '\n');
542}
543
544static const struct seq_operations exp_seq_ops = {
545 .start = exp_seq_start,
546 .next = exp_seq_next,
547 .stop = exp_seq_stop,
548 .show = exp_seq_show
549};
550
551static int exp_open(struct inode *inode, struct file *file)
552{
553 return seq_open_net(inode, file, &exp_seq_ops,
554 sizeof(struct ct_expect_iter_state));
555}
556
557static const struct file_operations exp_file_ops = {
558 .owner = THIS_MODULE,
559 .open = exp_open,
560 .read = seq_read,
561 .llseek = seq_lseek,
562 .release = seq_release_net,
563};
564#endif
565
566static int exp_proc_init(struct net *net)
567{
568#ifdef CONFIG_NF_CONNTRACK_PROCFS
569 struct proc_dir_entry *proc;
570
571 proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
572 &exp_file_ops);
573 if (!proc)
574 return -ENOMEM;
575#endif
576 return 0;
577}
578
579static void exp_proc_remove(struct net *net)
580{
581#ifdef CONFIG_NF_CONNTRACK_PROCFS
582 remove_proc_entry("nf_conntrack_expect", net->proc_net);
583#endif
584}
585
586module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
587
588int nf_conntrack_expect_pernet_init(struct net *net)
589{
590 int err = -ENOMEM;
591
592 net->ct.expect_count = 0;
593 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
594 if (net->ct.expect_hash == NULL)
595 goto err1;
596
597 err = exp_proc_init(net);
598 if (err < 0)
599 goto err2;
600
601 return 0;
602err2:
603 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
604err1:
605 return err;
606}
607
608void nf_conntrack_expect_pernet_fini(struct net *net)
609{
610 exp_proc_remove(net);
611 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
612}
613
614int nf_conntrack_expect_init(void)
615{
616 if (!nf_ct_expect_hsize) {
617 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
618 if (!nf_ct_expect_hsize)
619 nf_ct_expect_hsize = 1;
620 }
621 nf_ct_expect_max = nf_ct_expect_hsize * 4;
622 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
623 sizeof(struct nf_conntrack_expect),
624 0, 0, NULL);
625 if (!nf_ct_expect_cachep)
626 return -ENOMEM;
627 return 0;
628}
629
630void nf_conntrack_expect_fini(void)
631{
632 rcu_barrier();
633 kmem_cache_destroy(nf_ct_expect_cachep);
634}
635