1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/types.h>
14#include <linux/netfilter.h>
15#include <linux/skbuff.h>
16#include <linux/proc_fs.h>
17#include <linux/seq_file.h>
18#include <linux/stddef.h>
19#include <linux/slab.h>
20#include <linux/err.h>
21#include <linux/percpu.h>
22#include <linux/kernel.h>
23#include <linux/jhash.h>
24#include <linux/moduleparam.h>
25#include <linux/export.h>
26#include <net/net_namespace.h>
27#include <net/netns/hash.h>
28
29#include <net/netfilter/nf_conntrack.h>
30#include <net/netfilter/nf_conntrack_core.h>
31#include <net/netfilter/nf_conntrack_expect.h>
32#include <net/netfilter/nf_conntrack_helper.h>
33#include <net/netfilter/nf_conntrack_tuple.h>
34#include <net/netfilter/nf_conntrack_zones.h>
35
36unsigned int nf_ct_expect_hsize __read_mostly;
37EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
38
39struct hlist_head *nf_ct_expect_hash __read_mostly;
40EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
41
42unsigned int nf_ct_expect_max __read_mostly;
43
44static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
45static unsigned int nf_ct_expect_hashrnd __read_mostly;
46
47
48void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
49 u32 portid, int report)
50{
51 struct nf_conn_help *master_help = nfct_help(exp->master);
52 struct net *net = nf_ct_exp_net(exp);
53
54 NF_CT_ASSERT(master_help);
55 NF_CT_ASSERT(!timer_pending(&exp->timeout));
56
57 hlist_del_rcu(&exp->hnode);
58 net->ct.expect_count--;
59
60 hlist_del(&exp->lnode);
61 master_help->expecting[exp->class]--;
62
63 nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
64 nf_ct_expect_put(exp);
65
66 NF_CT_STAT_INC(net, expect_delete);
67}
68EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
69
70static void nf_ct_expectation_timed_out(unsigned long ul_expect)
71{
72 struct nf_conntrack_expect *exp = (void *)ul_expect;
73
74 spin_lock_bh(&nf_conntrack_expect_lock);
75 nf_ct_unlink_expect(exp);
76 spin_unlock_bh(&nf_conntrack_expect_lock);
77 nf_ct_expect_put(exp);
78}
79
80static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
81{
82 unsigned int hash, seed;
83
84 get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
85
86 seed = nf_ct_expect_hashrnd ^ net_hash_mix(n);
87
88 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
89 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
90 (__force __u16)tuple->dst.u.all) ^ seed);
91
92 return reciprocal_scale(hash, nf_ct_expect_hsize);
93}
94
95static bool
96nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
97 const struct nf_conntrack_expect *i,
98 const struct nf_conntrack_zone *zone,
99 const struct net *net)
100{
101 return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
102 net_eq(net, nf_ct_net(i->master)) &&
103 nf_ct_zone_equal_any(i->master, zone);
104}
105
106struct nf_conntrack_expect *
107__nf_ct_expect_find(struct net *net,
108 const struct nf_conntrack_zone *zone,
109 const struct nf_conntrack_tuple *tuple)
110{
111 struct nf_conntrack_expect *i;
112 unsigned int h;
113
114 if (!net->ct.expect_count)
115 return NULL;
116
117 h = nf_ct_expect_dst_hash(net, tuple);
118 hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
119 if (nf_ct_exp_equal(tuple, i, zone, net))
120 return i;
121 }
122 return NULL;
123}
124EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
125
126
127struct nf_conntrack_expect *
128nf_ct_expect_find_get(struct net *net,
129 const struct nf_conntrack_zone *zone,
130 const struct nf_conntrack_tuple *tuple)
131{
132 struct nf_conntrack_expect *i;
133
134 rcu_read_lock();
135 i = __nf_ct_expect_find(net, zone, tuple);
136 if (i && !atomic_inc_not_zero(&i->use))
137 i = NULL;
138 rcu_read_unlock();
139
140 return i;
141}
142EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
143
144
145
146struct nf_conntrack_expect *
147nf_ct_find_expectation(struct net *net,
148 const struct nf_conntrack_zone *zone,
149 const struct nf_conntrack_tuple *tuple)
150{
151 struct nf_conntrack_expect *i, *exp = NULL;
152 unsigned int h;
153
154 if (!net->ct.expect_count)
155 return NULL;
156
157 h = nf_ct_expect_dst_hash(net, tuple);
158 hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) {
159 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
160 nf_ct_exp_equal(tuple, i, zone, net)) {
161 exp = i;
162 break;
163 }
164 }
165 if (!exp)
166 return NULL;
167
168
169
170
171
172
173 if (!nf_ct_is_confirmed(exp->master))
174 return NULL;
175
176
177
178
179
180
181
182
183
184 if (unlikely(nf_ct_is_dying(exp->master) ||
185 !atomic_inc_not_zero(&exp->master->ct_general.use)))
186 return NULL;
187
188 if (exp->flags & NF_CT_EXPECT_PERMANENT) {
189 atomic_inc(&exp->use);
190 return exp;
191 } else if (del_timer(&exp->timeout)) {
192 nf_ct_unlink_expect(exp);
193 return exp;
194 }
195
196 nf_ct_put(exp->master);
197
198 return NULL;
199}
200
201
202void nf_ct_remove_expectations(struct nf_conn *ct)
203{
204 struct nf_conn_help *help = nfct_help(ct);
205 struct nf_conntrack_expect *exp;
206 struct hlist_node *next;
207
208
209 if (!help)
210 return;
211
212 spin_lock_bh(&nf_conntrack_expect_lock);
213 hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
214 if (del_timer(&exp->timeout)) {
215 nf_ct_unlink_expect(exp);
216 nf_ct_expect_put(exp);
217 }
218 }
219 spin_unlock_bh(&nf_conntrack_expect_lock);
220}
221EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
222
223
224static inline int expect_clash(const struct nf_conntrack_expect *a,
225 const struct nf_conntrack_expect *b)
226{
227
228
229 struct nf_conntrack_tuple_mask intersect_mask;
230 int count;
231
232 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
233
234 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
235 intersect_mask.src.u3.all[count] =
236 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
237 }
238
239 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
240 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
241 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
242}
243
244static inline int expect_matches(const struct nf_conntrack_expect *a,
245 const struct nf_conntrack_expect *b)
246{
247 return a->master == b->master && a->class == b->class &&
248 nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
249 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
250 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
251 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
252}
253
254
255void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
256{
257 spin_lock_bh(&nf_conntrack_expect_lock);
258 if (del_timer(&exp->timeout)) {
259 nf_ct_unlink_expect(exp);
260 nf_ct_expect_put(exp);
261 }
262 spin_unlock_bh(&nf_conntrack_expect_lock);
263}
264EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
265
266
267
268
269struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
270{
271 struct nf_conntrack_expect *new;
272
273 new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
274 if (!new)
275 return NULL;
276
277 new->master = me;
278 atomic_set(&new->use, 1);
279 return new;
280}
281EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
282
283void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
284 u_int8_t family,
285 const union nf_inet_addr *saddr,
286 const union nf_inet_addr *daddr,
287 u_int8_t proto, const __be16 *src, const __be16 *dst)
288{
289 int len;
290
291 if (family == AF_INET)
292 len = 4;
293 else
294 len = 16;
295
296 exp->flags = 0;
297 exp->class = class;
298 exp->expectfn = NULL;
299 exp->helper = NULL;
300 exp->tuple.src.l3num = family;
301 exp->tuple.dst.protonum = proto;
302
303 if (saddr) {
304 memcpy(&exp->tuple.src.u3, saddr, len);
305 if (sizeof(exp->tuple.src.u3) > len)
306
307 memset((void *)&exp->tuple.src.u3 + len, 0x00,
308 sizeof(exp->tuple.src.u3) - len);
309 memset(&exp->mask.src.u3, 0xFF, len);
310 if (sizeof(exp->mask.src.u3) > len)
311 memset((void *)&exp->mask.src.u3 + len, 0x00,
312 sizeof(exp->mask.src.u3) - len);
313 } else {
314 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
315 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
316 }
317
318 if (src) {
319 exp->tuple.src.u.all = *src;
320 exp->mask.src.u.all = htons(0xFFFF);
321 } else {
322 exp->tuple.src.u.all = 0;
323 exp->mask.src.u.all = 0;
324 }
325
326 memcpy(&exp->tuple.dst.u3, daddr, len);
327 if (sizeof(exp->tuple.dst.u3) > len)
328
329 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
330 sizeof(exp->tuple.dst.u3) - len);
331
332 exp->tuple.dst.u.all = *dst;
333
334#ifdef CONFIG_NF_NAT_NEEDED
335 memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
336 memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
337#endif
338}
339EXPORT_SYMBOL_GPL(nf_ct_expect_init);
340
341static void nf_ct_expect_free_rcu(struct rcu_head *head)
342{
343 struct nf_conntrack_expect *exp;
344
345 exp = container_of(head, struct nf_conntrack_expect, rcu);
346 kmem_cache_free(nf_ct_expect_cachep, exp);
347}
348
349void nf_ct_expect_put(struct nf_conntrack_expect *exp)
350{
351 if (atomic_dec_and_test(&exp->use))
352 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
353}
354EXPORT_SYMBOL_GPL(nf_ct_expect_put);
355
356static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
357{
358 struct nf_conn_help *master_help = nfct_help(exp->master);
359 struct nf_conntrack_helper *helper;
360 struct net *net = nf_ct_exp_net(exp);
361 unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
362
363
364 atomic_add(2, &exp->use);
365
366 hlist_add_head(&exp->lnode, &master_help->expectations);
367 master_help->expecting[exp->class]++;
368
369 hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
370 net->ct.expect_count++;
371
372 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
373 (unsigned long)exp);
374 helper = rcu_dereference_protected(master_help->helper,
375 lockdep_is_held(&nf_conntrack_expect_lock));
376 if (helper) {
377 exp->timeout.expires = jiffies +
378 helper->expect_policy[exp->class].timeout * HZ;
379 }
380 add_timer(&exp->timeout);
381
382 NF_CT_STAT_INC(net, expect_create);
383 return 0;
384}
385
386
387static void evict_oldest_expect(struct nf_conn *master,
388 struct nf_conntrack_expect *new)
389{
390 struct nf_conn_help *master_help = nfct_help(master);
391 struct nf_conntrack_expect *exp, *last = NULL;
392
393 hlist_for_each_entry(exp, &master_help->expectations, lnode) {
394 if (exp->class == new->class)
395 last = exp;
396 }
397
398 if (last && del_timer(&last->timeout)) {
399 nf_ct_unlink_expect(last);
400 nf_ct_expect_put(last);
401 }
402}
403
404static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
405{
406 const struct nf_conntrack_expect_policy *p;
407 struct nf_conntrack_expect *i;
408 struct nf_conn *master = expect->master;
409 struct nf_conn_help *master_help = nfct_help(master);
410 struct nf_conntrack_helper *helper;
411 struct net *net = nf_ct_exp_net(expect);
412 struct hlist_node *next;
413 unsigned int h;
414 int ret = 1;
415
416 if (!master_help) {
417 ret = -ESHUTDOWN;
418 goto out;
419 }
420 h = nf_ct_expect_dst_hash(net, &expect->tuple);
421 hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
422 if (expect_matches(i, expect)) {
423 if (del_timer(&i->timeout)) {
424 nf_ct_unlink_expect(i);
425 nf_ct_expect_put(i);
426 break;
427 }
428 } else if (expect_clash(i, expect)) {
429 ret = -EBUSY;
430 goto out;
431 }
432 }
433
434 helper = rcu_dereference_protected(master_help->helper,
435 lockdep_is_held(&nf_conntrack_expect_lock));
436 if (helper) {
437 p = &helper->expect_policy[expect->class];
438 if (p->max_expected &&
439 master_help->expecting[expect->class] >= p->max_expected) {
440 evict_oldest_expect(master, expect);
441 if (master_help->expecting[expect->class]
442 >= p->max_expected) {
443 ret = -EMFILE;
444 goto out;
445 }
446 }
447 }
448
449 if (net->ct.expect_count >= nf_ct_expect_max) {
450 net_warn_ratelimited("nf_conntrack: expectation table full\n");
451 ret = -EMFILE;
452 }
453out:
454 return ret;
455}
456
457int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
458 u32 portid, int report)
459{
460 int ret;
461
462 spin_lock_bh(&nf_conntrack_expect_lock);
463 ret = __nf_ct_expect_check(expect);
464 if (ret <= 0)
465 goto out;
466
467 ret = nf_ct_expect_insert(expect);
468 if (ret < 0)
469 goto out;
470 spin_unlock_bh(&nf_conntrack_expect_lock);
471 nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
472 return ret;
473out:
474 spin_unlock_bh(&nf_conntrack_expect_lock);
475 return ret;
476}
477EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
478
479#ifdef CONFIG_NF_CONNTRACK_PROCFS
480struct ct_expect_iter_state {
481 struct seq_net_private p;
482 unsigned int bucket;
483};
484
485static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
486{
487 struct ct_expect_iter_state *st = seq->private;
488 struct hlist_node *n;
489
490 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
491 n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
492 if (n)
493 return n;
494 }
495 return NULL;
496}
497
498static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
499 struct hlist_node *head)
500{
501 struct ct_expect_iter_state *st = seq->private;
502
503 head = rcu_dereference(hlist_next_rcu(head));
504 while (head == NULL) {
505 if (++st->bucket >= nf_ct_expect_hsize)
506 return NULL;
507 head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
508 }
509 return head;
510}
511
512static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
513{
514 struct hlist_node *head = ct_expect_get_first(seq);
515
516 if (head)
517 while (pos && (head = ct_expect_get_next(seq, head)))
518 pos--;
519 return pos ? NULL : head;
520}
521
522static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
523 __acquires(RCU)
524{
525 rcu_read_lock();
526 return ct_expect_get_idx(seq, *pos);
527}
528
529static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
530{
531 (*pos)++;
532 return ct_expect_get_next(seq, v);
533}
534
535static void exp_seq_stop(struct seq_file *seq, void *v)
536 __releases(RCU)
537{
538 rcu_read_unlock();
539}
540
541static int exp_seq_show(struct seq_file *s, void *v)
542{
543 struct nf_conntrack_expect *expect;
544 struct nf_conntrack_helper *helper;
545 struct hlist_node *n = v;
546 char *delim = "";
547
548 expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
549
550 if (expect->timeout.function)
551 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
552 ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
553 else
554 seq_printf(s, "- ");
555 seq_printf(s, "l3proto = %u proto=%u ",
556 expect->tuple.src.l3num,
557 expect->tuple.dst.protonum);
558 print_tuple(s, &expect->tuple,
559 __nf_ct_l3proto_find(expect->tuple.src.l3num),
560 __nf_ct_l4proto_find(expect->tuple.src.l3num,
561 expect->tuple.dst.protonum));
562
563 if (expect->flags & NF_CT_EXPECT_PERMANENT) {
564 seq_printf(s, "PERMANENT");
565 delim = ",";
566 }
567 if (expect->flags & NF_CT_EXPECT_INACTIVE) {
568 seq_printf(s, "%sINACTIVE", delim);
569 delim = ",";
570 }
571 if (expect->flags & NF_CT_EXPECT_USERSPACE)
572 seq_printf(s, "%sUSERSPACE", delim);
573
574 helper = rcu_dereference(nfct_help(expect->master)->helper);
575 if (helper) {
576 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
577 if (helper->expect_policy[expect->class].name[0])
578 seq_printf(s, "/%s",
579 helper->expect_policy[expect->class].name);
580 }
581
582 seq_putc(s, '\n');
583
584 return 0;
585}
586
587static const struct seq_operations exp_seq_ops = {
588 .start = exp_seq_start,
589 .next = exp_seq_next,
590 .stop = exp_seq_stop,
591 .show = exp_seq_show
592};
593
594static int exp_open(struct inode *inode, struct file *file)
595{
596 return seq_open_net(inode, file, &exp_seq_ops,
597 sizeof(struct ct_expect_iter_state));
598}
599
600static const struct file_operations exp_file_ops = {
601 .owner = THIS_MODULE,
602 .open = exp_open,
603 .read = seq_read,
604 .llseek = seq_lseek,
605 .release = seq_release_net,
606};
607#endif
608
609static int exp_proc_init(struct net *net)
610{
611#ifdef CONFIG_NF_CONNTRACK_PROCFS
612 struct proc_dir_entry *proc;
613 kuid_t root_uid;
614 kgid_t root_gid;
615
616 proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
617 &exp_file_ops);
618 if (!proc)
619 return -ENOMEM;
620
621 root_uid = make_kuid(net->user_ns, 0);
622 root_gid = make_kgid(net->user_ns, 0);
623 if (uid_valid(root_uid) && gid_valid(root_gid))
624 proc_set_user(proc, root_uid, root_gid);
625#endif
626 return 0;
627}
628
629static void exp_proc_remove(struct net *net)
630{
631#ifdef CONFIG_NF_CONNTRACK_PROCFS
632 remove_proc_entry("nf_conntrack_expect", net->proc_net);
633#endif
634}
635
636module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
637
638int nf_conntrack_expect_pernet_init(struct net *net)
639{
640 net->ct.expect_count = 0;
641 return exp_proc_init(net);
642}
643
644void nf_conntrack_expect_pernet_fini(struct net *net)
645{
646 exp_proc_remove(net);
647}
648
649int nf_conntrack_expect_init(void)
650{
651 if (!nf_ct_expect_hsize) {
652 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
653 if (!nf_ct_expect_hsize)
654 nf_ct_expect_hsize = 1;
655 }
656 nf_ct_expect_max = nf_ct_expect_hsize * 4;
657 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
658 sizeof(struct nf_conntrack_expect),
659 0, 0, NULL);
660 if (!nf_ct_expect_cachep)
661 return -ENOMEM;
662
663 nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
664 if (!nf_ct_expect_hash) {
665 kmem_cache_destroy(nf_ct_expect_cachep);
666 return -ENOMEM;
667 }
668
669 return 0;
670}
671
672void nf_conntrack_expect_fini(void)
673{
674 rcu_barrier();
675 kmem_cache_destroy(nf_ct_expect_cachep);
676 nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_hsize);
677}
678