1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51#define VERSION "0.409"
52
53#include <asm/uaccess.h>
54#include <linux/bitops.h>
55#include <linux/types.h>
56#include <linux/kernel.h>
57#include <linux/mm.h>
58#include <linux/string.h>
59#include <linux/socket.h>
60#include <linux/sockios.h>
61#include <linux/errno.h>
62#include <linux/in.h>
63#include <linux/inet.h>
64#include <linux/inetdevice.h>
65#include <linux/netdevice.h>
66#include <linux/if_arp.h>
67#include <linux/proc_fs.h>
68#include <linux/rcupdate.h>
69#include <linux/skbuff.h>
70#include <linux/netlink.h>
71#include <linux/init.h>
72#include <linux/list.h>
73#include <linux/slab.h>
74#include <linux/export.h>
75#include <linux/notifier.h>
76#include <net/net_namespace.h>
77#include <net/ip.h>
78#include <net/protocol.h>
79#include <net/route.h>
80#include <net/tcp.h>
81#include <net/sock.h>
82#include <net/ip_fib.h>
83#include "fib_lookup.h"
84
85static unsigned int fib_seq_sum(void)
86{
87 unsigned int fib_seq = 0;
88 struct net *net;
89
90 rtnl_lock();
91 for_each_net(net)
92 fib_seq += net->ipv4.fib_seq;
93 rtnl_unlock();
94
95 return fib_seq;
96}
97
98static ATOMIC_NOTIFIER_HEAD(fib_chain);
99
100static int call_fib_notifier(struct notifier_block *nb, struct net *net,
101 enum fib_event_type event_type,
102 struct fib_notifier_info *info)
103{
104 info->net = net;
105 return nb->notifier_call(nb, event_type, info);
106}
107
108static void fib_rules_notify(struct net *net, struct notifier_block *nb,
109 enum fib_event_type event_type)
110{
111#ifdef CONFIG_IP_MULTIPLE_TABLES
112 struct fib_notifier_info info;
113
114 if (net->ipv4.fib_has_custom_rules)
115 call_fib_notifier(nb, net, event_type, &info);
116#endif
117}
118
119static void fib_notify(struct net *net, struct notifier_block *nb,
120 enum fib_event_type event_type);
121
122static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
123 enum fib_event_type event_type, u32 dst,
124 int dst_len, struct fib_info *fi,
125 u8 tos, u8 type, u32 tb_id)
126{
127 struct fib_entry_notifier_info info = {
128 .dst = dst,
129 .dst_len = dst_len,
130 .fi = fi,
131 .tos = tos,
132 .type = type,
133 .tb_id = tb_id,
134 };
135 return call_fib_notifier(nb, net, event_type, &info.info);
136}
137
138static bool fib_dump_is_consistent(struct notifier_block *nb,
139 void (*cb)(struct notifier_block *nb),
140 unsigned int fib_seq)
141{
142 atomic_notifier_chain_register(&fib_chain, nb);
143 if (fib_seq == fib_seq_sum())
144 return true;
145 atomic_notifier_chain_unregister(&fib_chain, nb);
146 if (cb)
147 cb(nb);
148 return false;
149}
150
151#define FIB_DUMP_MAX_RETRIES 5
152int register_fib_notifier(struct notifier_block *nb,
153 void (*cb)(struct notifier_block *nb))
154{
155 int retries = 0;
156
157 do {
158 unsigned int fib_seq = fib_seq_sum();
159 struct net *net;
160
161
162
163
164
165 rcu_read_lock();
166 for_each_net_rcu(net) {
167 fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD);
168 fib_notify(net, nb, FIB_EVENT_ENTRY_ADD);
169 }
170 rcu_read_unlock();
171
172 if (fib_dump_is_consistent(nb, cb, fib_seq))
173 return 0;
174 } while (++retries < FIB_DUMP_MAX_RETRIES);
175
176 return -EBUSY;
177}
178EXPORT_SYMBOL(register_fib_notifier);
179
180int unregister_fib_notifier(struct notifier_block *nb)
181{
182 return atomic_notifier_chain_unregister(&fib_chain, nb);
183}
184EXPORT_SYMBOL(unregister_fib_notifier);
185
186int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
187 struct fib_notifier_info *info)
188{
189 net->ipv4.fib_seq++;
190 info->net = net;
191 return atomic_notifier_call_chain(&fib_chain, event_type, info);
192}
193
194static int call_fib_entry_notifiers(struct net *net,
195 enum fib_event_type event_type, u32 dst,
196 int dst_len, struct fib_info *fi,
197 u8 tos, u8 type, u32 tb_id)
198{
199 struct fib_entry_notifier_info info = {
200 .dst = dst,
201 .dst_len = dst_len,
202 .fi = fi,
203 .tos = tos,
204 .type = type,
205 .tb_id = tb_id,
206 };
207 return call_fib_notifiers(net, event_type, &info.info);
208}
209
210#define MAX_STAT_DEPTH 32
211
212#define KEYLENGTH (8*sizeof(t_key))
213#define KEY_MAX ((t_key)~0)
214
215typedef unsigned int t_key;
216
217#define IS_TRIE(n) ((n)->pos >= KEYLENGTH)
218#define IS_TNODE(n) ((n)->bits)
219#define IS_LEAF(n) (!(n)->bits)
220
221struct key_vector {
222 t_key key;
223 unsigned char pos;
224 unsigned char bits;
225 unsigned char slen;
226 union {
227
228 struct hlist_head leaf;
229
230 struct key_vector __rcu *tnode[0];
231 };
232};
233
234struct tnode {
235 struct rcu_head rcu;
236 t_key empty_children;
237 t_key full_children;
238 struct key_vector __rcu *parent;
239 struct key_vector kv[1];
240#define tn_bits kv[0].bits
241};
242
243#define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n])
244#define LEAF_SIZE TNODE_SIZE(1)
245
246#ifdef CONFIG_IP_FIB_TRIE_STATS
247struct trie_use_stats {
248 unsigned int gets;
249 unsigned int backtrack;
250 unsigned int semantic_match_passed;
251 unsigned int semantic_match_miss;
252 unsigned int null_node_hit;
253 unsigned int resize_node_skipped;
254};
255#endif
256
257struct trie_stat {
258 unsigned int totdepth;
259 unsigned int maxdepth;
260 unsigned int tnodes;
261 unsigned int leaves;
262 unsigned int nullpointers;
263 unsigned int prefixes;
264 unsigned int nodesizes[MAX_STAT_DEPTH];
265};
266
267struct trie {
268 struct rcu_head rcu;
269 struct key_vector kv[1];
270#ifdef CONFIG_IP_FIB_TRIE_STATS
271 struct trie_use_stats __percpu *stats;
272#endif
273};
274
275static struct key_vector *resize(struct trie *t, struct key_vector *tn);
276static size_t tnode_free_size;
277
278
279
280
281
282
283static const int sync_pages = 128;
284
285static struct kmem_cache *fn_alias_kmem __read_mostly;
286static struct kmem_cache *trie_leaf_kmem __read_mostly;
287
288static inline struct tnode *tn_info(struct key_vector *kv)
289{
290 return container_of(kv, struct tnode, kv[0]);
291}
292
293
294#define node_parent(tn) rtnl_dereference(tn_info(tn)->parent)
295#define get_child(tn, i) rtnl_dereference((tn)->tnode[i])
296
297
298#define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent)
299#define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i])
300
301
302static inline void node_set_parent(struct key_vector *n, struct key_vector *tp)
303{
304 if (n)
305 rcu_assign_pointer(tn_info(n)->parent, tp);
306}
307
308#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p)
309
310
311
312
313static inline unsigned long child_length(const struct key_vector *tn)
314{
315 return (1ul << tn->bits) & ~(1ul);
316}
317
318#define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos)
319
320static inline unsigned long get_index(t_key key, struct key_vector *kv)
321{
322 unsigned long index = key ^ kv->key;
323
324 if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos))
325 return 0;
326
327 return index >> kv->pos;
328}
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389static const int halve_threshold = 25;
390static const int inflate_threshold = 50;
391static const int halve_threshold_root = 15;
392static const int inflate_threshold_root = 30;
393
394static void __alias_free_mem(struct rcu_head *head)
395{
396 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
397 kmem_cache_free(fn_alias_kmem, fa);
398}
399
400static inline void alias_free_mem_rcu(struct fib_alias *fa)
401{
402 call_rcu(&fa->rcu, __alias_free_mem);
403}
404
405#define TNODE_KMALLOC_MAX \
406 ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *))
407#define TNODE_VMALLOC_MAX \
408 ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *))
409
410static void __node_free_rcu(struct rcu_head *head)
411{
412 struct tnode *n = container_of(head, struct tnode, rcu);
413
414 if (!n->tn_bits)
415 kmem_cache_free(trie_leaf_kmem, n);
416 else if (n->tn_bits <= TNODE_KMALLOC_MAX)
417 kfree(n);
418 else
419 vfree(n);
420}
421
422#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
423
424static struct tnode *tnode_alloc(int bits)
425{
426 size_t size;
427
428
429 if (bits > TNODE_VMALLOC_MAX)
430 return NULL;
431
432
433 size = TNODE_SIZE(1ul << bits);
434
435 if (size <= PAGE_SIZE)
436 return kzalloc(size, GFP_KERNEL);
437 else
438 return vzalloc(size);
439}
440
441static inline void empty_child_inc(struct key_vector *n)
442{
443 ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children;
444}
445
446static inline void empty_child_dec(struct key_vector *n)
447{
448 tn_info(n)->empty_children-- ? : tn_info(n)->full_children--;
449}
450
451static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
452{
453 struct tnode *kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
454 struct key_vector *l = kv->kv;
455
456 if (!kv)
457 return NULL;
458
459
460 l->key = key;
461 l->pos = 0;
462 l->bits = 0;
463 l->slen = fa->fa_slen;
464
465
466 INIT_HLIST_HEAD(&l->leaf);
467 hlist_add_head(&fa->fa_list, &l->leaf);
468
469 return l;
470}
471
472static struct key_vector *tnode_new(t_key key, int pos, int bits)
473{
474 struct tnode *tnode = tnode_alloc(bits);
475 unsigned int shift = pos + bits;
476 struct key_vector *tn = tnode->kv;
477
478
479 BUG_ON(!bits || (shift > KEYLENGTH));
480
481 pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
482 sizeof(struct key_vector *) << bits);
483
484 if (!tnode)
485 return NULL;
486
487 if (bits == KEYLENGTH)
488 tnode->full_children = 1;
489 else
490 tnode->empty_children = 1ul << bits;
491
492 tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
493 tn->pos = pos;
494 tn->bits = bits;
495 tn->slen = pos;
496
497 return tn;
498}
499
500
501
502
503static inline int tnode_full(struct key_vector *tn, struct key_vector *n)
504{
505 return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
506}
507
508
509
510
511static void put_child(struct key_vector *tn, unsigned long i,
512 struct key_vector *n)
513{
514 struct key_vector *chi = get_child(tn, i);
515 int isfull, wasfull;
516
517 BUG_ON(i >= child_length(tn));
518
519
520 if (n == NULL && chi != NULL)
521 empty_child_inc(tn);
522 if (n != NULL && chi == NULL)
523 empty_child_dec(tn);
524
525
526 wasfull = tnode_full(tn, chi);
527 isfull = tnode_full(tn, n);
528
529 if (wasfull && !isfull)
530 tn_info(tn)->full_children--;
531 else if (!wasfull && isfull)
532 tn_info(tn)->full_children++;
533
534 if (n && (tn->slen < n->slen))
535 tn->slen = n->slen;
536
537 rcu_assign_pointer(tn->tnode[i], n);
538}
539
540static void update_children(struct key_vector *tn)
541{
542 unsigned long i;
543
544
545 for (i = child_length(tn); i;) {
546 struct key_vector *inode = get_child(tn, --i);
547
548 if (!inode)
549 continue;
550
551
552
553
554
555 if (node_parent(inode) == tn)
556 update_children(inode);
557 else
558 node_set_parent(inode, tn);
559 }
560}
561
562static inline void put_child_root(struct key_vector *tp, t_key key,
563 struct key_vector *n)
564{
565 if (IS_TRIE(tp))
566 rcu_assign_pointer(tp->tnode[0], n);
567 else
568 put_child(tp, get_index(key, tp), n);
569}
570
571static inline void tnode_free_init(struct key_vector *tn)
572{
573 tn_info(tn)->rcu.next = NULL;
574}
575
576static inline void tnode_free_append(struct key_vector *tn,
577 struct key_vector *n)
578{
579 tn_info(n)->rcu.next = tn_info(tn)->rcu.next;
580 tn_info(tn)->rcu.next = &tn_info(n)->rcu;
581}
582
583static void tnode_free(struct key_vector *tn)
584{
585 struct callback_head *head = &tn_info(tn)->rcu;
586
587 while (head) {
588 head = head->next;
589 tnode_free_size += TNODE_SIZE(1ul << tn->bits);
590 node_free(tn);
591
592 tn = container_of(head, struct tnode, rcu)->kv;
593 }
594
595 if (tnode_free_size >= PAGE_SIZE * sync_pages) {
596 tnode_free_size = 0;
597 synchronize_rcu();
598 }
599}
600
601static struct key_vector *replace(struct trie *t,
602 struct key_vector *oldtnode,
603 struct key_vector *tn)
604{
605 struct key_vector *tp = node_parent(oldtnode);
606 unsigned long i;
607
608
609 NODE_INIT_PARENT(tn, tp);
610 put_child_root(tp, tn->key, tn);
611
612
613 update_children(tn);
614
615
616 tnode_free(oldtnode);
617
618
619 for (i = child_length(tn); i;) {
620 struct key_vector *inode = get_child(tn, --i);
621
622
623 if (tnode_full(tn, inode))
624 tn = resize(t, inode);
625 }
626
627 return tp;
628}
629
630static struct key_vector *inflate(struct trie *t,
631 struct key_vector *oldtnode)
632{
633 struct key_vector *tn;
634 unsigned long i;
635 t_key m;
636
637 pr_debug("In inflate\n");
638
639 tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
640 if (!tn)
641 goto notnode;
642
643
644 tnode_free_init(oldtnode);
645
646
647
648
649
650
651 for (i = child_length(oldtnode), m = 1u << tn->pos; i;) {
652 struct key_vector *inode = get_child(oldtnode, --i);
653 struct key_vector *node0, *node1;
654 unsigned long j, k;
655
656
657 if (inode == NULL)
658 continue;
659
660
661 if (!tnode_full(oldtnode, inode)) {
662 put_child(tn, get_index(inode->key, tn), inode);
663 continue;
664 }
665
666
667 tnode_free_append(oldtnode, inode);
668
669
670 if (inode->bits == 1) {
671 put_child(tn, 2 * i + 1, get_child(inode, 1));
672 put_child(tn, 2 * i, get_child(inode, 0));
673 continue;
674 }
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690 node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
691 if (!node1)
692 goto nomem;
693 node0 = tnode_new(inode->key, inode->pos, inode->bits - 1);
694
695 tnode_free_append(tn, node1);
696 if (!node0)
697 goto nomem;
698 tnode_free_append(tn, node0);
699
700
701 for (k = child_length(inode), j = k / 2; j;) {
702 put_child(node1, --j, get_child(inode, --k));
703 put_child(node0, j, get_child(inode, j));
704 put_child(node1, --j, get_child(inode, --k));
705 put_child(node0, j, get_child(inode, j));
706 }
707
708
709 NODE_INIT_PARENT(node1, tn);
710 NODE_INIT_PARENT(node0, tn);
711
712
713 put_child(tn, 2 * i + 1, node1);
714 put_child(tn, 2 * i, node0);
715 }
716
717
718 return replace(t, oldtnode, tn);
719nomem:
720
721 tnode_free(tn);
722notnode:
723 return NULL;
724}
725
726static struct key_vector *halve(struct trie *t,
727 struct key_vector *oldtnode)
728{
729 struct key_vector *tn;
730 unsigned long i;
731
732 pr_debug("In halve\n");
733
734 tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
735 if (!tn)
736 goto notnode;
737
738
739 tnode_free_init(oldtnode);
740
741
742
743
744
745
746 for (i = child_length(oldtnode); i;) {
747 struct key_vector *node1 = get_child(oldtnode, --i);
748 struct key_vector *node0 = get_child(oldtnode, --i);
749 struct key_vector *inode;
750
751
752 if (!node1 || !node0) {
753 put_child(tn, i / 2, node1 ? : node0);
754 continue;
755 }
756
757
758 inode = tnode_new(node0->key, oldtnode->pos, 1);
759 if (!inode)
760 goto nomem;
761 tnode_free_append(tn, inode);
762
763
764 put_child(inode, 1, node1);
765 put_child(inode, 0, node0);
766 NODE_INIT_PARENT(inode, tn);
767
768
769 put_child(tn, i / 2, inode);
770 }
771
772
773 return replace(t, oldtnode, tn);
774nomem:
775
776 tnode_free(tn);
777notnode:
778 return NULL;
779}
780
781static struct key_vector *collapse(struct trie *t,
782 struct key_vector *oldtnode)
783{
784 struct key_vector *n, *tp;
785 unsigned long i;
786
787
788 for (n = NULL, i = child_length(oldtnode); !n && i;)
789 n = get_child(oldtnode, --i);
790
791
792 tp = node_parent(oldtnode);
793 put_child_root(tp, oldtnode->key, n);
794 node_set_parent(n, tp);
795
796
797 node_free(oldtnode);
798
799 return tp;
800}
801
802static unsigned char update_suffix(struct key_vector *tn)
803{
804 unsigned char slen = tn->pos;
805 unsigned long stride, i;
806
807
808
809
810
811
812 for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) {
813 struct key_vector *n = get_child(tn, i);
814
815 if (!n || (n->slen <= slen))
816 continue;
817
818
819 stride <<= (n->slen - slen);
820 slen = n->slen;
821 i &= ~(stride - 1);
822
823
824
825
826
827
828 if ((slen + 1) >= (tn->pos + tn->bits))
829 break;
830 }
831
832 tn->slen = slen;
833
834 return slen;
835}
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn)
895{
896 unsigned long used = child_length(tn);
897 unsigned long threshold = used;
898
899
900 threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold;
901 used -= tn_info(tn)->empty_children;
902 used += tn_info(tn)->full_children;
903
904
905
906 return (used > 1) && tn->pos && ((50 * used) >= threshold);
907}
908
909static inline bool should_halve(struct key_vector *tp, struct key_vector *tn)
910{
911 unsigned long used = child_length(tn);
912 unsigned long threshold = used;
913
914
915 threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold;
916 used -= tn_info(tn)->empty_children;
917
918
919
920 return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold);
921}
922
923static inline bool should_collapse(struct key_vector *tn)
924{
925 unsigned long used = child_length(tn);
926
927 used -= tn_info(tn)->empty_children;
928
929
930 if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children)
931 used -= KEY_MAX;
932
933
934 return used < 2;
935}
936
937#define MAX_WORK 10
938static struct key_vector *resize(struct trie *t, struct key_vector *tn)
939{
940#ifdef CONFIG_IP_FIB_TRIE_STATS
941 struct trie_use_stats __percpu *stats = t->stats;
942#endif
943 struct key_vector *tp = node_parent(tn);
944 unsigned long cindex = get_index(tn->key, tp);
945 int max_work = MAX_WORK;
946
947 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
948 tn, inflate_threshold, halve_threshold);
949
950
951
952
953
954 BUG_ON(tn != get_child(tp, cindex));
955
956
957
958
959 while (should_inflate(tp, tn) && max_work) {
960 tp = inflate(t, tn);
961 if (!tp) {
962#ifdef CONFIG_IP_FIB_TRIE_STATS
963 this_cpu_inc(stats->resize_node_skipped);
964#endif
965 break;
966 }
967
968 max_work--;
969 tn = get_child(tp, cindex);
970 }
971
972
973 tp = node_parent(tn);
974
975
976 if (max_work != MAX_WORK)
977 return tp;
978
979
980
981
982 while (should_halve(tp, tn) && max_work) {
983 tp = halve(t, tn);
984 if (!tp) {
985#ifdef CONFIG_IP_FIB_TRIE_STATS
986 this_cpu_inc(stats->resize_node_skipped);
987#endif
988 break;
989 }
990
991 max_work--;
992 tn = get_child(tp, cindex);
993 }
994
995
996 if (should_collapse(tn))
997 return collapse(t, tn);
998
999
1000 tp = node_parent(tn);
1001
1002
1003 if (max_work != MAX_WORK)
1004 return tp;
1005
1006
1007 if (tn->slen > tn->pos) {
1008 unsigned char slen = update_suffix(tn);
1009
1010 if (slen > tp->slen)
1011 tp->slen = slen;
1012 }
1013
1014 return tp;
1015}
1016
1017static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l)
1018{
1019 while ((tp->slen > tp->pos) && (tp->slen > l->slen)) {
1020 if (update_suffix(tp) > l->slen)
1021 break;
1022 tp = node_parent(tp);
1023 }
1024}
1025
1026static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l)
1027{
1028
1029
1030
1031 while (tn->slen < l->slen) {
1032 tn->slen = l->slen;
1033 tn = node_parent(tn);
1034 }
1035}
1036
1037
1038static struct key_vector *fib_find_node(struct trie *t,
1039 struct key_vector **tp, u32 key)
1040{
1041 struct key_vector *pn, *n = t->kv;
1042 unsigned long index = 0;
1043
1044 do {
1045 pn = n;
1046 n = get_child_rcu(n, index);
1047
1048 if (!n)
1049 break;
1050
1051 index = get_cindex(key, n);
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067 if (index >= (1ul << n->bits)) {
1068 n = NULL;
1069 break;
1070 }
1071
1072
1073 } while (IS_TNODE(n));
1074
1075 *tp = pn;
1076
1077 return n;
1078}
1079
1080
1081
1082
1083static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
1084 u8 tos, u32 prio)
1085{
1086 struct fib_alias *fa;
1087
1088 if (!fah)
1089 return NULL;
1090
1091 hlist_for_each_entry(fa, fah, fa_list) {
1092 if (fa->fa_slen < slen)
1093 continue;
1094 if (fa->fa_slen != slen)
1095 break;
1096 if (fa->fa_tos > tos)
1097 continue;
1098 if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
1099 return fa;
1100 }
1101
1102 return NULL;
1103}
1104
1105static void trie_rebalance(struct trie *t, struct key_vector *tn)
1106{
1107 while (!IS_TRIE(tn))
1108 tn = resize(t, tn);
1109}
1110
1111static int fib_insert_node(struct trie *t, struct key_vector *tp,
1112 struct fib_alias *new, t_key key)
1113{
1114 struct key_vector *n, *l;
1115
1116 l = leaf_new(key, new);
1117 if (!l)
1118 goto noleaf;
1119
1120
1121 n = get_child(tp, get_index(key, tp));
1122
1123
1124
1125
1126
1127
1128
1129 if (n) {
1130 struct key_vector *tn;
1131
1132 tn = tnode_new(key, __fls(key ^ n->key), 1);
1133 if (!tn)
1134 goto notnode;
1135
1136
1137 NODE_INIT_PARENT(tn, tp);
1138 put_child(tn, get_index(key, tn) ^ 1, n);
1139
1140
1141 put_child_root(tp, key, tn);
1142 node_set_parent(n, tn);
1143
1144
1145 tp = tn;
1146 }
1147
1148
1149 NODE_INIT_PARENT(l, tp);
1150 put_child_root(tp, key, l);
1151 trie_rebalance(t, tp);
1152
1153 return 0;
1154notnode:
1155 node_free(l);
1156noleaf:
1157 return -ENOMEM;
1158}
1159
1160static int fib_insert_alias(struct trie *t, struct key_vector *tp,
1161 struct key_vector *l, struct fib_alias *new,
1162 struct fib_alias *fa, t_key key)
1163{
1164 if (!l)
1165 return fib_insert_node(t, tp, new, key);
1166
1167 if (fa) {
1168 hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
1169 } else {
1170 struct fib_alias *last;
1171
1172 hlist_for_each_entry(last, &l->leaf, fa_list) {
1173 if (new->fa_slen < last->fa_slen)
1174 break;
1175 fa = last;
1176 }
1177
1178 if (fa)
1179 hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
1180 else
1181 hlist_add_head_rcu(&new->fa_list, &l->leaf);
1182 }
1183
1184
1185 if (l->slen < new->fa_slen) {
1186 l->slen = new->fa_slen;
1187 leaf_push_suffix(tp, l);
1188 }
1189
1190 return 0;
1191}
1192
1193
1194int fib_table_insert(struct net *net, struct fib_table *tb,
1195 struct fib_config *cfg)
1196{
1197 enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
1198 struct trie *t = (struct trie *)tb->tb_data;
1199 struct fib_alias *fa, *new_fa;
1200 struct key_vector *l, *tp;
1201 u16 nlflags = NLM_F_EXCL;
1202 struct fib_info *fi;
1203 u8 plen = cfg->fc_dst_len;
1204 u8 slen = KEYLENGTH - plen;
1205 u8 tos = cfg->fc_tos;
1206 u32 key;
1207 int err;
1208
1209 if (plen > KEYLENGTH)
1210 return -EINVAL;
1211
1212 key = ntohl(cfg->fc_dst);
1213
1214 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
1215
1216 if ((plen < KEYLENGTH) && (key << plen))
1217 return -EINVAL;
1218
1219 fi = fib_create_info(cfg);
1220 if (IS_ERR(fi)) {
1221 err = PTR_ERR(fi);
1222 goto err;
1223 }
1224
1225 l = fib_find_node(t, &tp, key);
1226 fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority) : NULL;
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237 if (fa && fa->fa_tos == tos &&
1238 fa->fa_info->fib_priority == fi->fib_priority) {
1239 struct fib_alias *fa_first, *fa_match;
1240
1241 err = -EEXIST;
1242 if (cfg->fc_nlflags & NLM_F_EXCL)
1243 goto out;
1244
1245 nlflags &= ~NLM_F_EXCL;
1246
1247
1248
1249
1250
1251
1252 fa_match = NULL;
1253 fa_first = fa;
1254 hlist_for_each_entry_from(fa, fa_list) {
1255 if ((fa->fa_slen != slen) || (fa->fa_tos != tos))
1256 break;
1257 if (fa->fa_info->fib_priority != fi->fib_priority)
1258 break;
1259 if (fa->fa_type == cfg->fc_type &&
1260 fa->fa_info == fi) {
1261 fa_match = fa;
1262 break;
1263 }
1264 }
1265
1266 if (cfg->fc_nlflags & NLM_F_REPLACE) {
1267 struct fib_info *fi_drop;
1268 u8 state;
1269
1270 nlflags |= NLM_F_REPLACE;
1271 fa = fa_first;
1272 if (fa_match) {
1273 if (fa == fa_match)
1274 err = 0;
1275 goto out;
1276 }
1277 err = -ENOBUFS;
1278 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1279 if (new_fa == NULL)
1280 goto out;
1281
1282 fi_drop = fa->fa_info;
1283 new_fa->fa_tos = fa->fa_tos;
1284 new_fa->fa_info = fi;
1285 new_fa->fa_type = cfg->fc_type;
1286 state = fa->fa_state;
1287 new_fa->fa_state = state & ~FA_S_ACCESSED;
1288 new_fa->fa_slen = fa->fa_slen;
1289
1290 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1291 key, plen, fi,
1292 new_fa->fa_tos, cfg->fc_type,
1293 tb->tb_id);
1294 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1295 tb->tb_id, &cfg->fc_nlinfo, nlflags);
1296
1297 hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1298
1299 alias_free_mem_rcu(fa);
1300
1301 fib_release_info(fi_drop);
1302 if (state & FA_S_ACCESSED)
1303 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1304
1305 goto succeeded;
1306 }
1307
1308
1309
1310
1311 if (fa_match)
1312 goto out;
1313
1314 if (cfg->fc_nlflags & NLM_F_APPEND) {
1315 event = FIB_EVENT_ENTRY_APPEND;
1316 nlflags |= NLM_F_APPEND;
1317 } else {
1318 fa = fa_first;
1319 }
1320 }
1321 err = -ENOENT;
1322 if (!(cfg->fc_nlflags & NLM_F_CREATE))
1323 goto out;
1324
1325 nlflags |= NLM_F_CREATE;
1326 err = -ENOBUFS;
1327 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1328 if (new_fa == NULL)
1329 goto out;
1330
1331 new_fa->fa_info = fi;
1332 new_fa->fa_tos = tos;
1333 new_fa->fa_type = cfg->fc_type;
1334 new_fa->fa_state = 0;
1335 new_fa->fa_slen = slen;
1336
1337
1338 err = fib_insert_alias(t, tp, l, new_fa, fa, key);
1339 if (err)
1340 goto out_free_new_fa;
1341
1342 if (!plen)
1343 tb->tb_num_default++;
1344
1345 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1346 call_fib_entry_notifiers(net, event, key, plen, fi, tos, cfg->fc_type,
1347 tb->tb_id);
1348 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
1349 &cfg->fc_nlinfo, nlflags);
1350succeeded:
1351 return 0;
1352
1353out_free_new_fa:
1354 kmem_cache_free(fn_alias_kmem, new_fa);
1355out:
1356 fib_release_info(fi);
1357err:
1358 return err;
1359}
1360
1361static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
1362{
1363 t_key prefix = n->key;
1364
1365 return (key ^ prefix) & (prefix | -prefix);
1366}
1367
1368
1369int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1370 struct fib_result *res, int fib_flags)
1371{
1372 struct trie *t = (struct trie *)tb->tb_data;
1373#ifdef CONFIG_IP_FIB_TRIE_STATS
1374 struct trie_use_stats __percpu *stats = t->stats;
1375#endif
1376 const t_key key = ntohl(flp->daddr);
1377 struct key_vector *n, *pn;
1378 struct fib_alias *fa;
1379 unsigned long index;
1380 t_key cindex;
1381
1382 pn = t->kv;
1383 cindex = 0;
1384
1385 n = get_child_rcu(pn, cindex);
1386 if (!n)
1387 return -EAGAIN;
1388
1389#ifdef CONFIG_IP_FIB_TRIE_STATS
1390 this_cpu_inc(stats->gets);
1391#endif
1392
1393
1394 for (;;) {
1395 index = get_cindex(key, n);
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411 if (index >= (1ul << n->bits))
1412 break;
1413
1414
1415 if (IS_LEAF(n))
1416 goto found;
1417
1418
1419
1420
1421 if (n->slen > n->pos) {
1422 pn = n;
1423 cindex = index;
1424 }
1425
1426 n = get_child_rcu(n, index);
1427 if (unlikely(!n))
1428 goto backtrace;
1429 }
1430
1431
1432 for (;;) {
1433
1434 struct key_vector __rcu **cptr = n->tnode;
1435
1436
1437
1438
1439
1440 if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos))
1441 goto backtrace;
1442
1443
1444 if (unlikely(IS_LEAF(n)))
1445 break;
1446
1447
1448
1449
1450
1451
1452 while ((n = rcu_dereference(*cptr)) == NULL) {
1453backtrace:
1454#ifdef CONFIG_IP_FIB_TRIE_STATS
1455 if (!n)
1456 this_cpu_inc(stats->null_node_hit);
1457#endif
1458
1459
1460
1461
1462
1463 while (!cindex) {
1464 t_key pkey = pn->key;
1465
1466
1467
1468
1469
1470 if (IS_TRIE(pn))
1471 return -EAGAIN;
1472#ifdef CONFIG_IP_FIB_TRIE_STATS
1473 this_cpu_inc(stats->backtrack);
1474#endif
1475
1476 pn = node_parent_rcu(pn);
1477 cindex = get_index(pkey, pn);
1478 }
1479
1480
1481 cindex &= cindex - 1;
1482
1483
1484 cptr = &pn->tnode[cindex];
1485 }
1486 }
1487
1488found:
1489
1490 index = key ^ n->key;
1491
1492
1493 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
1494 struct fib_info *fi = fa->fa_info;
1495 int nhsel, err;
1496
1497 if ((index >= (1ul << fa->fa_slen)) &&
1498 ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
1499 continue;
1500 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1501 continue;
1502 if (fi->fib_dead)
1503 continue;
1504 if (fa->fa_info->fib_scope < flp->flowi4_scope)
1505 continue;
1506 fib_alias_accessed(fa);
1507 err = fib_props[fa->fa_type].error;
1508 if (unlikely(err < 0)) {
1509#ifdef CONFIG_IP_FIB_TRIE_STATS
1510 this_cpu_inc(stats->semantic_match_passed);
1511#endif
1512 return err;
1513 }
1514 if (fi->fib_flags & RTNH_F_DEAD)
1515 continue;
1516 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1517 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1518
1519 if (nh->nh_flags & RTNH_F_DEAD)
1520 continue;
1521 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
1522 continue;
1523
1524 if (!(fib_flags & FIB_LOOKUP_NOREF))
1525 atomic_inc(&fi->fib_clntref);
1526
1527 res->prefixlen = KEYLENGTH - fa->fa_slen;
1528 res->nh_sel = nhsel;
1529 res->type = fa->fa_type;
1530 res->scope = fi->fib_scope;
1531 res->fi = fi;
1532 res->table = tb;
1533 res->fa_head = &n->leaf;
1534#ifdef CONFIG_IP_FIB_TRIE_STATS
1535 this_cpu_inc(stats->semantic_match_passed);
1536#endif
1537 return err;
1538 }
1539 }
1540#ifdef CONFIG_IP_FIB_TRIE_STATS
1541 this_cpu_inc(stats->semantic_match_miss);
1542#endif
1543 goto backtrace;
1544}
1545EXPORT_SYMBOL_GPL(fib_table_lookup);
1546
1547static void fib_remove_alias(struct trie *t, struct key_vector *tp,
1548 struct key_vector *l, struct fib_alias *old)
1549{
1550
1551 struct hlist_node **pprev = old->fa_list.pprev;
1552 struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next);
1553
1554
1555 hlist_del_rcu(&old->fa_list);
1556
1557
1558
1559
1560 if (hlist_empty(&l->leaf)) {
1561 put_child_root(tp, l->key, NULL);
1562 node_free(l);
1563 trie_rebalance(t, tp);
1564 return;
1565 }
1566
1567
1568 if (*pprev)
1569 return;
1570
1571
1572 l->slen = fa->fa_slen;
1573 leaf_pull_suffix(tp, l);
1574}
1575
1576
1577int fib_table_delete(struct net *net, struct fib_table *tb,
1578 struct fib_config *cfg)
1579{
1580 struct trie *t = (struct trie *) tb->tb_data;
1581 struct fib_alias *fa, *fa_to_delete;
1582 struct key_vector *l, *tp;
1583 u8 plen = cfg->fc_dst_len;
1584 u8 slen = KEYLENGTH - plen;
1585 u8 tos = cfg->fc_tos;
1586 u32 key;
1587
1588 if (plen > KEYLENGTH)
1589 return -EINVAL;
1590
1591 key = ntohl(cfg->fc_dst);
1592
1593 if ((plen < KEYLENGTH) && (key << plen))
1594 return -EINVAL;
1595
1596 l = fib_find_node(t, &tp, key);
1597 if (!l)
1598 return -ESRCH;
1599
1600 fa = fib_find_alias(&l->leaf, slen, tos, 0);
1601 if (!fa)
1602 return -ESRCH;
1603
1604 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1605
1606 fa_to_delete = NULL;
1607 hlist_for_each_entry_from(fa, fa_list) {
1608 struct fib_info *fi = fa->fa_info;
1609
1610 if ((fa->fa_slen != slen) || (fa->fa_tos != tos))
1611 break;
1612
1613 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1614 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1615 fa->fa_info->fib_scope == cfg->fc_scope) &&
1616 (!cfg->fc_prefsrc ||
1617 fi->fib_prefsrc == cfg->fc_prefsrc) &&
1618 (!cfg->fc_protocol ||
1619 fi->fib_protocol == cfg->fc_protocol) &&
1620 fib_nh_match(cfg, fi) == 0) {
1621 fa_to_delete = fa;
1622 break;
1623 }
1624 }
1625
1626 if (!fa_to_delete)
1627 return -ESRCH;
1628
1629 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
1630 fa_to_delete->fa_info, tos,
1631 fa_to_delete->fa_type, tb->tb_id);
1632 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
1633 &cfg->fc_nlinfo, 0);
1634
1635 if (!plen)
1636 tb->tb_num_default--;
1637
1638 fib_remove_alias(t, tp, l, fa_to_delete);
1639
1640 if (fa_to_delete->fa_state & FA_S_ACCESSED)
1641 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1642
1643 fib_release_info(fa_to_delete->fa_info);
1644 alias_free_mem_rcu(fa_to_delete);
1645 return 0;
1646}
1647
1648
1649static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
1650{
1651 struct key_vector *pn, *n = *tn;
1652 unsigned long cindex;
1653
1654
1655 do {
1656
1657 pn = n;
1658 cindex = (key > pn->key) ? get_index(key, pn) : 0;
1659
1660 if (cindex >> pn->bits)
1661 break;
1662
1663
1664 n = get_child_rcu(pn, cindex++);
1665 if (!n)
1666 break;
1667
1668
1669 if (IS_LEAF(n) && (n->key >= key))
1670 goto found;
1671 } while (IS_TNODE(n));
1672
1673
1674 while (!IS_TRIE(pn)) {
1675
1676 if (cindex >= (1ul << pn->bits)) {
1677 t_key pkey = pn->key;
1678
1679 pn = node_parent_rcu(pn);
1680 cindex = get_index(pkey, pn) + 1;
1681 continue;
1682 }
1683
1684
1685 n = get_child_rcu(pn, cindex++);
1686 if (!n)
1687 continue;
1688
1689
1690 if (IS_LEAF(n))
1691 goto found;
1692
1693
1694 pn = n;
1695 cindex = 0;
1696 }
1697
1698 *tn = pn;
1699 return NULL;
1700found:
1701
1702 *tn = pn;
1703 return n;
1704}
1705
1706
1707int fib_table_flush(struct net *net, struct fib_table *tb)
1708{
1709 struct trie *t = (struct trie *)tb->tb_data;
1710 struct key_vector *pn = t->kv;
1711 unsigned long cindex = 1;
1712 struct hlist_node *tmp;
1713 struct fib_alias *fa;
1714 int found = 0;
1715
1716
1717 for (;;) {
1718 unsigned char slen = 0;
1719 struct key_vector *n;
1720
1721 if (!(cindex--)) {
1722 t_key pkey = pn->key;
1723
1724
1725 if (IS_TRIE(pn))
1726 break;
1727
1728
1729 pn = resize(t, pn);
1730 cindex = get_index(pkey, pn);
1731
1732 continue;
1733 }
1734
1735
1736 n = get_child(pn, cindex);
1737 if (!n)
1738 continue;
1739
1740 if (IS_TNODE(n)) {
1741
1742 pn = n;
1743 cindex = 1ul << n->bits;
1744
1745 continue;
1746 }
1747
1748 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1749 struct fib_info *fi = fa->fa_info;
1750
1751 if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) {
1752 slen = fa->fa_slen;
1753 continue;
1754 }
1755
1756 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
1757 n->key,
1758 KEYLENGTH - fa->fa_slen,
1759 fi, fa->fa_tos, fa->fa_type,
1760 tb->tb_id);
1761 hlist_del_rcu(&fa->fa_list);
1762 fib_release_info(fa->fa_info);
1763 alias_free_mem_rcu(fa);
1764 found++;
1765 }
1766
1767
1768 n->slen = slen;
1769
1770 if (hlist_empty(&n->leaf)) {
1771 put_child_root(pn, n->key, NULL);
1772 node_free(n);
1773 }
1774 }
1775
1776 pr_debug("trie_flush found=%d\n", found);
1777 return found;
1778}
1779
1780static void fib_leaf_notify(struct net *net, struct key_vector *l,
1781 struct fib_table *tb, struct notifier_block *nb,
1782 enum fib_event_type event_type)
1783{
1784 struct fib_alias *fa;
1785
1786 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
1787 struct fib_info *fi = fa->fa_info;
1788
1789 if (!fi)
1790 continue;
1791
1792#if 0
1793
1794
1795
1796
1797
1798
1799
1800
1801 if (tb->tb_id != fa->tb_id)
1802 continue;
1803#endif
1804
1805 call_fib_entry_notifier(nb, net, event_type, l->key,
1806 KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
1807 fa->fa_type, tb->tb_id);
1808 }
1809}
1810
1811static void fib_table_notify(struct net *net, struct fib_table *tb,
1812 struct notifier_block *nb,
1813 enum fib_event_type event_type)
1814{
1815 struct trie *t = (struct trie *)tb->tb_data;
1816 struct key_vector *l, *tp = t->kv;
1817 t_key key = 0;
1818
1819 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
1820 fib_leaf_notify(net, l, tb, nb, event_type);
1821
1822 key = l->key + 1;
1823
1824 if (key < l->key)
1825 break;
1826 }
1827}
1828
1829static void fib_notify(struct net *net, struct notifier_block *nb,
1830 enum fib_event_type event_type)
1831{
1832 unsigned int h;
1833
1834 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
1835 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
1836 struct fib_table *tb;
1837
1838 hlist_for_each_entry_rcu(tb, head, tb_hlist)
1839 fib_table_notify(net, tb, nb, event_type);
1840 }
1841}
1842
1843static void __trie_free_rcu(struct rcu_head *head)
1844{
1845 struct trie *t = container_of(head, struct trie, rcu);
1846 unsigned long *tb_data = (unsigned long *)t;
1847 struct fib_table *tb = container_of(tb_data, struct fib_table,
1848 tb_data[0]);
1849#ifdef CONFIG_IP_FIB_TRIE_STATS
1850 free_percpu(t->stats);
1851#endif
1852 kfree(tb);
1853}
1854
1855void fib_free_table(struct fib_table *tb)
1856{
1857 struct trie *t = (struct trie *)tb->tb_data;
1858
1859 call_rcu(&t->rcu, __trie_free_rcu);
1860}
1861
1862static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
1863 struct sk_buff *skb, struct netlink_callback *cb)
1864{
1865 __be32 xkey = htonl(l->key);
1866 struct fib_alias *fa;
1867 int i, s_i;
1868
1869 s_i = cb->args[4];
1870 i = 0;
1871
1872
1873 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
1874 if (i < s_i) {
1875 i++;
1876 continue;
1877 }
1878
1879 if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
1880 cb->nlh->nlmsg_seq,
1881 RTM_NEWROUTE,
1882 tb->tb_id,
1883 fa->fa_type,
1884 xkey,
1885 KEYLENGTH - fa->fa_slen,
1886 fa->fa_tos,
1887 fa->fa_info, NLM_F_MULTI) < 0) {
1888 cb->args[4] = i;
1889 return -1;
1890 }
1891 i++;
1892 }
1893
1894 cb->args[4] = i;
1895 return skb->len;
1896}
1897
1898
1899int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1900 struct netlink_callback *cb)
1901{
1902 struct trie *t = (struct trie *)tb->tb_data;
1903 struct key_vector *l, *tp = t->kv;
1904
1905
1906
1907 int count = cb->args[2];
1908 t_key key = cb->args[3];
1909
1910 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
1911 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
1912 cb->args[3] = key;
1913 cb->args[2] = count;
1914 return -1;
1915 }
1916
1917 ++count;
1918 key = l->key + 1;
1919
1920 memset(&cb->args[4], 0,
1921 sizeof(cb->args) - 4*sizeof(cb->args[0]));
1922
1923
1924 if (key < l->key)
1925 break;
1926 }
1927
1928 cb->args[3] = key;
1929 cb->args[2] = count;
1930
1931 return skb->len;
1932}
1933
1934void __init fib_trie_init(void)
1935{
1936 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1937 sizeof(struct fib_alias),
1938 0, SLAB_PANIC, NULL);
1939
1940 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
1941 LEAF_SIZE,
1942 0, SLAB_PANIC, NULL);
1943}
1944
1945struct fib_table *fib_trie_table(u32 id)
1946{
1947 struct fib_table *tb;
1948 struct trie *t;
1949
1950 tb = kzalloc(sizeof(*tb) + sizeof(struct trie), GFP_KERNEL);
1951 if (tb == NULL)
1952 return NULL;
1953
1954 tb->tb_id = id;
1955 tb->tb_default = -1;
1956 tb->tb_num_default = 0;
1957
1958 t = (struct trie *) tb->tb_data;
1959 t->kv[0].pos = KEYLENGTH;
1960 t->kv[0].slen = KEYLENGTH;
1961#ifdef CONFIG_IP_FIB_TRIE_STATS
1962 t->stats = alloc_percpu(struct trie_use_stats);
1963 if (!t->stats) {
1964 kfree(tb);
1965 tb = NULL;
1966 }
1967#endif
1968
1969 return tb;
1970}
1971
1972#ifdef CONFIG_PROC_FS
1973
1974struct fib_trie_iter {
1975 struct seq_net_private p;
1976 struct fib_table *tb;
1977 struct key_vector *tnode;
1978 unsigned int index;
1979 unsigned int depth;
1980};
1981
1982static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
1983{
1984 unsigned long cindex = iter->index;
1985 struct key_vector *pn = iter->tnode;
1986 t_key pkey;
1987
1988 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
1989 iter->tnode, iter->index, iter->depth);
1990
1991 while (!IS_TRIE(pn)) {
1992 while (cindex < child_length(pn)) {
1993 struct key_vector *n = get_child_rcu(pn, cindex++);
1994
1995 if (!n)
1996 continue;
1997
1998 if (IS_LEAF(n)) {
1999 iter->tnode = pn;
2000 iter->index = cindex;
2001 } else {
2002
2003 iter->tnode = n;
2004 iter->index = 0;
2005 ++iter->depth;
2006 }
2007
2008 return n;
2009 }
2010
2011
2012 pkey = pn->key;
2013 pn = node_parent_rcu(pn);
2014 cindex = get_index(pkey, pn) + 1;
2015 --iter->depth;
2016 }
2017
2018
2019 iter->tnode = pn;
2020 iter->index = 0;
2021
2022 return NULL;
2023}
2024
2025static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
2026 struct trie *t)
2027{
2028 struct key_vector *n, *pn = t->kv;
2029
2030 if (!t)
2031 return NULL;
2032
2033 n = rcu_dereference(pn->tnode[0]);
2034 if (!n)
2035 return NULL;
2036
2037 if (IS_TNODE(n)) {
2038 iter->tnode = n;
2039 iter->index = 0;
2040 iter->depth = 1;
2041 } else {
2042 iter->tnode = pn;
2043 iter->index = 0;
2044 iter->depth = 0;
2045 }
2046
2047 return n;
2048}
2049
2050static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2051{
2052 struct key_vector *n;
2053 struct fib_trie_iter iter;
2054
2055 memset(s, 0, sizeof(*s));
2056
2057 rcu_read_lock();
2058 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2059 if (IS_LEAF(n)) {
2060 struct fib_alias *fa;
2061
2062 s->leaves++;
2063 s->totdepth += iter.depth;
2064 if (iter.depth > s->maxdepth)
2065 s->maxdepth = iter.depth;
2066
2067 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
2068 ++s->prefixes;
2069 } else {
2070 s->tnodes++;
2071 if (n->bits < MAX_STAT_DEPTH)
2072 s->nodesizes[n->bits]++;
2073 s->nullpointers += tn_info(n)->empty_children;
2074 }
2075 }
2076 rcu_read_unlock();
2077}
2078
2079
2080
2081
2082static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2083{
2084 unsigned int i, max, pointers, bytes, avdepth;
2085
2086 if (stat->leaves)
2087 avdepth = stat->totdepth*100 / stat->leaves;
2088 else
2089 avdepth = 0;
2090
2091 seq_printf(seq, "\tAver depth: %u.%02d\n",
2092 avdepth / 100, avdepth % 100);
2093 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
2094
2095 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
2096 bytes = LEAF_SIZE * stat->leaves;
2097
2098 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2099 bytes += sizeof(struct fib_alias) * stat->prefixes;
2100
2101 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
2102 bytes += TNODE_SIZE(0) * stat->tnodes;
2103
2104 max = MAX_STAT_DEPTH;
2105 while (max > 0 && stat->nodesizes[max-1] == 0)
2106 max--;
2107
2108 pointers = 0;
2109 for (i = 1; i < max; i++)
2110 if (stat->nodesizes[i] != 0) {
2111 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
2112 pointers += (1<<i) * stat->nodesizes[i];
2113 }
2114 seq_putc(seq, '\n');
2115 seq_printf(seq, "\tPointers: %u\n", pointers);
2116
2117 bytes += sizeof(struct key_vector *) * pointers;
2118 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2119 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
2120}
2121
2122#ifdef CONFIG_IP_FIB_TRIE_STATS
2123static void trie_show_usage(struct seq_file *seq,
2124 const struct trie_use_stats __percpu *stats)
2125{
2126 struct trie_use_stats s = { 0 };
2127 int cpu;
2128
2129
2130 for_each_possible_cpu(cpu) {
2131 const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
2132
2133 s.gets += pcpu->gets;
2134 s.backtrack += pcpu->backtrack;
2135 s.semantic_match_passed += pcpu->semantic_match_passed;
2136 s.semantic_match_miss += pcpu->semantic_match_miss;
2137 s.null_node_hit += pcpu->null_node_hit;
2138 s.resize_node_skipped += pcpu->resize_node_skipped;
2139 }
2140
2141 seq_printf(seq, "\nCounters:\n---------\n");
2142 seq_printf(seq, "gets = %u\n", s.gets);
2143 seq_printf(seq, "backtracks = %u\n", s.backtrack);
2144 seq_printf(seq, "semantic match passed = %u\n",
2145 s.semantic_match_passed);
2146 seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
2147 seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
2148 seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
2149}
2150#endif
2151
2152static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2153{
2154 if (tb->tb_id == RT_TABLE_LOCAL)
2155 seq_puts(seq, "Local:\n");
2156 else if (tb->tb_id == RT_TABLE_MAIN)
2157 seq_puts(seq, "Main:\n");
2158 else
2159 seq_printf(seq, "Id %d:\n", tb->tb_id);
2160}
2161
2162
2163static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2164{
2165 struct net *net = (struct net *)seq->private;
2166 unsigned int h;
2167
2168 seq_printf(seq,
2169 "Basic info: size of leaf:"
2170 " %Zd bytes, size of tnode: %Zd bytes.\n",
2171 LEAF_SIZE, TNODE_SIZE(0));
2172
2173 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2174 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2175 struct fib_table *tb;
2176
2177 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2178 struct trie *t = (struct trie *) tb->tb_data;
2179 struct trie_stat stat;
2180
2181 if (!t)
2182 continue;
2183
2184 fib_table_print(seq, tb);
2185
2186 trie_collect_stats(t, &stat);
2187 trie_show_stats(seq, &stat);
2188#ifdef CONFIG_IP_FIB_TRIE_STATS
2189 trie_show_usage(seq, t->stats);
2190#endif
2191 }
2192 }
2193
2194 return 0;
2195}
2196
2197static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2198{
2199 return single_open_net(inode, file, fib_triestat_seq_show);
2200}
2201
2202static const struct file_operations fib_triestat_fops = {
2203 .owner = THIS_MODULE,
2204 .open = fib_triestat_seq_open,
2205 .read = seq_read,
2206 .llseek = seq_lseek,
2207 .release = single_release_net,
2208};
2209
2210static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2211{
2212 struct fib_trie_iter *iter = seq->private;
2213 struct net *net = seq_file_net(seq);
2214 loff_t idx = 0;
2215 unsigned int h;
2216
2217 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2218 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2219 struct fib_table *tb;
2220
2221 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2222 struct key_vector *n;
2223
2224 for (n = fib_trie_get_first(iter,
2225 (struct trie *) tb->tb_data);
2226 n; n = fib_trie_get_next(iter))
2227 if (pos == idx++) {
2228 iter->tb = tb;
2229 return n;
2230 }
2231 }
2232 }
2233
2234 return NULL;
2235}
2236
2237static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2238 __acquires(RCU)
2239{
2240 rcu_read_lock();
2241 return fib_trie_get_idx(seq, *pos);
2242}
2243
2244static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2245{
2246 struct fib_trie_iter *iter = seq->private;
2247 struct net *net = seq_file_net(seq);
2248 struct fib_table *tb = iter->tb;
2249 struct hlist_node *tb_node;
2250 unsigned int h;
2251 struct key_vector *n;
2252
2253 ++*pos;
2254
2255 n = fib_trie_get_next(iter);
2256 if (n)
2257 return n;
2258
2259
2260 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2261 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
2262 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2263 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2264 if (n)
2265 goto found;
2266 }
2267
2268
2269 while (++h < FIB_TABLE_HASHSZ) {
2270 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2271 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2272 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2273 if (n)
2274 goto found;
2275 }
2276 }
2277 return NULL;
2278
2279found:
2280 iter->tb = tb;
2281 return n;
2282}
2283
2284static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2285 __releases(RCU)
2286{
2287 rcu_read_unlock();
2288}
2289
2290static void seq_indent(struct seq_file *seq, int n)
2291{
2292 while (n-- > 0)
2293 seq_puts(seq, " ");
2294}
2295
2296static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2297{
2298 switch (s) {
2299 case RT_SCOPE_UNIVERSE: return "universe";
2300 case RT_SCOPE_SITE: return "site";
2301 case RT_SCOPE_LINK: return "link";
2302 case RT_SCOPE_HOST: return "host";
2303 case RT_SCOPE_NOWHERE: return "nowhere";
2304 default:
2305 snprintf(buf, len, "scope=%d", s);
2306 return buf;
2307 }
2308}
2309
2310static const char *const rtn_type_names[__RTN_MAX] = {
2311 [RTN_UNSPEC] = "UNSPEC",
2312 [RTN_UNICAST] = "UNICAST",
2313 [RTN_LOCAL] = "LOCAL",
2314 [RTN_BROADCAST] = "BROADCAST",
2315 [RTN_ANYCAST] = "ANYCAST",
2316 [RTN_MULTICAST] = "MULTICAST",
2317 [RTN_BLACKHOLE] = "BLACKHOLE",
2318 [RTN_UNREACHABLE] = "UNREACHABLE",
2319 [RTN_PROHIBIT] = "PROHIBIT",
2320 [RTN_THROW] = "THROW",
2321 [RTN_NAT] = "NAT",
2322 [RTN_XRESOLVE] = "XRESOLVE",
2323};
2324
2325static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2326{
2327 if (t < __RTN_MAX && rtn_type_names[t])
2328 return rtn_type_names[t];
2329 snprintf(buf, len, "type %u", t);
2330 return buf;
2331}
2332
2333
2334static int fib_trie_seq_show(struct seq_file *seq, void *v)
2335{
2336 const struct fib_trie_iter *iter = seq->private;
2337 struct key_vector *n = v;
2338
2339 if (IS_TRIE(node_parent_rcu(n)))
2340 fib_table_print(seq, iter->tb);
2341
2342 if (IS_TNODE(n)) {
2343 __be32 prf = htonl(n->key);
2344
2345 seq_indent(seq, iter->depth-1);
2346 seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",
2347 &prf, KEYLENGTH - n->pos - n->bits, n->bits,
2348 tn_info(n)->full_children,
2349 tn_info(n)->empty_children);
2350 } else {
2351 __be32 val = htonl(n->key);
2352 struct fib_alias *fa;
2353
2354 seq_indent(seq, iter->depth);
2355 seq_printf(seq, " |-- %pI4\n", &val);
2356
2357 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
2358 char buf1[32], buf2[32];
2359
2360 seq_indent(seq, iter->depth + 1);
2361 seq_printf(seq, " /%zu %s %s",
2362 KEYLENGTH - fa->fa_slen,
2363 rtn_scope(buf1, sizeof(buf1),
2364 fa->fa_info->fib_scope),
2365 rtn_type(buf2, sizeof(buf2),
2366 fa->fa_type));
2367 if (fa->fa_tos)
2368 seq_printf(seq, " tos=%d", fa->fa_tos);
2369 seq_putc(seq, '\n');
2370 }
2371 }
2372
2373 return 0;
2374}
2375
2376static const struct seq_operations fib_trie_seq_ops = {
2377 .start = fib_trie_seq_start,
2378 .next = fib_trie_seq_next,
2379 .stop = fib_trie_seq_stop,
2380 .show = fib_trie_seq_show,
2381};
2382
2383static int fib_trie_seq_open(struct inode *inode, struct file *file)
2384{
2385 return seq_open_net(inode, file, &fib_trie_seq_ops,
2386 sizeof(struct fib_trie_iter));
2387}
2388
2389static const struct file_operations fib_trie_fops = {
2390 .owner = THIS_MODULE,
2391 .open = fib_trie_seq_open,
2392 .read = seq_read,
2393 .llseek = seq_lseek,
2394 .release = seq_release_net,
2395};
2396
2397struct fib_route_iter {
2398 struct seq_net_private p;
2399 struct fib_table *main_tb;
2400 struct key_vector *tnode;
2401 loff_t pos;
2402 t_key key;
2403};
2404
2405static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
2406 loff_t pos)
2407{
2408 struct key_vector *l, **tp = &iter->tnode;
2409 t_key key;
2410
2411
2412 if (iter->pos > 0 && pos >= iter->pos) {
2413 key = iter->key;
2414 } else {
2415 iter->pos = 1;
2416 key = 0;
2417 }
2418
2419 pos -= iter->pos;
2420
2421 while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
2422 key = l->key + 1;
2423 iter->pos++;
2424 l = NULL;
2425
2426
2427 if (!key)
2428 break;
2429 }
2430
2431 if (l)
2432 iter->key = l->key;
2433 else
2434 iter->pos = 0;
2435
2436 return l;
2437}
2438
2439static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2440 __acquires(RCU)
2441{
2442 struct fib_route_iter *iter = seq->private;
2443 struct fib_table *tb;
2444 struct trie *t;
2445
2446 rcu_read_lock();
2447
2448 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
2449 if (!tb)
2450 return NULL;
2451
2452 iter->main_tb = tb;
2453 t = (struct trie *)tb->tb_data;
2454 iter->tnode = t->kv;
2455
2456 if (*pos != 0)
2457 return fib_route_get_idx(iter, *pos);
2458
2459 iter->pos = 0;
2460 iter->key = KEY_MAX;
2461
2462 return SEQ_START_TOKEN;
2463}
2464
2465static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2466{
2467 struct fib_route_iter *iter = seq->private;
2468 struct key_vector *l = NULL;
2469 t_key key = iter->key + 1;
2470
2471 ++*pos;
2472
2473
2474 if ((v == SEQ_START_TOKEN) || key)
2475 l = leaf_walk_rcu(&iter->tnode, key);
2476
2477 if (l) {
2478 iter->key = l->key;
2479 iter->pos++;
2480 } else {
2481 iter->pos = 0;
2482 }
2483
2484 return l;
2485}
2486
2487static void fib_route_seq_stop(struct seq_file *seq, void *v)
2488 __releases(RCU)
2489{
2490 rcu_read_unlock();
2491}
2492
2493static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2494{
2495 unsigned int flags = 0;
2496
2497 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2498 flags = RTF_REJECT;
2499 if (fi && fi->fib_nh->nh_gw)
2500 flags |= RTF_GATEWAY;
2501 if (mask == htonl(0xFFFFFFFF))
2502 flags |= RTF_HOST;
2503 flags |= RTF_UP;
2504 return flags;
2505}
2506
2507
2508
2509
2510
2511
2512
2513static int fib_route_seq_show(struct seq_file *seq, void *v)
2514{
2515 struct fib_alias *fa;
2516 struct key_vector *l = v;
2517 __be32 prefix;
2518
2519 if (v == SEQ_START_TOKEN) {
2520 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2521 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2522 "\tWindow\tIRTT");
2523 return 0;
2524 }
2525
2526 prefix = htonl(l->key);
2527
2528 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2529 const struct fib_info *fi = fa->fa_info;
2530 __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
2531 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2532 int len;
2533
2534 if ((fa->fa_type == RTN_BROADCAST) ||
2535 (fa->fa_type == RTN_MULTICAST))
2536 continue;
2537
2538 if (fi)
2539 seq_printf(seq,
2540 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2541 "%d\t%08X\t%d\t%u\t%u%n",
2542 fi->fib_dev ? fi->fib_dev->name : "*",
2543 prefix,
2544 fi->fib_nh->nh_gw, flags, 0, 0,
2545 fi->fib_priority,
2546 mask,
2547 (fi->fib_advmss ?
2548 fi->fib_advmss + 40 : 0),
2549 fi->fib_window,
2550 fi->fib_rtt >> 3, &len);
2551 else
2552 seq_printf(seq,
2553 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
2554 "%d\t%08X\t%d\t%u\t%u%n",
2555 prefix, 0, flags, 0, 0, 0,
2556 mask, 0, 0, 0, &len);
2557
2558 seq_printf(seq, "%*s\n", 127 - len, "");
2559 }
2560
2561 return 0;
2562}
2563
2564static const struct seq_operations fib_route_seq_ops = {
2565 .start = fib_route_seq_start,
2566 .next = fib_route_seq_next,
2567 .stop = fib_route_seq_stop,
2568 .show = fib_route_seq_show,
2569};
2570
2571static int fib_route_seq_open(struct inode *inode, struct file *file)
2572{
2573 return seq_open_net(inode, file, &fib_route_seq_ops,
2574 sizeof(struct fib_route_iter));
2575}
2576
2577static const struct file_operations fib_route_fops = {
2578 .owner = THIS_MODULE,
2579 .open = fib_route_seq_open,
2580 .read = seq_read,
2581 .llseek = seq_lseek,
2582 .release = seq_release_net,
2583};
2584
2585int __net_init fib_proc_init(struct net *net)
2586{
2587 if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops))
2588 goto out1;
2589
2590 if (!proc_create("fib_triestat", S_IRUGO, net->proc_net,
2591 &fib_triestat_fops))
2592 goto out2;
2593
2594 if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
2595 goto out3;
2596
2597 return 0;
2598
2599out3:
2600 remove_proc_entry("fib_triestat", net->proc_net);
2601out2:
2602 remove_proc_entry("fib_trie", net->proc_net);
2603out1:
2604 return -ENOMEM;
2605}
2606
2607void __net_exit fib_proc_exit(struct net *net)
2608{
2609 remove_proc_entry("fib_trie", net->proc_net);
2610 remove_proc_entry("fib_triestat", net->proc_net);
2611 remove_proc_entry("route", net->proc_net);
2612}
2613
2614#endif
2615