1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51#define VERSION "0.409"
52
53#include <asm/uaccess.h>
54#include <linux/bitops.h>
55#include <linux/types.h>
56#include <linux/kernel.h>
57#include <linux/mm.h>
58#include <linux/string.h>
59#include <linux/socket.h>
60#include <linux/sockios.h>
61#include <linux/errno.h>
62#include <linux/in.h>
63#include <linux/inet.h>
64#include <linux/inetdevice.h>
65#include <linux/netdevice.h>
66#include <linux/if_arp.h>
67#include <linux/proc_fs.h>
68#include <linux/rcupdate.h>
69#include <linux/skbuff.h>
70#include <linux/netlink.h>
71#include <linux/init.h>
72#include <linux/list.h>
73#include <linux/slab.h>
74#include <linux/export.h>
75#include <net/net_namespace.h>
76#include <net/ip.h>
77#include <net/protocol.h>
78#include <net/route.h>
79#include <net/tcp.h>
80#include <net/sock.h>
81#include <net/ip_fib.h>
82#include <net/switchdev.h>
83#include "fib_lookup.h"
84
85#define MAX_STAT_DEPTH 32
86
87#define KEYLENGTH (8*sizeof(t_key))
88#define KEY_MAX ((t_key)~0)
89
90typedef unsigned int t_key;
91
92#define IS_TRIE(n) ((n)->pos >= KEYLENGTH)
93#define IS_TNODE(n) ((n)->bits)
94#define IS_LEAF(n) (!(n)->bits)
95
96struct key_vector {
97 t_key key;
98 unsigned char pos;
99 unsigned char bits;
100 unsigned char slen;
101 union {
102
103 struct hlist_head leaf;
104
105 struct key_vector __rcu *tnode[0];
106 };
107};
108
109struct tnode {
110 struct rcu_head rcu;
111 t_key empty_children;
112 t_key full_children;
113 struct key_vector __rcu *parent;
114 struct key_vector kv[1];
115#define tn_bits kv[0].bits
116};
117
118#define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n])
119#define LEAF_SIZE TNODE_SIZE(1)
120
121#ifdef CONFIG_IP_FIB_TRIE_STATS
122struct trie_use_stats {
123 unsigned int gets;
124 unsigned int backtrack;
125 unsigned int semantic_match_passed;
126 unsigned int semantic_match_miss;
127 unsigned int null_node_hit;
128 unsigned int resize_node_skipped;
129};
130#endif
131
132struct trie_stat {
133 unsigned int totdepth;
134 unsigned int maxdepth;
135 unsigned int tnodes;
136 unsigned int leaves;
137 unsigned int nullpointers;
138 unsigned int prefixes;
139 unsigned int nodesizes[MAX_STAT_DEPTH];
140};
141
142struct trie {
143 struct key_vector kv[1];
144#ifdef CONFIG_IP_FIB_TRIE_STATS
145 struct trie_use_stats __percpu *stats;
146#endif
147};
148
149static struct key_vector *resize(struct trie *t, struct key_vector *tn);
150static size_t tnode_free_size;
151
152
153
154
155
156
157static const int sync_pages = 128;
158
159static struct kmem_cache *fn_alias_kmem __read_mostly;
160static struct kmem_cache *trie_leaf_kmem __read_mostly;
161
162static inline struct tnode *tn_info(struct key_vector *kv)
163{
164 return container_of(kv, struct tnode, kv[0]);
165}
166
167
168#define node_parent(tn) rtnl_dereference(tn_info(tn)->parent)
169#define get_child(tn, i) rtnl_dereference((tn)->tnode[i])
170
171
172#define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent)
173#define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i])
174
175
176static inline void node_set_parent(struct key_vector *n, struct key_vector *tp)
177{
178 if (n)
179 rcu_assign_pointer(tn_info(n)->parent, tp);
180}
181
182#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p)
183
184
185
186
187static inline unsigned long child_length(const struct key_vector *tn)
188{
189 return (1ul << tn->bits) & ~(1ul);
190}
191
192#define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos)
193
194static inline unsigned long get_index(t_key key, struct key_vector *kv)
195{
196 unsigned long index = key ^ kv->key;
197
198 if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos))
199 return 0;
200
201 return index >> kv->pos;
202}
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263static const int halve_threshold = 25;
264static const int inflate_threshold = 50;
265static const int halve_threshold_root = 15;
266static const int inflate_threshold_root = 30;
267
268static void __alias_free_mem(struct rcu_head *head)
269{
270 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
271 kmem_cache_free(fn_alias_kmem, fa);
272}
273
274static inline void alias_free_mem_rcu(struct fib_alias *fa)
275{
276 call_rcu(&fa->rcu, __alias_free_mem);
277}
278
279#define TNODE_KMALLOC_MAX \
280 ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *))
281#define TNODE_VMALLOC_MAX \
282 ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *))
283
284static void __node_free_rcu(struct rcu_head *head)
285{
286 struct tnode *n = container_of(head, struct tnode, rcu);
287
288 if (!n->tn_bits)
289 kmem_cache_free(trie_leaf_kmem, n);
290 else if (n->tn_bits <= TNODE_KMALLOC_MAX)
291 kfree(n);
292 else
293 vfree(n);
294}
295
296#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
297
298static struct tnode *tnode_alloc(int bits)
299{
300 size_t size;
301
302
303 if (bits > TNODE_VMALLOC_MAX)
304 return NULL;
305
306
307 size = TNODE_SIZE(1ul << bits);
308
309 if (size <= PAGE_SIZE)
310 return kzalloc(size, GFP_KERNEL);
311 else
312 return vzalloc(size);
313}
314
315static inline void empty_child_inc(struct key_vector *n)
316{
317 ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children;
318}
319
320static inline void empty_child_dec(struct key_vector *n)
321{
322 tn_info(n)->empty_children-- ? : tn_info(n)->full_children--;
323}
324
325static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
326{
327 struct tnode *kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
328 struct key_vector *l = kv->kv;
329
330 if (!kv)
331 return NULL;
332
333
334 l->key = key;
335 l->pos = 0;
336 l->bits = 0;
337 l->slen = fa->fa_slen;
338
339
340 INIT_HLIST_HEAD(&l->leaf);
341 hlist_add_head(&fa->fa_list, &l->leaf);
342
343 return l;
344}
345
346static struct key_vector *tnode_new(t_key key, int pos, int bits)
347{
348 struct tnode *tnode = tnode_alloc(bits);
349 unsigned int shift = pos + bits;
350 struct key_vector *tn = tnode->kv;
351
352
353 BUG_ON(!bits || (shift > KEYLENGTH));
354
355 pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
356 sizeof(struct key_vector *) << bits);
357
358 if (!tnode)
359 return NULL;
360
361 if (bits == KEYLENGTH)
362 tnode->full_children = 1;
363 else
364 tnode->empty_children = 1ul << bits;
365
366 tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
367 tn->pos = pos;
368 tn->bits = bits;
369 tn->slen = pos;
370
371 return tn;
372}
373
374
375
376
377static inline int tnode_full(struct key_vector *tn, struct key_vector *n)
378{
379 return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
380}
381
382
383
384
385static void put_child(struct key_vector *tn, unsigned long i,
386 struct key_vector *n)
387{
388 struct key_vector *chi = get_child(tn, i);
389 int isfull, wasfull;
390
391 BUG_ON(i >= child_length(tn));
392
393
394 if (!n && chi)
395 empty_child_inc(tn);
396 if (n && !chi)
397 empty_child_dec(tn);
398
399
400 wasfull = tnode_full(tn, chi);
401 isfull = tnode_full(tn, n);
402
403 if (wasfull && !isfull)
404 tn_info(tn)->full_children--;
405 else if (!wasfull && isfull)
406 tn_info(tn)->full_children++;
407
408 if (n && (tn->slen < n->slen))
409 tn->slen = n->slen;
410
411 rcu_assign_pointer(tn->tnode[i], n);
412}
413
414static void update_children(struct key_vector *tn)
415{
416 unsigned long i;
417
418
419 for (i = child_length(tn); i;) {
420 struct key_vector *inode = get_child(tn, --i);
421
422 if (!inode)
423 continue;
424
425
426
427
428
429 if (node_parent(inode) == tn)
430 update_children(inode);
431 else
432 node_set_parent(inode, tn);
433 }
434}
435
436static inline void put_child_root(struct key_vector *tp, t_key key,
437 struct key_vector *n)
438{
439 if (IS_TRIE(tp))
440 rcu_assign_pointer(tp->tnode[0], n);
441 else
442 put_child(tp, get_index(key, tp), n);
443}
444
445static inline void tnode_free_init(struct key_vector *tn)
446{
447 tn_info(tn)->rcu.next = NULL;
448}
449
450static inline void tnode_free_append(struct key_vector *tn,
451 struct key_vector *n)
452{
453 tn_info(n)->rcu.next = tn_info(tn)->rcu.next;
454 tn_info(tn)->rcu.next = &tn_info(n)->rcu;
455}
456
457static void tnode_free(struct key_vector *tn)
458{
459 struct callback_head *head = &tn_info(tn)->rcu;
460
461 while (head) {
462 head = head->next;
463 tnode_free_size += TNODE_SIZE(1ul << tn->bits);
464 node_free(tn);
465
466 tn = container_of(head, struct tnode, rcu)->kv;
467 }
468
469 if (tnode_free_size >= PAGE_SIZE * sync_pages) {
470 tnode_free_size = 0;
471 synchronize_rcu();
472 }
473}
474
475static struct key_vector *replace(struct trie *t,
476 struct key_vector *oldtnode,
477 struct key_vector *tn)
478{
479 struct key_vector *tp = node_parent(oldtnode);
480 unsigned long i;
481
482
483 NODE_INIT_PARENT(tn, tp);
484 put_child_root(tp, tn->key, tn);
485
486
487 update_children(tn);
488
489
490 tnode_free(oldtnode);
491
492
493 for (i = child_length(tn); i;) {
494 struct key_vector *inode = get_child(tn, --i);
495
496
497 if (tnode_full(tn, inode))
498 tn = resize(t, inode);
499 }
500
501 return tp;
502}
503
504static struct key_vector *inflate(struct trie *t,
505 struct key_vector *oldtnode)
506{
507 struct key_vector *tn;
508 unsigned long i;
509 t_key m;
510
511 pr_debug("In inflate\n");
512
513 tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
514 if (!tn)
515 goto notnode;
516
517
518 tnode_free_init(oldtnode);
519
520
521
522
523
524
525 for (i = child_length(oldtnode), m = 1u << tn->pos; i;) {
526 struct key_vector *inode = get_child(oldtnode, --i);
527 struct key_vector *node0, *node1;
528 unsigned long j, k;
529
530
531 if (!inode)
532 continue;
533
534
535 if (!tnode_full(oldtnode, inode)) {
536 put_child(tn, get_index(inode->key, tn), inode);
537 continue;
538 }
539
540
541 tnode_free_append(oldtnode, inode);
542
543
544 if (inode->bits == 1) {
545 put_child(tn, 2 * i + 1, get_child(inode, 1));
546 put_child(tn, 2 * i, get_child(inode, 0));
547 continue;
548 }
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564 node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
565 if (!node1)
566 goto nomem;
567 node0 = tnode_new(inode->key, inode->pos, inode->bits - 1);
568
569 tnode_free_append(tn, node1);
570 if (!node0)
571 goto nomem;
572 tnode_free_append(tn, node0);
573
574
575 for (k = child_length(inode), j = k / 2; j;) {
576 put_child(node1, --j, get_child(inode, --k));
577 put_child(node0, j, get_child(inode, j));
578 put_child(node1, --j, get_child(inode, --k));
579 put_child(node0, j, get_child(inode, j));
580 }
581
582
583 NODE_INIT_PARENT(node1, tn);
584 NODE_INIT_PARENT(node0, tn);
585
586
587 put_child(tn, 2 * i + 1, node1);
588 put_child(tn, 2 * i, node0);
589 }
590
591
592 return replace(t, oldtnode, tn);
593nomem:
594
595 tnode_free(tn);
596notnode:
597 return NULL;
598}
599
600static struct key_vector *halve(struct trie *t,
601 struct key_vector *oldtnode)
602{
603 struct key_vector *tn;
604 unsigned long i;
605
606 pr_debug("In halve\n");
607
608 tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
609 if (!tn)
610 goto notnode;
611
612
613 tnode_free_init(oldtnode);
614
615
616
617
618
619
620 for (i = child_length(oldtnode); i;) {
621 struct key_vector *node1 = get_child(oldtnode, --i);
622 struct key_vector *node0 = get_child(oldtnode, --i);
623 struct key_vector *inode;
624
625
626 if (!node1 || !node0) {
627 put_child(tn, i / 2, node1 ? : node0);
628 continue;
629 }
630
631
632 inode = tnode_new(node0->key, oldtnode->pos, 1);
633 if (!inode)
634 goto nomem;
635 tnode_free_append(tn, inode);
636
637
638 put_child(inode, 1, node1);
639 put_child(inode, 0, node0);
640 NODE_INIT_PARENT(inode, tn);
641
642
643 put_child(tn, i / 2, inode);
644 }
645
646
647 return replace(t, oldtnode, tn);
648nomem:
649
650 tnode_free(tn);
651notnode:
652 return NULL;
653}
654
655static struct key_vector *collapse(struct trie *t,
656 struct key_vector *oldtnode)
657{
658 struct key_vector *n, *tp;
659 unsigned long i;
660
661
662 for (n = NULL, i = child_length(oldtnode); !n && i;)
663 n = get_child(oldtnode, --i);
664
665
666 tp = node_parent(oldtnode);
667 put_child_root(tp, oldtnode->key, n);
668 node_set_parent(n, tp);
669
670
671 node_free(oldtnode);
672
673 return tp;
674}
675
676static unsigned char update_suffix(struct key_vector *tn)
677{
678 unsigned char slen = tn->pos;
679 unsigned long stride, i;
680
681
682
683
684
685
686 for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) {
687 struct key_vector *n = get_child(tn, i);
688
689 if (!n || (n->slen <= slen))
690 continue;
691
692
693 stride <<= (n->slen - slen);
694 slen = n->slen;
695 i &= ~(stride - 1);
696
697
698
699
700
701
702 if ((slen + 1) >= (tn->pos + tn->bits))
703 break;
704 }
705
706 tn->slen = slen;
707
708 return slen;
709}
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn)
769{
770 unsigned long used = child_length(tn);
771 unsigned long threshold = used;
772
773
774 threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold;
775 used -= tn_info(tn)->empty_children;
776 used += tn_info(tn)->full_children;
777
778
779
780 return (used > 1) && tn->pos && ((50 * used) >= threshold);
781}
782
783static inline bool should_halve(struct key_vector *tp, struct key_vector *tn)
784{
785 unsigned long used = child_length(tn);
786 unsigned long threshold = used;
787
788
789 threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold;
790 used -= tn_info(tn)->empty_children;
791
792
793
794 return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold);
795}
796
797static inline bool should_collapse(struct key_vector *tn)
798{
799 unsigned long used = child_length(tn);
800
801 used -= tn_info(tn)->empty_children;
802
803
804 if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children)
805 used -= KEY_MAX;
806
807
808 return used < 2;
809}
810
811#define MAX_WORK 10
812static struct key_vector *resize(struct trie *t, struct key_vector *tn)
813{
814#ifdef CONFIG_IP_FIB_TRIE_STATS
815 struct trie_use_stats __percpu *stats = t->stats;
816#endif
817 struct key_vector *tp = node_parent(tn);
818 unsigned long cindex = get_index(tn->key, tp);
819 int max_work = MAX_WORK;
820
821 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
822 tn, inflate_threshold, halve_threshold);
823
824
825
826
827
828 BUG_ON(tn != get_child(tp, cindex));
829
830
831
832
833 while (should_inflate(tp, tn) && max_work) {
834 tp = inflate(t, tn);
835 if (!tp) {
836#ifdef CONFIG_IP_FIB_TRIE_STATS
837 this_cpu_inc(stats->resize_node_skipped);
838#endif
839 break;
840 }
841
842 max_work--;
843 tn = get_child(tp, cindex);
844 }
845
846
847 tp = node_parent(tn);
848
849
850 if (max_work != MAX_WORK)
851 return tp;
852
853
854
855
856 while (should_halve(tp, tn) && max_work) {
857 tp = halve(t, tn);
858 if (!tp) {
859#ifdef CONFIG_IP_FIB_TRIE_STATS
860 this_cpu_inc(stats->resize_node_skipped);
861#endif
862 break;
863 }
864
865 max_work--;
866 tn = get_child(tp, cindex);
867 }
868
869
870 if (should_collapse(tn))
871 return collapse(t, tn);
872
873
874 tp = node_parent(tn);
875
876
877 if (max_work != MAX_WORK)
878 return tp;
879
880
881 if (tn->slen > tn->pos) {
882 unsigned char slen = update_suffix(tn);
883
884 if (slen > tp->slen)
885 tp->slen = slen;
886 }
887
888 return tp;
889}
890
891static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l)
892{
893 while ((tp->slen > tp->pos) && (tp->slen > l->slen)) {
894 if (update_suffix(tp) > l->slen)
895 break;
896 tp = node_parent(tp);
897 }
898}
899
900static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l)
901{
902
903
904
905 while (tn->slen < l->slen) {
906 tn->slen = l->slen;
907 tn = node_parent(tn);
908 }
909}
910
911
912static struct key_vector *fib_find_node(struct trie *t,
913 struct key_vector **tp, u32 key)
914{
915 struct key_vector *pn, *n = t->kv;
916 unsigned long index = 0;
917
918 do {
919 pn = n;
920 n = get_child_rcu(n, index);
921
922 if (!n)
923 break;
924
925 index = get_cindex(key, n);
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941 if (index >= (1ul << n->bits)) {
942 n = NULL;
943 break;
944 }
945
946
947 } while (IS_TNODE(n));
948
949 *tp = pn;
950
951 return n;
952}
953
954
955
956
957static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
958 u8 tos, u32 prio, u32 tb_id)
959{
960 struct fib_alias *fa;
961
962 if (!fah)
963 return NULL;
964
965 hlist_for_each_entry(fa, fah, fa_list) {
966 if (fa->fa_slen < slen)
967 continue;
968 if (fa->fa_slen != slen)
969 break;
970 if (fa->tb_id > tb_id)
971 continue;
972 if (fa->tb_id != tb_id)
973 break;
974 if (fa->fa_tos > tos)
975 continue;
976 if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
977 return fa;
978 }
979
980 return NULL;
981}
982
983static void trie_rebalance(struct trie *t, struct key_vector *tn)
984{
985 while (!IS_TRIE(tn))
986 tn = resize(t, tn);
987}
988
989static int fib_insert_node(struct trie *t, struct key_vector *tp,
990 struct fib_alias *new, t_key key)
991{
992 struct key_vector *n, *l;
993
994 l = leaf_new(key, new);
995 if (!l)
996 goto noleaf;
997
998
999 n = get_child(tp, get_index(key, tp));
1000
1001
1002
1003
1004
1005
1006
1007 if (n) {
1008 struct key_vector *tn;
1009
1010 tn = tnode_new(key, __fls(key ^ n->key), 1);
1011 if (!tn)
1012 goto notnode;
1013
1014
1015 NODE_INIT_PARENT(tn, tp);
1016 put_child(tn, get_index(key, tn) ^ 1, n);
1017
1018
1019 put_child_root(tp, key, tn);
1020 node_set_parent(n, tn);
1021
1022
1023 tp = tn;
1024 }
1025
1026
1027 NODE_INIT_PARENT(l, tp);
1028 put_child_root(tp, key, l);
1029 trie_rebalance(t, tp);
1030
1031 return 0;
1032notnode:
1033 node_free(l);
1034noleaf:
1035 return -ENOMEM;
1036}
1037
1038static int fib_insert_alias(struct trie *t, struct key_vector *tp,
1039 struct key_vector *l, struct fib_alias *new,
1040 struct fib_alias *fa, t_key key)
1041{
1042 if (!l)
1043 return fib_insert_node(t, tp, new, key);
1044
1045 if (fa) {
1046 hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
1047 } else {
1048 struct fib_alias *last;
1049
1050 hlist_for_each_entry(last, &l->leaf, fa_list) {
1051 if (new->fa_slen < last->fa_slen)
1052 break;
1053 if ((new->fa_slen == last->fa_slen) &&
1054 (new->tb_id > last->tb_id))
1055 break;
1056 fa = last;
1057 }
1058
1059 if (fa)
1060 hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
1061 else
1062 hlist_add_head_rcu(&new->fa_list, &l->leaf);
1063 }
1064
1065
1066 if (l->slen < new->fa_slen) {
1067 l->slen = new->fa_slen;
1068 leaf_push_suffix(tp, l);
1069 }
1070
1071 return 0;
1072}
1073
1074
1075int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1076{
1077 struct trie *t = (struct trie *)tb->tb_data;
1078 struct fib_alias *fa, *new_fa;
1079 struct key_vector *l, *tp;
1080 struct fib_info *fi;
1081 u8 plen = cfg->fc_dst_len;
1082 u8 slen = KEYLENGTH - plen;
1083 u8 tos = cfg->fc_tos;
1084 u32 key;
1085 int err;
1086
1087 if (plen > KEYLENGTH)
1088 return -EINVAL;
1089
1090 key = ntohl(cfg->fc_dst);
1091
1092 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
1093
1094 if ((plen < KEYLENGTH) && (key << plen))
1095 return -EINVAL;
1096
1097 fi = fib_create_info(cfg);
1098 if (IS_ERR(fi)) {
1099 err = PTR_ERR(fi);
1100 goto err;
1101 }
1102
1103 l = fib_find_node(t, &tp, key);
1104 fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
1105 tb->tb_id) : NULL;
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116 if (fa && fa->fa_tos == tos &&
1117 fa->fa_info->fib_priority == fi->fib_priority) {
1118 struct fib_alias *fa_first, *fa_match;
1119
1120 err = -EEXIST;
1121 if (cfg->fc_nlflags & NLM_F_EXCL)
1122 goto out;
1123
1124
1125
1126
1127
1128
1129 fa_match = NULL;
1130 fa_first = fa;
1131 hlist_for_each_entry_from(fa, fa_list) {
1132 if ((fa->fa_slen != slen) ||
1133 (fa->tb_id != tb->tb_id) ||
1134 (fa->fa_tos != tos))
1135 break;
1136 if (fa->fa_info->fib_priority != fi->fib_priority)
1137 break;
1138 if (fa->fa_type == cfg->fc_type &&
1139 fa->fa_info == fi) {
1140 fa_match = fa;
1141 break;
1142 }
1143 }
1144
1145 if (cfg->fc_nlflags & NLM_F_REPLACE) {
1146 struct fib_info *fi_drop;
1147 u8 state;
1148
1149 fa = fa_first;
1150 if (fa_match) {
1151 if (fa == fa_match)
1152 err = 0;
1153 goto out;
1154 }
1155 err = -ENOBUFS;
1156 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1157 if (!new_fa)
1158 goto out;
1159
1160 fi_drop = fa->fa_info;
1161 new_fa->fa_tos = fa->fa_tos;
1162 new_fa->fa_info = fi;
1163 new_fa->fa_type = cfg->fc_type;
1164 state = fa->fa_state;
1165 new_fa->fa_state = state & ~FA_S_ACCESSED;
1166 new_fa->fa_slen = fa->fa_slen;
1167 new_fa->tb_id = tb->tb_id;
1168
1169 err = netdev_switch_fib_ipv4_add(key, plen, fi,
1170 new_fa->fa_tos,
1171 cfg->fc_type,
1172 cfg->fc_nlflags,
1173 tb->tb_id);
1174 if (err) {
1175 netdev_switch_fib_ipv4_abort(fi);
1176 kmem_cache_free(fn_alias_kmem, new_fa);
1177 goto out;
1178 }
1179
1180 hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1181
1182 alias_free_mem_rcu(fa);
1183
1184 fib_release_info(fi_drop);
1185 if (state & FA_S_ACCESSED)
1186 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1187 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1188 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
1189
1190 goto succeeded;
1191 }
1192
1193
1194
1195
1196 if (fa_match)
1197 goto out;
1198
1199 if (!(cfg->fc_nlflags & NLM_F_APPEND))
1200 fa = fa_first;
1201 }
1202 err = -ENOENT;
1203 if (!(cfg->fc_nlflags & NLM_F_CREATE))
1204 goto out;
1205
1206 err = -ENOBUFS;
1207 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1208 if (!new_fa)
1209 goto out;
1210
1211 new_fa->fa_info = fi;
1212 new_fa->fa_tos = tos;
1213 new_fa->fa_type = cfg->fc_type;
1214 new_fa->fa_state = 0;
1215 new_fa->fa_slen = slen;
1216 new_fa->tb_id = tb->tb_id;
1217
1218
1219 err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
1220 cfg->fc_type,
1221 cfg->fc_nlflags,
1222 tb->tb_id);
1223 if (err) {
1224 netdev_switch_fib_ipv4_abort(fi);
1225 goto out_free_new_fa;
1226 }
1227
1228
1229 err = fib_insert_alias(t, tp, l, new_fa, fa, key);
1230 if (err)
1231 goto out_sw_fib_del;
1232
1233 if (!plen)
1234 tb->tb_num_default++;
1235
1236 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1237 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
1238 &cfg->fc_nlinfo, 0);
1239succeeded:
1240 return 0;
1241
1242out_sw_fib_del:
1243 netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
1244out_free_new_fa:
1245 kmem_cache_free(fn_alias_kmem, new_fa);
1246out:
1247 fib_release_info(fi);
1248err:
1249 return err;
1250}
1251
1252static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
1253{
1254 t_key prefix = n->key;
1255
1256 return (key ^ prefix) & (prefix | -prefix);
1257}
1258
1259
1260int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1261 struct fib_result *res, int fib_flags)
1262{
1263 struct trie *t = (struct trie *) tb->tb_data;
1264#ifdef CONFIG_IP_FIB_TRIE_STATS
1265 struct trie_use_stats __percpu *stats = t->stats;
1266#endif
1267 const t_key key = ntohl(flp->daddr);
1268 struct key_vector *n, *pn;
1269 struct fib_alias *fa;
1270 unsigned long index;
1271 t_key cindex;
1272
1273 pn = t->kv;
1274 cindex = 0;
1275
1276 n = get_child_rcu(pn, cindex);
1277 if (!n)
1278 return -EAGAIN;
1279
1280#ifdef CONFIG_IP_FIB_TRIE_STATS
1281 this_cpu_inc(stats->gets);
1282#endif
1283
1284
1285 for (;;) {
1286 index = get_cindex(key, n);
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302 if (index >= (1ul << n->bits))
1303 break;
1304
1305
1306 if (IS_LEAF(n))
1307 goto found;
1308
1309
1310
1311
1312 if (n->slen > n->pos) {
1313 pn = n;
1314 cindex = index;
1315 }
1316
1317 n = get_child_rcu(n, index);
1318 if (unlikely(!n))
1319 goto backtrace;
1320 }
1321
1322
1323 for (;;) {
1324
1325 struct key_vector __rcu **cptr = n->tnode;
1326
1327
1328
1329
1330
1331 if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos))
1332 goto backtrace;
1333
1334
1335 if (unlikely(IS_LEAF(n)))
1336 break;
1337
1338
1339
1340
1341
1342
1343 while ((n = rcu_dereference(*cptr)) == NULL) {
1344backtrace:
1345#ifdef CONFIG_IP_FIB_TRIE_STATS
1346 if (!n)
1347 this_cpu_inc(stats->null_node_hit);
1348#endif
1349
1350
1351
1352
1353
1354 while (!cindex) {
1355 t_key pkey = pn->key;
1356
1357
1358
1359
1360
1361 if (IS_TRIE(pn))
1362 return -EAGAIN;
1363#ifdef CONFIG_IP_FIB_TRIE_STATS
1364 this_cpu_inc(stats->backtrack);
1365#endif
1366
1367 pn = node_parent_rcu(pn);
1368 cindex = get_index(pkey, pn);
1369 }
1370
1371
1372 cindex &= cindex - 1;
1373
1374
1375 cptr = &pn->tnode[cindex];
1376 }
1377 }
1378
1379found:
1380
1381 index = key ^ n->key;
1382
1383
1384 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
1385 struct fib_info *fi = fa->fa_info;
1386 int nhsel, err;
1387
1388 if ((index >= (1ul << fa->fa_slen)) &&
1389 ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
1390 continue;
1391 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1392 continue;
1393 if (fi->fib_dead)
1394 continue;
1395 if (fa->fa_info->fib_scope < flp->flowi4_scope)
1396 continue;
1397 fib_alias_accessed(fa);
1398 err = fib_props[fa->fa_type].error;
1399 if (unlikely(err < 0)) {
1400#ifdef CONFIG_IP_FIB_TRIE_STATS
1401 this_cpu_inc(stats->semantic_match_passed);
1402#endif
1403 return err;
1404 }
1405 if (fi->fib_flags & RTNH_F_DEAD)
1406 continue;
1407 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1408 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1409
1410 if (nh->nh_flags & RTNH_F_DEAD)
1411 continue;
1412 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
1413 continue;
1414
1415 if (!(fib_flags & FIB_LOOKUP_NOREF))
1416 atomic_inc(&fi->fib_clntref);
1417
1418 res->prefixlen = KEYLENGTH - fa->fa_slen;
1419 res->nh_sel = nhsel;
1420 res->type = fa->fa_type;
1421 res->scope = fi->fib_scope;
1422 res->fi = fi;
1423 res->table = tb;
1424 res->fa_head = &n->leaf;
1425#ifdef CONFIG_IP_FIB_TRIE_STATS
1426 this_cpu_inc(stats->semantic_match_passed);
1427#endif
1428 return err;
1429 }
1430 }
1431#ifdef CONFIG_IP_FIB_TRIE_STATS
1432 this_cpu_inc(stats->semantic_match_miss);
1433#endif
1434 goto backtrace;
1435}
1436EXPORT_SYMBOL_GPL(fib_table_lookup);
1437
1438static void fib_remove_alias(struct trie *t, struct key_vector *tp,
1439 struct key_vector *l, struct fib_alias *old)
1440{
1441
1442 struct hlist_node **pprev = old->fa_list.pprev;
1443 struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next);
1444
1445
1446 hlist_del_rcu(&old->fa_list);
1447
1448
1449
1450
1451 if (hlist_empty(&l->leaf)) {
1452 put_child_root(tp, l->key, NULL);
1453 node_free(l);
1454 trie_rebalance(t, tp);
1455 return;
1456 }
1457
1458
1459 if (*pprev)
1460 return;
1461
1462
1463 l->slen = fa->fa_slen;
1464 leaf_pull_suffix(tp, l);
1465}
1466
1467
1468int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1469{
1470 struct trie *t = (struct trie *) tb->tb_data;
1471 struct fib_alias *fa, *fa_to_delete;
1472 struct key_vector *l, *tp;
1473 u8 plen = cfg->fc_dst_len;
1474 u8 slen = KEYLENGTH - plen;
1475 u8 tos = cfg->fc_tos;
1476 u32 key;
1477
1478 if (plen > KEYLENGTH)
1479 return -EINVAL;
1480
1481 key = ntohl(cfg->fc_dst);
1482
1483 if ((plen < KEYLENGTH) && (key << plen))
1484 return -EINVAL;
1485
1486 l = fib_find_node(t, &tp, key);
1487 if (!l)
1488 return -ESRCH;
1489
1490 fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id);
1491 if (!fa)
1492 return -ESRCH;
1493
1494 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1495
1496 fa_to_delete = NULL;
1497 hlist_for_each_entry_from(fa, fa_list) {
1498 struct fib_info *fi = fa->fa_info;
1499
1500 if ((fa->fa_slen != slen) ||
1501 (fa->tb_id != tb->tb_id) ||
1502 (fa->fa_tos != tos))
1503 break;
1504
1505 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1506 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1507 fa->fa_info->fib_scope == cfg->fc_scope) &&
1508 (!cfg->fc_prefsrc ||
1509 fi->fib_prefsrc == cfg->fc_prefsrc) &&
1510 (!cfg->fc_protocol ||
1511 fi->fib_protocol == cfg->fc_protocol) &&
1512 fib_nh_match(cfg, fi) == 0) {
1513 fa_to_delete = fa;
1514 break;
1515 }
1516 }
1517
1518 if (!fa_to_delete)
1519 return -ESRCH;
1520
1521 netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
1522 cfg->fc_type, tb->tb_id);
1523
1524 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
1525 &cfg->fc_nlinfo, 0);
1526
1527 if (!plen)
1528 tb->tb_num_default--;
1529
1530 fib_remove_alias(t, tp, l, fa_to_delete);
1531
1532 if (fa_to_delete->fa_state & FA_S_ACCESSED)
1533 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1534
1535 fib_release_info(fa_to_delete->fa_info);
1536 alias_free_mem_rcu(fa_to_delete);
1537 return 0;
1538}
1539
1540
1541static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
1542{
1543 struct key_vector *pn, *n = *tn;
1544 unsigned long cindex;
1545
1546
1547 do {
1548
1549 pn = n;
1550 cindex = key ? get_index(key, pn) : 0;
1551
1552 if (cindex >> pn->bits)
1553 break;
1554
1555
1556 n = get_child_rcu(pn, cindex++);
1557 if (!n)
1558 break;
1559
1560
1561 if (IS_LEAF(n) && (n->key >= key))
1562 goto found;
1563 } while (IS_TNODE(n));
1564
1565
1566 while (!IS_TRIE(pn)) {
1567
1568 if (cindex >= (1ul << pn->bits)) {
1569 t_key pkey = pn->key;
1570
1571 pn = node_parent_rcu(pn);
1572 cindex = get_index(pkey, pn) + 1;
1573 continue;
1574 }
1575
1576
1577 n = get_child_rcu(pn, cindex++);
1578 if (!n)
1579 continue;
1580
1581
1582 if (IS_LEAF(n))
1583 goto found;
1584
1585
1586 pn = n;
1587 cindex = 0;
1588 }
1589
1590 *tn = pn;
1591 return NULL;
1592found:
1593
1594 *tn = pn;
1595 return n;
1596}
1597
1598static void fib_trie_free(struct fib_table *tb)
1599{
1600 struct trie *t = (struct trie *)tb->tb_data;
1601 struct key_vector *pn = t->kv;
1602 unsigned long cindex = 1;
1603 struct hlist_node *tmp;
1604 struct fib_alias *fa;
1605
1606
1607 for (;;) {
1608 struct key_vector *n;
1609
1610 if (!(cindex--)) {
1611 t_key pkey = pn->key;
1612
1613 if (IS_TRIE(pn))
1614 break;
1615
1616 n = pn;
1617 pn = node_parent(pn);
1618
1619
1620 put_child_root(pn, n->key, NULL);
1621 node_free(n);
1622
1623 cindex = get_index(pkey, pn);
1624
1625 continue;
1626 }
1627
1628
1629 n = get_child(pn, cindex);
1630 if (!n)
1631 continue;
1632
1633 if (IS_TNODE(n)) {
1634
1635 pn = n;
1636 cindex = 1ul << n->bits;
1637
1638 continue;
1639 }
1640
1641 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1642 hlist_del_rcu(&fa->fa_list);
1643 alias_free_mem_rcu(fa);
1644 }
1645
1646 put_child_root(pn, n->key, NULL);
1647 node_free(n);
1648 }
1649
1650#ifdef CONFIG_IP_FIB_TRIE_STATS
1651 free_percpu(t->stats);
1652#endif
1653 kfree(tb);
1654}
1655
1656struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
1657{
1658 struct trie *ot = (struct trie *)oldtb->tb_data;
1659 struct key_vector *l, *tp = ot->kv;
1660 struct fib_table *local_tb;
1661 struct fib_alias *fa;
1662 struct trie *lt;
1663 t_key key = 0;
1664
1665 if (oldtb->tb_data == oldtb->__data)
1666 return oldtb;
1667
1668 local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL);
1669 if (!local_tb)
1670 return NULL;
1671
1672 lt = (struct trie *)local_tb->tb_data;
1673
1674 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
1675 struct key_vector *local_l = NULL, *local_tp;
1676
1677 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
1678 struct fib_alias *new_fa;
1679
1680 if (local_tb->tb_id != fa->tb_id)
1681 continue;
1682
1683
1684 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1685 if (!new_fa)
1686 goto out;
1687
1688 memcpy(new_fa, fa, sizeof(*fa));
1689
1690
1691 if (!local_l)
1692 local_l = fib_find_node(lt, &local_tp, l->key);
1693
1694 if (fib_insert_alias(lt, local_tp, local_l, new_fa,
1695 NULL, l->key))
1696 goto out;
1697 }
1698
1699
1700 key = l->key + 1;
1701 if (key < l->key)
1702 break;
1703 }
1704
1705 return local_tb;
1706out:
1707 fib_trie_free(local_tb);
1708
1709 return NULL;
1710}
1711
1712
1713void fib_table_flush_external(struct fib_table *tb)
1714{
1715 struct trie *t = (struct trie *)tb->tb_data;
1716 struct key_vector *pn = t->kv;
1717 unsigned long cindex = 1;
1718 struct hlist_node *tmp;
1719 struct fib_alias *fa;
1720
1721
1722 for (;;) {
1723 unsigned char slen = 0;
1724 struct key_vector *n;
1725
1726 if (!(cindex--)) {
1727 t_key pkey = pn->key;
1728
1729
1730 if (IS_TRIE(pn))
1731 break;
1732
1733
1734 pn = resize(t, pn);
1735 cindex = get_index(pkey, pn);
1736
1737 continue;
1738 }
1739
1740
1741 n = get_child(pn, cindex);
1742 if (!n)
1743 continue;
1744
1745 if (IS_TNODE(n)) {
1746
1747 pn = n;
1748 cindex = 1ul << n->bits;
1749
1750 continue;
1751 }
1752
1753 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1754 struct fib_info *fi = fa->fa_info;
1755
1756
1757
1758
1759 if (tb->tb_id != fa->tb_id) {
1760 hlist_del_rcu(&fa->fa_list);
1761 alias_free_mem_rcu(fa);
1762 continue;
1763 }
1764
1765
1766 slen = fa->fa_slen;
1767
1768 if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD))
1769 continue;
1770
1771 netdev_switch_fib_ipv4_del(n->key,
1772 KEYLENGTH - fa->fa_slen,
1773 fi, fa->fa_tos,
1774 fa->fa_type, tb->tb_id);
1775 }
1776
1777
1778 n->slen = slen;
1779
1780 if (hlist_empty(&n->leaf)) {
1781 put_child_root(pn, n->key, NULL);
1782 node_free(n);
1783 } else {
1784 leaf_pull_suffix(pn, n);
1785 }
1786 }
1787}
1788
1789
1790int fib_table_flush(struct fib_table *tb)
1791{
1792 struct trie *t = (struct trie *)tb->tb_data;
1793 struct key_vector *pn = t->kv;
1794 unsigned long cindex = 1;
1795 struct hlist_node *tmp;
1796 struct fib_alias *fa;
1797 int found = 0;
1798
1799
1800 for (;;) {
1801 unsigned char slen = 0;
1802 struct key_vector *n;
1803
1804 if (!(cindex--)) {
1805 t_key pkey = pn->key;
1806
1807
1808 if (IS_TRIE(pn))
1809 break;
1810
1811
1812 pn = resize(t, pn);
1813 cindex = get_index(pkey, pn);
1814
1815 continue;
1816 }
1817
1818
1819 n = get_child(pn, cindex);
1820 if (!n)
1821 continue;
1822
1823 if (IS_TNODE(n)) {
1824
1825 pn = n;
1826 cindex = 1ul << n->bits;
1827
1828 continue;
1829 }
1830
1831 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1832 struct fib_info *fi = fa->fa_info;
1833
1834 if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) {
1835 slen = fa->fa_slen;
1836 continue;
1837 }
1838
1839 netdev_switch_fib_ipv4_del(n->key,
1840 KEYLENGTH - fa->fa_slen,
1841 fi, fa->fa_tos,
1842 fa->fa_type, tb->tb_id);
1843 hlist_del_rcu(&fa->fa_list);
1844 fib_release_info(fa->fa_info);
1845 alias_free_mem_rcu(fa);
1846 found++;
1847 }
1848
1849
1850 n->slen = slen;
1851
1852 if (hlist_empty(&n->leaf)) {
1853 put_child_root(pn, n->key, NULL);
1854 node_free(n);
1855 } else {
1856 leaf_pull_suffix(pn, n);
1857 }
1858 }
1859
1860 pr_debug("trie_flush found=%d\n", found);
1861 return found;
1862}
1863
1864static void __trie_free_rcu(struct rcu_head *head)
1865{
1866 struct fib_table *tb = container_of(head, struct fib_table, rcu);
1867#ifdef CONFIG_IP_FIB_TRIE_STATS
1868 struct trie *t = (struct trie *)tb->tb_data;
1869
1870 if (tb->tb_data == tb->__data)
1871 free_percpu(t->stats);
1872#endif
1873 kfree(tb);
1874}
1875
1876void fib_free_table(struct fib_table *tb)
1877{
1878 call_rcu(&tb->rcu, __trie_free_rcu);
1879}
1880
1881static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
1882 struct sk_buff *skb, struct netlink_callback *cb)
1883{
1884 __be32 xkey = htonl(l->key);
1885 struct fib_alias *fa;
1886 int i, s_i;
1887
1888 s_i = cb->args[4];
1889 i = 0;
1890
1891
1892 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
1893 if (i < s_i) {
1894 i++;
1895 continue;
1896 }
1897
1898 if (tb->tb_id != fa->tb_id) {
1899 i++;
1900 continue;
1901 }
1902
1903 if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
1904 cb->nlh->nlmsg_seq,
1905 RTM_NEWROUTE,
1906 tb->tb_id,
1907 fa->fa_type,
1908 xkey,
1909 KEYLENGTH - fa->fa_slen,
1910 fa->fa_tos,
1911 fa->fa_info, NLM_F_MULTI) < 0) {
1912 cb->args[4] = i;
1913 return -1;
1914 }
1915 i++;
1916 }
1917
1918 cb->args[4] = i;
1919 return skb->len;
1920}
1921
1922
1923int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1924 struct netlink_callback *cb)
1925{
1926 struct trie *t = (struct trie *)tb->tb_data;
1927 struct key_vector *l, *tp = t->kv;
1928
1929
1930
1931 int count = cb->args[2];
1932 t_key key = cb->args[3];
1933
1934 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
1935 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
1936 cb->args[3] = key;
1937 cb->args[2] = count;
1938 return -1;
1939 }
1940
1941 ++count;
1942 key = l->key + 1;
1943
1944 memset(&cb->args[4], 0,
1945 sizeof(cb->args) - 4*sizeof(cb->args[0]));
1946
1947
1948 if (key < l->key)
1949 break;
1950 }
1951
1952 cb->args[3] = key;
1953 cb->args[2] = count;
1954
1955 return skb->len;
1956}
1957
1958void __init fib_trie_init(void)
1959{
1960 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1961 sizeof(struct fib_alias),
1962 0, SLAB_PANIC, NULL);
1963
1964 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
1965 LEAF_SIZE,
1966 0, SLAB_PANIC, NULL);
1967}
1968
1969struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
1970{
1971 struct fib_table *tb;
1972 struct trie *t;
1973 size_t sz = sizeof(*tb);
1974
1975 if (!alias)
1976 sz += sizeof(struct trie);
1977
1978 tb = kzalloc(sz, GFP_KERNEL);
1979 if (!tb)
1980 return NULL;
1981
1982 tb->tb_id = id;
1983 tb->tb_default = -1;
1984 tb->tb_num_default = 0;
1985 tb->tb_data = (alias ? alias->__data : tb->__data);
1986
1987 if (alias)
1988 return tb;
1989
1990 t = (struct trie *) tb->tb_data;
1991 t->kv[0].pos = KEYLENGTH;
1992 t->kv[0].slen = KEYLENGTH;
1993#ifdef CONFIG_IP_FIB_TRIE_STATS
1994 t->stats = alloc_percpu(struct trie_use_stats);
1995 if (!t->stats) {
1996 kfree(tb);
1997 tb = NULL;
1998 }
1999#endif
2000
2001 return tb;
2002}
2003
2004#ifdef CONFIG_PROC_FS
2005
2006struct fib_trie_iter {
2007 struct seq_net_private p;
2008 struct fib_table *tb;
2009 struct key_vector *tnode;
2010 unsigned int index;
2011 unsigned int depth;
2012};
2013
2014static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
2015{
2016 unsigned long cindex = iter->index;
2017 struct key_vector *pn = iter->tnode;
2018 t_key pkey;
2019
2020 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2021 iter->tnode, iter->index, iter->depth);
2022
2023 while (!IS_TRIE(pn)) {
2024 while (cindex < child_length(pn)) {
2025 struct key_vector *n = get_child_rcu(pn, cindex++);
2026
2027 if (!n)
2028 continue;
2029
2030 if (IS_LEAF(n)) {
2031 iter->tnode = pn;
2032 iter->index = cindex;
2033 } else {
2034
2035 iter->tnode = n;
2036 iter->index = 0;
2037 ++iter->depth;
2038 }
2039
2040 return n;
2041 }
2042
2043
2044 pkey = pn->key;
2045 pn = node_parent_rcu(pn);
2046 cindex = get_index(pkey, pn) + 1;
2047 --iter->depth;
2048 }
2049
2050
2051 iter->tnode = pn;
2052 iter->index = 0;
2053
2054 return NULL;
2055}
2056
2057static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
2058 struct trie *t)
2059{
2060 struct key_vector *n, *pn = t->kv;
2061
2062 if (!t)
2063 return NULL;
2064
2065 n = rcu_dereference(pn->tnode[0]);
2066 if (!n)
2067 return NULL;
2068
2069 if (IS_TNODE(n)) {
2070 iter->tnode = n;
2071 iter->index = 0;
2072 iter->depth = 1;
2073 } else {
2074 iter->tnode = pn;
2075 iter->index = 0;
2076 iter->depth = 0;
2077 }
2078
2079 return n;
2080}
2081
2082static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2083{
2084 struct key_vector *n;
2085 struct fib_trie_iter iter;
2086
2087 memset(s, 0, sizeof(*s));
2088
2089 rcu_read_lock();
2090 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2091 if (IS_LEAF(n)) {
2092 struct fib_alias *fa;
2093
2094 s->leaves++;
2095 s->totdepth += iter.depth;
2096 if (iter.depth > s->maxdepth)
2097 s->maxdepth = iter.depth;
2098
2099 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
2100 ++s->prefixes;
2101 } else {
2102 s->tnodes++;
2103 if (n->bits < MAX_STAT_DEPTH)
2104 s->nodesizes[n->bits]++;
2105 s->nullpointers += tn_info(n)->empty_children;
2106 }
2107 }
2108 rcu_read_unlock();
2109}
2110
2111
2112
2113
2114static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2115{
2116 unsigned int i, max, pointers, bytes, avdepth;
2117
2118 if (stat->leaves)
2119 avdepth = stat->totdepth*100 / stat->leaves;
2120 else
2121 avdepth = 0;
2122
2123 seq_printf(seq, "\tAver depth: %u.%02d\n",
2124 avdepth / 100, avdepth % 100);
2125 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
2126
2127 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
2128 bytes = LEAF_SIZE * stat->leaves;
2129
2130 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2131 bytes += sizeof(struct fib_alias) * stat->prefixes;
2132
2133 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
2134 bytes += TNODE_SIZE(0) * stat->tnodes;
2135
2136 max = MAX_STAT_DEPTH;
2137 while (max > 0 && stat->nodesizes[max-1] == 0)
2138 max--;
2139
2140 pointers = 0;
2141 for (i = 1; i < max; i++)
2142 if (stat->nodesizes[i] != 0) {
2143 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
2144 pointers += (1<<i) * stat->nodesizes[i];
2145 }
2146 seq_putc(seq, '\n');
2147 seq_printf(seq, "\tPointers: %u\n", pointers);
2148
2149 bytes += sizeof(struct key_vector *) * pointers;
2150 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2151 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
2152}
2153
2154#ifdef CONFIG_IP_FIB_TRIE_STATS
2155static void trie_show_usage(struct seq_file *seq,
2156 const struct trie_use_stats __percpu *stats)
2157{
2158 struct trie_use_stats s = { 0 };
2159 int cpu;
2160
2161
2162 for_each_possible_cpu(cpu) {
2163 const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
2164
2165 s.gets += pcpu->gets;
2166 s.backtrack += pcpu->backtrack;
2167 s.semantic_match_passed += pcpu->semantic_match_passed;
2168 s.semantic_match_miss += pcpu->semantic_match_miss;
2169 s.null_node_hit += pcpu->null_node_hit;
2170 s.resize_node_skipped += pcpu->resize_node_skipped;
2171 }
2172
2173 seq_printf(seq, "\nCounters:\n---------\n");
2174 seq_printf(seq, "gets = %u\n", s.gets);
2175 seq_printf(seq, "backtracks = %u\n", s.backtrack);
2176 seq_printf(seq, "semantic match passed = %u\n",
2177 s.semantic_match_passed);
2178 seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
2179 seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
2180 seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
2181}
2182#endif
2183
2184static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2185{
2186 if (tb->tb_id == RT_TABLE_LOCAL)
2187 seq_puts(seq, "Local:\n");
2188 else if (tb->tb_id == RT_TABLE_MAIN)
2189 seq_puts(seq, "Main:\n");
2190 else
2191 seq_printf(seq, "Id %d:\n", tb->tb_id);
2192}
2193
2194
2195static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2196{
2197 struct net *net = (struct net *)seq->private;
2198 unsigned int h;
2199
2200 seq_printf(seq,
2201 "Basic info: size of leaf:"
2202 " %Zd bytes, size of tnode: %Zd bytes.\n",
2203 LEAF_SIZE, TNODE_SIZE(0));
2204
2205 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2206 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2207 struct fib_table *tb;
2208
2209 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2210 struct trie *t = (struct trie *) tb->tb_data;
2211 struct trie_stat stat;
2212
2213 if (!t)
2214 continue;
2215
2216 fib_table_print(seq, tb);
2217
2218 trie_collect_stats(t, &stat);
2219 trie_show_stats(seq, &stat);
2220#ifdef CONFIG_IP_FIB_TRIE_STATS
2221 trie_show_usage(seq, t->stats);
2222#endif
2223 }
2224 }
2225
2226 return 0;
2227}
2228
2229static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2230{
2231 return single_open_net(inode, file, fib_triestat_seq_show);
2232}
2233
2234static const struct file_operations fib_triestat_fops = {
2235 .owner = THIS_MODULE,
2236 .open = fib_triestat_seq_open,
2237 .read = seq_read,
2238 .llseek = seq_lseek,
2239 .release = single_release_net,
2240};
2241
2242static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2243{
2244 struct fib_trie_iter *iter = seq->private;
2245 struct net *net = seq_file_net(seq);
2246 loff_t idx = 0;
2247 unsigned int h;
2248
2249 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2250 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2251 struct fib_table *tb;
2252
2253 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2254 struct key_vector *n;
2255
2256 for (n = fib_trie_get_first(iter,
2257 (struct trie *) tb->tb_data);
2258 n; n = fib_trie_get_next(iter))
2259 if (pos == idx++) {
2260 iter->tb = tb;
2261 return n;
2262 }
2263 }
2264 }
2265
2266 return NULL;
2267}
2268
2269static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2270 __acquires(RCU)
2271{
2272 rcu_read_lock();
2273 return fib_trie_get_idx(seq, *pos);
2274}
2275
2276static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2277{
2278 struct fib_trie_iter *iter = seq->private;
2279 struct net *net = seq_file_net(seq);
2280 struct fib_table *tb = iter->tb;
2281 struct hlist_node *tb_node;
2282 unsigned int h;
2283 struct key_vector *n;
2284
2285 ++*pos;
2286
2287 n = fib_trie_get_next(iter);
2288 if (n)
2289 return n;
2290
2291
2292 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2293 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
2294 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2295 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2296 if (n)
2297 goto found;
2298 }
2299
2300
2301 while (++h < FIB_TABLE_HASHSZ) {
2302 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2303 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2304 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2305 if (n)
2306 goto found;
2307 }
2308 }
2309 return NULL;
2310
2311found:
2312 iter->tb = tb;
2313 return n;
2314}
2315
2316static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2317 __releases(RCU)
2318{
2319 rcu_read_unlock();
2320}
2321
2322static void seq_indent(struct seq_file *seq, int n)
2323{
2324 while (n-- > 0)
2325 seq_puts(seq, " ");
2326}
2327
2328static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2329{
2330 switch (s) {
2331 case RT_SCOPE_UNIVERSE: return "universe";
2332 case RT_SCOPE_SITE: return "site";
2333 case RT_SCOPE_LINK: return "link";
2334 case RT_SCOPE_HOST: return "host";
2335 case RT_SCOPE_NOWHERE: return "nowhere";
2336 default:
2337 snprintf(buf, len, "scope=%d", s);
2338 return buf;
2339 }
2340}
2341
2342static const char *const rtn_type_names[__RTN_MAX] = {
2343 [RTN_UNSPEC] = "UNSPEC",
2344 [RTN_UNICAST] = "UNICAST",
2345 [RTN_LOCAL] = "LOCAL",
2346 [RTN_BROADCAST] = "BROADCAST",
2347 [RTN_ANYCAST] = "ANYCAST",
2348 [RTN_MULTICAST] = "MULTICAST",
2349 [RTN_BLACKHOLE] = "BLACKHOLE",
2350 [RTN_UNREACHABLE] = "UNREACHABLE",
2351 [RTN_PROHIBIT] = "PROHIBIT",
2352 [RTN_THROW] = "THROW",
2353 [RTN_NAT] = "NAT",
2354 [RTN_XRESOLVE] = "XRESOLVE",
2355};
2356
2357static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2358{
2359 if (t < __RTN_MAX && rtn_type_names[t])
2360 return rtn_type_names[t];
2361 snprintf(buf, len, "type %u", t);
2362 return buf;
2363}
2364
2365
2366static int fib_trie_seq_show(struct seq_file *seq, void *v)
2367{
2368 const struct fib_trie_iter *iter = seq->private;
2369 struct key_vector *n = v;
2370
2371 if (IS_TRIE(node_parent_rcu(n)))
2372 fib_table_print(seq, iter->tb);
2373
2374 if (IS_TNODE(n)) {
2375 __be32 prf = htonl(n->key);
2376
2377 seq_indent(seq, iter->depth-1);
2378 seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",
2379 &prf, KEYLENGTH - n->pos - n->bits, n->bits,
2380 tn_info(n)->full_children,
2381 tn_info(n)->empty_children);
2382 } else {
2383 __be32 val = htonl(n->key);
2384 struct fib_alias *fa;
2385
2386 seq_indent(seq, iter->depth);
2387 seq_printf(seq, " |-- %pI4\n", &val);
2388
2389 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
2390 char buf1[32], buf2[32];
2391
2392 seq_indent(seq, iter->depth + 1);
2393 seq_printf(seq, " /%zu %s %s",
2394 KEYLENGTH - fa->fa_slen,
2395 rtn_scope(buf1, sizeof(buf1),
2396 fa->fa_info->fib_scope),
2397 rtn_type(buf2, sizeof(buf2),
2398 fa->fa_type));
2399 if (fa->fa_tos)
2400 seq_printf(seq, " tos=%d", fa->fa_tos);
2401 seq_putc(seq, '\n');
2402 }
2403 }
2404
2405 return 0;
2406}
2407
2408static const struct seq_operations fib_trie_seq_ops = {
2409 .start = fib_trie_seq_start,
2410 .next = fib_trie_seq_next,
2411 .stop = fib_trie_seq_stop,
2412 .show = fib_trie_seq_show,
2413};
2414
2415static int fib_trie_seq_open(struct inode *inode, struct file *file)
2416{
2417 return seq_open_net(inode, file, &fib_trie_seq_ops,
2418 sizeof(struct fib_trie_iter));
2419}
2420
2421static const struct file_operations fib_trie_fops = {
2422 .owner = THIS_MODULE,
2423 .open = fib_trie_seq_open,
2424 .read = seq_read,
2425 .llseek = seq_lseek,
2426 .release = seq_release_net,
2427};
2428
2429struct fib_route_iter {
2430 struct seq_net_private p;
2431 struct fib_table *main_tb;
2432 struct key_vector *tnode;
2433 loff_t pos;
2434 t_key key;
2435};
2436
2437static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
2438 loff_t pos)
2439{
2440 struct fib_table *tb = iter->main_tb;
2441 struct key_vector *l, **tp = &iter->tnode;
2442 struct trie *t;
2443 t_key key;
2444
2445
2446 if (iter->pos > 0 && pos >= iter->pos) {
2447 pos -= iter->pos;
2448 key = iter->key;
2449 } else {
2450 t = (struct trie *)tb->tb_data;
2451 iter->tnode = t->kv;
2452 iter->pos = 0;
2453 key = 0;
2454 }
2455
2456 while ((l = leaf_walk_rcu(tp, key)) != NULL) {
2457 key = l->key + 1;
2458 iter->pos++;
2459
2460 if (pos-- <= 0)
2461 break;
2462
2463 l = NULL;
2464
2465
2466 if (!key)
2467 break;
2468 }
2469
2470 if (l)
2471 iter->key = key;
2472 else
2473 iter->pos = 0;
2474
2475 return l;
2476}
2477
2478static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2479 __acquires(RCU)
2480{
2481 struct fib_route_iter *iter = seq->private;
2482 struct fib_table *tb;
2483 struct trie *t;
2484
2485 rcu_read_lock();
2486
2487 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
2488 if (!tb)
2489 return NULL;
2490
2491 iter->main_tb = tb;
2492
2493 if (*pos != 0)
2494 return fib_route_get_idx(iter, *pos);
2495
2496 t = (struct trie *)tb->tb_data;
2497 iter->tnode = t->kv;
2498 iter->pos = 0;
2499 iter->key = 0;
2500
2501 return SEQ_START_TOKEN;
2502}
2503
2504static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2505{
2506 struct fib_route_iter *iter = seq->private;
2507 struct key_vector *l = NULL;
2508 t_key key = iter->key;
2509
2510 ++*pos;
2511
2512
2513 if ((v == SEQ_START_TOKEN) || key)
2514 l = leaf_walk_rcu(&iter->tnode, key);
2515
2516 if (l) {
2517 iter->key = l->key + 1;
2518 iter->pos++;
2519 } else {
2520 iter->pos = 0;
2521 }
2522
2523 return l;
2524}
2525
2526static void fib_route_seq_stop(struct seq_file *seq, void *v)
2527 __releases(RCU)
2528{
2529 rcu_read_unlock();
2530}
2531
2532static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2533{
2534 unsigned int flags = 0;
2535
2536 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2537 flags = RTF_REJECT;
2538 if (fi && fi->fib_nh->nh_gw)
2539 flags |= RTF_GATEWAY;
2540 if (mask == htonl(0xFFFFFFFF))
2541 flags |= RTF_HOST;
2542 flags |= RTF_UP;
2543 return flags;
2544}
2545
2546
2547
2548
2549
2550
2551
2552static int fib_route_seq_show(struct seq_file *seq, void *v)
2553{
2554 struct fib_route_iter *iter = seq->private;
2555 struct fib_table *tb = iter->main_tb;
2556 struct fib_alias *fa;
2557 struct key_vector *l = v;
2558 __be32 prefix;
2559
2560 if (v == SEQ_START_TOKEN) {
2561 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2562 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2563 "\tWindow\tIRTT");
2564 return 0;
2565 }
2566
2567 prefix = htonl(l->key);
2568
2569 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2570 const struct fib_info *fi = fa->fa_info;
2571 __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
2572 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2573
2574 if ((fa->fa_type == RTN_BROADCAST) ||
2575 (fa->fa_type == RTN_MULTICAST))
2576 continue;
2577
2578 if (fa->tb_id != tb->tb_id)
2579 continue;
2580
2581 seq_setwidth(seq, 127);
2582
2583 if (fi)
2584 seq_printf(seq,
2585 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2586 "%d\t%08X\t%d\t%u\t%u",
2587 fi->fib_dev ? fi->fib_dev->name : "*",
2588 prefix,
2589 fi->fib_nh->nh_gw, flags, 0, 0,
2590 fi->fib_priority,
2591 mask,
2592 (fi->fib_advmss ?
2593 fi->fib_advmss + 40 : 0),
2594 fi->fib_window,
2595 fi->fib_rtt >> 3);
2596 else
2597 seq_printf(seq,
2598 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
2599 "%d\t%08X\t%d\t%u\t%u",
2600 prefix, 0, flags, 0, 0, 0,
2601 mask, 0, 0, 0);
2602
2603 seq_pad(seq, '\n');
2604 }
2605
2606 return 0;
2607}
2608
2609static const struct seq_operations fib_route_seq_ops = {
2610 .start = fib_route_seq_start,
2611 .next = fib_route_seq_next,
2612 .stop = fib_route_seq_stop,
2613 .show = fib_route_seq_show,
2614};
2615
2616static int fib_route_seq_open(struct inode *inode, struct file *file)
2617{
2618 return seq_open_net(inode, file, &fib_route_seq_ops,
2619 sizeof(struct fib_route_iter));
2620}
2621
2622static const struct file_operations fib_route_fops = {
2623 .owner = THIS_MODULE,
2624 .open = fib_route_seq_open,
2625 .read = seq_read,
2626 .llseek = seq_lseek,
2627 .release = seq_release_net,
2628};
2629
2630int __net_init fib_proc_init(struct net *net)
2631{
2632 if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops))
2633 goto out1;
2634
2635 if (!proc_create("fib_triestat", S_IRUGO, net->proc_net,
2636 &fib_triestat_fops))
2637 goto out2;
2638
2639 if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
2640 goto out3;
2641
2642 return 0;
2643
2644out3:
2645 remove_proc_entry("fib_triestat", net->proc_net);
2646out2:
2647 remove_proc_entry("fib_trie", net->proc_net);
2648out1:
2649 return -ENOMEM;
2650}
2651
2652void __net_exit fib_proc_exit(struct net *net)
2653{
2654 remove_proc_entry("fib_trie", net->proc_net);
2655 remove_proc_entry("fib_triestat", net->proc_net);
2656 remove_proc_entry("route", net->proc_net);
2657}
2658
2659#endif
2660