1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39#define VERSION "0.409"
40
41#include <linux/cache.h>
42#include <linux/uaccess.h>
43#include <linux/bitops.h>
44#include <linux/types.h>
45#include <linux/kernel.h>
46#include <linux/mm.h>
47#include <linux/string.h>
48#include <linux/socket.h>
49#include <linux/sockios.h>
50#include <linux/errno.h>
51#include <linux/in.h>
52#include <linux/inet.h>
53#include <linux/inetdevice.h>
54#include <linux/netdevice.h>
55#include <linux/if_arp.h>
56#include <linux/proc_fs.h>
57#include <linux/rcupdate.h>
58#include <linux/skbuff.h>
59#include <linux/netlink.h>
60#include <linux/init.h>
61#include <linux/list.h>
62#include <linux/slab.h>
63#include <linux/export.h>
64#include <linux/vmalloc.h>
65#include <linux/notifier.h>
66#include <net/net_namespace.h>
67#include <net/ip.h>
68#include <net/protocol.h>
69#include <net/route.h>
70#include <net/tcp.h>
71#include <net/sock.h>
72#include <net/ip_fib.h>
73#include <net/fib_notifier.h>
74#include <trace/events/fib.h>
75#include "fib_lookup.h"
76
77static int call_fib_entry_notifier(struct notifier_block *nb,
78 enum fib_event_type event_type, u32 dst,
79 int dst_len, struct fib_alias *fa,
80 struct netlink_ext_ack *extack)
81{
82 struct fib_entry_notifier_info info = {
83 .info.extack = extack,
84 .dst = dst,
85 .dst_len = dst_len,
86 .fi = fa->fa_info,
87 .tos = fa->fa_tos,
88 .type = fa->fa_type,
89 .tb_id = fa->tb_id,
90 };
91 return call_fib4_notifier(nb, event_type, &info.info);
92}
93
94static int call_fib_entry_notifiers(struct net *net,
95 enum fib_event_type event_type, u32 dst,
96 int dst_len, struct fib_alias *fa,
97 struct netlink_ext_ack *extack)
98{
99 struct fib_entry_notifier_info info = {
100 .info.extack = extack,
101 .dst = dst,
102 .dst_len = dst_len,
103 .fi = fa->fa_info,
104 .tos = fa->fa_tos,
105 .type = fa->fa_type,
106 .tb_id = fa->tb_id,
107 };
108 return call_fib4_notifiers(net, event_type, &info.info);
109}
110
111#define MAX_STAT_DEPTH 32
112
113#define KEYLENGTH (8*sizeof(t_key))
114#define KEY_MAX ((t_key)~0)
115
116typedef unsigned int t_key;
117
118#define IS_TRIE(n) ((n)->pos >= KEYLENGTH)
119#define IS_TNODE(n) ((n)->bits)
120#define IS_LEAF(n) (!(n)->bits)
121
122struct key_vector {
123 t_key key;
124 unsigned char pos;
125 unsigned char bits;
126 unsigned char slen;
127 union {
128
129 struct hlist_head leaf;
130
131 struct key_vector __rcu *tnode[0];
132 };
133};
134
135struct tnode {
136 struct rcu_head rcu;
137 t_key empty_children;
138 t_key full_children;
139 struct key_vector __rcu *parent;
140 struct key_vector kv[1];
141#define tn_bits kv[0].bits
142};
143
144#define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n])
145#define LEAF_SIZE TNODE_SIZE(1)
146
147#ifdef CONFIG_IP_FIB_TRIE_STATS
148struct trie_use_stats {
149 unsigned int gets;
150 unsigned int backtrack;
151 unsigned int semantic_match_passed;
152 unsigned int semantic_match_miss;
153 unsigned int null_node_hit;
154 unsigned int resize_node_skipped;
155};
156#endif
157
158struct trie_stat {
159 unsigned int totdepth;
160 unsigned int maxdepth;
161 unsigned int tnodes;
162 unsigned int leaves;
163 unsigned int nullpointers;
164 unsigned int prefixes;
165 unsigned int nodesizes[MAX_STAT_DEPTH];
166};
167
168struct trie {
169 struct key_vector kv[1];
170#ifdef CONFIG_IP_FIB_TRIE_STATS
171 struct trie_use_stats __percpu *stats;
172#endif
173};
174
175static struct key_vector *resize(struct trie *t, struct key_vector *tn);
176static unsigned int tnode_free_size;
177
178
179
180
181
182
183unsigned int sysctl_fib_sync_mem = 512 * 1024;
184unsigned int sysctl_fib_sync_mem_min = 64 * 1024;
185unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024;
186
187static struct kmem_cache *fn_alias_kmem __ro_after_init;
188static struct kmem_cache *trie_leaf_kmem __ro_after_init;
189
190static inline struct tnode *tn_info(struct key_vector *kv)
191{
192 return container_of(kv, struct tnode, kv[0]);
193}
194
195
196#define node_parent(tn) rtnl_dereference(tn_info(tn)->parent)
197#define get_child(tn, i) rtnl_dereference((tn)->tnode[i])
198
199
200#define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent)
201#define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i])
202
203
204static inline void node_set_parent(struct key_vector *n, struct key_vector *tp)
205{
206 if (n)
207 rcu_assign_pointer(tn_info(n)->parent, tp);
208}
209
210#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p)
211
212
213
214
215static inline unsigned long child_length(const struct key_vector *tn)
216{
217 return (1ul << tn->bits) & ~(1ul);
218}
219
220#define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos)
221
222static inline unsigned long get_index(t_key key, struct key_vector *kv)
223{
224 unsigned long index = key ^ kv->key;
225
226 if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos))
227 return 0;
228
229 return index >> kv->pos;
230}
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291static const int halve_threshold = 25;
292static const int inflate_threshold = 50;
293static const int halve_threshold_root = 15;
294static const int inflate_threshold_root = 30;
295
296static void __alias_free_mem(struct rcu_head *head)
297{
298 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
299 kmem_cache_free(fn_alias_kmem, fa);
300}
301
302static inline void alias_free_mem_rcu(struct fib_alias *fa)
303{
304 call_rcu(&fa->rcu, __alias_free_mem);
305}
306
307#define TNODE_KMALLOC_MAX \
308 ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *))
309#define TNODE_VMALLOC_MAX \
310 ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *))
311
312static void __node_free_rcu(struct rcu_head *head)
313{
314 struct tnode *n = container_of(head, struct tnode, rcu);
315
316 if (!n->tn_bits)
317 kmem_cache_free(trie_leaf_kmem, n);
318 else
319 kvfree(n);
320}
321
322#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
323
324static struct tnode *tnode_alloc(int bits)
325{
326 size_t size;
327
328
329 if (bits > TNODE_VMALLOC_MAX)
330 return NULL;
331
332
333 size = TNODE_SIZE(1ul << bits);
334
335 if (size <= PAGE_SIZE)
336 return kzalloc(size, GFP_KERNEL);
337 else
338 return vzalloc(size);
339}
340
341static inline void empty_child_inc(struct key_vector *n)
342{
343 tn_info(n)->empty_children++;
344
345 if (!tn_info(n)->empty_children)
346 tn_info(n)->full_children++;
347}
348
349static inline void empty_child_dec(struct key_vector *n)
350{
351 if (!tn_info(n)->empty_children)
352 tn_info(n)->full_children--;
353
354 tn_info(n)->empty_children--;
355}
356
357static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
358{
359 struct key_vector *l;
360 struct tnode *kv;
361
362 kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
363 if (!kv)
364 return NULL;
365
366
367 l = kv->kv;
368 l->key = key;
369 l->pos = 0;
370 l->bits = 0;
371 l->slen = fa->fa_slen;
372
373
374 INIT_HLIST_HEAD(&l->leaf);
375 hlist_add_head(&fa->fa_list, &l->leaf);
376
377 return l;
378}
379
380static struct key_vector *tnode_new(t_key key, int pos, int bits)
381{
382 unsigned int shift = pos + bits;
383 struct key_vector *tn;
384 struct tnode *tnode;
385
386
387 BUG_ON(!bits || (shift > KEYLENGTH));
388
389 tnode = tnode_alloc(bits);
390 if (!tnode)
391 return NULL;
392
393 pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
394 sizeof(struct key_vector *) << bits);
395
396 if (bits == KEYLENGTH)
397 tnode->full_children = 1;
398 else
399 tnode->empty_children = 1ul << bits;
400
401 tn = tnode->kv;
402 tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
403 tn->pos = pos;
404 tn->bits = bits;
405 tn->slen = pos;
406
407 return tn;
408}
409
410
411
412
413static inline int tnode_full(struct key_vector *tn, struct key_vector *n)
414{
415 return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
416}
417
418
419
420
421static void put_child(struct key_vector *tn, unsigned long i,
422 struct key_vector *n)
423{
424 struct key_vector *chi = get_child(tn, i);
425 int isfull, wasfull;
426
427 BUG_ON(i >= child_length(tn));
428
429
430 if (!n && chi)
431 empty_child_inc(tn);
432 if (n && !chi)
433 empty_child_dec(tn);
434
435
436 wasfull = tnode_full(tn, chi);
437 isfull = tnode_full(tn, n);
438
439 if (wasfull && !isfull)
440 tn_info(tn)->full_children--;
441 else if (!wasfull && isfull)
442 tn_info(tn)->full_children++;
443
444 if (n && (tn->slen < n->slen))
445 tn->slen = n->slen;
446
447 rcu_assign_pointer(tn->tnode[i], n);
448}
449
450static void update_children(struct key_vector *tn)
451{
452 unsigned long i;
453
454
455 for (i = child_length(tn); i;) {
456 struct key_vector *inode = get_child(tn, --i);
457
458 if (!inode)
459 continue;
460
461
462
463
464
465 if (node_parent(inode) == tn)
466 update_children(inode);
467 else
468 node_set_parent(inode, tn);
469 }
470}
471
472static inline void put_child_root(struct key_vector *tp, t_key key,
473 struct key_vector *n)
474{
475 if (IS_TRIE(tp))
476 rcu_assign_pointer(tp->tnode[0], n);
477 else
478 put_child(tp, get_index(key, tp), n);
479}
480
481static inline void tnode_free_init(struct key_vector *tn)
482{
483 tn_info(tn)->rcu.next = NULL;
484}
485
486static inline void tnode_free_append(struct key_vector *tn,
487 struct key_vector *n)
488{
489 tn_info(n)->rcu.next = tn_info(tn)->rcu.next;
490 tn_info(tn)->rcu.next = &tn_info(n)->rcu;
491}
492
493static void tnode_free(struct key_vector *tn)
494{
495 struct callback_head *head = &tn_info(tn)->rcu;
496
497 while (head) {
498 head = head->next;
499 tnode_free_size += TNODE_SIZE(1ul << tn->bits);
500 node_free(tn);
501
502 tn = container_of(head, struct tnode, rcu)->kv;
503 }
504
505 if (tnode_free_size >= sysctl_fib_sync_mem) {
506 tnode_free_size = 0;
507 synchronize_rcu();
508 }
509}
510
511static struct key_vector *replace(struct trie *t,
512 struct key_vector *oldtnode,
513 struct key_vector *tn)
514{
515 struct key_vector *tp = node_parent(oldtnode);
516 unsigned long i;
517
518
519 NODE_INIT_PARENT(tn, tp);
520 put_child_root(tp, tn->key, tn);
521
522
523 update_children(tn);
524
525
526 tnode_free(oldtnode);
527
528
529 for (i = child_length(tn); i;) {
530 struct key_vector *inode = get_child(tn, --i);
531
532
533 if (tnode_full(tn, inode))
534 tn = resize(t, inode);
535 }
536
537 return tp;
538}
539
540static struct key_vector *inflate(struct trie *t,
541 struct key_vector *oldtnode)
542{
543 struct key_vector *tn;
544 unsigned long i;
545 t_key m;
546
547 pr_debug("In inflate\n");
548
549 tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
550 if (!tn)
551 goto notnode;
552
553
554 tnode_free_init(oldtnode);
555
556
557
558
559
560
561 for (i = child_length(oldtnode), m = 1u << tn->pos; i;) {
562 struct key_vector *inode = get_child(oldtnode, --i);
563 struct key_vector *node0, *node1;
564 unsigned long j, k;
565
566
567 if (!inode)
568 continue;
569
570
571 if (!tnode_full(oldtnode, inode)) {
572 put_child(tn, get_index(inode->key, tn), inode);
573 continue;
574 }
575
576
577 tnode_free_append(oldtnode, inode);
578
579
580 if (inode->bits == 1) {
581 put_child(tn, 2 * i + 1, get_child(inode, 1));
582 put_child(tn, 2 * i, get_child(inode, 0));
583 continue;
584 }
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600 node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
601 if (!node1)
602 goto nomem;
603 node0 = tnode_new(inode->key, inode->pos, inode->bits - 1);
604
605 tnode_free_append(tn, node1);
606 if (!node0)
607 goto nomem;
608 tnode_free_append(tn, node0);
609
610
611 for (k = child_length(inode), j = k / 2; j;) {
612 put_child(node1, --j, get_child(inode, --k));
613 put_child(node0, j, get_child(inode, j));
614 put_child(node1, --j, get_child(inode, --k));
615 put_child(node0, j, get_child(inode, j));
616 }
617
618
619 NODE_INIT_PARENT(node1, tn);
620 NODE_INIT_PARENT(node0, tn);
621
622
623 put_child(tn, 2 * i + 1, node1);
624 put_child(tn, 2 * i, node0);
625 }
626
627
628 return replace(t, oldtnode, tn);
629nomem:
630
631 tnode_free(tn);
632notnode:
633 return NULL;
634}
635
636static struct key_vector *halve(struct trie *t,
637 struct key_vector *oldtnode)
638{
639 struct key_vector *tn;
640 unsigned long i;
641
642 pr_debug("In halve\n");
643
644 tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
645 if (!tn)
646 goto notnode;
647
648
649 tnode_free_init(oldtnode);
650
651
652
653
654
655
656 for (i = child_length(oldtnode); i;) {
657 struct key_vector *node1 = get_child(oldtnode, --i);
658 struct key_vector *node0 = get_child(oldtnode, --i);
659 struct key_vector *inode;
660
661
662 if (!node1 || !node0) {
663 put_child(tn, i / 2, node1 ? : node0);
664 continue;
665 }
666
667
668 inode = tnode_new(node0->key, oldtnode->pos, 1);
669 if (!inode)
670 goto nomem;
671 tnode_free_append(tn, inode);
672
673
674 put_child(inode, 1, node1);
675 put_child(inode, 0, node0);
676 NODE_INIT_PARENT(inode, tn);
677
678
679 put_child(tn, i / 2, inode);
680 }
681
682
683 return replace(t, oldtnode, tn);
684nomem:
685
686 tnode_free(tn);
687notnode:
688 return NULL;
689}
690
691static struct key_vector *collapse(struct trie *t,
692 struct key_vector *oldtnode)
693{
694 struct key_vector *n, *tp;
695 unsigned long i;
696
697
698 for (n = NULL, i = child_length(oldtnode); !n && i;)
699 n = get_child(oldtnode, --i);
700
701
702 tp = node_parent(oldtnode);
703 put_child_root(tp, oldtnode->key, n);
704 node_set_parent(n, tp);
705
706
707 node_free(oldtnode);
708
709 return tp;
710}
711
712static unsigned char update_suffix(struct key_vector *tn)
713{
714 unsigned char slen = tn->pos;
715 unsigned long stride, i;
716 unsigned char slen_max;
717
718
719
720
721
722 slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen);
723
724
725
726
727
728
729 for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) {
730 struct key_vector *n = get_child(tn, i);
731
732 if (!n || (n->slen <= slen))
733 continue;
734
735
736 stride <<= (n->slen - slen);
737 slen = n->slen;
738 i &= ~(stride - 1);
739
740
741 if (slen >= slen_max)
742 break;
743 }
744
745 tn->slen = slen;
746
747 return slen;
748}
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn)
808{
809 unsigned long used = child_length(tn);
810 unsigned long threshold = used;
811
812
813 threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold;
814 used -= tn_info(tn)->empty_children;
815 used += tn_info(tn)->full_children;
816
817
818
819 return (used > 1) && tn->pos && ((50 * used) >= threshold);
820}
821
822static inline bool should_halve(struct key_vector *tp, struct key_vector *tn)
823{
824 unsigned long used = child_length(tn);
825 unsigned long threshold = used;
826
827
828 threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold;
829 used -= tn_info(tn)->empty_children;
830
831
832
833 return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold);
834}
835
836static inline bool should_collapse(struct key_vector *tn)
837{
838 unsigned long used = child_length(tn);
839
840 used -= tn_info(tn)->empty_children;
841
842
843 if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children)
844 used -= KEY_MAX;
845
846
847 return used < 2;
848}
849
850#define MAX_WORK 10
851static struct key_vector *resize(struct trie *t, struct key_vector *tn)
852{
853#ifdef CONFIG_IP_FIB_TRIE_STATS
854 struct trie_use_stats __percpu *stats = t->stats;
855#endif
856 struct key_vector *tp = node_parent(tn);
857 unsigned long cindex = get_index(tn->key, tp);
858 int max_work = MAX_WORK;
859
860 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
861 tn, inflate_threshold, halve_threshold);
862
863
864
865
866
867 BUG_ON(tn != get_child(tp, cindex));
868
869
870
871
872 while (should_inflate(tp, tn) && max_work) {
873 tp = inflate(t, tn);
874 if (!tp) {
875#ifdef CONFIG_IP_FIB_TRIE_STATS
876 this_cpu_inc(stats->resize_node_skipped);
877#endif
878 break;
879 }
880
881 max_work--;
882 tn = get_child(tp, cindex);
883 }
884
885
886 tp = node_parent(tn);
887
888
889 if (max_work != MAX_WORK)
890 return tp;
891
892
893
894
895 while (should_halve(tp, tn) && max_work) {
896 tp = halve(t, tn);
897 if (!tp) {
898#ifdef CONFIG_IP_FIB_TRIE_STATS
899 this_cpu_inc(stats->resize_node_skipped);
900#endif
901 break;
902 }
903
904 max_work--;
905 tn = get_child(tp, cindex);
906 }
907
908
909 if (should_collapse(tn))
910 return collapse(t, tn);
911
912
913 return node_parent(tn);
914}
915
916static void node_pull_suffix(struct key_vector *tn, unsigned char slen)
917{
918 unsigned char node_slen = tn->slen;
919
920 while ((node_slen > tn->pos) && (node_slen > slen)) {
921 slen = update_suffix(tn);
922 if (node_slen == slen)
923 break;
924
925 tn = node_parent(tn);
926 node_slen = tn->slen;
927 }
928}
929
930static void node_push_suffix(struct key_vector *tn, unsigned char slen)
931{
932 while (tn->slen < slen) {
933 tn->slen = slen;
934 tn = node_parent(tn);
935 }
936}
937
938
939static struct key_vector *fib_find_node(struct trie *t,
940 struct key_vector **tp, u32 key)
941{
942 struct key_vector *pn, *n = t->kv;
943 unsigned long index = 0;
944
945 do {
946 pn = n;
947 n = get_child_rcu(n, index);
948
949 if (!n)
950 break;
951
952 index = get_cindex(key, n);
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968 if (index >= (1ul << n->bits)) {
969 n = NULL;
970 break;
971 }
972
973
974 } while (IS_TNODE(n));
975
976 *tp = pn;
977
978 return n;
979}
980
981
982
983
984static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
985 u8 tos, u32 prio, u32 tb_id)
986{
987 struct fib_alias *fa;
988
989 if (!fah)
990 return NULL;
991
992 hlist_for_each_entry(fa, fah, fa_list) {
993 if (fa->fa_slen < slen)
994 continue;
995 if (fa->fa_slen != slen)
996 break;
997 if (fa->tb_id > tb_id)
998 continue;
999 if (fa->tb_id != tb_id)
1000 break;
1001 if (fa->fa_tos > tos)
1002 continue;
1003 if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
1004 return fa;
1005 }
1006
1007 return NULL;
1008}
1009
1010static void trie_rebalance(struct trie *t, struct key_vector *tn)
1011{
1012 while (!IS_TRIE(tn))
1013 tn = resize(t, tn);
1014}
1015
1016static int fib_insert_node(struct trie *t, struct key_vector *tp,
1017 struct fib_alias *new, t_key key)
1018{
1019 struct key_vector *n, *l;
1020
1021 l = leaf_new(key, new);
1022 if (!l)
1023 goto noleaf;
1024
1025
1026 n = get_child(tp, get_index(key, tp));
1027
1028
1029
1030
1031
1032
1033
1034 if (n) {
1035 struct key_vector *tn;
1036
1037 tn = tnode_new(key, __fls(key ^ n->key), 1);
1038 if (!tn)
1039 goto notnode;
1040
1041
1042 NODE_INIT_PARENT(tn, tp);
1043 put_child(tn, get_index(key, tn) ^ 1, n);
1044
1045
1046 put_child_root(tp, key, tn);
1047 node_set_parent(n, tn);
1048
1049
1050 tp = tn;
1051 }
1052
1053
1054 node_push_suffix(tp, new->fa_slen);
1055 NODE_INIT_PARENT(l, tp);
1056 put_child_root(tp, key, l);
1057 trie_rebalance(t, tp);
1058
1059 return 0;
1060notnode:
1061 node_free(l);
1062noleaf:
1063 return -ENOMEM;
1064}
1065
1066
1067
1068
1069static int fib_insert_alias(struct trie *t, struct key_vector *tp,
1070 struct key_vector *l, struct fib_alias *new,
1071 struct fib_alias *fa, t_key key)
1072{
1073 if (!l)
1074 return fib_insert_node(t, tp, new, key);
1075
1076 if (fa) {
1077 hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
1078 } else {
1079 struct fib_alias *last;
1080
1081 hlist_for_each_entry(last, &l->leaf, fa_list) {
1082 if (new->fa_slen < last->fa_slen)
1083 break;
1084 if ((new->fa_slen == last->fa_slen) &&
1085 (new->tb_id > last->tb_id))
1086 break;
1087 fa = last;
1088 }
1089
1090 if (fa)
1091 hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
1092 else
1093 hlist_add_head_rcu(&new->fa_list, &l->leaf);
1094 }
1095
1096
1097 if (l->slen < new->fa_slen) {
1098 l->slen = new->fa_slen;
1099 node_push_suffix(tp, new->fa_slen);
1100 }
1101
1102 return 0;
1103}
1104
1105static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
1106{
1107 if (plen > KEYLENGTH) {
1108 NL_SET_ERR_MSG(extack, "Invalid prefix length");
1109 return false;
1110 }
1111
1112 if ((plen < KEYLENGTH) && (key << plen)) {
1113 NL_SET_ERR_MSG(extack,
1114 "Invalid prefix for given prefix length");
1115 return false;
1116 }
1117
1118 return true;
1119}
1120
1121
1122int fib_table_insert(struct net *net, struct fib_table *tb,
1123 struct fib_config *cfg, struct netlink_ext_ack *extack)
1124{
1125 enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
1126 struct trie *t = (struct trie *)tb->tb_data;
1127 struct fib_alias *fa, *new_fa;
1128 struct key_vector *l, *tp;
1129 u16 nlflags = NLM_F_EXCL;
1130 struct fib_info *fi;
1131 u8 plen = cfg->fc_dst_len;
1132 u8 slen = KEYLENGTH - plen;
1133 u8 tos = cfg->fc_tos;
1134 u32 key;
1135 int err;
1136
1137 key = ntohl(cfg->fc_dst);
1138
1139 if (!fib_valid_key_len(key, plen, extack))
1140 return -EINVAL;
1141
1142 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
1143
1144 fi = fib_create_info(cfg, extack);
1145 if (IS_ERR(fi)) {
1146 err = PTR_ERR(fi);
1147 goto err;
1148 }
1149
1150 l = fib_find_node(t, &tp, key);
1151 fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
1152 tb->tb_id) : NULL;
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163 if (fa && fa->fa_tos == tos &&
1164 fa->fa_info->fib_priority == fi->fib_priority) {
1165 struct fib_alias *fa_first, *fa_match;
1166
1167 err = -EEXIST;
1168 if (cfg->fc_nlflags & NLM_F_EXCL)
1169 goto out;
1170
1171 nlflags &= ~NLM_F_EXCL;
1172
1173
1174
1175
1176
1177
1178 fa_match = NULL;
1179 fa_first = fa;
1180 hlist_for_each_entry_from(fa, fa_list) {
1181 if ((fa->fa_slen != slen) ||
1182 (fa->tb_id != tb->tb_id) ||
1183 (fa->fa_tos != tos))
1184 break;
1185 if (fa->fa_info->fib_priority != fi->fib_priority)
1186 break;
1187 if (fa->fa_type == cfg->fc_type &&
1188 fa->fa_info == fi) {
1189 fa_match = fa;
1190 break;
1191 }
1192 }
1193
1194 if (cfg->fc_nlflags & NLM_F_REPLACE) {
1195 struct fib_info *fi_drop;
1196 u8 state;
1197
1198 nlflags |= NLM_F_REPLACE;
1199 fa = fa_first;
1200 if (fa_match) {
1201 if (fa == fa_match)
1202 err = 0;
1203 goto out;
1204 }
1205 err = -ENOBUFS;
1206 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1207 if (!new_fa)
1208 goto out;
1209
1210 fi_drop = fa->fa_info;
1211 new_fa->fa_tos = fa->fa_tos;
1212 new_fa->fa_info = fi;
1213 new_fa->fa_type = cfg->fc_type;
1214 state = fa->fa_state;
1215 new_fa->fa_state = state & ~FA_S_ACCESSED;
1216 new_fa->fa_slen = fa->fa_slen;
1217 new_fa->tb_id = tb->tb_id;
1218 new_fa->fa_default = -1;
1219
1220 err = call_fib_entry_notifiers(net,
1221 FIB_EVENT_ENTRY_REPLACE,
1222 key, plen, new_fa,
1223 extack);
1224 if (err)
1225 goto out_free_new_fa;
1226
1227 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1228 tb->tb_id, &cfg->fc_nlinfo, nlflags);
1229
1230 hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1231
1232 alias_free_mem_rcu(fa);
1233
1234 fib_release_info(fi_drop);
1235 if (state & FA_S_ACCESSED)
1236 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1237
1238 goto succeeded;
1239 }
1240
1241
1242
1243
1244 if (fa_match)
1245 goto out;
1246
1247 if (cfg->fc_nlflags & NLM_F_APPEND) {
1248 event = FIB_EVENT_ENTRY_APPEND;
1249 nlflags |= NLM_F_APPEND;
1250 } else {
1251 fa = fa_first;
1252 }
1253 }
1254 err = -ENOENT;
1255 if (!(cfg->fc_nlflags & NLM_F_CREATE))
1256 goto out;
1257
1258 nlflags |= NLM_F_CREATE;
1259 err = -ENOBUFS;
1260 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1261 if (!new_fa)
1262 goto out;
1263
1264 new_fa->fa_info = fi;
1265 new_fa->fa_tos = tos;
1266 new_fa->fa_type = cfg->fc_type;
1267 new_fa->fa_state = 0;
1268 new_fa->fa_slen = slen;
1269 new_fa->tb_id = tb->tb_id;
1270 new_fa->fa_default = -1;
1271
1272 err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
1273 if (err)
1274 goto out_free_new_fa;
1275
1276
1277 err = fib_insert_alias(t, tp, l, new_fa, fa, key);
1278 if (err)
1279 goto out_fib_notif;
1280
1281 if (!plen)
1282 tb->tb_num_default++;
1283
1284 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1285 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
1286 &cfg->fc_nlinfo, nlflags);
1287succeeded:
1288 return 0;
1289
1290out_fib_notif:
1291
1292
1293
1294
1295 NL_SET_ERR_MSG(extack, "Failed to insert route into trie");
1296 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key,
1297 plen, new_fa, NULL);
1298out_free_new_fa:
1299 kmem_cache_free(fn_alias_kmem, new_fa);
1300out:
1301 fib_release_info(fi);
1302err:
1303 return err;
1304}
1305
1306static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
1307{
1308 t_key prefix = n->key;
1309
1310 return (key ^ prefix) & (prefix | -prefix);
1311}
1312
1313
1314int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1315 struct fib_result *res, int fib_flags)
1316{
1317 struct trie *t = (struct trie *) tb->tb_data;
1318#ifdef CONFIG_IP_FIB_TRIE_STATS
1319 struct trie_use_stats __percpu *stats = t->stats;
1320#endif
1321 const t_key key = ntohl(flp->daddr);
1322 struct key_vector *n, *pn;
1323 struct fib_alias *fa;
1324 unsigned long index;
1325 t_key cindex;
1326
1327 pn = t->kv;
1328 cindex = 0;
1329
1330 n = get_child_rcu(pn, cindex);
1331 if (!n) {
1332 trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN);
1333 return -EAGAIN;
1334 }
1335
1336#ifdef CONFIG_IP_FIB_TRIE_STATS
1337 this_cpu_inc(stats->gets);
1338#endif
1339
1340
1341 for (;;) {
1342 index = get_cindex(key, n);
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358 if (index >= (1ul << n->bits))
1359 break;
1360
1361
1362 if (IS_LEAF(n))
1363 goto found;
1364
1365
1366
1367
1368 if (n->slen > n->pos) {
1369 pn = n;
1370 cindex = index;
1371 }
1372
1373 n = get_child_rcu(n, index);
1374 if (unlikely(!n))
1375 goto backtrace;
1376 }
1377
1378
1379 for (;;) {
1380
1381 struct key_vector __rcu **cptr = n->tnode;
1382
1383
1384
1385
1386
1387 if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos))
1388 goto backtrace;
1389
1390
1391 if (unlikely(IS_LEAF(n)))
1392 break;
1393
1394
1395
1396
1397
1398
1399 while ((n = rcu_dereference(*cptr)) == NULL) {
1400backtrace:
1401#ifdef CONFIG_IP_FIB_TRIE_STATS
1402 if (!n)
1403 this_cpu_inc(stats->null_node_hit);
1404#endif
1405
1406
1407
1408
1409
1410 while (!cindex) {
1411 t_key pkey = pn->key;
1412
1413
1414
1415
1416
1417 if (IS_TRIE(pn)) {
1418 trace_fib_table_lookup(tb->tb_id, flp,
1419 NULL, -EAGAIN);
1420 return -EAGAIN;
1421 }
1422#ifdef CONFIG_IP_FIB_TRIE_STATS
1423 this_cpu_inc(stats->backtrack);
1424#endif
1425
1426 pn = node_parent_rcu(pn);
1427 cindex = get_index(pkey, pn);
1428 }
1429
1430
1431 cindex &= cindex - 1;
1432
1433
1434 cptr = &pn->tnode[cindex];
1435 }
1436 }
1437
1438found:
1439
1440 index = key ^ n->key;
1441
1442
1443 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
1444 struct fib_info *fi = fa->fa_info;
1445 int nhsel, err;
1446
1447 if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
1448 if (index >= (1ul << fa->fa_slen))
1449 continue;
1450 }
1451 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1452 continue;
1453 if (fi->fib_dead)
1454 continue;
1455 if (fa->fa_info->fib_scope < flp->flowi4_scope)
1456 continue;
1457 fib_alias_accessed(fa);
1458 err = fib_props[fa->fa_type].error;
1459 if (unlikely(err < 0)) {
1460out_reject:
1461#ifdef CONFIG_IP_FIB_TRIE_STATS
1462 this_cpu_inc(stats->semantic_match_passed);
1463#endif
1464 trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
1465 return err;
1466 }
1467 if (fi->fib_flags & RTNH_F_DEAD)
1468 continue;
1469
1470 if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) {
1471 err = fib_props[RTN_BLACKHOLE].error;
1472 goto out_reject;
1473 }
1474
1475 for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
1476 struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
1477
1478 if (nhc->nhc_flags & RTNH_F_DEAD)
1479 continue;
1480 if (ip_ignore_linkdown(nhc->nhc_dev) &&
1481 nhc->nhc_flags & RTNH_F_LINKDOWN &&
1482 !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
1483 continue;
1484 if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
1485 if (flp->flowi4_oif &&
1486 flp->flowi4_oif != nhc->nhc_oif)
1487 continue;
1488 }
1489
1490 if (!(fib_flags & FIB_LOOKUP_NOREF))
1491 refcount_inc(&fi->fib_clntref);
1492
1493 res->prefix = htonl(n->key);
1494 res->prefixlen = KEYLENGTH - fa->fa_slen;
1495 res->nh_sel = nhsel;
1496 res->nhc = nhc;
1497 res->type = fa->fa_type;
1498 res->scope = fi->fib_scope;
1499 res->fi = fi;
1500 res->table = tb;
1501 res->fa_head = &n->leaf;
1502#ifdef CONFIG_IP_FIB_TRIE_STATS
1503 this_cpu_inc(stats->semantic_match_passed);
1504#endif
1505 trace_fib_table_lookup(tb->tb_id, flp, nhc, err);
1506
1507 return err;
1508 }
1509 }
1510#ifdef CONFIG_IP_FIB_TRIE_STATS
1511 this_cpu_inc(stats->semantic_match_miss);
1512#endif
1513 goto backtrace;
1514}
1515EXPORT_SYMBOL_GPL(fib_table_lookup);
1516
1517static void fib_remove_alias(struct trie *t, struct key_vector *tp,
1518 struct key_vector *l, struct fib_alias *old)
1519{
1520
1521 struct hlist_node **pprev = old->fa_list.pprev;
1522 struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next);
1523
1524
1525 hlist_del_rcu(&old->fa_list);
1526
1527
1528
1529
1530 if (hlist_empty(&l->leaf)) {
1531 if (tp->slen == l->slen)
1532 node_pull_suffix(tp, tp->pos);
1533 put_child_root(tp, l->key, NULL);
1534 node_free(l);
1535 trie_rebalance(t, tp);
1536 return;
1537 }
1538
1539
1540 if (*pprev)
1541 return;
1542
1543
1544 l->slen = fa->fa_slen;
1545 node_pull_suffix(tp, fa->fa_slen);
1546}
1547
1548
1549int fib_table_delete(struct net *net, struct fib_table *tb,
1550 struct fib_config *cfg, struct netlink_ext_ack *extack)
1551{
1552 struct trie *t = (struct trie *) tb->tb_data;
1553 struct fib_alias *fa, *fa_to_delete;
1554 struct key_vector *l, *tp;
1555 u8 plen = cfg->fc_dst_len;
1556 u8 slen = KEYLENGTH - plen;
1557 u8 tos = cfg->fc_tos;
1558 u32 key;
1559
1560 key = ntohl(cfg->fc_dst);
1561
1562 if (!fib_valid_key_len(key, plen, extack))
1563 return -EINVAL;
1564
1565 l = fib_find_node(t, &tp, key);
1566 if (!l)
1567 return -ESRCH;
1568
1569 fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id);
1570 if (!fa)
1571 return -ESRCH;
1572
1573 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1574
1575 fa_to_delete = NULL;
1576 hlist_for_each_entry_from(fa, fa_list) {
1577 struct fib_info *fi = fa->fa_info;
1578
1579 if ((fa->fa_slen != slen) ||
1580 (fa->tb_id != tb->tb_id) ||
1581 (fa->fa_tos != tos))
1582 break;
1583
1584 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1585 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1586 fa->fa_info->fib_scope == cfg->fc_scope) &&
1587 (!cfg->fc_prefsrc ||
1588 fi->fib_prefsrc == cfg->fc_prefsrc) &&
1589 (!cfg->fc_protocol ||
1590 fi->fib_protocol == cfg->fc_protocol) &&
1591 fib_nh_match(cfg, fi, extack) == 0 &&
1592 fib_metrics_match(cfg, fi)) {
1593 fa_to_delete = fa;
1594 break;
1595 }
1596 }
1597
1598 if (!fa_to_delete)
1599 return -ESRCH;
1600
1601 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
1602 fa_to_delete, extack);
1603 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
1604 &cfg->fc_nlinfo, 0);
1605
1606 if (!plen)
1607 tb->tb_num_default--;
1608
1609 fib_remove_alias(t, tp, l, fa_to_delete);
1610
1611 if (fa_to_delete->fa_state & FA_S_ACCESSED)
1612 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1613
1614 fib_release_info(fa_to_delete->fa_info);
1615 alias_free_mem_rcu(fa_to_delete);
1616 return 0;
1617}
1618
1619
1620static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
1621{
1622 struct key_vector *pn, *n = *tn;
1623 unsigned long cindex;
1624
1625
1626 do {
1627
1628 pn = n;
1629 cindex = (key > pn->key) ? get_index(key, pn) : 0;
1630
1631 if (cindex >> pn->bits)
1632 break;
1633
1634
1635 n = get_child_rcu(pn, cindex++);
1636 if (!n)
1637 break;
1638
1639
1640 if (IS_LEAF(n) && (n->key >= key))
1641 goto found;
1642 } while (IS_TNODE(n));
1643
1644
1645 while (!IS_TRIE(pn)) {
1646
1647 if (cindex >= (1ul << pn->bits)) {
1648 t_key pkey = pn->key;
1649
1650 pn = node_parent_rcu(pn);
1651 cindex = get_index(pkey, pn) + 1;
1652 continue;
1653 }
1654
1655
1656 n = get_child_rcu(pn, cindex++);
1657 if (!n)
1658 continue;
1659
1660
1661 if (IS_LEAF(n))
1662 goto found;
1663
1664
1665 pn = n;
1666 cindex = 0;
1667 }
1668
1669 *tn = pn;
1670 return NULL;
1671found:
1672
1673 *tn = pn;
1674 return n;
1675}
1676
1677static void fib_trie_free(struct fib_table *tb)
1678{
1679 struct trie *t = (struct trie *)tb->tb_data;
1680 struct key_vector *pn = t->kv;
1681 unsigned long cindex = 1;
1682 struct hlist_node *tmp;
1683 struct fib_alias *fa;
1684
1685
1686 for (;;) {
1687 struct key_vector *n;
1688
1689 if (!(cindex--)) {
1690 t_key pkey = pn->key;
1691
1692 if (IS_TRIE(pn))
1693 break;
1694
1695 n = pn;
1696 pn = node_parent(pn);
1697
1698
1699 put_child_root(pn, n->key, NULL);
1700 node_free(n);
1701
1702 cindex = get_index(pkey, pn);
1703
1704 continue;
1705 }
1706
1707
1708 n = get_child(pn, cindex);
1709 if (!n)
1710 continue;
1711
1712 if (IS_TNODE(n)) {
1713
1714 pn = n;
1715 cindex = 1ul << n->bits;
1716
1717 continue;
1718 }
1719
1720 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1721 hlist_del_rcu(&fa->fa_list);
1722 alias_free_mem_rcu(fa);
1723 }
1724
1725 put_child_root(pn, n->key, NULL);
1726 node_free(n);
1727 }
1728
1729#ifdef CONFIG_IP_FIB_TRIE_STATS
1730 free_percpu(t->stats);
1731#endif
1732 kfree(tb);
1733}
1734
1735struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
1736{
1737 struct trie *ot = (struct trie *)oldtb->tb_data;
1738 struct key_vector *l, *tp = ot->kv;
1739 struct fib_table *local_tb;
1740 struct fib_alias *fa;
1741 struct trie *lt;
1742 t_key key = 0;
1743
1744 if (oldtb->tb_data == oldtb->__data)
1745 return oldtb;
1746
1747 local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL);
1748 if (!local_tb)
1749 return NULL;
1750
1751 lt = (struct trie *)local_tb->tb_data;
1752
1753 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
1754 struct key_vector *local_l = NULL, *local_tp;
1755
1756 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
1757 struct fib_alias *new_fa;
1758
1759 if (local_tb->tb_id != fa->tb_id)
1760 continue;
1761
1762
1763 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1764 if (!new_fa)
1765 goto out;
1766
1767 memcpy(new_fa, fa, sizeof(*fa));
1768
1769
1770 if (!local_l)
1771 local_l = fib_find_node(lt, &local_tp, l->key);
1772
1773 if (fib_insert_alias(lt, local_tp, local_l, new_fa,
1774 NULL, l->key)) {
1775 kmem_cache_free(fn_alias_kmem, new_fa);
1776 goto out;
1777 }
1778 }
1779
1780
1781 key = l->key + 1;
1782 if (key < l->key)
1783 break;
1784 }
1785
1786 return local_tb;
1787out:
1788 fib_trie_free(local_tb);
1789
1790 return NULL;
1791}
1792
1793
1794void fib_table_flush_external(struct fib_table *tb)
1795{
1796 struct trie *t = (struct trie *)tb->tb_data;
1797 struct key_vector *pn = t->kv;
1798 unsigned long cindex = 1;
1799 struct hlist_node *tmp;
1800 struct fib_alias *fa;
1801
1802
1803 for (;;) {
1804 unsigned char slen = 0;
1805 struct key_vector *n;
1806
1807 if (!(cindex--)) {
1808 t_key pkey = pn->key;
1809
1810
1811 if (IS_TRIE(pn))
1812 break;
1813
1814
1815 if (pn->slen > pn->pos)
1816 update_suffix(pn);
1817
1818
1819 pn = resize(t, pn);
1820 cindex = get_index(pkey, pn);
1821
1822 continue;
1823 }
1824
1825
1826 n = get_child(pn, cindex);
1827 if (!n)
1828 continue;
1829
1830 if (IS_TNODE(n)) {
1831
1832 pn = n;
1833 cindex = 1ul << n->bits;
1834
1835 continue;
1836 }
1837
1838 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1839
1840
1841
1842 if (tb->tb_id != fa->tb_id) {
1843 hlist_del_rcu(&fa->fa_list);
1844 alias_free_mem_rcu(fa);
1845 continue;
1846 }
1847
1848
1849 slen = fa->fa_slen;
1850 }
1851
1852
1853 n->slen = slen;
1854
1855 if (hlist_empty(&n->leaf)) {
1856 put_child_root(pn, n->key, NULL);
1857 node_free(n);
1858 }
1859 }
1860}
1861
1862
1863int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
1864{
1865 struct trie *t = (struct trie *)tb->tb_data;
1866 struct key_vector *pn = t->kv;
1867 unsigned long cindex = 1;
1868 struct hlist_node *tmp;
1869 struct fib_alias *fa;
1870 int found = 0;
1871
1872
1873 for (;;) {
1874 unsigned char slen = 0;
1875 struct key_vector *n;
1876
1877 if (!(cindex--)) {
1878 t_key pkey = pn->key;
1879
1880
1881 if (IS_TRIE(pn))
1882 break;
1883
1884
1885 if (pn->slen > pn->pos)
1886 update_suffix(pn);
1887
1888
1889 pn = resize(t, pn);
1890 cindex = get_index(pkey, pn);
1891
1892 continue;
1893 }
1894
1895
1896 n = get_child(pn, cindex);
1897 if (!n)
1898 continue;
1899
1900 if (IS_TNODE(n)) {
1901
1902 pn = n;
1903 cindex = 1ul << n->bits;
1904
1905 continue;
1906 }
1907
1908 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1909 struct fib_info *fi = fa->fa_info;
1910
1911 if (!fi || tb->tb_id != fa->tb_id ||
1912 (!(fi->fib_flags & RTNH_F_DEAD) &&
1913 !fib_props[fa->fa_type].error)) {
1914 slen = fa->fa_slen;
1915 continue;
1916 }
1917
1918
1919
1920
1921 if (!flush_all && fib_props[fa->fa_type].error) {
1922 slen = fa->fa_slen;
1923 continue;
1924 }
1925
1926 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
1927 n->key,
1928 KEYLENGTH - fa->fa_slen, fa,
1929 NULL);
1930 hlist_del_rcu(&fa->fa_list);
1931 fib_release_info(fa->fa_info);
1932 alias_free_mem_rcu(fa);
1933 found++;
1934 }
1935
1936
1937 n->slen = slen;
1938
1939 if (hlist_empty(&n->leaf)) {
1940 put_child_root(pn, n->key, NULL);
1941 node_free(n);
1942 }
1943 }
1944
1945 pr_debug("trie_flush found=%d\n", found);
1946 return found;
1947}
1948
1949
1950static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
1951 struct nl_info *info)
1952{
1953 struct trie *t = (struct trie *)tb->tb_data;
1954 struct key_vector *pn = t->kv;
1955 unsigned long cindex = 1;
1956 struct fib_alias *fa;
1957
1958 for (;;) {
1959 struct key_vector *n;
1960
1961 if (!(cindex--)) {
1962 t_key pkey = pn->key;
1963
1964 if (IS_TRIE(pn))
1965 break;
1966
1967 pn = node_parent(pn);
1968 cindex = get_index(pkey, pn);
1969 continue;
1970 }
1971
1972
1973 n = get_child(pn, cindex);
1974 if (!n)
1975 continue;
1976
1977 if (IS_TNODE(n)) {
1978
1979 pn = n;
1980 cindex = 1ul << n->bits;
1981
1982 continue;
1983 }
1984
1985 hlist_for_each_entry(fa, &n->leaf, fa_list) {
1986 struct fib_info *fi = fa->fa_info;
1987
1988 if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)
1989 continue;
1990
1991 rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
1992 KEYLENGTH - fa->fa_slen, tb->tb_id,
1993 info, NLM_F_REPLACE);
1994
1995
1996
1997
1998
1999 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
2000 n->key,
2001 KEYLENGTH - fa->fa_slen, fa,
2002 NULL);
2003 }
2004 }
2005}
2006
2007void fib_info_notify_update(struct net *net, struct nl_info *info)
2008{
2009 unsigned int h;
2010
2011 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2012 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2013 struct fib_table *tb;
2014
2015 hlist_for_each_entry_rcu(tb, head, tb_hlist)
2016 __fib_info_notify_update(net, tb, info);
2017 }
2018}
2019
2020static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
2021 struct notifier_block *nb,
2022 struct netlink_ext_ack *extack)
2023{
2024 struct fib_alias *fa;
2025 int err;
2026
2027 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2028 struct fib_info *fi = fa->fa_info;
2029
2030 if (!fi)
2031 continue;
2032
2033
2034
2035
2036 if (tb->tb_id != fa->tb_id)
2037 continue;
2038
2039 err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_ADD, l->key,
2040 KEYLENGTH - fa->fa_slen,
2041 fa, extack);
2042 if (err)
2043 return err;
2044 }
2045 return 0;
2046}
2047
2048static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb,
2049 struct netlink_ext_ack *extack)
2050{
2051 struct trie *t = (struct trie *)tb->tb_data;
2052 struct key_vector *l, *tp = t->kv;
2053 t_key key = 0;
2054 int err;
2055
2056 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2057 err = fib_leaf_notify(l, tb, nb, extack);
2058 if (err)
2059 return err;
2060
2061 key = l->key + 1;
2062
2063 if (key < l->key)
2064 break;
2065 }
2066 return 0;
2067}
2068
2069int fib_notify(struct net *net, struct notifier_block *nb,
2070 struct netlink_ext_ack *extack)
2071{
2072 unsigned int h;
2073 int err;
2074
2075 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2076 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2077 struct fib_table *tb;
2078
2079 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2080 err = fib_table_notify(tb, nb, extack);
2081 if (err)
2082 return err;
2083 }
2084 }
2085 return 0;
2086}
2087
2088static void __trie_free_rcu(struct rcu_head *head)
2089{
2090 struct fib_table *tb = container_of(head, struct fib_table, rcu);
2091#ifdef CONFIG_IP_FIB_TRIE_STATS
2092 struct trie *t = (struct trie *)tb->tb_data;
2093
2094 if (tb->tb_data == tb->__data)
2095 free_percpu(t->stats);
2096#endif
2097 kfree(tb);
2098}
2099
2100void fib_free_table(struct fib_table *tb)
2101{
2102 call_rcu(&tb->rcu, __trie_free_rcu);
2103}
2104
2105static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
2106 struct sk_buff *skb, struct netlink_callback *cb,
2107 struct fib_dump_filter *filter)
2108{
2109 unsigned int flags = NLM_F_MULTI;
2110 __be32 xkey = htonl(l->key);
2111 int i, s_i, i_fa, s_fa, err;
2112 struct fib_alias *fa;
2113
2114 if (filter->filter_set ||
2115 !filter->dump_exceptions || !filter->dump_routes)
2116 flags |= NLM_F_DUMP_FILTERED;
2117
2118 s_i = cb->args[4];
2119 s_fa = cb->args[5];
2120 i = 0;
2121
2122
2123 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2124 struct fib_info *fi = fa->fa_info;
2125
2126 if (i < s_i)
2127 goto next;
2128
2129 i_fa = 0;
2130
2131 if (tb->tb_id != fa->tb_id)
2132 goto next;
2133
2134 if (filter->filter_set) {
2135 if (filter->rt_type && fa->fa_type != filter->rt_type)
2136 goto next;
2137
2138 if ((filter->protocol &&
2139 fi->fib_protocol != filter->protocol))
2140 goto next;
2141
2142 if (filter->dev &&
2143 !fib_info_nh_uses_dev(fi, filter->dev))
2144 goto next;
2145 }
2146
2147 if (filter->dump_routes) {
2148 if (!s_fa) {
2149 err = fib_dump_info(skb,
2150 NETLINK_CB(cb->skb).portid,
2151 cb->nlh->nlmsg_seq,
2152 RTM_NEWROUTE,
2153 tb->tb_id, fa->fa_type,
2154 xkey,
2155 KEYLENGTH - fa->fa_slen,
2156 fa->fa_tos, fi, flags);
2157 if (err < 0)
2158 goto stop;
2159 }
2160
2161 i_fa++;
2162 }
2163
2164 if (filter->dump_exceptions) {
2165 err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
2166 &i_fa, s_fa, flags);
2167 if (err < 0)
2168 goto stop;
2169 }
2170
2171next:
2172 i++;
2173 }
2174
2175 cb->args[4] = i;
2176 return skb->len;
2177
2178stop:
2179 cb->args[4] = i;
2180 cb->args[5] = i_fa;
2181 return err;
2182}
2183
2184
2185int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
2186 struct netlink_callback *cb, struct fib_dump_filter *filter)
2187{
2188 struct trie *t = (struct trie *)tb->tb_data;
2189 struct key_vector *l, *tp = t->kv;
2190
2191
2192
2193 int count = cb->args[2];
2194 t_key key = cb->args[3];
2195
2196
2197
2198
2199 if (count && !key)
2200 return skb->len;
2201
2202 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2203 int err;
2204
2205 err = fn_trie_dump_leaf(l, tb, skb, cb, filter);
2206 if (err < 0) {
2207 cb->args[3] = key;
2208 cb->args[2] = count;
2209 return err;
2210 }
2211
2212 ++count;
2213 key = l->key + 1;
2214
2215 memset(&cb->args[4], 0,
2216 sizeof(cb->args) - 4*sizeof(cb->args[0]));
2217
2218
2219 if (key < l->key)
2220 break;
2221 }
2222
2223 cb->args[3] = key;
2224 cb->args[2] = count;
2225
2226 return skb->len;
2227}
2228
2229void __init fib_trie_init(void)
2230{
2231 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2232 sizeof(struct fib_alias),
2233 0, SLAB_PANIC, NULL);
2234
2235 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
2236 LEAF_SIZE,
2237 0, SLAB_PANIC, NULL);
2238}
2239
2240struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
2241{
2242 struct fib_table *tb;
2243 struct trie *t;
2244 size_t sz = sizeof(*tb);
2245
2246 if (!alias)
2247 sz += sizeof(struct trie);
2248
2249 tb = kzalloc(sz, GFP_KERNEL);
2250 if (!tb)
2251 return NULL;
2252
2253 tb->tb_id = id;
2254 tb->tb_num_default = 0;
2255 tb->tb_data = (alias ? alias->__data : tb->__data);
2256
2257 if (alias)
2258 return tb;
2259
2260 t = (struct trie *) tb->tb_data;
2261 t->kv[0].pos = KEYLENGTH;
2262 t->kv[0].slen = KEYLENGTH;
2263#ifdef CONFIG_IP_FIB_TRIE_STATS
2264 t->stats = alloc_percpu(struct trie_use_stats);
2265 if (!t->stats) {
2266 kfree(tb);
2267 tb = NULL;
2268 }
2269#endif
2270
2271 return tb;
2272}
2273
2274#ifdef CONFIG_PROC_FS
2275
2276struct fib_trie_iter {
2277 struct seq_net_private p;
2278 struct fib_table *tb;
2279 struct key_vector *tnode;
2280 unsigned int index;
2281 unsigned int depth;
2282};
2283
2284static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
2285{
2286 unsigned long cindex = iter->index;
2287 struct key_vector *pn = iter->tnode;
2288 t_key pkey;
2289
2290 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2291 iter->tnode, iter->index, iter->depth);
2292
2293 while (!IS_TRIE(pn)) {
2294 while (cindex < child_length(pn)) {
2295 struct key_vector *n = get_child_rcu(pn, cindex++);
2296
2297 if (!n)
2298 continue;
2299
2300 if (IS_LEAF(n)) {
2301 iter->tnode = pn;
2302 iter->index = cindex;
2303 } else {
2304
2305 iter->tnode = n;
2306 iter->index = 0;
2307 ++iter->depth;
2308 }
2309
2310 return n;
2311 }
2312
2313
2314 pkey = pn->key;
2315 pn = node_parent_rcu(pn);
2316 cindex = get_index(pkey, pn) + 1;
2317 --iter->depth;
2318 }
2319
2320
2321 iter->tnode = pn;
2322 iter->index = 0;
2323
2324 return NULL;
2325}
2326
2327static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
2328 struct trie *t)
2329{
2330 struct key_vector *n, *pn;
2331
2332 if (!t)
2333 return NULL;
2334
2335 pn = t->kv;
2336 n = rcu_dereference(pn->tnode[0]);
2337 if (!n)
2338 return NULL;
2339
2340 if (IS_TNODE(n)) {
2341 iter->tnode = n;
2342 iter->index = 0;
2343 iter->depth = 1;
2344 } else {
2345 iter->tnode = pn;
2346 iter->index = 0;
2347 iter->depth = 0;
2348 }
2349
2350 return n;
2351}
2352
2353static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2354{
2355 struct key_vector *n;
2356 struct fib_trie_iter iter;
2357
2358 memset(s, 0, sizeof(*s));
2359
2360 rcu_read_lock();
2361 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2362 if (IS_LEAF(n)) {
2363 struct fib_alias *fa;
2364
2365 s->leaves++;
2366 s->totdepth += iter.depth;
2367 if (iter.depth > s->maxdepth)
2368 s->maxdepth = iter.depth;
2369
2370 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
2371 ++s->prefixes;
2372 } else {
2373 s->tnodes++;
2374 if (n->bits < MAX_STAT_DEPTH)
2375 s->nodesizes[n->bits]++;
2376 s->nullpointers += tn_info(n)->empty_children;
2377 }
2378 }
2379 rcu_read_unlock();
2380}
2381
2382
2383
2384
2385static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2386{
2387 unsigned int i, max, pointers, bytes, avdepth;
2388
2389 if (stat->leaves)
2390 avdepth = stat->totdepth*100 / stat->leaves;
2391 else
2392 avdepth = 0;
2393
2394 seq_printf(seq, "\tAver depth: %u.%02d\n",
2395 avdepth / 100, avdepth % 100);
2396 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
2397
2398 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
2399 bytes = LEAF_SIZE * stat->leaves;
2400
2401 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2402 bytes += sizeof(struct fib_alias) * stat->prefixes;
2403
2404 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
2405 bytes += TNODE_SIZE(0) * stat->tnodes;
2406
2407 max = MAX_STAT_DEPTH;
2408 while (max > 0 && stat->nodesizes[max-1] == 0)
2409 max--;
2410
2411 pointers = 0;
2412 for (i = 1; i < max; i++)
2413 if (stat->nodesizes[i] != 0) {
2414 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
2415 pointers += (1<<i) * stat->nodesizes[i];
2416 }
2417 seq_putc(seq, '\n');
2418 seq_printf(seq, "\tPointers: %u\n", pointers);
2419
2420 bytes += sizeof(struct key_vector *) * pointers;
2421 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2422 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
2423}
2424
2425#ifdef CONFIG_IP_FIB_TRIE_STATS
2426static void trie_show_usage(struct seq_file *seq,
2427 const struct trie_use_stats __percpu *stats)
2428{
2429 struct trie_use_stats s = { 0 };
2430 int cpu;
2431
2432
2433 for_each_possible_cpu(cpu) {
2434 const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
2435
2436 s.gets += pcpu->gets;
2437 s.backtrack += pcpu->backtrack;
2438 s.semantic_match_passed += pcpu->semantic_match_passed;
2439 s.semantic_match_miss += pcpu->semantic_match_miss;
2440 s.null_node_hit += pcpu->null_node_hit;
2441 s.resize_node_skipped += pcpu->resize_node_skipped;
2442 }
2443
2444 seq_printf(seq, "\nCounters:\n---------\n");
2445 seq_printf(seq, "gets = %u\n", s.gets);
2446 seq_printf(seq, "backtracks = %u\n", s.backtrack);
2447 seq_printf(seq, "semantic match passed = %u\n",
2448 s.semantic_match_passed);
2449 seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
2450 seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
2451 seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
2452}
2453#endif
2454
2455static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2456{
2457 if (tb->tb_id == RT_TABLE_LOCAL)
2458 seq_puts(seq, "Local:\n");
2459 else if (tb->tb_id == RT_TABLE_MAIN)
2460 seq_puts(seq, "Main:\n");
2461 else
2462 seq_printf(seq, "Id %d:\n", tb->tb_id);
2463}
2464
2465
2466static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2467{
2468 struct net *net = (struct net *)seq->private;
2469 unsigned int h;
2470
2471 seq_printf(seq,
2472 "Basic info: size of leaf:"
2473 " %zd bytes, size of tnode: %zd bytes.\n",
2474 LEAF_SIZE, TNODE_SIZE(0));
2475
2476 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2477 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2478 struct fib_table *tb;
2479
2480 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2481 struct trie *t = (struct trie *) tb->tb_data;
2482 struct trie_stat stat;
2483
2484 if (!t)
2485 continue;
2486
2487 fib_table_print(seq, tb);
2488
2489 trie_collect_stats(t, &stat);
2490 trie_show_stats(seq, &stat);
2491#ifdef CONFIG_IP_FIB_TRIE_STATS
2492 trie_show_usage(seq, t->stats);
2493#endif
2494 }
2495 }
2496
2497 return 0;
2498}
2499
2500static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2501{
2502 struct fib_trie_iter *iter = seq->private;
2503 struct net *net = seq_file_net(seq);
2504 loff_t idx = 0;
2505 unsigned int h;
2506
2507 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2508 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2509 struct fib_table *tb;
2510
2511 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2512 struct key_vector *n;
2513
2514 for (n = fib_trie_get_first(iter,
2515 (struct trie *) tb->tb_data);
2516 n; n = fib_trie_get_next(iter))
2517 if (pos == idx++) {
2518 iter->tb = tb;
2519 return n;
2520 }
2521 }
2522 }
2523
2524 return NULL;
2525}
2526
2527static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2528 __acquires(RCU)
2529{
2530 rcu_read_lock();
2531 return fib_trie_get_idx(seq, *pos);
2532}
2533
2534static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2535{
2536 struct fib_trie_iter *iter = seq->private;
2537 struct net *net = seq_file_net(seq);
2538 struct fib_table *tb = iter->tb;
2539 struct hlist_node *tb_node;
2540 unsigned int h;
2541 struct key_vector *n;
2542
2543 ++*pos;
2544
2545 n = fib_trie_get_next(iter);
2546 if (n)
2547 return n;
2548
2549
2550 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2551 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
2552 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2553 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2554 if (n)
2555 goto found;
2556 }
2557
2558
2559 while (++h < FIB_TABLE_HASHSZ) {
2560 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2561 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2562 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2563 if (n)
2564 goto found;
2565 }
2566 }
2567 return NULL;
2568
2569found:
2570 iter->tb = tb;
2571 return n;
2572}
2573
2574static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2575 __releases(RCU)
2576{
2577 rcu_read_unlock();
2578}
2579
2580static void seq_indent(struct seq_file *seq, int n)
2581{
2582 while (n-- > 0)
2583 seq_puts(seq, " ");
2584}
2585
2586static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2587{
2588 switch (s) {
2589 case RT_SCOPE_UNIVERSE: return "universe";
2590 case RT_SCOPE_SITE: return "site";
2591 case RT_SCOPE_LINK: return "link";
2592 case RT_SCOPE_HOST: return "host";
2593 case RT_SCOPE_NOWHERE: return "nowhere";
2594 default:
2595 snprintf(buf, len, "scope=%d", s);
2596 return buf;
2597 }
2598}
2599
2600static const char *const rtn_type_names[__RTN_MAX] = {
2601 [RTN_UNSPEC] = "UNSPEC",
2602 [RTN_UNICAST] = "UNICAST",
2603 [RTN_LOCAL] = "LOCAL",
2604 [RTN_BROADCAST] = "BROADCAST",
2605 [RTN_ANYCAST] = "ANYCAST",
2606 [RTN_MULTICAST] = "MULTICAST",
2607 [RTN_BLACKHOLE] = "BLACKHOLE",
2608 [RTN_UNREACHABLE] = "UNREACHABLE",
2609 [RTN_PROHIBIT] = "PROHIBIT",
2610 [RTN_THROW] = "THROW",
2611 [RTN_NAT] = "NAT",
2612 [RTN_XRESOLVE] = "XRESOLVE",
2613};
2614
2615static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2616{
2617 if (t < __RTN_MAX && rtn_type_names[t])
2618 return rtn_type_names[t];
2619 snprintf(buf, len, "type %u", t);
2620 return buf;
2621}
2622
2623
2624static int fib_trie_seq_show(struct seq_file *seq, void *v)
2625{
2626 const struct fib_trie_iter *iter = seq->private;
2627 struct key_vector *n = v;
2628
2629 if (IS_TRIE(node_parent_rcu(n)))
2630 fib_table_print(seq, iter->tb);
2631
2632 if (IS_TNODE(n)) {
2633 __be32 prf = htonl(n->key);
2634
2635 seq_indent(seq, iter->depth-1);
2636 seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",
2637 &prf, KEYLENGTH - n->pos - n->bits, n->bits,
2638 tn_info(n)->full_children,
2639 tn_info(n)->empty_children);
2640 } else {
2641 __be32 val = htonl(n->key);
2642 struct fib_alias *fa;
2643
2644 seq_indent(seq, iter->depth);
2645 seq_printf(seq, " |-- %pI4\n", &val);
2646
2647 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
2648 char buf1[32], buf2[32];
2649
2650 seq_indent(seq, iter->depth + 1);
2651 seq_printf(seq, " /%zu %s %s",
2652 KEYLENGTH - fa->fa_slen,
2653 rtn_scope(buf1, sizeof(buf1),
2654 fa->fa_info->fib_scope),
2655 rtn_type(buf2, sizeof(buf2),
2656 fa->fa_type));
2657 if (fa->fa_tos)
2658 seq_printf(seq, " tos=%d", fa->fa_tos);
2659 seq_putc(seq, '\n');
2660 }
2661 }
2662
2663 return 0;
2664}
2665
2666static const struct seq_operations fib_trie_seq_ops = {
2667 .start = fib_trie_seq_start,
2668 .next = fib_trie_seq_next,
2669 .stop = fib_trie_seq_stop,
2670 .show = fib_trie_seq_show,
2671};
2672
2673struct fib_route_iter {
2674 struct seq_net_private p;
2675 struct fib_table *main_tb;
2676 struct key_vector *tnode;
2677 loff_t pos;
2678 t_key key;
2679};
2680
2681static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
2682 loff_t pos)
2683{
2684 struct key_vector *l, **tp = &iter->tnode;
2685 t_key key;
2686
2687
2688 if (iter->pos > 0 && pos >= iter->pos) {
2689 key = iter->key;
2690 } else {
2691 iter->pos = 1;
2692 key = 0;
2693 }
2694
2695 pos -= iter->pos;
2696
2697 while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
2698 key = l->key + 1;
2699 iter->pos++;
2700 l = NULL;
2701
2702
2703 if (!key)
2704 break;
2705 }
2706
2707 if (l)
2708 iter->key = l->key;
2709 else
2710 iter->pos = 0;
2711
2712 return l;
2713}
2714
2715static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2716 __acquires(RCU)
2717{
2718 struct fib_route_iter *iter = seq->private;
2719 struct fib_table *tb;
2720 struct trie *t;
2721
2722 rcu_read_lock();
2723
2724 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
2725 if (!tb)
2726 return NULL;
2727
2728 iter->main_tb = tb;
2729 t = (struct trie *)tb->tb_data;
2730 iter->tnode = t->kv;
2731
2732 if (*pos != 0)
2733 return fib_route_get_idx(iter, *pos);
2734
2735 iter->pos = 0;
2736 iter->key = KEY_MAX;
2737
2738 return SEQ_START_TOKEN;
2739}
2740
2741static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2742{
2743 struct fib_route_iter *iter = seq->private;
2744 struct key_vector *l = NULL;
2745 t_key key = iter->key + 1;
2746
2747 ++*pos;
2748
2749
2750 if ((v == SEQ_START_TOKEN) || key)
2751 l = leaf_walk_rcu(&iter->tnode, key);
2752
2753 if (l) {
2754 iter->key = l->key;
2755 iter->pos++;
2756 } else {
2757 iter->pos = 0;
2758 }
2759
2760 return l;
2761}
2762
2763static void fib_route_seq_stop(struct seq_file *seq, void *v)
2764 __releases(RCU)
2765{
2766 rcu_read_unlock();
2767}
2768
2769static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
2770{
2771 unsigned int flags = 0;
2772
2773 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2774 flags = RTF_REJECT;
2775 if (fi) {
2776 const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
2777
2778 if (nhc->nhc_gw.ipv4)
2779 flags |= RTF_GATEWAY;
2780 }
2781 if (mask == htonl(0xFFFFFFFF))
2782 flags |= RTF_HOST;
2783 flags |= RTF_UP;
2784 return flags;
2785}
2786
2787
2788
2789
2790
2791
2792
2793static int fib_route_seq_show(struct seq_file *seq, void *v)
2794{
2795 struct fib_route_iter *iter = seq->private;
2796 struct fib_table *tb = iter->main_tb;
2797 struct fib_alias *fa;
2798 struct key_vector *l = v;
2799 __be32 prefix;
2800
2801 if (v == SEQ_START_TOKEN) {
2802 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2803 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2804 "\tWindow\tIRTT");
2805 return 0;
2806 }
2807
2808 prefix = htonl(l->key);
2809
2810 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2811 struct fib_info *fi = fa->fa_info;
2812 __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
2813 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2814
2815 if ((fa->fa_type == RTN_BROADCAST) ||
2816 (fa->fa_type == RTN_MULTICAST))
2817 continue;
2818
2819 if (fa->tb_id != tb->tb_id)
2820 continue;
2821
2822 seq_setwidth(seq, 127);
2823
2824 if (fi) {
2825 struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
2826 __be32 gw = 0;
2827
2828 if (nhc->nhc_gw_family == AF_INET)
2829 gw = nhc->nhc_gw.ipv4;
2830
2831 seq_printf(seq,
2832 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2833 "%d\t%08X\t%d\t%u\t%u",
2834 nhc->nhc_dev ? nhc->nhc_dev->name : "*",
2835 prefix, gw, flags, 0, 0,
2836 fi->fib_priority,
2837 mask,
2838 (fi->fib_advmss ?
2839 fi->fib_advmss + 40 : 0),
2840 fi->fib_window,
2841 fi->fib_rtt >> 3);
2842 } else {
2843 seq_printf(seq,
2844 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
2845 "%d\t%08X\t%d\t%u\t%u",
2846 prefix, 0, flags, 0, 0, 0,
2847 mask, 0, 0, 0);
2848 }
2849 seq_pad(seq, '\n');
2850 }
2851
2852 return 0;
2853}
2854
2855static const struct seq_operations fib_route_seq_ops = {
2856 .start = fib_route_seq_start,
2857 .next = fib_route_seq_next,
2858 .stop = fib_route_seq_stop,
2859 .show = fib_route_seq_show,
2860};
2861
2862int __net_init fib_proc_init(struct net *net)
2863{
2864 if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
2865 sizeof(struct fib_trie_iter)))
2866 goto out1;
2867
2868 if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
2869 fib_triestat_seq_show, NULL))
2870 goto out2;
2871
2872 if (!proc_create_net("route", 0444, net->proc_net, &fib_route_seq_ops,
2873 sizeof(struct fib_route_iter)))
2874 goto out3;
2875
2876 return 0;
2877
2878out3:
2879 remove_proc_entry("fib_triestat", net->proc_net);
2880out2:
2881 remove_proc_entry("fib_trie", net->proc_net);
2882out1:
2883 return -ENOMEM;
2884}
2885
2886void __net_exit fib_proc_exit(struct net *net)
2887{
2888 remove_proc_entry("fib_trie", net->proc_net);
2889 remove_proc_entry("fib_triestat", net->proc_net);
2890 remove_proc_entry("route", net->proc_net);
2891}
2892
2893#endif
2894