1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39#define VERSION "0.409"
40
41#include <linux/cache.h>
42#include <linux/uaccess.h>
43#include <linux/bitops.h>
44#include <linux/types.h>
45#include <linux/kernel.h>
46#include <linux/mm.h>
47#include <linux/string.h>
48#include <linux/socket.h>
49#include <linux/sockios.h>
50#include <linux/errno.h>
51#include <linux/in.h>
52#include <linux/inet.h>
53#include <linux/inetdevice.h>
54#include <linux/netdevice.h>
55#include <linux/if_arp.h>
56#include <linux/proc_fs.h>
57#include <linux/rcupdate.h>
58#include <linux/skbuff.h>
59#include <linux/netlink.h>
60#include <linux/init.h>
61#include <linux/list.h>
62#include <linux/slab.h>
63#include <linux/export.h>
64#include <linux/vmalloc.h>
65#include <linux/notifier.h>
66#include <net/net_namespace.h>
67#include <net/ip.h>
68#include <net/protocol.h>
69#include <net/route.h>
70#include <net/tcp.h>
71#include <net/sock.h>
72#include <net/ip_fib.h>
73#include <net/fib_notifier.h>
74#include <trace/events/fib.h>
75#include "fib_lookup.h"
76
77static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
78 enum fib_event_type event_type, u32 dst,
79 int dst_len, struct fib_alias *fa)
80{
81 struct fib_entry_notifier_info info = {
82 .dst = dst,
83 .dst_len = dst_len,
84 .fi = fa->fa_info,
85 .tos = fa->fa_tos,
86 .type = fa->fa_type,
87 .tb_id = fa->tb_id,
88 };
89 return call_fib4_notifier(nb, net, event_type, &info.info);
90}
91
92static int call_fib_entry_notifiers(struct net *net,
93 enum fib_event_type event_type, u32 dst,
94 int dst_len, struct fib_alias *fa,
95 struct netlink_ext_ack *extack)
96{
97 struct fib_entry_notifier_info info = {
98 .info.extack = extack,
99 .dst = dst,
100 .dst_len = dst_len,
101 .fi = fa->fa_info,
102 .tos = fa->fa_tos,
103 .type = fa->fa_type,
104 .tb_id = fa->tb_id,
105 };
106 return call_fib4_notifiers(net, event_type, &info.info);
107}
108
109#define MAX_STAT_DEPTH 32
110
111#define KEYLENGTH (8*sizeof(t_key))
112#define KEY_MAX ((t_key)~0)
113
114typedef unsigned int t_key;
115
116#define IS_TRIE(n) ((n)->pos >= KEYLENGTH)
117#define IS_TNODE(n) ((n)->bits)
118#define IS_LEAF(n) (!(n)->bits)
119
120struct key_vector {
121 t_key key;
122 unsigned char pos;
123 unsigned char bits;
124 unsigned char slen;
125 union {
126
127 struct hlist_head leaf;
128
129 struct key_vector __rcu *tnode[0];
130 };
131};
132
133struct tnode {
134 struct rcu_head rcu;
135 t_key empty_children;
136 t_key full_children;
137 struct key_vector __rcu *parent;
138 struct key_vector kv[1];
139#define tn_bits kv[0].bits
140};
141
142#define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n])
143#define LEAF_SIZE TNODE_SIZE(1)
144
145#ifdef CONFIG_IP_FIB_TRIE_STATS
146struct trie_use_stats {
147 unsigned int gets;
148 unsigned int backtrack;
149 unsigned int semantic_match_passed;
150 unsigned int semantic_match_miss;
151 unsigned int null_node_hit;
152 unsigned int resize_node_skipped;
153};
154#endif
155
156struct trie_stat {
157 unsigned int totdepth;
158 unsigned int maxdepth;
159 unsigned int tnodes;
160 unsigned int leaves;
161 unsigned int nullpointers;
162 unsigned int prefixes;
163 unsigned int nodesizes[MAX_STAT_DEPTH];
164};
165
166struct trie {
167 struct key_vector kv[1];
168#ifdef CONFIG_IP_FIB_TRIE_STATS
169 struct trie_use_stats __percpu *stats;
170#endif
171};
172
173static struct key_vector *resize(struct trie *t, struct key_vector *tn);
174static unsigned int tnode_free_size;
175
176
177
178
179
180
181unsigned int sysctl_fib_sync_mem = 512 * 1024;
182unsigned int sysctl_fib_sync_mem_min = 64 * 1024;
183unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024;
184
185static struct kmem_cache *fn_alias_kmem __ro_after_init;
186static struct kmem_cache *trie_leaf_kmem __ro_after_init;
187
188static inline struct tnode *tn_info(struct key_vector *kv)
189{
190 return container_of(kv, struct tnode, kv[0]);
191}
192
193
194#define node_parent(tn) rtnl_dereference(tn_info(tn)->parent)
195#define get_child(tn, i) rtnl_dereference((tn)->tnode[i])
196
197
198#define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent)
199#define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i])
200
201
202static inline void node_set_parent(struct key_vector *n, struct key_vector *tp)
203{
204 if (n)
205 rcu_assign_pointer(tn_info(n)->parent, tp);
206}
207
208#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p)
209
210
211
212
213static inline unsigned long child_length(const struct key_vector *tn)
214{
215 return (1ul << tn->bits) & ~(1ul);
216}
217
218#define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos)
219
220static inline unsigned long get_index(t_key key, struct key_vector *kv)
221{
222 unsigned long index = key ^ kv->key;
223
224 if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos))
225 return 0;
226
227 return index >> kv->pos;
228}
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289static const int halve_threshold = 25;
290static const int inflate_threshold = 50;
291static const int halve_threshold_root = 15;
292static const int inflate_threshold_root = 30;
293
294static void __alias_free_mem(struct rcu_head *head)
295{
296 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
297 kmem_cache_free(fn_alias_kmem, fa);
298}
299
300static inline void alias_free_mem_rcu(struct fib_alias *fa)
301{
302 call_rcu(&fa->rcu, __alias_free_mem);
303}
304
305#define TNODE_KMALLOC_MAX \
306 ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *))
307#define TNODE_VMALLOC_MAX \
308 ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *))
309
310static void __node_free_rcu(struct rcu_head *head)
311{
312 struct tnode *n = container_of(head, struct tnode, rcu);
313
314 if (!n->tn_bits)
315 kmem_cache_free(trie_leaf_kmem, n);
316 else
317 kvfree(n);
318}
319
320#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
321
322static struct tnode *tnode_alloc(int bits)
323{
324 size_t size;
325
326
327 if (bits > TNODE_VMALLOC_MAX)
328 return NULL;
329
330
331 size = TNODE_SIZE(1ul << bits);
332
333 if (size <= PAGE_SIZE)
334 return kzalloc(size, GFP_KERNEL);
335 else
336 return vzalloc(size);
337}
338
339static inline void empty_child_inc(struct key_vector *n)
340{
341 tn_info(n)->empty_children++;
342
343 if (!tn_info(n)->empty_children)
344 tn_info(n)->full_children++;
345}
346
347static inline void empty_child_dec(struct key_vector *n)
348{
349 if (!tn_info(n)->empty_children)
350 tn_info(n)->full_children--;
351
352 tn_info(n)->empty_children--;
353}
354
355static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
356{
357 struct key_vector *l;
358 struct tnode *kv;
359
360 kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
361 if (!kv)
362 return NULL;
363
364
365 l = kv->kv;
366 l->key = key;
367 l->pos = 0;
368 l->bits = 0;
369 l->slen = fa->fa_slen;
370
371
372 INIT_HLIST_HEAD(&l->leaf);
373 hlist_add_head(&fa->fa_list, &l->leaf);
374
375 return l;
376}
377
378static struct key_vector *tnode_new(t_key key, int pos, int bits)
379{
380 unsigned int shift = pos + bits;
381 struct key_vector *tn;
382 struct tnode *tnode;
383
384
385 BUG_ON(!bits || (shift > KEYLENGTH));
386
387 tnode = tnode_alloc(bits);
388 if (!tnode)
389 return NULL;
390
391 pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
392 sizeof(struct key_vector *) << bits);
393
394 if (bits == KEYLENGTH)
395 tnode->full_children = 1;
396 else
397 tnode->empty_children = 1ul << bits;
398
399 tn = tnode->kv;
400 tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
401 tn->pos = pos;
402 tn->bits = bits;
403 tn->slen = pos;
404
405 return tn;
406}
407
408
409
410
411static inline int tnode_full(struct key_vector *tn, struct key_vector *n)
412{
413 return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
414}
415
416
417
418
419static void put_child(struct key_vector *tn, unsigned long i,
420 struct key_vector *n)
421{
422 struct key_vector *chi = get_child(tn, i);
423 int isfull, wasfull;
424
425 BUG_ON(i >= child_length(tn));
426
427
428 if (!n && chi)
429 empty_child_inc(tn);
430 if (n && !chi)
431 empty_child_dec(tn);
432
433
434 wasfull = tnode_full(tn, chi);
435 isfull = tnode_full(tn, n);
436
437 if (wasfull && !isfull)
438 tn_info(tn)->full_children--;
439 else if (!wasfull && isfull)
440 tn_info(tn)->full_children++;
441
442 if (n && (tn->slen < n->slen))
443 tn->slen = n->slen;
444
445 rcu_assign_pointer(tn->tnode[i], n);
446}
447
448static void update_children(struct key_vector *tn)
449{
450 unsigned long i;
451
452
453 for (i = child_length(tn); i;) {
454 struct key_vector *inode = get_child(tn, --i);
455
456 if (!inode)
457 continue;
458
459
460
461
462
463 if (node_parent(inode) == tn)
464 update_children(inode);
465 else
466 node_set_parent(inode, tn);
467 }
468}
469
470static inline void put_child_root(struct key_vector *tp, t_key key,
471 struct key_vector *n)
472{
473 if (IS_TRIE(tp))
474 rcu_assign_pointer(tp->tnode[0], n);
475 else
476 put_child(tp, get_index(key, tp), n);
477}
478
479static inline void tnode_free_init(struct key_vector *tn)
480{
481 tn_info(tn)->rcu.next = NULL;
482}
483
484static inline void tnode_free_append(struct key_vector *tn,
485 struct key_vector *n)
486{
487 tn_info(n)->rcu.next = tn_info(tn)->rcu.next;
488 tn_info(tn)->rcu.next = &tn_info(n)->rcu;
489}
490
491static void tnode_free(struct key_vector *tn)
492{
493 struct callback_head *head = &tn_info(tn)->rcu;
494
495 while (head) {
496 head = head->next;
497 tnode_free_size += TNODE_SIZE(1ul << tn->bits);
498 node_free(tn);
499
500 tn = container_of(head, struct tnode, rcu)->kv;
501 }
502
503 if (tnode_free_size >= sysctl_fib_sync_mem) {
504 tnode_free_size = 0;
505 synchronize_rcu();
506 }
507}
508
509static struct key_vector *replace(struct trie *t,
510 struct key_vector *oldtnode,
511 struct key_vector *tn)
512{
513 struct key_vector *tp = node_parent(oldtnode);
514 unsigned long i;
515
516
517 NODE_INIT_PARENT(tn, tp);
518 put_child_root(tp, tn->key, tn);
519
520
521 update_children(tn);
522
523
524 tnode_free(oldtnode);
525
526
527 for (i = child_length(tn); i;) {
528 struct key_vector *inode = get_child(tn, --i);
529
530
531 if (tnode_full(tn, inode))
532 tn = resize(t, inode);
533 }
534
535 return tp;
536}
537
538static struct key_vector *inflate(struct trie *t,
539 struct key_vector *oldtnode)
540{
541 struct key_vector *tn;
542 unsigned long i;
543 t_key m;
544
545 pr_debug("In inflate\n");
546
547 tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
548 if (!tn)
549 goto notnode;
550
551
552 tnode_free_init(oldtnode);
553
554
555
556
557
558
559 for (i = child_length(oldtnode), m = 1u << tn->pos; i;) {
560 struct key_vector *inode = get_child(oldtnode, --i);
561 struct key_vector *node0, *node1;
562 unsigned long j, k;
563
564
565 if (!inode)
566 continue;
567
568
569 if (!tnode_full(oldtnode, inode)) {
570 put_child(tn, get_index(inode->key, tn), inode);
571 continue;
572 }
573
574
575 tnode_free_append(oldtnode, inode);
576
577
578 if (inode->bits == 1) {
579 put_child(tn, 2 * i + 1, get_child(inode, 1));
580 put_child(tn, 2 * i, get_child(inode, 0));
581 continue;
582 }
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598 node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
599 if (!node1)
600 goto nomem;
601 node0 = tnode_new(inode->key, inode->pos, inode->bits - 1);
602
603 tnode_free_append(tn, node1);
604 if (!node0)
605 goto nomem;
606 tnode_free_append(tn, node0);
607
608
609 for (k = child_length(inode), j = k / 2; j;) {
610 put_child(node1, --j, get_child(inode, --k));
611 put_child(node0, j, get_child(inode, j));
612 put_child(node1, --j, get_child(inode, --k));
613 put_child(node0, j, get_child(inode, j));
614 }
615
616
617 NODE_INIT_PARENT(node1, tn);
618 NODE_INIT_PARENT(node0, tn);
619
620
621 put_child(tn, 2 * i + 1, node1);
622 put_child(tn, 2 * i, node0);
623 }
624
625
626 return replace(t, oldtnode, tn);
627nomem:
628
629 tnode_free(tn);
630notnode:
631 return NULL;
632}
633
634static struct key_vector *halve(struct trie *t,
635 struct key_vector *oldtnode)
636{
637 struct key_vector *tn;
638 unsigned long i;
639
640 pr_debug("In halve\n");
641
642 tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
643 if (!tn)
644 goto notnode;
645
646
647 tnode_free_init(oldtnode);
648
649
650
651
652
653
654 for (i = child_length(oldtnode); i;) {
655 struct key_vector *node1 = get_child(oldtnode, --i);
656 struct key_vector *node0 = get_child(oldtnode, --i);
657 struct key_vector *inode;
658
659
660 if (!node1 || !node0) {
661 put_child(tn, i / 2, node1 ? : node0);
662 continue;
663 }
664
665
666 inode = tnode_new(node0->key, oldtnode->pos, 1);
667 if (!inode)
668 goto nomem;
669 tnode_free_append(tn, inode);
670
671
672 put_child(inode, 1, node1);
673 put_child(inode, 0, node0);
674 NODE_INIT_PARENT(inode, tn);
675
676
677 put_child(tn, i / 2, inode);
678 }
679
680
681 return replace(t, oldtnode, tn);
682nomem:
683
684 tnode_free(tn);
685notnode:
686 return NULL;
687}
688
689static struct key_vector *collapse(struct trie *t,
690 struct key_vector *oldtnode)
691{
692 struct key_vector *n, *tp;
693 unsigned long i;
694
695
696 for (n = NULL, i = child_length(oldtnode); !n && i;)
697 n = get_child(oldtnode, --i);
698
699
700 tp = node_parent(oldtnode);
701 put_child_root(tp, oldtnode->key, n);
702 node_set_parent(n, tp);
703
704
705 node_free(oldtnode);
706
707 return tp;
708}
709
710static unsigned char update_suffix(struct key_vector *tn)
711{
712 unsigned char slen = tn->pos;
713 unsigned long stride, i;
714 unsigned char slen_max;
715
716
717
718
719
720 slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen);
721
722
723
724
725
726
727 for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) {
728 struct key_vector *n = get_child(tn, i);
729
730 if (!n || (n->slen <= slen))
731 continue;
732
733
734 stride <<= (n->slen - slen);
735 slen = n->slen;
736 i &= ~(stride - 1);
737
738
739 if (slen >= slen_max)
740 break;
741 }
742
743 tn->slen = slen;
744
745 return slen;
746}
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn)
806{
807 unsigned long used = child_length(tn);
808 unsigned long threshold = used;
809
810
811 threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold;
812 used -= tn_info(tn)->empty_children;
813 used += tn_info(tn)->full_children;
814
815
816
817 return (used > 1) && tn->pos && ((50 * used) >= threshold);
818}
819
820static inline bool should_halve(struct key_vector *tp, struct key_vector *tn)
821{
822 unsigned long used = child_length(tn);
823 unsigned long threshold = used;
824
825
826 threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold;
827 used -= tn_info(tn)->empty_children;
828
829
830
831 return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold);
832}
833
834static inline bool should_collapse(struct key_vector *tn)
835{
836 unsigned long used = child_length(tn);
837
838 used -= tn_info(tn)->empty_children;
839
840
841 if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children)
842 used -= KEY_MAX;
843
844
845 return used < 2;
846}
847
848#define MAX_WORK 10
849static struct key_vector *resize(struct trie *t, struct key_vector *tn)
850{
851#ifdef CONFIG_IP_FIB_TRIE_STATS
852 struct trie_use_stats __percpu *stats = t->stats;
853#endif
854 struct key_vector *tp = node_parent(tn);
855 unsigned long cindex = get_index(tn->key, tp);
856 int max_work = MAX_WORK;
857
858 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
859 tn, inflate_threshold, halve_threshold);
860
861
862
863
864
865 BUG_ON(tn != get_child(tp, cindex));
866
867
868
869
870 while (should_inflate(tp, tn) && max_work) {
871 tp = inflate(t, tn);
872 if (!tp) {
873#ifdef CONFIG_IP_FIB_TRIE_STATS
874 this_cpu_inc(stats->resize_node_skipped);
875#endif
876 break;
877 }
878
879 max_work--;
880 tn = get_child(tp, cindex);
881 }
882
883
884 tp = node_parent(tn);
885
886
887 if (max_work != MAX_WORK)
888 return tp;
889
890
891
892
893 while (should_halve(tp, tn) && max_work) {
894 tp = halve(t, tn);
895 if (!tp) {
896#ifdef CONFIG_IP_FIB_TRIE_STATS
897 this_cpu_inc(stats->resize_node_skipped);
898#endif
899 break;
900 }
901
902 max_work--;
903 tn = get_child(tp, cindex);
904 }
905
906
907 if (should_collapse(tn))
908 return collapse(t, tn);
909
910
911 return node_parent(tn);
912}
913
914static void node_pull_suffix(struct key_vector *tn, unsigned char slen)
915{
916 unsigned char node_slen = tn->slen;
917
918 while ((node_slen > tn->pos) && (node_slen > slen)) {
919 slen = update_suffix(tn);
920 if (node_slen == slen)
921 break;
922
923 tn = node_parent(tn);
924 node_slen = tn->slen;
925 }
926}
927
928static void node_push_suffix(struct key_vector *tn, unsigned char slen)
929{
930 while (tn->slen < slen) {
931 tn->slen = slen;
932 tn = node_parent(tn);
933 }
934}
935
936
937static struct key_vector *fib_find_node(struct trie *t,
938 struct key_vector **tp, u32 key)
939{
940 struct key_vector *pn, *n = t->kv;
941 unsigned long index = 0;
942
943 do {
944 pn = n;
945 n = get_child_rcu(n, index);
946
947 if (!n)
948 break;
949
950 index = get_cindex(key, n);
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966 if (index >= (1ul << n->bits)) {
967 n = NULL;
968 break;
969 }
970
971
972 } while (IS_TNODE(n));
973
974 *tp = pn;
975
976 return n;
977}
978
979
980
981
982static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
983 u8 tos, u32 prio, u32 tb_id)
984{
985 struct fib_alias *fa;
986
987 if (!fah)
988 return NULL;
989
990 hlist_for_each_entry(fa, fah, fa_list) {
991 if (fa->fa_slen < slen)
992 continue;
993 if (fa->fa_slen != slen)
994 break;
995 if (fa->tb_id > tb_id)
996 continue;
997 if (fa->tb_id != tb_id)
998 break;
999 if (fa->fa_tos > tos)
1000 continue;
1001 if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
1002 return fa;
1003 }
1004
1005 return NULL;
1006}
1007
1008static void trie_rebalance(struct trie *t, struct key_vector *tn)
1009{
1010 while (!IS_TRIE(tn))
1011 tn = resize(t, tn);
1012}
1013
1014static int fib_insert_node(struct trie *t, struct key_vector *tp,
1015 struct fib_alias *new, t_key key)
1016{
1017 struct key_vector *n, *l;
1018
1019 l = leaf_new(key, new);
1020 if (!l)
1021 goto noleaf;
1022
1023
1024 n = get_child(tp, get_index(key, tp));
1025
1026
1027
1028
1029
1030
1031
1032 if (n) {
1033 struct key_vector *tn;
1034
1035 tn = tnode_new(key, __fls(key ^ n->key), 1);
1036 if (!tn)
1037 goto notnode;
1038
1039
1040 NODE_INIT_PARENT(tn, tp);
1041 put_child(tn, get_index(key, tn) ^ 1, n);
1042
1043
1044 put_child_root(tp, key, tn);
1045 node_set_parent(n, tn);
1046
1047
1048 tp = tn;
1049 }
1050
1051
1052 node_push_suffix(tp, new->fa_slen);
1053 NODE_INIT_PARENT(l, tp);
1054 put_child_root(tp, key, l);
1055 trie_rebalance(t, tp);
1056
1057 return 0;
1058notnode:
1059 node_free(l);
1060noleaf:
1061 return -ENOMEM;
1062}
1063
1064
1065
1066
1067static int fib_insert_alias(struct trie *t, struct key_vector *tp,
1068 struct key_vector *l, struct fib_alias *new,
1069 struct fib_alias *fa, t_key key)
1070{
1071 if (!l)
1072 return fib_insert_node(t, tp, new, key);
1073
1074 if (fa) {
1075 hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
1076 } else {
1077 struct fib_alias *last;
1078
1079 hlist_for_each_entry(last, &l->leaf, fa_list) {
1080 if (new->fa_slen < last->fa_slen)
1081 break;
1082 if ((new->fa_slen == last->fa_slen) &&
1083 (new->tb_id > last->tb_id))
1084 break;
1085 fa = last;
1086 }
1087
1088 if (fa)
1089 hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
1090 else
1091 hlist_add_head_rcu(&new->fa_list, &l->leaf);
1092 }
1093
1094
1095 if (l->slen < new->fa_slen) {
1096 l->slen = new->fa_slen;
1097 node_push_suffix(tp, new->fa_slen);
1098 }
1099
1100 return 0;
1101}
1102
1103static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
1104{
1105 if (plen > KEYLENGTH) {
1106 NL_SET_ERR_MSG(extack, "Invalid prefix length");
1107 return false;
1108 }
1109
1110 if ((plen < KEYLENGTH) && (key << plen)) {
1111 NL_SET_ERR_MSG(extack,
1112 "Invalid prefix for given prefix length");
1113 return false;
1114 }
1115
1116 return true;
1117}
1118
1119
1120int fib_table_insert(struct net *net, struct fib_table *tb,
1121 struct fib_config *cfg, struct netlink_ext_ack *extack)
1122{
1123 enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
1124 struct trie *t = (struct trie *)tb->tb_data;
1125 struct fib_alias *fa, *new_fa;
1126 struct key_vector *l, *tp;
1127 u16 nlflags = NLM_F_EXCL;
1128 struct fib_info *fi;
1129 u8 plen = cfg->fc_dst_len;
1130 u8 slen = KEYLENGTH - plen;
1131 u8 tos = cfg->fc_tos;
1132 u32 key;
1133 int err;
1134
1135 key = ntohl(cfg->fc_dst);
1136
1137 if (!fib_valid_key_len(key, plen, extack))
1138 return -EINVAL;
1139
1140 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
1141
1142 fi = fib_create_info(cfg, extack);
1143 if (IS_ERR(fi)) {
1144 err = PTR_ERR(fi);
1145 goto err;
1146 }
1147
1148 l = fib_find_node(t, &tp, key);
1149 fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
1150 tb->tb_id) : NULL;
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161 if (fa && fa->fa_tos == tos &&
1162 fa->fa_info->fib_priority == fi->fib_priority) {
1163 struct fib_alias *fa_first, *fa_match;
1164
1165 err = -EEXIST;
1166 if (cfg->fc_nlflags & NLM_F_EXCL)
1167 goto out;
1168
1169 nlflags &= ~NLM_F_EXCL;
1170
1171
1172
1173
1174
1175
1176 fa_match = NULL;
1177 fa_first = fa;
1178 hlist_for_each_entry_from(fa, fa_list) {
1179 if ((fa->fa_slen != slen) ||
1180 (fa->tb_id != tb->tb_id) ||
1181 (fa->fa_tos != tos))
1182 break;
1183 if (fa->fa_info->fib_priority != fi->fib_priority)
1184 break;
1185 if (fa->fa_type == cfg->fc_type &&
1186 fa->fa_info == fi) {
1187 fa_match = fa;
1188 break;
1189 }
1190 }
1191
1192 if (cfg->fc_nlflags & NLM_F_REPLACE) {
1193 struct fib_info *fi_drop;
1194 u8 state;
1195
1196 nlflags |= NLM_F_REPLACE;
1197 fa = fa_first;
1198 if (fa_match) {
1199 if (fa == fa_match)
1200 err = 0;
1201 goto out;
1202 }
1203 err = -ENOBUFS;
1204 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1205 if (!new_fa)
1206 goto out;
1207
1208 fi_drop = fa->fa_info;
1209 new_fa->fa_tos = fa->fa_tos;
1210 new_fa->fa_info = fi;
1211 new_fa->fa_type = cfg->fc_type;
1212 state = fa->fa_state;
1213 new_fa->fa_state = state & ~FA_S_ACCESSED;
1214 new_fa->fa_slen = fa->fa_slen;
1215 new_fa->tb_id = tb->tb_id;
1216 new_fa->fa_default = -1;
1217
1218 err = call_fib_entry_notifiers(net,
1219 FIB_EVENT_ENTRY_REPLACE,
1220 key, plen, new_fa,
1221 extack);
1222 if (err)
1223 goto out_free_new_fa;
1224
1225 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1226 tb->tb_id, &cfg->fc_nlinfo, nlflags);
1227
1228 hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1229
1230 alias_free_mem_rcu(fa);
1231
1232 fib_release_info(fi_drop);
1233 if (state & FA_S_ACCESSED)
1234 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1235
1236 goto succeeded;
1237 }
1238
1239
1240
1241
1242 if (fa_match)
1243 goto out;
1244
1245 if (cfg->fc_nlflags & NLM_F_APPEND) {
1246 event = FIB_EVENT_ENTRY_APPEND;
1247 nlflags |= NLM_F_APPEND;
1248 } else {
1249 fa = fa_first;
1250 }
1251 }
1252 err = -ENOENT;
1253 if (!(cfg->fc_nlflags & NLM_F_CREATE))
1254 goto out;
1255
1256 nlflags |= NLM_F_CREATE;
1257 err = -ENOBUFS;
1258 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1259 if (!new_fa)
1260 goto out;
1261
1262 new_fa->fa_info = fi;
1263 new_fa->fa_tos = tos;
1264 new_fa->fa_type = cfg->fc_type;
1265 new_fa->fa_state = 0;
1266 new_fa->fa_slen = slen;
1267 new_fa->tb_id = tb->tb_id;
1268 new_fa->fa_default = -1;
1269
1270 err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
1271 if (err)
1272 goto out_free_new_fa;
1273
1274
1275 err = fib_insert_alias(t, tp, l, new_fa, fa, key);
1276 if (err)
1277 goto out_fib_notif;
1278
1279 if (!plen)
1280 tb->tb_num_default++;
1281
1282 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1283 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
1284 &cfg->fc_nlinfo, nlflags);
1285succeeded:
1286 return 0;
1287
1288out_fib_notif:
1289
1290
1291
1292
1293 NL_SET_ERR_MSG(extack, "Failed to insert route into trie");
1294 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key,
1295 plen, new_fa, NULL);
1296out_free_new_fa:
1297 kmem_cache_free(fn_alias_kmem, new_fa);
1298out:
1299 fib_release_info(fi);
1300err:
1301 return err;
1302}
1303
1304static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
1305{
1306 t_key prefix = n->key;
1307
1308 return (key ^ prefix) & (prefix | -prefix);
1309}
1310
1311
1312int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1313 struct fib_result *res, int fib_flags)
1314{
1315 struct trie *t = (struct trie *) tb->tb_data;
1316#ifdef CONFIG_IP_FIB_TRIE_STATS
1317 struct trie_use_stats __percpu *stats = t->stats;
1318#endif
1319 const t_key key = ntohl(flp->daddr);
1320 struct key_vector *n, *pn;
1321 struct fib_alias *fa;
1322 unsigned long index;
1323 t_key cindex;
1324
1325 pn = t->kv;
1326 cindex = 0;
1327
1328 n = get_child_rcu(pn, cindex);
1329 if (!n) {
1330 trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN);
1331 return -EAGAIN;
1332 }
1333
1334#ifdef CONFIG_IP_FIB_TRIE_STATS
1335 this_cpu_inc(stats->gets);
1336#endif
1337
1338
1339 for (;;) {
1340 index = get_cindex(key, n);
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356 if (index >= (1ul << n->bits))
1357 break;
1358
1359
1360 if (IS_LEAF(n))
1361 goto found;
1362
1363
1364
1365
1366 if (n->slen > n->pos) {
1367 pn = n;
1368 cindex = index;
1369 }
1370
1371 n = get_child_rcu(n, index);
1372 if (unlikely(!n))
1373 goto backtrace;
1374 }
1375
1376
1377 for (;;) {
1378
1379 struct key_vector __rcu **cptr = n->tnode;
1380
1381
1382
1383
1384
1385 if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos))
1386 goto backtrace;
1387
1388
1389 if (unlikely(IS_LEAF(n)))
1390 break;
1391
1392
1393
1394
1395
1396
1397 while ((n = rcu_dereference(*cptr)) == NULL) {
1398backtrace:
1399#ifdef CONFIG_IP_FIB_TRIE_STATS
1400 if (!n)
1401 this_cpu_inc(stats->null_node_hit);
1402#endif
1403
1404
1405
1406
1407
1408 while (!cindex) {
1409 t_key pkey = pn->key;
1410
1411
1412
1413
1414
1415 if (IS_TRIE(pn)) {
1416 trace_fib_table_lookup(tb->tb_id, flp,
1417 NULL, -EAGAIN);
1418 return -EAGAIN;
1419 }
1420#ifdef CONFIG_IP_FIB_TRIE_STATS
1421 this_cpu_inc(stats->backtrack);
1422#endif
1423
1424 pn = node_parent_rcu(pn);
1425 cindex = get_index(pkey, pn);
1426 }
1427
1428
1429 cindex &= cindex - 1;
1430
1431
1432 cptr = &pn->tnode[cindex];
1433 }
1434 }
1435
1436found:
1437
1438 index = key ^ n->key;
1439
1440
1441 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
1442 struct fib_info *fi = fa->fa_info;
1443 int nhsel, err;
1444
1445 if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
1446 if (index >= (1ul << fa->fa_slen))
1447 continue;
1448 }
1449 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1450 continue;
1451 if (fi->fib_dead)
1452 continue;
1453 if (fa->fa_info->fib_scope < flp->flowi4_scope)
1454 continue;
1455 fib_alias_accessed(fa);
1456 err = fib_props[fa->fa_type].error;
1457 if (unlikely(err < 0)) {
1458out_reject:
1459#ifdef CONFIG_IP_FIB_TRIE_STATS
1460 this_cpu_inc(stats->semantic_match_passed);
1461#endif
1462 trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
1463 return err;
1464 }
1465 if (fi->fib_flags & RTNH_F_DEAD)
1466 continue;
1467
1468 if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) {
1469 err = fib_props[RTN_BLACKHOLE].error;
1470 goto out_reject;
1471 }
1472
1473 for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
1474 struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
1475
1476 if (nhc->nhc_flags & RTNH_F_DEAD)
1477 continue;
1478 if (ip_ignore_linkdown(nhc->nhc_dev) &&
1479 nhc->nhc_flags & RTNH_F_LINKDOWN &&
1480 !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
1481 continue;
1482 if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
1483 if (flp->flowi4_oif &&
1484 flp->flowi4_oif != nhc->nhc_oif)
1485 continue;
1486 }
1487
1488 if (!(fib_flags & FIB_LOOKUP_NOREF))
1489 refcount_inc(&fi->fib_clntref);
1490
1491 res->prefix = htonl(n->key);
1492 res->prefixlen = KEYLENGTH - fa->fa_slen;
1493 res->nh_sel = nhsel;
1494 res->nhc = nhc;
1495 res->type = fa->fa_type;
1496 res->scope = fi->fib_scope;
1497 res->fi = fi;
1498 res->table = tb;
1499 res->fa_head = &n->leaf;
1500#ifdef CONFIG_IP_FIB_TRIE_STATS
1501 this_cpu_inc(stats->semantic_match_passed);
1502#endif
1503 trace_fib_table_lookup(tb->tb_id, flp, nhc, err);
1504
1505 return err;
1506 }
1507 }
1508#ifdef CONFIG_IP_FIB_TRIE_STATS
1509 this_cpu_inc(stats->semantic_match_miss);
1510#endif
1511 goto backtrace;
1512}
1513EXPORT_SYMBOL_GPL(fib_table_lookup);
1514
1515static void fib_remove_alias(struct trie *t, struct key_vector *tp,
1516 struct key_vector *l, struct fib_alias *old)
1517{
1518
1519 struct hlist_node **pprev = old->fa_list.pprev;
1520 struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next);
1521
1522
1523 hlist_del_rcu(&old->fa_list);
1524
1525
1526
1527
1528 if (hlist_empty(&l->leaf)) {
1529 if (tp->slen == l->slen)
1530 node_pull_suffix(tp, tp->pos);
1531 put_child_root(tp, l->key, NULL);
1532 node_free(l);
1533 trie_rebalance(t, tp);
1534 return;
1535 }
1536
1537
1538 if (*pprev)
1539 return;
1540
1541
1542 l->slen = fa->fa_slen;
1543 node_pull_suffix(tp, fa->fa_slen);
1544}
1545
1546
1547int fib_table_delete(struct net *net, struct fib_table *tb,
1548 struct fib_config *cfg, struct netlink_ext_ack *extack)
1549{
1550 struct trie *t = (struct trie *) tb->tb_data;
1551 struct fib_alias *fa, *fa_to_delete;
1552 struct key_vector *l, *tp;
1553 u8 plen = cfg->fc_dst_len;
1554 u8 slen = KEYLENGTH - plen;
1555 u8 tos = cfg->fc_tos;
1556 u32 key;
1557
1558 key = ntohl(cfg->fc_dst);
1559
1560 if (!fib_valid_key_len(key, plen, extack))
1561 return -EINVAL;
1562
1563 l = fib_find_node(t, &tp, key);
1564 if (!l)
1565 return -ESRCH;
1566
1567 fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id);
1568 if (!fa)
1569 return -ESRCH;
1570
1571 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1572
1573 fa_to_delete = NULL;
1574 hlist_for_each_entry_from(fa, fa_list) {
1575 struct fib_info *fi = fa->fa_info;
1576
1577 if ((fa->fa_slen != slen) ||
1578 (fa->tb_id != tb->tb_id) ||
1579 (fa->fa_tos != tos))
1580 break;
1581
1582 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1583 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1584 fa->fa_info->fib_scope == cfg->fc_scope) &&
1585 (!cfg->fc_prefsrc ||
1586 fi->fib_prefsrc == cfg->fc_prefsrc) &&
1587 (!cfg->fc_protocol ||
1588 fi->fib_protocol == cfg->fc_protocol) &&
1589 fib_nh_match(cfg, fi, extack) == 0 &&
1590 fib_metrics_match(cfg, fi)) {
1591 fa_to_delete = fa;
1592 break;
1593 }
1594 }
1595
1596 if (!fa_to_delete)
1597 return -ESRCH;
1598
1599 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
1600 fa_to_delete, extack);
1601 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
1602 &cfg->fc_nlinfo, 0);
1603
1604 if (!plen)
1605 tb->tb_num_default--;
1606
1607 fib_remove_alias(t, tp, l, fa_to_delete);
1608
1609 if (fa_to_delete->fa_state & FA_S_ACCESSED)
1610 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1611
1612 fib_release_info(fa_to_delete->fa_info);
1613 alias_free_mem_rcu(fa_to_delete);
1614 return 0;
1615}
1616
1617
1618static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
1619{
1620 struct key_vector *pn, *n = *tn;
1621 unsigned long cindex;
1622
1623
1624 do {
1625
1626 pn = n;
1627 cindex = (key > pn->key) ? get_index(key, pn) : 0;
1628
1629 if (cindex >> pn->bits)
1630 break;
1631
1632
1633 n = get_child_rcu(pn, cindex++);
1634 if (!n)
1635 break;
1636
1637
1638 if (IS_LEAF(n) && (n->key >= key))
1639 goto found;
1640 } while (IS_TNODE(n));
1641
1642
1643 while (!IS_TRIE(pn)) {
1644
1645 if (cindex >= (1ul << pn->bits)) {
1646 t_key pkey = pn->key;
1647
1648 pn = node_parent_rcu(pn);
1649 cindex = get_index(pkey, pn) + 1;
1650 continue;
1651 }
1652
1653
1654 n = get_child_rcu(pn, cindex++);
1655 if (!n)
1656 continue;
1657
1658
1659 if (IS_LEAF(n))
1660 goto found;
1661
1662
1663 pn = n;
1664 cindex = 0;
1665 }
1666
1667 *tn = pn;
1668 return NULL;
1669found:
1670
1671 *tn = pn;
1672 return n;
1673}
1674
1675static void fib_trie_free(struct fib_table *tb)
1676{
1677 struct trie *t = (struct trie *)tb->tb_data;
1678 struct key_vector *pn = t->kv;
1679 unsigned long cindex = 1;
1680 struct hlist_node *tmp;
1681 struct fib_alias *fa;
1682
1683
1684 for (;;) {
1685 struct key_vector *n;
1686
1687 if (!(cindex--)) {
1688 t_key pkey = pn->key;
1689
1690 if (IS_TRIE(pn))
1691 break;
1692
1693 n = pn;
1694 pn = node_parent(pn);
1695
1696
1697 put_child_root(pn, n->key, NULL);
1698 node_free(n);
1699
1700 cindex = get_index(pkey, pn);
1701
1702 continue;
1703 }
1704
1705
1706 n = get_child(pn, cindex);
1707 if (!n)
1708 continue;
1709
1710 if (IS_TNODE(n)) {
1711
1712 pn = n;
1713 cindex = 1ul << n->bits;
1714
1715 continue;
1716 }
1717
1718 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1719 hlist_del_rcu(&fa->fa_list);
1720 alias_free_mem_rcu(fa);
1721 }
1722
1723 put_child_root(pn, n->key, NULL);
1724 node_free(n);
1725 }
1726
1727#ifdef CONFIG_IP_FIB_TRIE_STATS
1728 free_percpu(t->stats);
1729#endif
1730 kfree(tb);
1731}
1732
1733struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
1734{
1735 struct trie *ot = (struct trie *)oldtb->tb_data;
1736 struct key_vector *l, *tp = ot->kv;
1737 struct fib_table *local_tb;
1738 struct fib_alias *fa;
1739 struct trie *lt;
1740 t_key key = 0;
1741
1742 if (oldtb->tb_data == oldtb->__data)
1743 return oldtb;
1744
1745 local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL);
1746 if (!local_tb)
1747 return NULL;
1748
1749 lt = (struct trie *)local_tb->tb_data;
1750
1751 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
1752 struct key_vector *local_l = NULL, *local_tp;
1753
1754 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
1755 struct fib_alias *new_fa;
1756
1757 if (local_tb->tb_id != fa->tb_id)
1758 continue;
1759
1760
1761 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1762 if (!new_fa)
1763 goto out;
1764
1765 memcpy(new_fa, fa, sizeof(*fa));
1766
1767
1768 if (!local_l)
1769 local_l = fib_find_node(lt, &local_tp, l->key);
1770
1771 if (fib_insert_alias(lt, local_tp, local_l, new_fa,
1772 NULL, l->key)) {
1773 kmem_cache_free(fn_alias_kmem, new_fa);
1774 goto out;
1775 }
1776 }
1777
1778
1779 key = l->key + 1;
1780 if (key < l->key)
1781 break;
1782 }
1783
1784 return local_tb;
1785out:
1786 fib_trie_free(local_tb);
1787
1788 return NULL;
1789}
1790
1791
1792void fib_table_flush_external(struct fib_table *tb)
1793{
1794 struct trie *t = (struct trie *)tb->tb_data;
1795 struct key_vector *pn = t->kv;
1796 unsigned long cindex = 1;
1797 struct hlist_node *tmp;
1798 struct fib_alias *fa;
1799
1800
1801 for (;;) {
1802 unsigned char slen = 0;
1803 struct key_vector *n;
1804
1805 if (!(cindex--)) {
1806 t_key pkey = pn->key;
1807
1808
1809 if (IS_TRIE(pn))
1810 break;
1811
1812
1813 if (pn->slen > pn->pos)
1814 update_suffix(pn);
1815
1816
1817 pn = resize(t, pn);
1818 cindex = get_index(pkey, pn);
1819
1820 continue;
1821 }
1822
1823
1824 n = get_child(pn, cindex);
1825 if (!n)
1826 continue;
1827
1828 if (IS_TNODE(n)) {
1829
1830 pn = n;
1831 cindex = 1ul << n->bits;
1832
1833 continue;
1834 }
1835
1836 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1837
1838
1839
1840 if (tb->tb_id != fa->tb_id) {
1841 hlist_del_rcu(&fa->fa_list);
1842 alias_free_mem_rcu(fa);
1843 continue;
1844 }
1845
1846
1847 slen = fa->fa_slen;
1848 }
1849
1850
1851 n->slen = slen;
1852
1853 if (hlist_empty(&n->leaf)) {
1854 put_child_root(pn, n->key, NULL);
1855 node_free(n);
1856 }
1857 }
1858}
1859
1860
1861int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
1862{
1863 struct trie *t = (struct trie *)tb->tb_data;
1864 struct key_vector *pn = t->kv;
1865 unsigned long cindex = 1;
1866 struct hlist_node *tmp;
1867 struct fib_alias *fa;
1868 int found = 0;
1869
1870
1871 for (;;) {
1872 unsigned char slen = 0;
1873 struct key_vector *n;
1874
1875 if (!(cindex--)) {
1876 t_key pkey = pn->key;
1877
1878
1879 if (IS_TRIE(pn))
1880 break;
1881
1882
1883 if (pn->slen > pn->pos)
1884 update_suffix(pn);
1885
1886
1887 pn = resize(t, pn);
1888 cindex = get_index(pkey, pn);
1889
1890 continue;
1891 }
1892
1893
1894 n = get_child(pn, cindex);
1895 if (!n)
1896 continue;
1897
1898 if (IS_TNODE(n)) {
1899
1900 pn = n;
1901 cindex = 1ul << n->bits;
1902
1903 continue;
1904 }
1905
1906 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1907 struct fib_info *fi = fa->fa_info;
1908
1909 if (!fi || tb->tb_id != fa->tb_id ||
1910 (!(fi->fib_flags & RTNH_F_DEAD) &&
1911 !fib_props[fa->fa_type].error)) {
1912 slen = fa->fa_slen;
1913 continue;
1914 }
1915
1916
1917
1918
1919 if (!flush_all && fib_props[fa->fa_type].error) {
1920 slen = fa->fa_slen;
1921 continue;
1922 }
1923
1924 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
1925 n->key,
1926 KEYLENGTH - fa->fa_slen, fa,
1927 NULL);
1928 hlist_del_rcu(&fa->fa_list);
1929 fib_release_info(fa->fa_info);
1930 alias_free_mem_rcu(fa);
1931 found++;
1932 }
1933
1934
1935 n->slen = slen;
1936
1937 if (hlist_empty(&n->leaf)) {
1938 put_child_root(pn, n->key, NULL);
1939 node_free(n);
1940 }
1941 }
1942
1943 pr_debug("trie_flush found=%d\n", found);
1944 return found;
1945}
1946
1947
1948static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
1949 struct nl_info *info)
1950{
1951 struct trie *t = (struct trie *)tb->tb_data;
1952 struct key_vector *pn = t->kv;
1953 unsigned long cindex = 1;
1954 struct fib_alias *fa;
1955
1956 for (;;) {
1957 struct key_vector *n;
1958
1959 if (!(cindex--)) {
1960 t_key pkey = pn->key;
1961
1962 if (IS_TRIE(pn))
1963 break;
1964
1965 pn = node_parent(pn);
1966 cindex = get_index(pkey, pn);
1967 continue;
1968 }
1969
1970
1971 n = get_child(pn, cindex);
1972 if (!n)
1973 continue;
1974
1975 if (IS_TNODE(n)) {
1976
1977 pn = n;
1978 cindex = 1ul << n->bits;
1979
1980 continue;
1981 }
1982
1983 hlist_for_each_entry(fa, &n->leaf, fa_list) {
1984 struct fib_info *fi = fa->fa_info;
1985
1986 if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)
1987 continue;
1988
1989 rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
1990 KEYLENGTH - fa->fa_slen, tb->tb_id,
1991 info, NLM_F_REPLACE);
1992
1993
1994
1995
1996
1997 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1998 n->key,
1999 KEYLENGTH - fa->fa_slen, fa,
2000 NULL);
2001 }
2002 }
2003}
2004
2005void fib_info_notify_update(struct net *net, struct nl_info *info)
2006{
2007 unsigned int h;
2008
2009 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2010 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2011 struct fib_table *tb;
2012
2013 hlist_for_each_entry_rcu(tb, head, tb_hlist)
2014 __fib_info_notify_update(net, tb, info);
2015 }
2016}
2017
2018static void fib_leaf_notify(struct net *net, struct key_vector *l,
2019 struct fib_table *tb, struct notifier_block *nb)
2020{
2021 struct fib_alias *fa;
2022
2023 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2024 struct fib_info *fi = fa->fa_info;
2025
2026 if (!fi)
2027 continue;
2028
2029
2030
2031
2032 if (tb->tb_id != fa->tb_id)
2033 continue;
2034
2035 call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
2036 KEYLENGTH - fa->fa_slen, fa);
2037 }
2038}
2039
2040static void fib_table_notify(struct net *net, struct fib_table *tb,
2041 struct notifier_block *nb)
2042{
2043 struct trie *t = (struct trie *)tb->tb_data;
2044 struct key_vector *l, *tp = t->kv;
2045 t_key key = 0;
2046
2047 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2048 fib_leaf_notify(net, l, tb, nb);
2049
2050 key = l->key + 1;
2051
2052 if (key < l->key)
2053 break;
2054 }
2055}
2056
2057void fib_notify(struct net *net, struct notifier_block *nb)
2058{
2059 unsigned int h;
2060
2061 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2062 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2063 struct fib_table *tb;
2064
2065 hlist_for_each_entry_rcu(tb, head, tb_hlist)
2066 fib_table_notify(net, tb, nb);
2067 }
2068}
2069
2070static void __trie_free_rcu(struct rcu_head *head)
2071{
2072 struct fib_table *tb = container_of(head, struct fib_table, rcu);
2073#ifdef CONFIG_IP_FIB_TRIE_STATS
2074 struct trie *t = (struct trie *)tb->tb_data;
2075
2076 if (tb->tb_data == tb->__data)
2077 free_percpu(t->stats);
2078#endif
2079 kfree(tb);
2080}
2081
2082void fib_free_table(struct fib_table *tb)
2083{
2084 call_rcu(&tb->rcu, __trie_free_rcu);
2085}
2086
2087static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
2088 struct sk_buff *skb, struct netlink_callback *cb,
2089 struct fib_dump_filter *filter)
2090{
2091 unsigned int flags = NLM_F_MULTI;
2092 __be32 xkey = htonl(l->key);
2093 int i, s_i, i_fa, s_fa, err;
2094 struct fib_alias *fa;
2095
2096 if (filter->filter_set ||
2097 !filter->dump_exceptions || !filter->dump_routes)
2098 flags |= NLM_F_DUMP_FILTERED;
2099
2100 s_i = cb->args[4];
2101 s_fa = cb->args[5];
2102 i = 0;
2103
2104
2105 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2106 struct fib_info *fi = fa->fa_info;
2107
2108 if (i < s_i)
2109 goto next;
2110
2111 i_fa = 0;
2112
2113 if (tb->tb_id != fa->tb_id)
2114 goto next;
2115
2116 if (filter->filter_set) {
2117 if (filter->rt_type && fa->fa_type != filter->rt_type)
2118 goto next;
2119
2120 if ((filter->protocol &&
2121 fi->fib_protocol != filter->protocol))
2122 goto next;
2123
2124 if (filter->dev &&
2125 !fib_info_nh_uses_dev(fi, filter->dev))
2126 goto next;
2127 }
2128
2129 if (filter->dump_routes) {
2130 if (!s_fa) {
2131 err = fib_dump_info(skb,
2132 NETLINK_CB(cb->skb).portid,
2133 cb->nlh->nlmsg_seq,
2134 RTM_NEWROUTE,
2135 tb->tb_id, fa->fa_type,
2136 xkey,
2137 KEYLENGTH - fa->fa_slen,
2138 fa->fa_tos, fi, flags);
2139 if (err < 0)
2140 goto stop;
2141 }
2142
2143 i_fa++;
2144 }
2145
2146 if (filter->dump_exceptions) {
2147 err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
2148 &i_fa, s_fa, flags);
2149 if (err < 0)
2150 goto stop;
2151 }
2152
2153next:
2154 i++;
2155 }
2156
2157 cb->args[4] = i;
2158 return skb->len;
2159
2160stop:
2161 cb->args[4] = i;
2162 cb->args[5] = i_fa;
2163 return err;
2164}
2165
2166
2167int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
2168 struct netlink_callback *cb, struct fib_dump_filter *filter)
2169{
2170 struct trie *t = (struct trie *)tb->tb_data;
2171 struct key_vector *l, *tp = t->kv;
2172
2173
2174
2175 int count = cb->args[2];
2176 t_key key = cb->args[3];
2177
2178 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2179 int err;
2180
2181 err = fn_trie_dump_leaf(l, tb, skb, cb, filter);
2182 if (err < 0) {
2183 cb->args[3] = key;
2184 cb->args[2] = count;
2185 return err;
2186 }
2187
2188 ++count;
2189 key = l->key + 1;
2190
2191 memset(&cb->args[4], 0,
2192 sizeof(cb->args) - 4*sizeof(cb->args[0]));
2193
2194
2195 if (key < l->key)
2196 break;
2197 }
2198
2199 cb->args[3] = key;
2200 cb->args[2] = count;
2201
2202 return skb->len;
2203}
2204
2205void __init fib_trie_init(void)
2206{
2207 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2208 sizeof(struct fib_alias),
2209 0, SLAB_PANIC, NULL);
2210
2211 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
2212 LEAF_SIZE,
2213 0, SLAB_PANIC, NULL);
2214}
2215
2216struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
2217{
2218 struct fib_table *tb;
2219 struct trie *t;
2220 size_t sz = sizeof(*tb);
2221
2222 if (!alias)
2223 sz += sizeof(struct trie);
2224
2225 tb = kzalloc(sz, GFP_KERNEL);
2226 if (!tb)
2227 return NULL;
2228
2229 tb->tb_id = id;
2230 tb->tb_num_default = 0;
2231 tb->tb_data = (alias ? alias->__data : tb->__data);
2232
2233 if (alias)
2234 return tb;
2235
2236 t = (struct trie *) tb->tb_data;
2237 t->kv[0].pos = KEYLENGTH;
2238 t->kv[0].slen = KEYLENGTH;
2239#ifdef CONFIG_IP_FIB_TRIE_STATS
2240 t->stats = alloc_percpu(struct trie_use_stats);
2241 if (!t->stats) {
2242 kfree(tb);
2243 tb = NULL;
2244 }
2245#endif
2246
2247 return tb;
2248}
2249
2250#ifdef CONFIG_PROC_FS
2251
2252struct fib_trie_iter {
2253 struct seq_net_private p;
2254 struct fib_table *tb;
2255 struct key_vector *tnode;
2256 unsigned int index;
2257 unsigned int depth;
2258};
2259
2260static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
2261{
2262 unsigned long cindex = iter->index;
2263 struct key_vector *pn = iter->tnode;
2264 t_key pkey;
2265
2266 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2267 iter->tnode, iter->index, iter->depth);
2268
2269 while (!IS_TRIE(pn)) {
2270 while (cindex < child_length(pn)) {
2271 struct key_vector *n = get_child_rcu(pn, cindex++);
2272
2273 if (!n)
2274 continue;
2275
2276 if (IS_LEAF(n)) {
2277 iter->tnode = pn;
2278 iter->index = cindex;
2279 } else {
2280
2281 iter->tnode = n;
2282 iter->index = 0;
2283 ++iter->depth;
2284 }
2285
2286 return n;
2287 }
2288
2289
2290 pkey = pn->key;
2291 pn = node_parent_rcu(pn);
2292 cindex = get_index(pkey, pn) + 1;
2293 --iter->depth;
2294 }
2295
2296
2297 iter->tnode = pn;
2298 iter->index = 0;
2299
2300 return NULL;
2301}
2302
2303static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
2304 struct trie *t)
2305{
2306 struct key_vector *n, *pn;
2307
2308 if (!t)
2309 return NULL;
2310
2311 pn = t->kv;
2312 n = rcu_dereference(pn->tnode[0]);
2313 if (!n)
2314 return NULL;
2315
2316 if (IS_TNODE(n)) {
2317 iter->tnode = n;
2318 iter->index = 0;
2319 iter->depth = 1;
2320 } else {
2321 iter->tnode = pn;
2322 iter->index = 0;
2323 iter->depth = 0;
2324 }
2325
2326 return n;
2327}
2328
2329static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2330{
2331 struct key_vector *n;
2332 struct fib_trie_iter iter;
2333
2334 memset(s, 0, sizeof(*s));
2335
2336 rcu_read_lock();
2337 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2338 if (IS_LEAF(n)) {
2339 struct fib_alias *fa;
2340
2341 s->leaves++;
2342 s->totdepth += iter.depth;
2343 if (iter.depth > s->maxdepth)
2344 s->maxdepth = iter.depth;
2345
2346 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
2347 ++s->prefixes;
2348 } else {
2349 s->tnodes++;
2350 if (n->bits < MAX_STAT_DEPTH)
2351 s->nodesizes[n->bits]++;
2352 s->nullpointers += tn_info(n)->empty_children;
2353 }
2354 }
2355 rcu_read_unlock();
2356}
2357
2358
2359
2360
2361static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2362{
2363 unsigned int i, max, pointers, bytes, avdepth;
2364
2365 if (stat->leaves)
2366 avdepth = stat->totdepth*100 / stat->leaves;
2367 else
2368 avdepth = 0;
2369
2370 seq_printf(seq, "\tAver depth: %u.%02d\n",
2371 avdepth / 100, avdepth % 100);
2372 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
2373
2374 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
2375 bytes = LEAF_SIZE * stat->leaves;
2376
2377 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2378 bytes += sizeof(struct fib_alias) * stat->prefixes;
2379
2380 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
2381 bytes += TNODE_SIZE(0) * stat->tnodes;
2382
2383 max = MAX_STAT_DEPTH;
2384 while (max > 0 && stat->nodesizes[max-1] == 0)
2385 max--;
2386
2387 pointers = 0;
2388 for (i = 1; i < max; i++)
2389 if (stat->nodesizes[i] != 0) {
2390 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
2391 pointers += (1<<i) * stat->nodesizes[i];
2392 }
2393 seq_putc(seq, '\n');
2394 seq_printf(seq, "\tPointers: %u\n", pointers);
2395
2396 bytes += sizeof(struct key_vector *) * pointers;
2397 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2398 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
2399}
2400
2401#ifdef CONFIG_IP_FIB_TRIE_STATS
2402static void trie_show_usage(struct seq_file *seq,
2403 const struct trie_use_stats __percpu *stats)
2404{
2405 struct trie_use_stats s = { 0 };
2406 int cpu;
2407
2408
2409 for_each_possible_cpu(cpu) {
2410 const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
2411
2412 s.gets += pcpu->gets;
2413 s.backtrack += pcpu->backtrack;
2414 s.semantic_match_passed += pcpu->semantic_match_passed;
2415 s.semantic_match_miss += pcpu->semantic_match_miss;
2416 s.null_node_hit += pcpu->null_node_hit;
2417 s.resize_node_skipped += pcpu->resize_node_skipped;
2418 }
2419
2420 seq_printf(seq, "\nCounters:\n---------\n");
2421 seq_printf(seq, "gets = %u\n", s.gets);
2422 seq_printf(seq, "backtracks = %u\n", s.backtrack);
2423 seq_printf(seq, "semantic match passed = %u\n",
2424 s.semantic_match_passed);
2425 seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
2426 seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
2427 seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
2428}
2429#endif
2430
2431static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2432{
2433 if (tb->tb_id == RT_TABLE_LOCAL)
2434 seq_puts(seq, "Local:\n");
2435 else if (tb->tb_id == RT_TABLE_MAIN)
2436 seq_puts(seq, "Main:\n");
2437 else
2438 seq_printf(seq, "Id %d:\n", tb->tb_id);
2439}
2440
2441
2442static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2443{
2444 struct net *net = (struct net *)seq->private;
2445 unsigned int h;
2446
2447 seq_printf(seq,
2448 "Basic info: size of leaf:"
2449 " %zd bytes, size of tnode: %zd bytes.\n",
2450 LEAF_SIZE, TNODE_SIZE(0));
2451
2452 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2453 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2454 struct fib_table *tb;
2455
2456 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2457 struct trie *t = (struct trie *) tb->tb_data;
2458 struct trie_stat stat;
2459
2460 if (!t)
2461 continue;
2462
2463 fib_table_print(seq, tb);
2464
2465 trie_collect_stats(t, &stat);
2466 trie_show_stats(seq, &stat);
2467#ifdef CONFIG_IP_FIB_TRIE_STATS
2468 trie_show_usage(seq, t->stats);
2469#endif
2470 }
2471 }
2472
2473 return 0;
2474}
2475
2476static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2477{
2478 struct fib_trie_iter *iter = seq->private;
2479 struct net *net = seq_file_net(seq);
2480 loff_t idx = 0;
2481 unsigned int h;
2482
2483 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2484 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2485 struct fib_table *tb;
2486
2487 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2488 struct key_vector *n;
2489
2490 for (n = fib_trie_get_first(iter,
2491 (struct trie *) tb->tb_data);
2492 n; n = fib_trie_get_next(iter))
2493 if (pos == idx++) {
2494 iter->tb = tb;
2495 return n;
2496 }
2497 }
2498 }
2499
2500 return NULL;
2501}
2502
2503static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2504 __acquires(RCU)
2505{
2506 rcu_read_lock();
2507 return fib_trie_get_idx(seq, *pos);
2508}
2509
2510static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2511{
2512 struct fib_trie_iter *iter = seq->private;
2513 struct net *net = seq_file_net(seq);
2514 struct fib_table *tb = iter->tb;
2515 struct hlist_node *tb_node;
2516 unsigned int h;
2517 struct key_vector *n;
2518
2519 ++*pos;
2520
2521 n = fib_trie_get_next(iter);
2522 if (n)
2523 return n;
2524
2525
2526 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2527 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
2528 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2529 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2530 if (n)
2531 goto found;
2532 }
2533
2534
2535 while (++h < FIB_TABLE_HASHSZ) {
2536 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2537 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2538 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2539 if (n)
2540 goto found;
2541 }
2542 }
2543 return NULL;
2544
2545found:
2546 iter->tb = tb;
2547 return n;
2548}
2549
2550static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2551 __releases(RCU)
2552{
2553 rcu_read_unlock();
2554}
2555
2556static void seq_indent(struct seq_file *seq, int n)
2557{
2558 while (n-- > 0)
2559 seq_puts(seq, " ");
2560}
2561
2562static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2563{
2564 switch (s) {
2565 case RT_SCOPE_UNIVERSE: return "universe";
2566 case RT_SCOPE_SITE: return "site";
2567 case RT_SCOPE_LINK: return "link";
2568 case RT_SCOPE_HOST: return "host";
2569 case RT_SCOPE_NOWHERE: return "nowhere";
2570 default:
2571 snprintf(buf, len, "scope=%d", s);
2572 return buf;
2573 }
2574}
2575
2576static const char *const rtn_type_names[__RTN_MAX] = {
2577 [RTN_UNSPEC] = "UNSPEC",
2578 [RTN_UNICAST] = "UNICAST",
2579 [RTN_LOCAL] = "LOCAL",
2580 [RTN_BROADCAST] = "BROADCAST",
2581 [RTN_ANYCAST] = "ANYCAST",
2582 [RTN_MULTICAST] = "MULTICAST",
2583 [RTN_BLACKHOLE] = "BLACKHOLE",
2584 [RTN_UNREACHABLE] = "UNREACHABLE",
2585 [RTN_PROHIBIT] = "PROHIBIT",
2586 [RTN_THROW] = "THROW",
2587 [RTN_NAT] = "NAT",
2588 [RTN_XRESOLVE] = "XRESOLVE",
2589};
2590
2591static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2592{
2593 if (t < __RTN_MAX && rtn_type_names[t])
2594 return rtn_type_names[t];
2595 snprintf(buf, len, "type %u", t);
2596 return buf;
2597}
2598
2599
2600static int fib_trie_seq_show(struct seq_file *seq, void *v)
2601{
2602 const struct fib_trie_iter *iter = seq->private;
2603 struct key_vector *n = v;
2604
2605 if (IS_TRIE(node_parent_rcu(n)))
2606 fib_table_print(seq, iter->tb);
2607
2608 if (IS_TNODE(n)) {
2609 __be32 prf = htonl(n->key);
2610
2611 seq_indent(seq, iter->depth-1);
2612 seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",
2613 &prf, KEYLENGTH - n->pos - n->bits, n->bits,
2614 tn_info(n)->full_children,
2615 tn_info(n)->empty_children);
2616 } else {
2617 __be32 val = htonl(n->key);
2618 struct fib_alias *fa;
2619
2620 seq_indent(seq, iter->depth);
2621 seq_printf(seq, " |-- %pI4\n", &val);
2622
2623 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
2624 char buf1[32], buf2[32];
2625
2626 seq_indent(seq, iter->depth + 1);
2627 seq_printf(seq, " /%zu %s %s",
2628 KEYLENGTH - fa->fa_slen,
2629 rtn_scope(buf1, sizeof(buf1),
2630 fa->fa_info->fib_scope),
2631 rtn_type(buf2, sizeof(buf2),
2632 fa->fa_type));
2633 if (fa->fa_tos)
2634 seq_printf(seq, " tos=%d", fa->fa_tos);
2635 seq_putc(seq, '\n');
2636 }
2637 }
2638
2639 return 0;
2640}
2641
2642static const struct seq_operations fib_trie_seq_ops = {
2643 .start = fib_trie_seq_start,
2644 .next = fib_trie_seq_next,
2645 .stop = fib_trie_seq_stop,
2646 .show = fib_trie_seq_show,
2647};
2648
2649struct fib_route_iter {
2650 struct seq_net_private p;
2651 struct fib_table *main_tb;
2652 struct key_vector *tnode;
2653 loff_t pos;
2654 t_key key;
2655};
2656
2657static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
2658 loff_t pos)
2659{
2660 struct key_vector *l, **tp = &iter->tnode;
2661 t_key key;
2662
2663
2664 if (iter->pos > 0 && pos >= iter->pos) {
2665 key = iter->key;
2666 } else {
2667 iter->pos = 1;
2668 key = 0;
2669 }
2670
2671 pos -= iter->pos;
2672
2673 while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
2674 key = l->key + 1;
2675 iter->pos++;
2676 l = NULL;
2677
2678
2679 if (!key)
2680 break;
2681 }
2682
2683 if (l)
2684 iter->key = l->key;
2685 else
2686 iter->pos = 0;
2687
2688 return l;
2689}
2690
2691static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2692 __acquires(RCU)
2693{
2694 struct fib_route_iter *iter = seq->private;
2695 struct fib_table *tb;
2696 struct trie *t;
2697
2698 rcu_read_lock();
2699
2700 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
2701 if (!tb)
2702 return NULL;
2703
2704 iter->main_tb = tb;
2705 t = (struct trie *)tb->tb_data;
2706 iter->tnode = t->kv;
2707
2708 if (*pos != 0)
2709 return fib_route_get_idx(iter, *pos);
2710
2711 iter->pos = 0;
2712 iter->key = KEY_MAX;
2713
2714 return SEQ_START_TOKEN;
2715}
2716
2717static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2718{
2719 struct fib_route_iter *iter = seq->private;
2720 struct key_vector *l = NULL;
2721 t_key key = iter->key + 1;
2722
2723 ++*pos;
2724
2725
2726 if ((v == SEQ_START_TOKEN) || key)
2727 l = leaf_walk_rcu(&iter->tnode, key);
2728
2729 if (l) {
2730 iter->key = l->key;
2731 iter->pos++;
2732 } else {
2733 iter->pos = 0;
2734 }
2735
2736 return l;
2737}
2738
2739static void fib_route_seq_stop(struct seq_file *seq, void *v)
2740 __releases(RCU)
2741{
2742 rcu_read_unlock();
2743}
2744
2745static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
2746{
2747 unsigned int flags = 0;
2748
2749 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2750 flags = RTF_REJECT;
2751 if (fi) {
2752 const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
2753
2754 if (nhc->nhc_gw.ipv4)
2755 flags |= RTF_GATEWAY;
2756 }
2757 if (mask == htonl(0xFFFFFFFF))
2758 flags |= RTF_HOST;
2759 flags |= RTF_UP;
2760 return flags;
2761}
2762
2763
2764
2765
2766
2767
2768
2769static int fib_route_seq_show(struct seq_file *seq, void *v)
2770{
2771 struct fib_route_iter *iter = seq->private;
2772 struct fib_table *tb = iter->main_tb;
2773 struct fib_alias *fa;
2774 struct key_vector *l = v;
2775 __be32 prefix;
2776
2777 if (v == SEQ_START_TOKEN) {
2778 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2779 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2780 "\tWindow\tIRTT");
2781 return 0;
2782 }
2783
2784 prefix = htonl(l->key);
2785
2786 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2787 struct fib_info *fi = fa->fa_info;
2788 __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
2789 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2790
2791 if ((fa->fa_type == RTN_BROADCAST) ||
2792 (fa->fa_type == RTN_MULTICAST))
2793 continue;
2794
2795 if (fa->tb_id != tb->tb_id)
2796 continue;
2797
2798 seq_setwidth(seq, 127);
2799
2800 if (fi) {
2801 struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
2802 __be32 gw = 0;
2803
2804 if (nhc->nhc_gw_family == AF_INET)
2805 gw = nhc->nhc_gw.ipv4;
2806
2807 seq_printf(seq,
2808 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2809 "%d\t%08X\t%d\t%u\t%u",
2810 nhc->nhc_dev ? nhc->nhc_dev->name : "*",
2811 prefix, gw, flags, 0, 0,
2812 fi->fib_priority,
2813 mask,
2814 (fi->fib_advmss ?
2815 fi->fib_advmss + 40 : 0),
2816 fi->fib_window,
2817 fi->fib_rtt >> 3);
2818 } else {
2819 seq_printf(seq,
2820 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
2821 "%d\t%08X\t%d\t%u\t%u",
2822 prefix, 0, flags, 0, 0, 0,
2823 mask, 0, 0, 0);
2824 }
2825 seq_pad(seq, '\n');
2826 }
2827
2828 return 0;
2829}
2830
2831static const struct seq_operations fib_route_seq_ops = {
2832 .start = fib_route_seq_start,
2833 .next = fib_route_seq_next,
2834 .stop = fib_route_seq_stop,
2835 .show = fib_route_seq_show,
2836};
2837
2838int __net_init fib_proc_init(struct net *net)
2839{
2840 if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
2841 sizeof(struct fib_trie_iter)))
2842 goto out1;
2843
2844 if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
2845 fib_triestat_seq_show, NULL))
2846 goto out2;
2847
2848 if (!proc_create_net("route", 0444, net->proc_net, &fib_route_seq_ops,
2849 sizeof(struct fib_route_iter)))
2850 goto out3;
2851
2852 return 0;
2853
2854out3:
2855 remove_proc_entry("fib_triestat", net->proc_net);
2856out2:
2857 remove_proc_entry("fib_trie", net->proc_net);
2858out1:
2859 return -ENOMEM;
2860}
2861
2862void __net_exit fib_proc_exit(struct net *net)
2863{
2864 remove_proc_entry("fib_trie", net->proc_net);
2865 remove_proc_entry("fib_triestat", net->proc_net);
2866 remove_proc_entry("route", net->proc_net);
2867}
2868
2869#endif
2870