1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38#include <linux/cache.h>
39#include <linux/uaccess.h>
40#include <linux/bitops.h>
41#include <linux/types.h>
42#include <linux/kernel.h>
43#include <linux/mm.h>
44#include <linux/string.h>
45#include <linux/socket.h>
46#include <linux/sockios.h>
47#include <linux/errno.h>
48#include <linux/in.h>
49#include <linux/inet.h>
50#include <linux/inetdevice.h>
51#include <linux/netdevice.h>
52#include <linux/if_arp.h>
53#include <linux/proc_fs.h>
54#include <linux/rcupdate.h>
55#include <linux/skbuff.h>
56#include <linux/netlink.h>
57#include <linux/init.h>
58#include <linux/list.h>
59#include <linux/slab.h>
60#include <linux/export.h>
61#include <linux/vmalloc.h>
62#include <linux/notifier.h>
63#include <net/net_namespace.h>
64#include <net/ip.h>
65#include <net/protocol.h>
66#include <net/route.h>
67#include <net/tcp.h>
68#include <net/sock.h>
69#include <net/ip_fib.h>
70#include <net/fib_notifier.h>
71#include <trace/events/fib.h>
72#include "fib_lookup.h"
73
74static int call_fib_entry_notifier(struct notifier_block *nb,
75 enum fib_event_type event_type, u32 dst,
76 int dst_len, struct fib_alias *fa,
77 struct netlink_ext_ack *extack)
78{
79 struct fib_entry_notifier_info info = {
80 .info.extack = extack,
81 .dst = dst,
82 .dst_len = dst_len,
83 .fi = fa->fa_info,
84 .tos = fa->fa_tos,
85 .type = fa->fa_type,
86 .tb_id = fa->tb_id,
87 };
88 return call_fib4_notifier(nb, event_type, &info.info);
89}
90
91static int call_fib_entry_notifiers(struct net *net,
92 enum fib_event_type event_type, u32 dst,
93 int dst_len, struct fib_alias *fa,
94 struct netlink_ext_ack *extack)
95{
96 struct fib_entry_notifier_info info = {
97 .info.extack = extack,
98 .dst = dst,
99 .dst_len = dst_len,
100 .fi = fa->fa_info,
101 .tos = fa->fa_tos,
102 .type = fa->fa_type,
103 .tb_id = fa->tb_id,
104 };
105 return call_fib4_notifiers(net, event_type, &info.info);
106}
107
108#define MAX_STAT_DEPTH 32
109
110#define KEYLENGTH (8*sizeof(t_key))
111#define KEY_MAX ((t_key)~0)
112
113typedef unsigned int t_key;
114
115#define IS_TRIE(n) ((n)->pos >= KEYLENGTH)
116#define IS_TNODE(n) ((n)->bits)
117#define IS_LEAF(n) (!(n)->bits)
118
119struct key_vector {
120 t_key key;
121 unsigned char pos;
122 unsigned char bits;
123 unsigned char slen;
124 union {
125
126 struct hlist_head leaf;
127
128 struct key_vector __rcu *tnode[0];
129 };
130};
131
132struct tnode {
133 struct rcu_head rcu;
134 t_key empty_children;
135 t_key full_children;
136 struct key_vector __rcu *parent;
137 struct key_vector kv[1];
138#define tn_bits kv[0].bits
139};
140
141#define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n])
142#define LEAF_SIZE TNODE_SIZE(1)
143
144#ifdef CONFIG_IP_FIB_TRIE_STATS
145struct trie_use_stats {
146 unsigned int gets;
147 unsigned int backtrack;
148 unsigned int semantic_match_passed;
149 unsigned int semantic_match_miss;
150 unsigned int null_node_hit;
151 unsigned int resize_node_skipped;
152};
153#endif
154
155struct trie_stat {
156 unsigned int totdepth;
157 unsigned int maxdepth;
158 unsigned int tnodes;
159 unsigned int leaves;
160 unsigned int nullpointers;
161 unsigned int prefixes;
162 unsigned int nodesizes[MAX_STAT_DEPTH];
163};
164
165struct trie {
166 struct key_vector kv[1];
167#ifdef CONFIG_IP_FIB_TRIE_STATS
168 struct trie_use_stats __percpu *stats;
169#endif
170};
171
172static struct key_vector *resize(struct trie *t, struct key_vector *tn);
173static unsigned int tnode_free_size;
174
175
176
177
178
179
180unsigned int sysctl_fib_sync_mem = 512 * 1024;
181unsigned int sysctl_fib_sync_mem_min = 64 * 1024;
182unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024;
183
184static struct kmem_cache *fn_alias_kmem __ro_after_init;
185static struct kmem_cache *trie_leaf_kmem __ro_after_init;
186
187static inline struct tnode *tn_info(struct key_vector *kv)
188{
189 return container_of(kv, struct tnode, kv[0]);
190}
191
192
193#define node_parent(tn) rtnl_dereference(tn_info(tn)->parent)
194#define get_child(tn, i) rtnl_dereference((tn)->tnode[i])
195
196
197#define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent)
198#define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i])
199
200
201static inline void node_set_parent(struct key_vector *n, struct key_vector *tp)
202{
203 if (n)
204 rcu_assign_pointer(tn_info(n)->parent, tp);
205}
206
207#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p)
208
209
210
211
212static inline unsigned long child_length(const struct key_vector *tn)
213{
214 return (1ul << tn->bits) & ~(1ul);
215}
216
217#define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos)
218
219static inline unsigned long get_index(t_key key, struct key_vector *kv)
220{
221 unsigned long index = key ^ kv->key;
222
223 if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos))
224 return 0;
225
226 return index >> kv->pos;
227}
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288static const int halve_threshold = 25;
289static const int inflate_threshold = 50;
290static const int halve_threshold_root = 15;
291static const int inflate_threshold_root = 30;
292
293static void __alias_free_mem(struct rcu_head *head)
294{
295 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
296 kmem_cache_free(fn_alias_kmem, fa);
297}
298
299static inline void alias_free_mem_rcu(struct fib_alias *fa)
300{
301 call_rcu(&fa->rcu, __alias_free_mem);
302}
303
304#define TNODE_VMALLOC_MAX \
305 ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *))
306
307static void __node_free_rcu(struct rcu_head *head)
308{
309 struct tnode *n = container_of(head, struct tnode, rcu);
310
311 if (!n->tn_bits)
312 kmem_cache_free(trie_leaf_kmem, n);
313 else
314 kvfree(n);
315}
316
317#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
318
319static struct tnode *tnode_alloc(int bits)
320{
321 size_t size;
322
323
324 if (bits > TNODE_VMALLOC_MAX)
325 return NULL;
326
327
328 size = TNODE_SIZE(1ul << bits);
329
330 if (size <= PAGE_SIZE)
331 return kzalloc(size, GFP_KERNEL);
332 else
333 return vzalloc(size);
334}
335
336static inline void empty_child_inc(struct key_vector *n)
337{
338 tn_info(n)->empty_children++;
339
340 if (!tn_info(n)->empty_children)
341 tn_info(n)->full_children++;
342}
343
344static inline void empty_child_dec(struct key_vector *n)
345{
346 if (!tn_info(n)->empty_children)
347 tn_info(n)->full_children--;
348
349 tn_info(n)->empty_children--;
350}
351
352static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
353{
354 struct key_vector *l;
355 struct tnode *kv;
356
357 kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
358 if (!kv)
359 return NULL;
360
361
362 l = kv->kv;
363 l->key = key;
364 l->pos = 0;
365 l->bits = 0;
366 l->slen = fa->fa_slen;
367
368
369 INIT_HLIST_HEAD(&l->leaf);
370 hlist_add_head(&fa->fa_list, &l->leaf);
371
372 return l;
373}
374
375static struct key_vector *tnode_new(t_key key, int pos, int bits)
376{
377 unsigned int shift = pos + bits;
378 struct key_vector *tn;
379 struct tnode *tnode;
380
381
382 BUG_ON(!bits || (shift > KEYLENGTH));
383
384 tnode = tnode_alloc(bits);
385 if (!tnode)
386 return NULL;
387
388 pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
389 sizeof(struct key_vector *) << bits);
390
391 if (bits == KEYLENGTH)
392 tnode->full_children = 1;
393 else
394 tnode->empty_children = 1ul << bits;
395
396 tn = tnode->kv;
397 tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
398 tn->pos = pos;
399 tn->bits = bits;
400 tn->slen = pos;
401
402 return tn;
403}
404
405
406
407
408static inline int tnode_full(struct key_vector *tn, struct key_vector *n)
409{
410 return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
411}
412
413
414
415
416static void put_child(struct key_vector *tn, unsigned long i,
417 struct key_vector *n)
418{
419 struct key_vector *chi = get_child(tn, i);
420 int isfull, wasfull;
421
422 BUG_ON(i >= child_length(tn));
423
424
425 if (!n && chi)
426 empty_child_inc(tn);
427 if (n && !chi)
428 empty_child_dec(tn);
429
430
431 wasfull = tnode_full(tn, chi);
432 isfull = tnode_full(tn, n);
433
434 if (wasfull && !isfull)
435 tn_info(tn)->full_children--;
436 else if (!wasfull && isfull)
437 tn_info(tn)->full_children++;
438
439 if (n && (tn->slen < n->slen))
440 tn->slen = n->slen;
441
442 rcu_assign_pointer(tn->tnode[i], n);
443}
444
445static void update_children(struct key_vector *tn)
446{
447 unsigned long i;
448
449
450 for (i = child_length(tn); i;) {
451 struct key_vector *inode = get_child(tn, --i);
452
453 if (!inode)
454 continue;
455
456
457
458
459
460 if (node_parent(inode) == tn)
461 update_children(inode);
462 else
463 node_set_parent(inode, tn);
464 }
465}
466
467static inline void put_child_root(struct key_vector *tp, t_key key,
468 struct key_vector *n)
469{
470 if (IS_TRIE(tp))
471 rcu_assign_pointer(tp->tnode[0], n);
472 else
473 put_child(tp, get_index(key, tp), n);
474}
475
476static inline void tnode_free_init(struct key_vector *tn)
477{
478 tn_info(tn)->rcu.next = NULL;
479}
480
481static inline void tnode_free_append(struct key_vector *tn,
482 struct key_vector *n)
483{
484 tn_info(n)->rcu.next = tn_info(tn)->rcu.next;
485 tn_info(tn)->rcu.next = &tn_info(n)->rcu;
486}
487
488static void tnode_free(struct key_vector *tn)
489{
490 struct callback_head *head = &tn_info(tn)->rcu;
491
492 while (head) {
493 head = head->next;
494 tnode_free_size += TNODE_SIZE(1ul << tn->bits);
495 node_free(tn);
496
497 tn = container_of(head, struct tnode, rcu)->kv;
498 }
499
500 if (tnode_free_size >= sysctl_fib_sync_mem) {
501 tnode_free_size = 0;
502 synchronize_rcu();
503 }
504}
505
506static struct key_vector *replace(struct trie *t,
507 struct key_vector *oldtnode,
508 struct key_vector *tn)
509{
510 struct key_vector *tp = node_parent(oldtnode);
511 unsigned long i;
512
513
514 NODE_INIT_PARENT(tn, tp);
515 put_child_root(tp, tn->key, tn);
516
517
518 update_children(tn);
519
520
521 tnode_free(oldtnode);
522
523
524 for (i = child_length(tn); i;) {
525 struct key_vector *inode = get_child(tn, --i);
526
527
528 if (tnode_full(tn, inode))
529 tn = resize(t, inode);
530 }
531
532 return tp;
533}
534
535static struct key_vector *inflate(struct trie *t,
536 struct key_vector *oldtnode)
537{
538 struct key_vector *tn;
539 unsigned long i;
540 t_key m;
541
542 pr_debug("In inflate\n");
543
544 tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
545 if (!tn)
546 goto notnode;
547
548
549 tnode_free_init(oldtnode);
550
551
552
553
554
555
556 for (i = child_length(oldtnode), m = 1u << tn->pos; i;) {
557 struct key_vector *inode = get_child(oldtnode, --i);
558 struct key_vector *node0, *node1;
559 unsigned long j, k;
560
561
562 if (!inode)
563 continue;
564
565
566 if (!tnode_full(oldtnode, inode)) {
567 put_child(tn, get_index(inode->key, tn), inode);
568 continue;
569 }
570
571
572 tnode_free_append(oldtnode, inode);
573
574
575 if (inode->bits == 1) {
576 put_child(tn, 2 * i + 1, get_child(inode, 1));
577 put_child(tn, 2 * i, get_child(inode, 0));
578 continue;
579 }
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595 node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
596 if (!node1)
597 goto nomem;
598 node0 = tnode_new(inode->key, inode->pos, inode->bits - 1);
599
600 tnode_free_append(tn, node1);
601 if (!node0)
602 goto nomem;
603 tnode_free_append(tn, node0);
604
605
606 for (k = child_length(inode), j = k / 2; j;) {
607 put_child(node1, --j, get_child(inode, --k));
608 put_child(node0, j, get_child(inode, j));
609 put_child(node1, --j, get_child(inode, --k));
610 put_child(node0, j, get_child(inode, j));
611 }
612
613
614 NODE_INIT_PARENT(node1, tn);
615 NODE_INIT_PARENT(node0, tn);
616
617
618 put_child(tn, 2 * i + 1, node1);
619 put_child(tn, 2 * i, node0);
620 }
621
622
623 return replace(t, oldtnode, tn);
624nomem:
625
626 tnode_free(tn);
627notnode:
628 return NULL;
629}
630
631static struct key_vector *halve(struct trie *t,
632 struct key_vector *oldtnode)
633{
634 struct key_vector *tn;
635 unsigned long i;
636
637 pr_debug("In halve\n");
638
639 tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
640 if (!tn)
641 goto notnode;
642
643
644 tnode_free_init(oldtnode);
645
646
647
648
649
650
651 for (i = child_length(oldtnode); i;) {
652 struct key_vector *node1 = get_child(oldtnode, --i);
653 struct key_vector *node0 = get_child(oldtnode, --i);
654 struct key_vector *inode;
655
656
657 if (!node1 || !node0) {
658 put_child(tn, i / 2, node1 ? : node0);
659 continue;
660 }
661
662
663 inode = tnode_new(node0->key, oldtnode->pos, 1);
664 if (!inode)
665 goto nomem;
666 tnode_free_append(tn, inode);
667
668
669 put_child(inode, 1, node1);
670 put_child(inode, 0, node0);
671 NODE_INIT_PARENT(inode, tn);
672
673
674 put_child(tn, i / 2, inode);
675 }
676
677
678 return replace(t, oldtnode, tn);
679nomem:
680
681 tnode_free(tn);
682notnode:
683 return NULL;
684}
685
686static struct key_vector *collapse(struct trie *t,
687 struct key_vector *oldtnode)
688{
689 struct key_vector *n, *tp;
690 unsigned long i;
691
692
693 for (n = NULL, i = child_length(oldtnode); !n && i;)
694 n = get_child(oldtnode, --i);
695
696
697 tp = node_parent(oldtnode);
698 put_child_root(tp, oldtnode->key, n);
699 node_set_parent(n, tp);
700
701
702 node_free(oldtnode);
703
704 return tp;
705}
706
707static unsigned char update_suffix(struct key_vector *tn)
708{
709 unsigned char slen = tn->pos;
710 unsigned long stride, i;
711 unsigned char slen_max;
712
713
714
715
716
717 slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen);
718
719
720
721
722
723
724 for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) {
725 struct key_vector *n = get_child(tn, i);
726
727 if (!n || (n->slen <= slen))
728 continue;
729
730
731 stride <<= (n->slen - slen);
732 slen = n->slen;
733 i &= ~(stride - 1);
734
735
736 if (slen >= slen_max)
737 break;
738 }
739
740 tn->slen = slen;
741
742 return slen;
743}
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn)
803{
804 unsigned long used = child_length(tn);
805 unsigned long threshold = used;
806
807
808 threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold;
809 used -= tn_info(tn)->empty_children;
810 used += tn_info(tn)->full_children;
811
812
813
814 return (used > 1) && tn->pos && ((50 * used) >= threshold);
815}
816
817static inline bool should_halve(struct key_vector *tp, struct key_vector *tn)
818{
819 unsigned long used = child_length(tn);
820 unsigned long threshold = used;
821
822
823 threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold;
824 used -= tn_info(tn)->empty_children;
825
826
827
828 return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold);
829}
830
831static inline bool should_collapse(struct key_vector *tn)
832{
833 unsigned long used = child_length(tn);
834
835 used -= tn_info(tn)->empty_children;
836
837
838 if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children)
839 used -= KEY_MAX;
840
841
842 return used < 2;
843}
844
845#define MAX_WORK 10
846static struct key_vector *resize(struct trie *t, struct key_vector *tn)
847{
848#ifdef CONFIG_IP_FIB_TRIE_STATS
849 struct trie_use_stats __percpu *stats = t->stats;
850#endif
851 struct key_vector *tp = node_parent(tn);
852 unsigned long cindex = get_index(tn->key, tp);
853 int max_work = MAX_WORK;
854
855 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
856 tn, inflate_threshold, halve_threshold);
857
858
859
860
861
862 BUG_ON(tn != get_child(tp, cindex));
863
864
865
866
867 while (should_inflate(tp, tn) && max_work) {
868 tp = inflate(t, tn);
869 if (!tp) {
870#ifdef CONFIG_IP_FIB_TRIE_STATS
871 this_cpu_inc(stats->resize_node_skipped);
872#endif
873 break;
874 }
875
876 max_work--;
877 tn = get_child(tp, cindex);
878 }
879
880
881 tp = node_parent(tn);
882
883
884 if (max_work != MAX_WORK)
885 return tp;
886
887
888
889
890 while (should_halve(tp, tn) && max_work) {
891 tp = halve(t, tn);
892 if (!tp) {
893#ifdef CONFIG_IP_FIB_TRIE_STATS
894 this_cpu_inc(stats->resize_node_skipped);
895#endif
896 break;
897 }
898
899 max_work--;
900 tn = get_child(tp, cindex);
901 }
902
903
904 if (should_collapse(tn))
905 return collapse(t, tn);
906
907
908 return node_parent(tn);
909}
910
911static void node_pull_suffix(struct key_vector *tn, unsigned char slen)
912{
913 unsigned char node_slen = tn->slen;
914
915 while ((node_slen > tn->pos) && (node_slen > slen)) {
916 slen = update_suffix(tn);
917 if (node_slen == slen)
918 break;
919
920 tn = node_parent(tn);
921 node_slen = tn->slen;
922 }
923}
924
925static void node_push_suffix(struct key_vector *tn, unsigned char slen)
926{
927 while (tn->slen < slen) {
928 tn->slen = slen;
929 tn = node_parent(tn);
930 }
931}
932
933
934static struct key_vector *fib_find_node(struct trie *t,
935 struct key_vector **tp, u32 key)
936{
937 struct key_vector *pn, *n = t->kv;
938 unsigned long index = 0;
939
940 do {
941 pn = n;
942 n = get_child_rcu(n, index);
943
944 if (!n)
945 break;
946
947 index = get_cindex(key, n);
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963 if (index >= (1ul << n->bits)) {
964 n = NULL;
965 break;
966 }
967
968
969 } while (IS_TNODE(n));
970
971 *tp = pn;
972
973 return n;
974}
975
976
977
978
979
980
981static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
982 u8 tos, u32 prio, u32 tb_id,
983 bool find_first)
984{
985 struct fib_alias *fa;
986
987 if (!fah)
988 return NULL;
989
990 hlist_for_each_entry(fa, fah, fa_list) {
991 if (fa->fa_slen < slen)
992 continue;
993 if (fa->fa_slen != slen)
994 break;
995 if (fa->tb_id > tb_id)
996 continue;
997 if (fa->tb_id != tb_id)
998 break;
999 if (find_first)
1000 return fa;
1001 if (fa->fa_tos > tos)
1002 continue;
1003 if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
1004 return fa;
1005 }
1006
1007 return NULL;
1008}
1009
1010static struct fib_alias *
1011fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
1012{
1013 u8 slen = KEYLENGTH - fri->dst_len;
1014 struct key_vector *l, *tp;
1015 struct fib_table *tb;
1016 struct fib_alias *fa;
1017 struct trie *t;
1018
1019 tb = fib_get_table(net, fri->tb_id);
1020 if (!tb)
1021 return NULL;
1022
1023 t = (struct trie *)tb->tb_data;
1024 l = fib_find_node(t, &tp, be32_to_cpu(fri->dst));
1025 if (!l)
1026 return NULL;
1027
1028 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
1029 if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
1030 fa->fa_tos == fri->tos && fa->fa_info == fri->fi &&
1031 fa->fa_type == fri->type)
1032 return fa;
1033 }
1034
1035 return NULL;
1036}
1037
1038void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
1039{
1040 struct fib_alias *fa_match;
1041
1042 rcu_read_lock();
1043
1044 fa_match = fib_find_matching_alias(net, fri);
1045 if (!fa_match)
1046 goto out;
1047
1048 fa_match->offload = fri->offload;
1049 fa_match->trap = fri->trap;
1050
1051out:
1052 rcu_read_unlock();
1053}
1054EXPORT_SYMBOL_GPL(fib_alias_hw_flags_set);
1055
1056static void trie_rebalance(struct trie *t, struct key_vector *tn)
1057{
1058 while (!IS_TRIE(tn))
1059 tn = resize(t, tn);
1060}
1061
1062static int fib_insert_node(struct trie *t, struct key_vector *tp,
1063 struct fib_alias *new, t_key key)
1064{
1065 struct key_vector *n, *l;
1066
1067 l = leaf_new(key, new);
1068 if (!l)
1069 goto noleaf;
1070
1071
1072 n = get_child(tp, get_index(key, tp));
1073
1074
1075
1076
1077
1078
1079
1080 if (n) {
1081 struct key_vector *tn;
1082
1083 tn = tnode_new(key, __fls(key ^ n->key), 1);
1084 if (!tn)
1085 goto notnode;
1086
1087
1088 NODE_INIT_PARENT(tn, tp);
1089 put_child(tn, get_index(key, tn) ^ 1, n);
1090
1091
1092 put_child_root(tp, key, tn);
1093 node_set_parent(n, tn);
1094
1095
1096 tp = tn;
1097 }
1098
1099
1100 node_push_suffix(tp, new->fa_slen);
1101 NODE_INIT_PARENT(l, tp);
1102 put_child_root(tp, key, l);
1103 trie_rebalance(t, tp);
1104
1105 return 0;
1106notnode:
1107 node_free(l);
1108noleaf:
1109 return -ENOMEM;
1110}
1111
1112static int fib_insert_alias(struct trie *t, struct key_vector *tp,
1113 struct key_vector *l, struct fib_alias *new,
1114 struct fib_alias *fa, t_key key)
1115{
1116 if (!l)
1117 return fib_insert_node(t, tp, new, key);
1118
1119 if (fa) {
1120 hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
1121 } else {
1122 struct fib_alias *last;
1123
1124 hlist_for_each_entry(last, &l->leaf, fa_list) {
1125 if (new->fa_slen < last->fa_slen)
1126 break;
1127 if ((new->fa_slen == last->fa_slen) &&
1128 (new->tb_id > last->tb_id))
1129 break;
1130 fa = last;
1131 }
1132
1133 if (fa)
1134 hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
1135 else
1136 hlist_add_head_rcu(&new->fa_list, &l->leaf);
1137 }
1138
1139
1140 if (l->slen < new->fa_slen) {
1141 l->slen = new->fa_slen;
1142 node_push_suffix(tp, new->fa_slen);
1143 }
1144
1145 return 0;
1146}
1147
1148static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
1149{
1150 if (plen > KEYLENGTH) {
1151 NL_SET_ERR_MSG(extack, "Invalid prefix length");
1152 return false;
1153 }
1154
1155 if ((plen < KEYLENGTH) && (key << plen)) {
1156 NL_SET_ERR_MSG(extack,
1157 "Invalid prefix for given prefix length");
1158 return false;
1159 }
1160
1161 return true;
1162}
1163
1164static void fib_remove_alias(struct trie *t, struct key_vector *tp,
1165 struct key_vector *l, struct fib_alias *old);
1166
1167
1168int fib_table_insert(struct net *net, struct fib_table *tb,
1169 struct fib_config *cfg, struct netlink_ext_ack *extack)
1170{
1171 struct trie *t = (struct trie *)tb->tb_data;
1172 struct fib_alias *fa, *new_fa;
1173 struct key_vector *l, *tp;
1174 u16 nlflags = NLM_F_EXCL;
1175 struct fib_info *fi;
1176 u8 plen = cfg->fc_dst_len;
1177 u8 slen = KEYLENGTH - plen;
1178 u8 tos = cfg->fc_tos;
1179 u32 key;
1180 int err;
1181
1182 key = ntohl(cfg->fc_dst);
1183
1184 if (!fib_valid_key_len(key, plen, extack))
1185 return -EINVAL;
1186
1187 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
1188
1189 fi = fib_create_info(cfg, extack);
1190 if (IS_ERR(fi)) {
1191 err = PTR_ERR(fi);
1192 goto err;
1193 }
1194
1195 l = fib_find_node(t, &tp, key);
1196 fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
1197 tb->tb_id, false) : NULL;
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208 if (fa && fa->fa_tos == tos &&
1209 fa->fa_info->fib_priority == fi->fib_priority) {
1210 struct fib_alias *fa_first, *fa_match;
1211
1212 err = -EEXIST;
1213 if (cfg->fc_nlflags & NLM_F_EXCL)
1214 goto out;
1215
1216 nlflags &= ~NLM_F_EXCL;
1217
1218
1219
1220
1221
1222
1223 fa_match = NULL;
1224 fa_first = fa;
1225 hlist_for_each_entry_from(fa, fa_list) {
1226 if ((fa->fa_slen != slen) ||
1227 (fa->tb_id != tb->tb_id) ||
1228 (fa->fa_tos != tos))
1229 break;
1230 if (fa->fa_info->fib_priority != fi->fib_priority)
1231 break;
1232 if (fa->fa_type == cfg->fc_type &&
1233 fa->fa_info == fi) {
1234 fa_match = fa;
1235 break;
1236 }
1237 }
1238
1239 if (cfg->fc_nlflags & NLM_F_REPLACE) {
1240 struct fib_info *fi_drop;
1241 u8 state;
1242
1243 nlflags |= NLM_F_REPLACE;
1244 fa = fa_first;
1245 if (fa_match) {
1246 if (fa == fa_match)
1247 err = 0;
1248 goto out;
1249 }
1250 err = -ENOBUFS;
1251 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1252 if (!new_fa)
1253 goto out;
1254
1255 fi_drop = fa->fa_info;
1256 new_fa->fa_tos = fa->fa_tos;
1257 new_fa->fa_info = fi;
1258 new_fa->fa_type = cfg->fc_type;
1259 state = fa->fa_state;
1260 new_fa->fa_state = state & ~FA_S_ACCESSED;
1261 new_fa->fa_slen = fa->fa_slen;
1262 new_fa->tb_id = tb->tb_id;
1263 new_fa->fa_default = -1;
1264 new_fa->offload = 0;
1265 new_fa->trap = 0;
1266
1267 hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1268
1269 if (fib_find_alias(&l->leaf, fa->fa_slen, 0, 0,
1270 tb->tb_id, true) == new_fa) {
1271 enum fib_event_type fib_event;
1272
1273 fib_event = FIB_EVENT_ENTRY_REPLACE;
1274 err = call_fib_entry_notifiers(net, fib_event,
1275 key, plen,
1276 new_fa, extack);
1277 if (err) {
1278 hlist_replace_rcu(&new_fa->fa_list,
1279 &fa->fa_list);
1280 goto out_free_new_fa;
1281 }
1282 }
1283
1284 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1285 tb->tb_id, &cfg->fc_nlinfo, nlflags);
1286
1287 alias_free_mem_rcu(fa);
1288
1289 fib_release_info(fi_drop);
1290 if (state & FA_S_ACCESSED)
1291 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1292
1293 goto succeeded;
1294 }
1295
1296
1297
1298
1299 if (fa_match)
1300 goto out;
1301
1302 if (cfg->fc_nlflags & NLM_F_APPEND)
1303 nlflags |= NLM_F_APPEND;
1304 else
1305 fa = fa_first;
1306 }
1307 err = -ENOENT;
1308 if (!(cfg->fc_nlflags & NLM_F_CREATE))
1309 goto out;
1310
1311 nlflags |= NLM_F_CREATE;
1312 err = -ENOBUFS;
1313 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1314 if (!new_fa)
1315 goto out;
1316
1317 new_fa->fa_info = fi;
1318 new_fa->fa_tos = tos;
1319 new_fa->fa_type = cfg->fc_type;
1320 new_fa->fa_state = 0;
1321 new_fa->fa_slen = slen;
1322 new_fa->tb_id = tb->tb_id;
1323 new_fa->fa_default = -1;
1324 new_fa->offload = 0;
1325 new_fa->trap = 0;
1326
1327
1328 err = fib_insert_alias(t, tp, l, new_fa, fa, key);
1329 if (err)
1330 goto out_free_new_fa;
1331
1332
1333 l = l ? l : fib_find_node(t, &tp, key);
1334 if (WARN_ON_ONCE(!l))
1335 goto out_free_new_fa;
1336
1337 if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
1338 new_fa) {
1339 enum fib_event_type fib_event;
1340
1341 fib_event = FIB_EVENT_ENTRY_REPLACE;
1342 err = call_fib_entry_notifiers(net, fib_event, key, plen,
1343 new_fa, extack);
1344 if (err)
1345 goto out_remove_new_fa;
1346 }
1347
1348 if (!plen)
1349 tb->tb_num_default++;
1350
1351 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1352 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
1353 &cfg->fc_nlinfo, nlflags);
1354succeeded:
1355 return 0;
1356
1357out_remove_new_fa:
1358 fib_remove_alias(t, tp, l, new_fa);
1359out_free_new_fa:
1360 kmem_cache_free(fn_alias_kmem, new_fa);
1361out:
1362 fib_release_info(fi);
1363err:
1364 return err;
1365}
1366
1367static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
1368{
1369 t_key prefix = n->key;
1370
1371 return (key ^ prefix) & (prefix | -prefix);
1372}
1373
1374bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
1375 const struct flowi4 *flp)
1376{
1377 if (nhc->nhc_flags & RTNH_F_DEAD)
1378 return false;
1379
1380 if (ip_ignore_linkdown(nhc->nhc_dev) &&
1381 nhc->nhc_flags & RTNH_F_LINKDOWN &&
1382 !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
1383 return false;
1384
1385 if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
1386 if (flp->flowi4_oif &&
1387 flp->flowi4_oif != nhc->nhc_oif)
1388 return false;
1389 }
1390
1391 return true;
1392}
1393
1394
1395int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1396 struct fib_result *res, int fib_flags)
1397{
1398 struct trie *t = (struct trie *) tb->tb_data;
1399#ifdef CONFIG_IP_FIB_TRIE_STATS
1400 struct trie_use_stats __percpu *stats = t->stats;
1401#endif
1402 const t_key key = ntohl(flp->daddr);
1403 struct key_vector *n, *pn;
1404 struct fib_alias *fa;
1405 unsigned long index;
1406 t_key cindex;
1407
1408 pn = t->kv;
1409 cindex = 0;
1410
1411 n = get_child_rcu(pn, cindex);
1412 if (!n) {
1413 trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN);
1414 return -EAGAIN;
1415 }
1416
1417#ifdef CONFIG_IP_FIB_TRIE_STATS
1418 this_cpu_inc(stats->gets);
1419#endif
1420
1421
1422 for (;;) {
1423 index = get_cindex(key, n);
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439 if (index >= (1ul << n->bits))
1440 break;
1441
1442
1443 if (IS_LEAF(n))
1444 goto found;
1445
1446
1447
1448
1449 if (n->slen > n->pos) {
1450 pn = n;
1451 cindex = index;
1452 }
1453
1454 n = get_child_rcu(n, index);
1455 if (unlikely(!n))
1456 goto backtrace;
1457 }
1458
1459
1460 for (;;) {
1461
1462 struct key_vector __rcu **cptr = n->tnode;
1463
1464
1465
1466
1467
1468 if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos))
1469 goto backtrace;
1470
1471
1472 if (unlikely(IS_LEAF(n)))
1473 break;
1474
1475
1476
1477
1478
1479
1480 while ((n = rcu_dereference(*cptr)) == NULL) {
1481backtrace:
1482#ifdef CONFIG_IP_FIB_TRIE_STATS
1483 if (!n)
1484 this_cpu_inc(stats->null_node_hit);
1485#endif
1486
1487
1488
1489
1490
1491 while (!cindex) {
1492 t_key pkey = pn->key;
1493
1494
1495
1496
1497
1498 if (IS_TRIE(pn)) {
1499 trace_fib_table_lookup(tb->tb_id, flp,
1500 NULL, -EAGAIN);
1501 return -EAGAIN;
1502 }
1503#ifdef CONFIG_IP_FIB_TRIE_STATS
1504 this_cpu_inc(stats->backtrack);
1505#endif
1506
1507 pn = node_parent_rcu(pn);
1508 cindex = get_index(pkey, pn);
1509 }
1510
1511
1512 cindex &= cindex - 1;
1513
1514
1515 cptr = &pn->tnode[cindex];
1516 }
1517 }
1518
1519found:
1520
1521 index = key ^ n->key;
1522
1523
1524 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
1525 struct fib_info *fi = fa->fa_info;
1526 struct fib_nh_common *nhc;
1527 int nhsel, err;
1528
1529 if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
1530 if (index >= (1ul << fa->fa_slen))
1531 continue;
1532 }
1533 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1534 continue;
1535 if (fi->fib_dead)
1536 continue;
1537 if (fa->fa_info->fib_scope < flp->flowi4_scope)
1538 continue;
1539 fib_alias_accessed(fa);
1540 err = fib_props[fa->fa_type].error;
1541 if (unlikely(err < 0)) {
1542out_reject:
1543#ifdef CONFIG_IP_FIB_TRIE_STATS
1544 this_cpu_inc(stats->semantic_match_passed);
1545#endif
1546 trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
1547 return err;
1548 }
1549 if (fi->fib_flags & RTNH_F_DEAD)
1550 continue;
1551
1552 if (unlikely(fi->nh)) {
1553 if (nexthop_is_blackhole(fi->nh)) {
1554 err = fib_props[RTN_BLACKHOLE].error;
1555 goto out_reject;
1556 }
1557
1558 nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp,
1559 &nhsel);
1560 if (nhc)
1561 goto set_result;
1562 goto miss;
1563 }
1564
1565 for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
1566 nhc = fib_info_nhc(fi, nhsel);
1567
1568 if (!fib_lookup_good_nhc(nhc, fib_flags, flp))
1569 continue;
1570set_result:
1571 if (!(fib_flags & FIB_LOOKUP_NOREF))
1572 refcount_inc(&fi->fib_clntref);
1573
1574 res->prefix = htonl(n->key);
1575 res->prefixlen = KEYLENGTH - fa->fa_slen;
1576 res->nh_sel = nhsel;
1577 res->nhc = nhc;
1578 res->type = fa->fa_type;
1579 res->scope = fi->fib_scope;
1580 res->fi = fi;
1581 res->table = tb;
1582 res->fa_head = &n->leaf;
1583#ifdef CONFIG_IP_FIB_TRIE_STATS
1584 this_cpu_inc(stats->semantic_match_passed);
1585#endif
1586 trace_fib_table_lookup(tb->tb_id, flp, nhc, err);
1587
1588 return err;
1589 }
1590 }
1591miss:
1592#ifdef CONFIG_IP_FIB_TRIE_STATS
1593 this_cpu_inc(stats->semantic_match_miss);
1594#endif
1595 goto backtrace;
1596}
1597EXPORT_SYMBOL_GPL(fib_table_lookup);
1598
1599static void fib_remove_alias(struct trie *t, struct key_vector *tp,
1600 struct key_vector *l, struct fib_alias *old)
1601{
1602
1603 struct hlist_node **pprev = old->fa_list.pprev;
1604 struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next);
1605
1606
1607 hlist_del_rcu(&old->fa_list);
1608
1609
1610
1611
1612 if (hlist_empty(&l->leaf)) {
1613 if (tp->slen == l->slen)
1614 node_pull_suffix(tp, tp->pos);
1615 put_child_root(tp, l->key, NULL);
1616 node_free(l);
1617 trie_rebalance(t, tp);
1618 return;
1619 }
1620
1621
1622 if (*pprev)
1623 return;
1624
1625
1626 l->slen = fa->fa_slen;
1627 node_pull_suffix(tp, fa->fa_slen);
1628}
1629
1630static void fib_notify_alias_delete(struct net *net, u32 key,
1631 struct hlist_head *fah,
1632 struct fib_alias *fa_to_delete,
1633 struct netlink_ext_ack *extack)
1634{
1635 struct fib_alias *fa_next, *fa_to_notify;
1636 u32 tb_id = fa_to_delete->tb_id;
1637 u8 slen = fa_to_delete->fa_slen;
1638 enum fib_event_type fib_event;
1639
1640
1641 if (fib_find_alias(fah, slen, 0, 0, tb_id, true) != fa_to_delete)
1642 return;
1643
1644
1645
1646
1647 fa_next = hlist_entry_safe(fa_to_delete->fa_list.next,
1648 struct fib_alias, fa_list);
1649 if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) {
1650 fib_event = FIB_EVENT_ENTRY_REPLACE;
1651 fa_to_notify = fa_next;
1652 } else {
1653 fib_event = FIB_EVENT_ENTRY_DEL;
1654 fa_to_notify = fa_to_delete;
1655 }
1656 call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen,
1657 fa_to_notify, extack);
1658}
1659
1660
1661int fib_table_delete(struct net *net, struct fib_table *tb,
1662 struct fib_config *cfg, struct netlink_ext_ack *extack)
1663{
1664 struct trie *t = (struct trie *) tb->tb_data;
1665 struct fib_alias *fa, *fa_to_delete;
1666 struct key_vector *l, *tp;
1667 u8 plen = cfg->fc_dst_len;
1668 u8 slen = KEYLENGTH - plen;
1669 u8 tos = cfg->fc_tos;
1670 u32 key;
1671
1672 key = ntohl(cfg->fc_dst);
1673
1674 if (!fib_valid_key_len(key, plen, extack))
1675 return -EINVAL;
1676
1677 l = fib_find_node(t, &tp, key);
1678 if (!l)
1679 return -ESRCH;
1680
1681 fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false);
1682 if (!fa)
1683 return -ESRCH;
1684
1685 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1686
1687 fa_to_delete = NULL;
1688 hlist_for_each_entry_from(fa, fa_list) {
1689 struct fib_info *fi = fa->fa_info;
1690
1691 if ((fa->fa_slen != slen) ||
1692 (fa->tb_id != tb->tb_id) ||
1693 (fa->fa_tos != tos))
1694 break;
1695
1696 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1697 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1698 fa->fa_info->fib_scope == cfg->fc_scope) &&
1699 (!cfg->fc_prefsrc ||
1700 fi->fib_prefsrc == cfg->fc_prefsrc) &&
1701 (!cfg->fc_protocol ||
1702 fi->fib_protocol == cfg->fc_protocol) &&
1703 fib_nh_match(net, cfg, fi, extack) == 0 &&
1704 fib_metrics_match(cfg, fi)) {
1705 fa_to_delete = fa;
1706 break;
1707 }
1708 }
1709
1710 if (!fa_to_delete)
1711 return -ESRCH;
1712
1713 fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack);
1714 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
1715 &cfg->fc_nlinfo, 0);
1716
1717 if (!plen)
1718 tb->tb_num_default--;
1719
1720 fib_remove_alias(t, tp, l, fa_to_delete);
1721
1722 if (fa_to_delete->fa_state & FA_S_ACCESSED)
1723 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1724
1725 fib_release_info(fa_to_delete->fa_info);
1726 alias_free_mem_rcu(fa_to_delete);
1727 return 0;
1728}
1729
1730
1731static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
1732{
1733 struct key_vector *pn, *n = *tn;
1734 unsigned long cindex;
1735
1736
1737 do {
1738
1739 pn = n;
1740 cindex = (key > pn->key) ? get_index(key, pn) : 0;
1741
1742 if (cindex >> pn->bits)
1743 break;
1744
1745
1746 n = get_child_rcu(pn, cindex++);
1747 if (!n)
1748 break;
1749
1750
1751 if (IS_LEAF(n) && (n->key >= key))
1752 goto found;
1753 } while (IS_TNODE(n));
1754
1755
1756 while (!IS_TRIE(pn)) {
1757
1758 if (cindex >= (1ul << pn->bits)) {
1759 t_key pkey = pn->key;
1760
1761 pn = node_parent_rcu(pn);
1762 cindex = get_index(pkey, pn) + 1;
1763 continue;
1764 }
1765
1766
1767 n = get_child_rcu(pn, cindex++);
1768 if (!n)
1769 continue;
1770
1771
1772 if (IS_LEAF(n))
1773 goto found;
1774
1775
1776 pn = n;
1777 cindex = 0;
1778 }
1779
1780 *tn = pn;
1781 return NULL;
1782found:
1783
1784 *tn = pn;
1785 return n;
1786}
1787
1788static void fib_trie_free(struct fib_table *tb)
1789{
1790 struct trie *t = (struct trie *)tb->tb_data;
1791 struct key_vector *pn = t->kv;
1792 unsigned long cindex = 1;
1793 struct hlist_node *tmp;
1794 struct fib_alias *fa;
1795
1796
1797 for (;;) {
1798 struct key_vector *n;
1799
1800 if (!(cindex--)) {
1801 t_key pkey = pn->key;
1802
1803 if (IS_TRIE(pn))
1804 break;
1805
1806 n = pn;
1807 pn = node_parent(pn);
1808
1809
1810 put_child_root(pn, n->key, NULL);
1811 node_free(n);
1812
1813 cindex = get_index(pkey, pn);
1814
1815 continue;
1816 }
1817
1818
1819 n = get_child(pn, cindex);
1820 if (!n)
1821 continue;
1822
1823 if (IS_TNODE(n)) {
1824
1825 pn = n;
1826 cindex = 1ul << n->bits;
1827
1828 continue;
1829 }
1830
1831 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1832 hlist_del_rcu(&fa->fa_list);
1833 alias_free_mem_rcu(fa);
1834 }
1835
1836 put_child_root(pn, n->key, NULL);
1837 node_free(n);
1838 }
1839
1840#ifdef CONFIG_IP_FIB_TRIE_STATS
1841 free_percpu(t->stats);
1842#endif
1843 kfree(tb);
1844}
1845
1846struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
1847{
1848 struct trie *ot = (struct trie *)oldtb->tb_data;
1849 struct key_vector *l, *tp = ot->kv;
1850 struct fib_table *local_tb;
1851 struct fib_alias *fa;
1852 struct trie *lt;
1853 t_key key = 0;
1854
1855 if (oldtb->tb_data == oldtb->__data)
1856 return oldtb;
1857
1858 local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL);
1859 if (!local_tb)
1860 return NULL;
1861
1862 lt = (struct trie *)local_tb->tb_data;
1863
1864 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
1865 struct key_vector *local_l = NULL, *local_tp;
1866
1867 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
1868 struct fib_alias *new_fa;
1869
1870 if (local_tb->tb_id != fa->tb_id)
1871 continue;
1872
1873
1874 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1875 if (!new_fa)
1876 goto out;
1877
1878 memcpy(new_fa, fa, sizeof(*fa));
1879
1880
1881 if (!local_l)
1882 local_l = fib_find_node(lt, &local_tp, l->key);
1883
1884 if (fib_insert_alias(lt, local_tp, local_l, new_fa,
1885 NULL, l->key)) {
1886 kmem_cache_free(fn_alias_kmem, new_fa);
1887 goto out;
1888 }
1889 }
1890
1891
1892 key = l->key + 1;
1893 if (key < l->key)
1894 break;
1895 }
1896
1897 return local_tb;
1898out:
1899 fib_trie_free(local_tb);
1900
1901 return NULL;
1902}
1903
1904
1905void fib_table_flush_external(struct fib_table *tb)
1906{
1907 struct trie *t = (struct trie *)tb->tb_data;
1908 struct key_vector *pn = t->kv;
1909 unsigned long cindex = 1;
1910 struct hlist_node *tmp;
1911 struct fib_alias *fa;
1912
1913
1914 for (;;) {
1915 unsigned char slen = 0;
1916 struct key_vector *n;
1917
1918 if (!(cindex--)) {
1919 t_key pkey = pn->key;
1920
1921
1922 if (IS_TRIE(pn))
1923 break;
1924
1925
1926 if (pn->slen > pn->pos)
1927 update_suffix(pn);
1928
1929
1930 pn = resize(t, pn);
1931 cindex = get_index(pkey, pn);
1932
1933 continue;
1934 }
1935
1936
1937 n = get_child(pn, cindex);
1938 if (!n)
1939 continue;
1940
1941 if (IS_TNODE(n)) {
1942
1943 pn = n;
1944 cindex = 1ul << n->bits;
1945
1946 continue;
1947 }
1948
1949 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1950
1951
1952
1953 if (tb->tb_id != fa->tb_id) {
1954 hlist_del_rcu(&fa->fa_list);
1955 alias_free_mem_rcu(fa);
1956 continue;
1957 }
1958
1959
1960 slen = fa->fa_slen;
1961 }
1962
1963
1964 n->slen = slen;
1965
1966 if (hlist_empty(&n->leaf)) {
1967 put_child_root(pn, n->key, NULL);
1968 node_free(n);
1969 }
1970 }
1971}
1972
1973
1974int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
1975{
1976 struct trie *t = (struct trie *)tb->tb_data;
1977 struct key_vector *pn = t->kv;
1978 unsigned long cindex = 1;
1979 struct hlist_node *tmp;
1980 struct fib_alias *fa;
1981 int found = 0;
1982
1983
1984 for (;;) {
1985 unsigned char slen = 0;
1986 struct key_vector *n;
1987
1988 if (!(cindex--)) {
1989 t_key pkey = pn->key;
1990
1991
1992 if (IS_TRIE(pn))
1993 break;
1994
1995
1996 if (pn->slen > pn->pos)
1997 update_suffix(pn);
1998
1999
2000 pn = resize(t, pn);
2001 cindex = get_index(pkey, pn);
2002
2003 continue;
2004 }
2005
2006
2007 n = get_child(pn, cindex);
2008 if (!n)
2009 continue;
2010
2011 if (IS_TNODE(n)) {
2012
2013 pn = n;
2014 cindex = 1ul << n->bits;
2015
2016 continue;
2017 }
2018
2019 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
2020 struct fib_info *fi = fa->fa_info;
2021
2022 if (!fi || tb->tb_id != fa->tb_id ||
2023 (!(fi->fib_flags & RTNH_F_DEAD) &&
2024 !fib_props[fa->fa_type].error)) {
2025 slen = fa->fa_slen;
2026 continue;
2027 }
2028
2029
2030
2031
2032 if (!flush_all && fib_props[fa->fa_type].error) {
2033 slen = fa->fa_slen;
2034 continue;
2035 }
2036
2037 fib_notify_alias_delete(net, n->key, &n->leaf, fa,
2038 NULL);
2039 hlist_del_rcu(&fa->fa_list);
2040 fib_release_info(fa->fa_info);
2041 alias_free_mem_rcu(fa);
2042 found++;
2043 }
2044
2045
2046 n->slen = slen;
2047
2048 if (hlist_empty(&n->leaf)) {
2049 put_child_root(pn, n->key, NULL);
2050 node_free(n);
2051 }
2052 }
2053
2054 pr_debug("trie_flush found=%d\n", found);
2055 return found;
2056}
2057
2058
2059static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
2060 struct nl_info *info)
2061{
2062 struct trie *t = (struct trie *)tb->tb_data;
2063 struct key_vector *pn = t->kv;
2064 unsigned long cindex = 1;
2065 struct fib_alias *fa;
2066
2067 for (;;) {
2068 struct key_vector *n;
2069
2070 if (!(cindex--)) {
2071 t_key pkey = pn->key;
2072
2073 if (IS_TRIE(pn))
2074 break;
2075
2076 pn = node_parent(pn);
2077 cindex = get_index(pkey, pn);
2078 continue;
2079 }
2080
2081
2082 n = get_child(pn, cindex);
2083 if (!n)
2084 continue;
2085
2086 if (IS_TNODE(n)) {
2087
2088 pn = n;
2089 cindex = 1ul << n->bits;
2090
2091 continue;
2092 }
2093
2094 hlist_for_each_entry(fa, &n->leaf, fa_list) {
2095 struct fib_info *fi = fa->fa_info;
2096
2097 if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)
2098 continue;
2099
2100 rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
2101 KEYLENGTH - fa->fa_slen, tb->tb_id,
2102 info, NLM_F_REPLACE);
2103
2104
2105
2106
2107
2108 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
2109 n->key,
2110 KEYLENGTH - fa->fa_slen, fa,
2111 NULL);
2112 }
2113 }
2114}
2115
2116void fib_info_notify_update(struct net *net, struct nl_info *info)
2117{
2118 unsigned int h;
2119
2120 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2121 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2122 struct fib_table *tb;
2123
2124 hlist_for_each_entry_rcu(tb, head, tb_hlist)
2125 __fib_info_notify_update(net, tb, info);
2126 }
2127}
2128
2129static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
2130 struct notifier_block *nb,
2131 struct netlink_ext_ack *extack)
2132{
2133 struct fib_alias *fa;
2134 int last_slen = -1;
2135 int err;
2136
2137 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2138 struct fib_info *fi = fa->fa_info;
2139
2140 if (!fi)
2141 continue;
2142
2143
2144
2145
2146 if (tb->tb_id != fa->tb_id)
2147 continue;
2148
2149 if (fa->fa_slen == last_slen)
2150 continue;
2151
2152 last_slen = fa->fa_slen;
2153 err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE,
2154 l->key, KEYLENGTH - fa->fa_slen,
2155 fa, extack);
2156 if (err)
2157 return err;
2158 }
2159 return 0;
2160}
2161
2162static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb,
2163 struct netlink_ext_ack *extack)
2164{
2165 struct trie *t = (struct trie *)tb->tb_data;
2166 struct key_vector *l, *tp = t->kv;
2167 t_key key = 0;
2168 int err;
2169
2170 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2171 err = fib_leaf_notify(l, tb, nb, extack);
2172 if (err)
2173 return err;
2174
2175 key = l->key + 1;
2176
2177 if (key < l->key)
2178 break;
2179 }
2180 return 0;
2181}
2182
2183int fib_notify(struct net *net, struct notifier_block *nb,
2184 struct netlink_ext_ack *extack)
2185{
2186 unsigned int h;
2187 int err;
2188
2189 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2190 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2191 struct fib_table *tb;
2192
2193 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2194 err = fib_table_notify(tb, nb, extack);
2195 if (err)
2196 return err;
2197 }
2198 }
2199 return 0;
2200}
2201
2202static void __trie_free_rcu(struct rcu_head *head)
2203{
2204 struct fib_table *tb = container_of(head, struct fib_table, rcu);
2205#ifdef CONFIG_IP_FIB_TRIE_STATS
2206 struct trie *t = (struct trie *)tb->tb_data;
2207
2208 if (tb->tb_data == tb->__data)
2209 free_percpu(t->stats);
2210#endif
2211 kfree(tb);
2212}
2213
2214void fib_free_table(struct fib_table *tb)
2215{
2216 call_rcu(&tb->rcu, __trie_free_rcu);
2217}
2218
2219static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
2220 struct sk_buff *skb, struct netlink_callback *cb,
2221 struct fib_dump_filter *filter)
2222{
2223 unsigned int flags = NLM_F_MULTI;
2224 __be32 xkey = htonl(l->key);
2225 int i, s_i, i_fa, s_fa, err;
2226 struct fib_alias *fa;
2227
2228 if (filter->filter_set ||
2229 !filter->dump_exceptions || !filter->dump_routes)
2230 flags |= NLM_F_DUMP_FILTERED;
2231
2232 s_i = cb->args[4];
2233 s_fa = cb->args[5];
2234 i = 0;
2235
2236
2237 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2238 struct fib_info *fi = fa->fa_info;
2239
2240 if (i < s_i)
2241 goto next;
2242
2243 i_fa = 0;
2244
2245 if (tb->tb_id != fa->tb_id)
2246 goto next;
2247
2248 if (filter->filter_set) {
2249 if (filter->rt_type && fa->fa_type != filter->rt_type)
2250 goto next;
2251
2252 if ((filter->protocol &&
2253 fi->fib_protocol != filter->protocol))
2254 goto next;
2255
2256 if (filter->dev &&
2257 !fib_info_nh_uses_dev(fi, filter->dev))
2258 goto next;
2259 }
2260
2261 if (filter->dump_routes) {
2262 if (!s_fa) {
2263 struct fib_rt_info fri;
2264
2265 fri.fi = fi;
2266 fri.tb_id = tb->tb_id;
2267 fri.dst = xkey;
2268 fri.dst_len = KEYLENGTH - fa->fa_slen;
2269 fri.tos = fa->fa_tos;
2270 fri.type = fa->fa_type;
2271 fri.offload = fa->offload;
2272 fri.trap = fa->trap;
2273 err = fib_dump_info(skb,
2274 NETLINK_CB(cb->skb).portid,
2275 cb->nlh->nlmsg_seq,
2276 RTM_NEWROUTE, &fri, flags);
2277 if (err < 0)
2278 goto stop;
2279 }
2280
2281 i_fa++;
2282 }
2283
2284 if (filter->dump_exceptions) {
2285 err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
2286 &i_fa, s_fa, flags);
2287 if (err < 0)
2288 goto stop;
2289 }
2290
2291next:
2292 i++;
2293 }
2294
2295 cb->args[4] = i;
2296 return skb->len;
2297
2298stop:
2299 cb->args[4] = i;
2300 cb->args[5] = i_fa;
2301 return err;
2302}
2303
2304
2305int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
2306 struct netlink_callback *cb, struct fib_dump_filter *filter)
2307{
2308 struct trie *t = (struct trie *)tb->tb_data;
2309 struct key_vector *l, *tp = t->kv;
2310
2311
2312
2313 int count = cb->args[2];
2314 t_key key = cb->args[3];
2315
2316
2317
2318
2319 if (count && !key)
2320 return skb->len;
2321
2322 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2323 int err;
2324
2325 err = fn_trie_dump_leaf(l, tb, skb, cb, filter);
2326 if (err < 0) {
2327 cb->args[3] = key;
2328 cb->args[2] = count;
2329 return err;
2330 }
2331
2332 ++count;
2333 key = l->key + 1;
2334
2335 memset(&cb->args[4], 0,
2336 sizeof(cb->args) - 4*sizeof(cb->args[0]));
2337
2338
2339 if (key < l->key)
2340 break;
2341 }
2342
2343 cb->args[3] = key;
2344 cb->args[2] = count;
2345
2346 return skb->len;
2347}
2348
2349void __init fib_trie_init(void)
2350{
2351 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2352 sizeof(struct fib_alias),
2353 0, SLAB_PANIC, NULL);
2354
2355 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
2356 LEAF_SIZE,
2357 0, SLAB_PANIC, NULL);
2358}
2359
2360struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
2361{
2362 struct fib_table *tb;
2363 struct trie *t;
2364 size_t sz = sizeof(*tb);
2365
2366 if (!alias)
2367 sz += sizeof(struct trie);
2368
2369 tb = kzalloc(sz, GFP_KERNEL);
2370 if (!tb)
2371 return NULL;
2372
2373 tb->tb_id = id;
2374 tb->tb_num_default = 0;
2375 tb->tb_data = (alias ? alias->__data : tb->__data);
2376
2377 if (alias)
2378 return tb;
2379
2380 t = (struct trie *) tb->tb_data;
2381 t->kv[0].pos = KEYLENGTH;
2382 t->kv[0].slen = KEYLENGTH;
2383#ifdef CONFIG_IP_FIB_TRIE_STATS
2384 t->stats = alloc_percpu(struct trie_use_stats);
2385 if (!t->stats) {
2386 kfree(tb);
2387 tb = NULL;
2388 }
2389#endif
2390
2391 return tb;
2392}
2393
2394#ifdef CONFIG_PROC_FS
2395
2396struct fib_trie_iter {
2397 struct seq_net_private p;
2398 struct fib_table *tb;
2399 struct key_vector *tnode;
2400 unsigned int index;
2401 unsigned int depth;
2402};
2403
2404static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
2405{
2406 unsigned long cindex = iter->index;
2407 struct key_vector *pn = iter->tnode;
2408 t_key pkey;
2409
2410 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2411 iter->tnode, iter->index, iter->depth);
2412
2413 while (!IS_TRIE(pn)) {
2414 while (cindex < child_length(pn)) {
2415 struct key_vector *n = get_child_rcu(pn, cindex++);
2416
2417 if (!n)
2418 continue;
2419
2420 if (IS_LEAF(n)) {
2421 iter->tnode = pn;
2422 iter->index = cindex;
2423 } else {
2424
2425 iter->tnode = n;
2426 iter->index = 0;
2427 ++iter->depth;
2428 }
2429
2430 return n;
2431 }
2432
2433
2434 pkey = pn->key;
2435 pn = node_parent_rcu(pn);
2436 cindex = get_index(pkey, pn) + 1;
2437 --iter->depth;
2438 }
2439
2440
2441 iter->tnode = pn;
2442 iter->index = 0;
2443
2444 return NULL;
2445}
2446
2447static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
2448 struct trie *t)
2449{
2450 struct key_vector *n, *pn;
2451
2452 if (!t)
2453 return NULL;
2454
2455 pn = t->kv;
2456 n = rcu_dereference(pn->tnode[0]);
2457 if (!n)
2458 return NULL;
2459
2460 if (IS_TNODE(n)) {
2461 iter->tnode = n;
2462 iter->index = 0;
2463 iter->depth = 1;
2464 } else {
2465 iter->tnode = pn;
2466 iter->index = 0;
2467 iter->depth = 0;
2468 }
2469
2470 return n;
2471}
2472
2473static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2474{
2475 struct key_vector *n;
2476 struct fib_trie_iter iter;
2477
2478 memset(s, 0, sizeof(*s));
2479
2480 rcu_read_lock();
2481 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2482 if (IS_LEAF(n)) {
2483 struct fib_alias *fa;
2484
2485 s->leaves++;
2486 s->totdepth += iter.depth;
2487 if (iter.depth > s->maxdepth)
2488 s->maxdepth = iter.depth;
2489
2490 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
2491 ++s->prefixes;
2492 } else {
2493 s->tnodes++;
2494 if (n->bits < MAX_STAT_DEPTH)
2495 s->nodesizes[n->bits]++;
2496 s->nullpointers += tn_info(n)->empty_children;
2497 }
2498 }
2499 rcu_read_unlock();
2500}
2501
2502
2503
2504
2505static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2506{
2507 unsigned int i, max, pointers, bytes, avdepth;
2508
2509 if (stat->leaves)
2510 avdepth = stat->totdepth*100 / stat->leaves;
2511 else
2512 avdepth = 0;
2513
2514 seq_printf(seq, "\tAver depth: %u.%02d\n",
2515 avdepth / 100, avdepth % 100);
2516 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
2517
2518 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
2519 bytes = LEAF_SIZE * stat->leaves;
2520
2521 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2522 bytes += sizeof(struct fib_alias) * stat->prefixes;
2523
2524 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
2525 bytes += TNODE_SIZE(0) * stat->tnodes;
2526
2527 max = MAX_STAT_DEPTH;
2528 while (max > 0 && stat->nodesizes[max-1] == 0)
2529 max--;
2530
2531 pointers = 0;
2532 for (i = 1; i < max; i++)
2533 if (stat->nodesizes[i] != 0) {
2534 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
2535 pointers += (1<<i) * stat->nodesizes[i];
2536 }
2537 seq_putc(seq, '\n');
2538 seq_printf(seq, "\tPointers: %u\n", pointers);
2539
2540 bytes += sizeof(struct key_vector *) * pointers;
2541 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2542 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
2543}
2544
2545#ifdef CONFIG_IP_FIB_TRIE_STATS
2546static void trie_show_usage(struct seq_file *seq,
2547 const struct trie_use_stats __percpu *stats)
2548{
2549 struct trie_use_stats s = { 0 };
2550 int cpu;
2551
2552
2553 for_each_possible_cpu(cpu) {
2554 const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
2555
2556 s.gets += pcpu->gets;
2557 s.backtrack += pcpu->backtrack;
2558 s.semantic_match_passed += pcpu->semantic_match_passed;
2559 s.semantic_match_miss += pcpu->semantic_match_miss;
2560 s.null_node_hit += pcpu->null_node_hit;
2561 s.resize_node_skipped += pcpu->resize_node_skipped;
2562 }
2563
2564 seq_printf(seq, "\nCounters:\n---------\n");
2565 seq_printf(seq, "gets = %u\n", s.gets);
2566 seq_printf(seq, "backtracks = %u\n", s.backtrack);
2567 seq_printf(seq, "semantic match passed = %u\n",
2568 s.semantic_match_passed);
2569 seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
2570 seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
2571 seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
2572}
2573#endif
2574
2575static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2576{
2577 if (tb->tb_id == RT_TABLE_LOCAL)
2578 seq_puts(seq, "Local:\n");
2579 else if (tb->tb_id == RT_TABLE_MAIN)
2580 seq_puts(seq, "Main:\n");
2581 else
2582 seq_printf(seq, "Id %d:\n", tb->tb_id);
2583}
2584
2585
2586static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2587{
2588 struct net *net = (struct net *)seq->private;
2589 unsigned int h;
2590
2591 seq_printf(seq,
2592 "Basic info: size of leaf:"
2593 " %zd bytes, size of tnode: %zd bytes.\n",
2594 LEAF_SIZE, TNODE_SIZE(0));
2595
2596 rcu_read_lock();
2597 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2598 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2599 struct fib_table *tb;
2600
2601 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2602 struct trie *t = (struct trie *) tb->tb_data;
2603 struct trie_stat stat;
2604
2605 if (!t)
2606 continue;
2607
2608 fib_table_print(seq, tb);
2609
2610 trie_collect_stats(t, &stat);
2611 trie_show_stats(seq, &stat);
2612#ifdef CONFIG_IP_FIB_TRIE_STATS
2613 trie_show_usage(seq, t->stats);
2614#endif
2615 }
2616 cond_resched_rcu();
2617 }
2618 rcu_read_unlock();
2619
2620 return 0;
2621}
2622
2623static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2624{
2625 struct fib_trie_iter *iter = seq->private;
2626 struct net *net = seq_file_net(seq);
2627 loff_t idx = 0;
2628 unsigned int h;
2629
2630 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2631 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2632 struct fib_table *tb;
2633
2634 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2635 struct key_vector *n;
2636
2637 for (n = fib_trie_get_first(iter,
2638 (struct trie *) tb->tb_data);
2639 n; n = fib_trie_get_next(iter))
2640 if (pos == idx++) {
2641 iter->tb = tb;
2642 return n;
2643 }
2644 }
2645 }
2646
2647 return NULL;
2648}
2649
2650static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2651 __acquires(RCU)
2652{
2653 rcu_read_lock();
2654 return fib_trie_get_idx(seq, *pos);
2655}
2656
2657static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2658{
2659 struct fib_trie_iter *iter = seq->private;
2660 struct net *net = seq_file_net(seq);
2661 struct fib_table *tb = iter->tb;
2662 struct hlist_node *tb_node;
2663 unsigned int h;
2664 struct key_vector *n;
2665
2666 ++*pos;
2667
2668 n = fib_trie_get_next(iter);
2669 if (n)
2670 return n;
2671
2672
2673 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2674 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
2675 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2676 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2677 if (n)
2678 goto found;
2679 }
2680
2681
2682 while (++h < FIB_TABLE_HASHSZ) {
2683 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2684 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2685 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2686 if (n)
2687 goto found;
2688 }
2689 }
2690 return NULL;
2691
2692found:
2693 iter->tb = tb;
2694 return n;
2695}
2696
2697static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2698 __releases(RCU)
2699{
2700 rcu_read_unlock();
2701}
2702
2703static void seq_indent(struct seq_file *seq, int n)
2704{
2705 while (n-- > 0)
2706 seq_puts(seq, " ");
2707}
2708
2709static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2710{
2711 switch (s) {
2712 case RT_SCOPE_UNIVERSE: return "universe";
2713 case RT_SCOPE_SITE: return "site";
2714 case RT_SCOPE_LINK: return "link";
2715 case RT_SCOPE_HOST: return "host";
2716 case RT_SCOPE_NOWHERE: return "nowhere";
2717 default:
2718 snprintf(buf, len, "scope=%d", s);
2719 return buf;
2720 }
2721}
2722
2723static const char *const rtn_type_names[__RTN_MAX] = {
2724 [RTN_UNSPEC] = "UNSPEC",
2725 [RTN_UNICAST] = "UNICAST",
2726 [RTN_LOCAL] = "LOCAL",
2727 [RTN_BROADCAST] = "BROADCAST",
2728 [RTN_ANYCAST] = "ANYCAST",
2729 [RTN_MULTICAST] = "MULTICAST",
2730 [RTN_BLACKHOLE] = "BLACKHOLE",
2731 [RTN_UNREACHABLE] = "UNREACHABLE",
2732 [RTN_PROHIBIT] = "PROHIBIT",
2733 [RTN_THROW] = "THROW",
2734 [RTN_NAT] = "NAT",
2735 [RTN_XRESOLVE] = "XRESOLVE",
2736};
2737
2738static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2739{
2740 if (t < __RTN_MAX && rtn_type_names[t])
2741 return rtn_type_names[t];
2742 snprintf(buf, len, "type %u", t);
2743 return buf;
2744}
2745
2746
2747static int fib_trie_seq_show(struct seq_file *seq, void *v)
2748{
2749 const struct fib_trie_iter *iter = seq->private;
2750 struct key_vector *n = v;
2751
2752 if (IS_TRIE(node_parent_rcu(n)))
2753 fib_table_print(seq, iter->tb);
2754
2755 if (IS_TNODE(n)) {
2756 __be32 prf = htonl(n->key);
2757
2758 seq_indent(seq, iter->depth-1);
2759 seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",
2760 &prf, KEYLENGTH - n->pos - n->bits, n->bits,
2761 tn_info(n)->full_children,
2762 tn_info(n)->empty_children);
2763 } else {
2764 __be32 val = htonl(n->key);
2765 struct fib_alias *fa;
2766
2767 seq_indent(seq, iter->depth);
2768 seq_printf(seq, " |-- %pI4\n", &val);
2769
2770 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
2771 char buf1[32], buf2[32];
2772
2773 seq_indent(seq, iter->depth + 1);
2774 seq_printf(seq, " /%zu %s %s",
2775 KEYLENGTH - fa->fa_slen,
2776 rtn_scope(buf1, sizeof(buf1),
2777 fa->fa_info->fib_scope),
2778 rtn_type(buf2, sizeof(buf2),
2779 fa->fa_type));
2780 if (fa->fa_tos)
2781 seq_printf(seq, " tos=%d", fa->fa_tos);
2782 seq_putc(seq, '\n');
2783 }
2784 }
2785
2786 return 0;
2787}
2788
2789static const struct seq_operations fib_trie_seq_ops = {
2790 .start = fib_trie_seq_start,
2791 .next = fib_trie_seq_next,
2792 .stop = fib_trie_seq_stop,
2793 .show = fib_trie_seq_show,
2794};
2795
2796struct fib_route_iter {
2797 struct seq_net_private p;
2798 struct fib_table *main_tb;
2799 struct key_vector *tnode;
2800 loff_t pos;
2801 t_key key;
2802};
2803
2804static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
2805 loff_t pos)
2806{
2807 struct key_vector *l, **tp = &iter->tnode;
2808 t_key key;
2809
2810
2811 if (iter->pos > 0 && pos >= iter->pos) {
2812 key = iter->key;
2813 } else {
2814 iter->pos = 1;
2815 key = 0;
2816 }
2817
2818 pos -= iter->pos;
2819
2820 while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
2821 key = l->key + 1;
2822 iter->pos++;
2823 l = NULL;
2824
2825
2826 if (!key)
2827 break;
2828 }
2829
2830 if (l)
2831 iter->key = l->key;
2832 else
2833 iter->pos = 0;
2834
2835 return l;
2836}
2837
2838static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2839 __acquires(RCU)
2840{
2841 struct fib_route_iter *iter = seq->private;
2842 struct fib_table *tb;
2843 struct trie *t;
2844
2845 rcu_read_lock();
2846
2847 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
2848 if (!tb)
2849 return NULL;
2850
2851 iter->main_tb = tb;
2852 t = (struct trie *)tb->tb_data;
2853 iter->tnode = t->kv;
2854
2855 if (*pos != 0)
2856 return fib_route_get_idx(iter, *pos);
2857
2858 iter->pos = 0;
2859 iter->key = KEY_MAX;
2860
2861 return SEQ_START_TOKEN;
2862}
2863
2864static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2865{
2866 struct fib_route_iter *iter = seq->private;
2867 struct key_vector *l = NULL;
2868 t_key key = iter->key + 1;
2869
2870 ++*pos;
2871
2872
2873 if ((v == SEQ_START_TOKEN) || key)
2874 l = leaf_walk_rcu(&iter->tnode, key);
2875
2876 if (l) {
2877 iter->key = l->key;
2878 iter->pos++;
2879 } else {
2880 iter->pos = 0;
2881 }
2882
2883 return l;
2884}
2885
2886static void fib_route_seq_stop(struct seq_file *seq, void *v)
2887 __releases(RCU)
2888{
2889 rcu_read_unlock();
2890}
2891
2892static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
2893{
2894 unsigned int flags = 0;
2895
2896 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2897 flags = RTF_REJECT;
2898 if (fi) {
2899 const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
2900
2901 if (nhc->nhc_gw.ipv4)
2902 flags |= RTF_GATEWAY;
2903 }
2904 if (mask == htonl(0xFFFFFFFF))
2905 flags |= RTF_HOST;
2906 flags |= RTF_UP;
2907 return flags;
2908}
2909
2910
2911
2912
2913
2914
2915
2916static int fib_route_seq_show(struct seq_file *seq, void *v)
2917{
2918 struct fib_route_iter *iter = seq->private;
2919 struct fib_table *tb = iter->main_tb;
2920 struct fib_alias *fa;
2921 struct key_vector *l = v;
2922 __be32 prefix;
2923
2924 if (v == SEQ_START_TOKEN) {
2925 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2926 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2927 "\tWindow\tIRTT");
2928 return 0;
2929 }
2930
2931 prefix = htonl(l->key);
2932
2933 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2934 struct fib_info *fi = fa->fa_info;
2935 __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
2936 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2937
2938 if ((fa->fa_type == RTN_BROADCAST) ||
2939 (fa->fa_type == RTN_MULTICAST))
2940 continue;
2941
2942 if (fa->tb_id != tb->tb_id)
2943 continue;
2944
2945 seq_setwidth(seq, 127);
2946
2947 if (fi) {
2948 struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
2949 __be32 gw = 0;
2950
2951 if (nhc->nhc_gw_family == AF_INET)
2952 gw = nhc->nhc_gw.ipv4;
2953
2954 seq_printf(seq,
2955 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2956 "%d\t%08X\t%d\t%u\t%u",
2957 nhc->nhc_dev ? nhc->nhc_dev->name : "*",
2958 prefix, gw, flags, 0, 0,
2959 fi->fib_priority,
2960 mask,
2961 (fi->fib_advmss ?
2962 fi->fib_advmss + 40 : 0),
2963 fi->fib_window,
2964 fi->fib_rtt >> 3);
2965 } else {
2966 seq_printf(seq,
2967 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
2968 "%d\t%08X\t%d\t%u\t%u",
2969 prefix, 0, flags, 0, 0, 0,
2970 mask, 0, 0, 0);
2971 }
2972 seq_pad(seq, '\n');
2973 }
2974
2975 return 0;
2976}
2977
2978static const struct seq_operations fib_route_seq_ops = {
2979 .start = fib_route_seq_start,
2980 .next = fib_route_seq_next,
2981 .stop = fib_route_seq_stop,
2982 .show = fib_route_seq_show,
2983};
2984
2985int __net_init fib_proc_init(struct net *net)
2986{
2987 if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
2988 sizeof(struct fib_trie_iter)))
2989 goto out1;
2990
2991 if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
2992 fib_triestat_seq_show, NULL))
2993 goto out2;
2994
2995 if (!proc_create_net("route", 0444, net->proc_net, &fib_route_seq_ops,
2996 sizeof(struct fib_route_iter)))
2997 goto out3;
2998
2999 return 0;
3000
3001out3:
3002 remove_proc_entry("fib_triestat", net->proc_net);
3003out2:
3004 remove_proc_entry("fib_trie", net->proc_net);
3005out1:
3006 return -ENOMEM;
3007}
3008
3009void __net_exit fib_proc_exit(struct net *net)
3010{
3011 remove_proc_entry("fib_trie", net->proc_net);
3012 remove_proc_entry("fib_triestat", net->proc_net);
3013 remove_proc_entry("route", net->proc_net);
3014}
3015
3016#endif
3017