1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <asm/uaccess.h>
17#include <linux/bitops.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/jiffies.h>
21#include <linux/mm.h>
22#include <linux/string.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/errno.h>
26#include <linux/in.h>
27#include <linux/inet.h>
28#include <linux/inetdevice.h>
29#include <linux/netdevice.h>
30#include <linux/if_arp.h>
31#include <linux/proc_fs.h>
32#include <linux/skbuff.h>
33#include <linux/init.h>
34#include <linux/slab.h>
35
36#include <net/arp.h>
37#include <net/ip.h>
38#include <net/protocol.h>
39#include <net/route.h>
40#include <net/tcp.h>
41#include <net/sock.h>
42#include <net/ip_fib.h>
43#include <net/netlink.h>
44#include <net/nexthop.h>
45#include <net/lwtunnel.h>
46
47#include "fib_lookup.h"
48
49static DEFINE_SPINLOCK(fib_info_lock);
50static struct hlist_head *fib_info_hash;
51static struct hlist_head *fib_info_laddrhash;
52static unsigned int fib_info_hash_size;
53static unsigned int fib_info_cnt;
54
55#define DEVINDEX_HASHBITS 8
56#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
57static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
58
59#ifdef CONFIG_IP_ROUTE_MULTIPATH
60u32 fib_multipath_secret __read_mostly;
61
62#define for_nexthops(fi) { \
63 int nhsel; const struct fib_nh *nh; \
64 for (nhsel = 0, nh = (fi)->fib_nh; \
65 nhsel < (fi)->fib_nhs; \
66 nh++, nhsel++)
67
68#define change_nexthops(fi) { \
69 int nhsel; struct fib_nh *nexthop_nh; \
70 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
71 nhsel < (fi)->fib_nhs; \
72 nexthop_nh++, nhsel++)
73
74#else
75
76
77
78#define for_nexthops(fi) { \
79 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \
80 for (nhsel = 0; nhsel < 1; nhsel++)
81
82#define change_nexthops(fi) { \
83 int nhsel; \
84 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
85 for (nhsel = 0; nhsel < 1; nhsel++)
86
87#endif
88
89#define endfor_nexthops(fi) }
90
91
92const struct fib_prop fib_props[RTN_MAX + 1] = {
93 [RTN_UNSPEC] = {
94 .error = 0,
95 .scope = RT_SCOPE_NOWHERE,
96 },
97 [RTN_UNICAST] = {
98 .error = 0,
99 .scope = RT_SCOPE_UNIVERSE,
100 },
101 [RTN_LOCAL] = {
102 .error = 0,
103 .scope = RT_SCOPE_HOST,
104 },
105 [RTN_BROADCAST] = {
106 .error = 0,
107 .scope = RT_SCOPE_LINK,
108 },
109 [RTN_ANYCAST] = {
110 .error = 0,
111 .scope = RT_SCOPE_LINK,
112 },
113 [RTN_MULTICAST] = {
114 .error = 0,
115 .scope = RT_SCOPE_UNIVERSE,
116 },
117 [RTN_BLACKHOLE] = {
118 .error = -EINVAL,
119 .scope = RT_SCOPE_UNIVERSE,
120 },
121 [RTN_UNREACHABLE] = {
122 .error = -EHOSTUNREACH,
123 .scope = RT_SCOPE_UNIVERSE,
124 },
125 [RTN_PROHIBIT] = {
126 .error = -EACCES,
127 .scope = RT_SCOPE_UNIVERSE,
128 },
129 [RTN_THROW] = {
130 .error = -EAGAIN,
131 .scope = RT_SCOPE_UNIVERSE,
132 },
133 [RTN_NAT] = {
134 .error = -EINVAL,
135 .scope = RT_SCOPE_NOWHERE,
136 },
137 [RTN_XRESOLVE] = {
138 .error = -EINVAL,
139 .scope = RT_SCOPE_NOWHERE,
140 },
141};
142
143static void rt_fibinfo_free(struct rtable __rcu **rtp)
144{
145 struct rtable *rt = rcu_dereference_protected(*rtp, 1);
146
147 if (!rt)
148 return;
149
150
151
152
153
154
155 dst_free(&rt->dst);
156}
157
158static void free_nh_exceptions(struct fib_nh *nh)
159{
160 struct fnhe_hash_bucket *hash;
161 int i;
162
163 hash = rcu_dereference_protected(nh->nh_exceptions, 1);
164 if (!hash)
165 return;
166 for (i = 0; i < FNHE_HASH_SIZE; i++) {
167 struct fib_nh_exception *fnhe;
168
169 fnhe = rcu_dereference_protected(hash[i].chain, 1);
170 while (fnhe) {
171 struct fib_nh_exception *next;
172
173 next = rcu_dereference_protected(fnhe->fnhe_next, 1);
174
175 rt_fibinfo_free(&fnhe->fnhe_rth_input);
176 rt_fibinfo_free(&fnhe->fnhe_rth_output);
177
178 kfree(fnhe);
179
180 fnhe = next;
181 }
182 }
183 kfree(hash);
184}
185
186static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
187{
188 int cpu;
189
190 if (!rtp)
191 return;
192
193 for_each_possible_cpu(cpu) {
194 struct rtable *rt;
195
196 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
197 if (rt)
198 dst_free(&rt->dst);
199 }
200 free_percpu(rtp);
201}
202
203
204static void free_fib_info_rcu(struct rcu_head *head)
205{
206 struct fib_info *fi = container_of(head, struct fib_info, rcu);
207
208 change_nexthops(fi) {
209 if (nexthop_nh->nh_dev)
210 dev_put(nexthop_nh->nh_dev);
211 lwtstate_put(nexthop_nh->nh_lwtstate);
212 free_nh_exceptions(nexthop_nh);
213 rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
214 rt_fibinfo_free(&nexthop_nh->nh_rth_input);
215 } endfor_nexthops(fi);
216
217 if (fi->fib_metrics != (u32 *) dst_default_metrics)
218 kfree(fi->fib_metrics);
219 kfree(fi);
220}
221
222void free_fib_info(struct fib_info *fi)
223{
224 if (fi->fib_dead == 0) {
225 pr_warn("Freeing alive fib_info %p\n", fi);
226 return;
227 }
228 fib_info_cnt--;
229#ifdef CONFIG_IP_ROUTE_CLASSID
230 change_nexthops(fi) {
231 if (nexthop_nh->nh_tclassid)
232 fi->fib_net->ipv4.fib_num_tclassid_users--;
233 } endfor_nexthops(fi);
234#endif
235 call_rcu(&fi->rcu, free_fib_info_rcu);
236}
237
238void fib_release_info(struct fib_info *fi)
239{
240 spin_lock_bh(&fib_info_lock);
241 if (fi && --fi->fib_treeref == 0) {
242 hlist_del(&fi->fib_hash);
243 if (fi->fib_prefsrc)
244 hlist_del(&fi->fib_lhash);
245 change_nexthops(fi) {
246 if (!nexthop_nh->nh_dev)
247 continue;
248 hlist_del(&nexthop_nh->nh_hash);
249 } endfor_nexthops(fi)
250 fi->fib_dead = 1;
251 fib_info_put(fi);
252 }
253 spin_unlock_bh(&fib_info_lock);
254}
255
256static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
257{
258 const struct fib_nh *onh = ofi->fib_nh;
259
260 for_nexthops(fi) {
261 if (nh->nh_oif != onh->nh_oif ||
262 nh->nh_gw != onh->nh_gw ||
263 nh->nh_scope != onh->nh_scope ||
264#ifdef CONFIG_IP_ROUTE_MULTIPATH
265 nh->nh_weight != onh->nh_weight ||
266#endif
267#ifdef CONFIG_IP_ROUTE_CLASSID
268 nh->nh_tclassid != onh->nh_tclassid ||
269#endif
270 lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) ||
271 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
272 return -1;
273 onh++;
274 } endfor_nexthops(fi);
275 return 0;
276}
277
278static inline unsigned int fib_devindex_hashfn(unsigned int val)
279{
280 unsigned int mask = DEVINDEX_HASHSIZE - 1;
281
282 return (val ^
283 (val >> DEVINDEX_HASHBITS) ^
284 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
285}
286
287static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
288{
289 unsigned int mask = (fib_info_hash_size - 1);
290 unsigned int val = fi->fib_nhs;
291
292 val ^= (fi->fib_protocol << 8) | fi->fib_scope;
293 val ^= (__force u32)fi->fib_prefsrc;
294 val ^= fi->fib_priority;
295 for_nexthops(fi) {
296 val ^= fib_devindex_hashfn(nh->nh_oif);
297 } endfor_nexthops(fi)
298
299 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
300}
301
302static struct fib_info *fib_find_info(const struct fib_info *nfi)
303{
304 struct hlist_head *head;
305 struct fib_info *fi;
306 unsigned int hash;
307
308 hash = fib_info_hashfn(nfi);
309 head = &fib_info_hash[hash];
310
311 hlist_for_each_entry(fi, head, fib_hash) {
312 if (!net_eq(fi->fib_net, nfi->fib_net))
313 continue;
314 if (fi->fib_nhs != nfi->fib_nhs)
315 continue;
316 if (nfi->fib_protocol == fi->fib_protocol &&
317 nfi->fib_scope == fi->fib_scope &&
318 nfi->fib_prefsrc == fi->fib_prefsrc &&
319 nfi->fib_priority == fi->fib_priority &&
320 nfi->fib_type == fi->fib_type &&
321 memcmp(nfi->fib_metrics, fi->fib_metrics,
322 sizeof(u32) * RTAX_MAX) == 0 &&
323 !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
324 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
325 return fi;
326 }
327
328 return NULL;
329}
330
331
332
333
334int ip_fib_check_default(__be32 gw, struct net_device *dev)
335{
336 struct hlist_head *head;
337 struct fib_nh *nh;
338 unsigned int hash;
339
340 spin_lock(&fib_info_lock);
341
342 hash = fib_devindex_hashfn(dev->ifindex);
343 head = &fib_info_devhash[hash];
344 hlist_for_each_entry(nh, head, nh_hash) {
345 if (nh->nh_dev == dev &&
346 nh->nh_gw == gw &&
347 !(nh->nh_flags & RTNH_F_DEAD)) {
348 spin_unlock(&fib_info_lock);
349 return 0;
350 }
351 }
352
353 spin_unlock(&fib_info_lock);
354
355 return -1;
356}
357
358static inline size_t fib_nlmsg_size(struct fib_info *fi)
359{
360 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
361 + nla_total_size(4)
362 + nla_total_size(4)
363 + nla_total_size(4)
364 + nla_total_size(4)
365 + nla_total_size(TCP_CA_NAME_MAX);
366
367
368 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
369
370 if (fi->fib_nhs) {
371 size_t nh_encapsize = 0;
372
373
374
375 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
376
377
378 nhsize += 2 * nla_total_size(4);
379
380
381 for_nexthops(fi) {
382 if (nh->nh_lwtstate) {
383
384 nh_encapsize += lwtunnel_get_encap_size(
385 nh->nh_lwtstate);
386
387 nh_encapsize += nla_total_size(2);
388 }
389 } endfor_nexthops(fi);
390
391
392 payload += nla_total_size((fi->fib_nhs * nhsize) +
393 nh_encapsize);
394
395 }
396
397 return payload;
398}
399
400void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
401 int dst_len, u32 tb_id, const struct nl_info *info,
402 unsigned int nlm_flags)
403{
404 struct sk_buff *skb;
405 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
406 int err = -ENOBUFS;
407
408 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
409 if (!skb)
410 goto errout;
411
412 err = fib_dump_info(skb, info->portid, seq, event, tb_id,
413 fa->fa_type, key, dst_len,
414 fa->fa_tos, fa->fa_info, nlm_flags);
415 if (err < 0) {
416
417 WARN_ON(err == -EMSGSIZE);
418 kfree_skb(skb);
419 goto errout;
420 }
421 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE,
422 info->nlh, GFP_KERNEL);
423 return;
424errout:
425 if (err < 0)
426 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
427}
428
429static int fib_detect_death(struct fib_info *fi, int order,
430 struct fib_info **last_resort, int *last_idx,
431 int dflt)
432{
433 struct neighbour *n;
434 int state = NUD_NONE;
435
436 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
437 if (n) {
438 state = n->nud_state;
439 neigh_release(n);
440 } else {
441 return 0;
442 }
443 if (state == NUD_REACHABLE)
444 return 0;
445 if ((state & NUD_VALID) && order != dflt)
446 return 0;
447 if ((state & NUD_VALID) ||
448 (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) {
449 *last_resort = fi;
450 *last_idx = order;
451 }
452 return 1;
453}
454
455#ifdef CONFIG_IP_ROUTE_MULTIPATH
456
457static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
458{
459 int nhs = 0;
460
461 while (rtnh_ok(rtnh, remaining)) {
462 nhs++;
463 rtnh = rtnh_next(rtnh, &remaining);
464 }
465
466
467 return remaining > 0 ? 0 : nhs;
468}
469
470static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
471 int remaining, struct fib_config *cfg)
472{
473 struct net *net = cfg->fc_nlinfo.nl_net;
474 int ret;
475
476 change_nexthops(fi) {
477 int attrlen;
478
479 if (!rtnh_ok(rtnh, remaining))
480 return -EINVAL;
481
482 if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
483 return -EINVAL;
484
485 nexthop_nh->nh_flags =
486 (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
487 nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
488 nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
489
490 attrlen = rtnh_attrlen(rtnh);
491 if (attrlen > 0) {
492 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
493
494 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
495 nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0;
496#ifdef CONFIG_IP_ROUTE_CLASSID
497 nla = nla_find(attrs, attrlen, RTA_FLOW);
498 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
499 if (nexthop_nh->nh_tclassid)
500 fi->fib_net->ipv4.fib_num_tclassid_users++;
501#endif
502 nla = nla_find(attrs, attrlen, RTA_ENCAP);
503 if (nla) {
504 struct lwtunnel_state *lwtstate;
505 struct net_device *dev = NULL;
506 struct nlattr *nla_entype;
507
508 nla_entype = nla_find(attrs, attrlen,
509 RTA_ENCAP_TYPE);
510 if (!nla_entype)
511 goto err_inval;
512 if (cfg->fc_oif)
513 dev = __dev_get_by_index(net, cfg->fc_oif);
514 ret = lwtunnel_build_state(dev, nla_get_u16(
515 nla_entype),
516 nla, AF_INET, cfg,
517 &lwtstate);
518 if (ret)
519 goto errout;
520 nexthop_nh->nh_lwtstate =
521 lwtstate_get(lwtstate);
522 }
523 }
524
525 rtnh = rtnh_next(rtnh, &remaining);
526 } endfor_nexthops(fi);
527
528 return 0;
529
530err_inval:
531 ret = -EINVAL;
532
533errout:
534 return ret;
535}
536
537static void fib_rebalance(struct fib_info *fi)
538{
539 int total;
540 int w;
541 struct in_device *in_dev;
542
543 if (fi->fib_nhs < 2)
544 return;
545
546 total = 0;
547 for_nexthops(fi) {
548 if (nh->nh_flags & RTNH_F_DEAD)
549 continue;
550
551 in_dev = __in_dev_get_rtnl(nh->nh_dev);
552
553 if (in_dev &&
554 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
555 nh->nh_flags & RTNH_F_LINKDOWN)
556 continue;
557
558 total += nh->nh_weight;
559 } endfor_nexthops(fi);
560
561 w = 0;
562 change_nexthops(fi) {
563 int upper_bound;
564
565 in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev);
566
567 if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
568 upper_bound = -1;
569 } else if (in_dev &&
570 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
571 nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
572 upper_bound = -1;
573 } else {
574 w += nexthop_nh->nh_weight;
575 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
576 total) - 1;
577 }
578
579 atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
580 } endfor_nexthops(fi);
581
582 net_get_random_once(&fib_multipath_secret,
583 sizeof(fib_multipath_secret));
584}
585
586static inline void fib_add_weight(struct fib_info *fi,
587 const struct fib_nh *nh)
588{
589 fi->fib_weight += nh->nh_weight;
590}
591
592#else
593
594#define fib_rebalance(fi) do { } while (0)
595#define fib_add_weight(fi, nh) do { } while (0)
596
597#endif
598
599static int fib_encap_match(struct net *net, u16 encap_type,
600 struct nlattr *encap,
601 int oif, const struct fib_nh *nh,
602 const struct fib_config *cfg)
603{
604 struct lwtunnel_state *lwtstate;
605 struct net_device *dev = NULL;
606 int ret, result = 0;
607
608 if (encap_type == LWTUNNEL_ENCAP_NONE)
609 return 0;
610
611 if (oif)
612 dev = __dev_get_by_index(net, oif);
613 ret = lwtunnel_build_state(dev, encap_type, encap,
614 AF_INET, cfg, &lwtstate);
615 if (!ret) {
616 result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
617 lwtstate_free(lwtstate);
618 }
619
620 return result;
621}
622
623int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
624{
625 struct net *net = cfg->fc_nlinfo.nl_net;
626#ifdef CONFIG_IP_ROUTE_MULTIPATH
627 struct rtnexthop *rtnh;
628 int remaining;
629#endif
630
631 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
632 return 1;
633
634 if (cfg->fc_oif || cfg->fc_gw) {
635 if (cfg->fc_encap) {
636 if (fib_encap_match(net, cfg->fc_encap_type,
637 cfg->fc_encap, cfg->fc_oif,
638 fi->fib_nh, cfg))
639 return 1;
640 }
641 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
642 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
643 return 0;
644 return 1;
645 }
646
647#ifdef CONFIG_IP_ROUTE_MULTIPATH
648 if (!cfg->fc_mp)
649 return 0;
650
651 rtnh = cfg->fc_mp;
652 remaining = cfg->fc_mp_len;
653
654 for_nexthops(fi) {
655 int attrlen;
656
657 if (!rtnh_ok(rtnh, remaining))
658 return -EINVAL;
659
660 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
661 return 1;
662
663 attrlen = rtnh_attrlen(rtnh);
664 if (attrlen > 0) {
665 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
666
667 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
668 if (nla && nla_get_in_addr(nla) != nh->nh_gw)
669 return 1;
670#ifdef CONFIG_IP_ROUTE_CLASSID
671 nla = nla_find(attrs, attrlen, RTA_FLOW);
672 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
673 return 1;
674#endif
675 }
676
677 rtnh = rtnh_next(rtnh, &remaining);
678 } endfor_nexthops(fi);
679#endif
680 return 0;
681}
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
728 struct fib_nh *nh)
729{
730 int err = 0;
731 struct net *net;
732 struct net_device *dev;
733
734 net = cfg->fc_nlinfo.nl_net;
735 if (nh->nh_gw) {
736 struct fib_result res;
737
738 if (nh->nh_flags & RTNH_F_ONLINK) {
739 unsigned int addr_type;
740
741 if (cfg->fc_scope >= RT_SCOPE_LINK)
742 return -EINVAL;
743 dev = __dev_get_by_index(net, nh->nh_oif);
744 if (!dev)
745 return -ENODEV;
746 if (!(dev->flags & IFF_UP))
747 return -ENETDOWN;
748 addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
749 if (addr_type != RTN_UNICAST)
750 return -EINVAL;
751 if (!netif_carrier_ok(dev))
752 nh->nh_flags |= RTNH_F_LINKDOWN;
753 nh->nh_dev = dev;
754 dev_hold(dev);
755 nh->nh_scope = RT_SCOPE_LINK;
756 return 0;
757 }
758 rcu_read_lock();
759 {
760 struct fib_table *tbl = NULL;
761 struct flowi4 fl4 = {
762 .daddr = nh->nh_gw,
763 .flowi4_scope = cfg->fc_scope + 1,
764 .flowi4_oif = nh->nh_oif,
765 .flowi4_iif = LOOPBACK_IFINDEX,
766 };
767
768
769 if (fl4.flowi4_scope < RT_SCOPE_LINK)
770 fl4.flowi4_scope = RT_SCOPE_LINK;
771
772 if (cfg->fc_table)
773 tbl = fib_get_table(net, cfg->fc_table);
774
775 if (tbl)
776 err = fib_table_lookup(tbl, &fl4, &res,
777 FIB_LOOKUP_IGNORE_LINKSTATE |
778 FIB_LOOKUP_NOREF);
779
780
781
782
783
784 if (!tbl || err) {
785 err = fib_lookup(net, &fl4, &res,
786 FIB_LOOKUP_IGNORE_LINKSTATE);
787 }
788
789 if (err) {
790 rcu_read_unlock();
791 return err;
792 }
793 }
794 err = -EINVAL;
795 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
796 goto out;
797 nh->nh_scope = res.scope;
798 nh->nh_oif = FIB_RES_OIF(res);
799 nh->nh_dev = dev = FIB_RES_DEV(res);
800 if (!dev)
801 goto out;
802 dev_hold(dev);
803 if (!netif_carrier_ok(dev))
804 nh->nh_flags |= RTNH_F_LINKDOWN;
805 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
806 } else {
807 struct in_device *in_dev;
808
809 if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK))
810 return -EINVAL;
811
812 rcu_read_lock();
813 err = -ENODEV;
814 in_dev = inetdev_by_index(net, nh->nh_oif);
815 if (!in_dev)
816 goto out;
817 err = -ENETDOWN;
818 if (!(in_dev->dev->flags & IFF_UP))
819 goto out;
820 nh->nh_dev = in_dev->dev;
821 dev_hold(nh->nh_dev);
822 nh->nh_scope = RT_SCOPE_HOST;
823 if (!netif_carrier_ok(nh->nh_dev))
824 nh->nh_flags |= RTNH_F_LINKDOWN;
825 err = 0;
826 }
827out:
828 rcu_read_unlock();
829 return err;
830}
831
832static inline unsigned int fib_laddr_hashfn(__be32 val)
833{
834 unsigned int mask = (fib_info_hash_size - 1);
835
836 return ((__force u32)val ^
837 ((__force u32)val >> 7) ^
838 ((__force u32)val >> 14)) & mask;
839}
840
841static struct hlist_head *fib_info_hash_alloc(int bytes)
842{
843 if (bytes <= PAGE_SIZE)
844 return kzalloc(bytes, GFP_KERNEL);
845 else
846 return (struct hlist_head *)
847 __get_free_pages(GFP_KERNEL | __GFP_ZERO,
848 get_order(bytes));
849}
850
851static void fib_info_hash_free(struct hlist_head *hash, int bytes)
852{
853 if (!hash)
854 return;
855
856 if (bytes <= PAGE_SIZE)
857 kfree(hash);
858 else
859 free_pages((unsigned long) hash, get_order(bytes));
860}
861
862static void fib_info_hash_move(struct hlist_head *new_info_hash,
863 struct hlist_head *new_laddrhash,
864 unsigned int new_size)
865{
866 struct hlist_head *old_info_hash, *old_laddrhash;
867 unsigned int old_size = fib_info_hash_size;
868 unsigned int i, bytes;
869
870 spin_lock_bh(&fib_info_lock);
871 old_info_hash = fib_info_hash;
872 old_laddrhash = fib_info_laddrhash;
873 fib_info_hash_size = new_size;
874
875 for (i = 0; i < old_size; i++) {
876 struct hlist_head *head = &fib_info_hash[i];
877 struct hlist_node *n;
878 struct fib_info *fi;
879
880 hlist_for_each_entry_safe(fi, n, head, fib_hash) {
881 struct hlist_head *dest;
882 unsigned int new_hash;
883
884 new_hash = fib_info_hashfn(fi);
885 dest = &new_info_hash[new_hash];
886 hlist_add_head(&fi->fib_hash, dest);
887 }
888 }
889 fib_info_hash = new_info_hash;
890
891 for (i = 0; i < old_size; i++) {
892 struct hlist_head *lhead = &fib_info_laddrhash[i];
893 struct hlist_node *n;
894 struct fib_info *fi;
895
896 hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
897 struct hlist_head *ldest;
898 unsigned int new_hash;
899
900 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
901 ldest = &new_laddrhash[new_hash];
902 hlist_add_head(&fi->fib_lhash, ldest);
903 }
904 }
905 fib_info_laddrhash = new_laddrhash;
906
907 spin_unlock_bh(&fib_info_lock);
908
909 bytes = old_size * sizeof(struct hlist_head *);
910 fib_info_hash_free(old_info_hash, bytes);
911 fib_info_hash_free(old_laddrhash, bytes);
912}
913
914__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
915{
916 nh->nh_saddr = inet_select_addr(nh->nh_dev,
917 nh->nh_gw,
918 nh->nh_parent->fib_scope);
919 nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
920
921 return nh->nh_saddr;
922}
923
924static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
925{
926 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
927 fib_prefsrc != cfg->fc_dst) {
928 u32 tb_id = cfg->fc_table;
929 int rc;
930
931 if (tb_id == RT_TABLE_MAIN)
932 tb_id = RT_TABLE_LOCAL;
933
934 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
935 fib_prefsrc, tb_id);
936
937 if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
938 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
939 fib_prefsrc, RT_TABLE_LOCAL);
940 }
941
942 if (rc != RTN_LOCAL)
943 return false;
944 }
945 return true;
946}
947
948static int
949fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
950{
951 bool ecn_ca = false;
952 struct nlattr *nla;
953 int remaining;
954
955 if (!cfg->fc_mx)
956 return 0;
957
958 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
959 int type = nla_type(nla);
960 u32 val;
961
962 if (!type)
963 continue;
964 if (type > RTAX_MAX)
965 return -EINVAL;
966
967 if (type == RTAX_CC_ALGO) {
968 char tmp[TCP_CA_NAME_MAX];
969
970 nla_strlcpy(tmp, nla, sizeof(tmp));
971 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
972 if (val == TCP_CA_UNSPEC)
973 return -EINVAL;
974 } else {
975 val = nla_get_u32(nla);
976 }
977 if (type == RTAX_ADVMSS && val > 65535 - 40)
978 val = 65535 - 40;
979 if (type == RTAX_MTU && val > 65535 - 15)
980 val = 65535 - 15;
981 if (type == RTAX_HOPLIMIT && val > 255)
982 val = 255;
983 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
984 return -EINVAL;
985 fi->fib_metrics[type - 1] = val;
986 }
987
988 if (ecn_ca)
989 fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
990
991 return 0;
992}
993
994struct fib_info *fib_create_info(struct fib_config *cfg)
995{
996 int err;
997 struct fib_info *fi = NULL;
998 struct fib_info *ofi;
999 int nhs = 1;
1000 struct net *net = cfg->fc_nlinfo.nl_net;
1001
1002 if (cfg->fc_type > RTN_MAX)
1003 goto err_inval;
1004
1005
1006 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
1007 goto err_inval;
1008
1009 if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
1010 goto err_inval;
1011
1012#ifdef CONFIG_IP_ROUTE_MULTIPATH
1013 if (cfg->fc_mp) {
1014 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
1015 if (nhs == 0)
1016 goto err_inval;
1017 }
1018#endif
1019
1020 err = -ENOBUFS;
1021 if (fib_info_cnt >= fib_info_hash_size) {
1022 unsigned int new_size = fib_info_hash_size << 1;
1023 struct hlist_head *new_info_hash;
1024 struct hlist_head *new_laddrhash;
1025 unsigned int bytes;
1026
1027 if (!new_size)
1028 new_size = 16;
1029 bytes = new_size * sizeof(struct hlist_head *);
1030 new_info_hash = fib_info_hash_alloc(bytes);
1031 new_laddrhash = fib_info_hash_alloc(bytes);
1032 if (!new_info_hash || !new_laddrhash) {
1033 fib_info_hash_free(new_info_hash, bytes);
1034 fib_info_hash_free(new_laddrhash, bytes);
1035 } else
1036 fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
1037
1038 if (!fib_info_hash_size)
1039 goto failure;
1040 }
1041
1042 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
1043 if (!fi)
1044 goto failure;
1045 fib_info_cnt++;
1046 if (cfg->fc_mx) {
1047 fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1048 if (!fi->fib_metrics)
1049 goto failure;
1050 } else
1051 fi->fib_metrics = (u32 *) dst_default_metrics;
1052
1053 fi->fib_net = net;
1054 fi->fib_protocol = cfg->fc_protocol;
1055 fi->fib_scope = cfg->fc_scope;
1056 fi->fib_flags = cfg->fc_flags;
1057 fi->fib_priority = cfg->fc_priority;
1058 fi->fib_prefsrc = cfg->fc_prefsrc;
1059 fi->fib_type = cfg->fc_type;
1060 fi->fib_tb_id = cfg->fc_table;
1061
1062 fi->fib_nhs = nhs;
1063 change_nexthops(fi) {
1064 nexthop_nh->nh_parent = fi;
1065 nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
1066 if (!nexthop_nh->nh_pcpu_rth_output)
1067 goto failure;
1068 } endfor_nexthops(fi)
1069
1070 err = fib_convert_metrics(fi, cfg);
1071 if (err)
1072 goto failure;
1073
1074 if (cfg->fc_mp) {
1075#ifdef CONFIG_IP_ROUTE_MULTIPATH
1076 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
1077 if (err != 0)
1078 goto failure;
1079 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
1080 goto err_inval;
1081 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
1082 goto err_inval;
1083#ifdef CONFIG_IP_ROUTE_CLASSID
1084 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
1085 goto err_inval;
1086#endif
1087#else
1088 goto err_inval;
1089#endif
1090 } else {
1091 struct fib_nh *nh = fi->fib_nh;
1092
1093 if (cfg->fc_encap) {
1094 struct lwtunnel_state *lwtstate;
1095 struct net_device *dev = NULL;
1096
1097 if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE)
1098 goto err_inval;
1099 if (cfg->fc_oif)
1100 dev = __dev_get_by_index(net, cfg->fc_oif);
1101 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1102 cfg->fc_encap, AF_INET, cfg,
1103 &lwtstate);
1104 if (err)
1105 goto failure;
1106
1107 nh->nh_lwtstate = lwtstate_get(lwtstate);
1108 }
1109 nh->nh_oif = cfg->fc_oif;
1110 nh->nh_gw = cfg->fc_gw;
1111 nh->nh_flags = cfg->fc_flags;
1112#ifdef CONFIG_IP_ROUTE_CLASSID
1113 nh->nh_tclassid = cfg->fc_flow;
1114 if (nh->nh_tclassid)
1115 fi->fib_net->ipv4.fib_num_tclassid_users++;
1116#endif
1117#ifdef CONFIG_IP_ROUTE_MULTIPATH
1118 nh->nh_weight = 1;
1119#endif
1120 }
1121
1122 if (fib_props[cfg->fc_type].error) {
1123 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
1124 goto err_inval;
1125 goto link_it;
1126 } else {
1127 switch (cfg->fc_type) {
1128 case RTN_UNICAST:
1129 case RTN_LOCAL:
1130 case RTN_BROADCAST:
1131 case RTN_ANYCAST:
1132 case RTN_MULTICAST:
1133 break;
1134 default:
1135 goto err_inval;
1136 }
1137 }
1138
1139 if (cfg->fc_scope > RT_SCOPE_HOST)
1140 goto err_inval;
1141
1142 if (cfg->fc_scope == RT_SCOPE_HOST) {
1143 struct fib_nh *nh = fi->fib_nh;
1144
1145
1146 if (nhs != 1 || nh->nh_gw)
1147 goto err_inval;
1148 nh->nh_scope = RT_SCOPE_NOWHERE;
1149 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
1150 err = -ENODEV;
1151 if (!nh->nh_dev)
1152 goto failure;
1153 } else {
1154 int linkdown = 0;
1155
1156 change_nexthops(fi) {
1157 err = fib_check_nh(cfg, fi, nexthop_nh);
1158 if (err != 0)
1159 goto failure;
1160 if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
1161 linkdown++;
1162 } endfor_nexthops(fi)
1163 if (linkdown == fi->fib_nhs)
1164 fi->fib_flags |= RTNH_F_LINKDOWN;
1165 }
1166
1167 if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc))
1168 goto err_inval;
1169
1170 change_nexthops(fi) {
1171 fib_info_update_nh_saddr(net, nexthop_nh);
1172 fib_add_weight(fi, nexthop_nh);
1173 } endfor_nexthops(fi)
1174
1175 fib_rebalance(fi);
1176
1177link_it:
1178 ofi = fib_find_info(fi);
1179 if (ofi) {
1180 fi->fib_dead = 1;
1181 free_fib_info(fi);
1182 ofi->fib_treeref++;
1183 return ofi;
1184 }
1185
1186 fi->fib_treeref++;
1187 atomic_inc(&fi->fib_clntref);
1188 spin_lock_bh(&fib_info_lock);
1189 hlist_add_head(&fi->fib_hash,
1190 &fib_info_hash[fib_info_hashfn(fi)]);
1191 if (fi->fib_prefsrc) {
1192 struct hlist_head *head;
1193
1194 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
1195 hlist_add_head(&fi->fib_lhash, head);
1196 }
1197 change_nexthops(fi) {
1198 struct hlist_head *head;
1199 unsigned int hash;
1200
1201 if (!nexthop_nh->nh_dev)
1202 continue;
1203 hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
1204 head = &fib_info_devhash[hash];
1205 hlist_add_head(&nexthop_nh->nh_hash, head);
1206 } endfor_nexthops(fi)
1207 spin_unlock_bh(&fib_info_lock);
1208 return fi;
1209
1210err_inval:
1211 err = -EINVAL;
1212
1213failure:
1214 if (fi) {
1215 fi->fib_dead = 1;
1216 free_fib_info(fi);
1217 }
1218
1219 return ERR_PTR(err);
1220}
1221
1222int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
1223 u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
1224 struct fib_info *fi, unsigned int flags)
1225{
1226 struct nlmsghdr *nlh;
1227 struct rtmsg *rtm;
1228
1229 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
1230 if (!nlh)
1231 return -EMSGSIZE;
1232
1233 rtm = nlmsg_data(nlh);
1234 rtm->rtm_family = AF_INET;
1235 rtm->rtm_dst_len = dst_len;
1236 rtm->rtm_src_len = 0;
1237 rtm->rtm_tos = tos;
1238 if (tb_id < 256)
1239 rtm->rtm_table = tb_id;
1240 else
1241 rtm->rtm_table = RT_TABLE_COMPAT;
1242 if (nla_put_u32(skb, RTA_TABLE, tb_id))
1243 goto nla_put_failure;
1244 rtm->rtm_type = type;
1245 rtm->rtm_flags = fi->fib_flags;
1246 rtm->rtm_scope = fi->fib_scope;
1247 rtm->rtm_protocol = fi->fib_protocol;
1248
1249 if (rtm->rtm_dst_len &&
1250 nla_put_in_addr(skb, RTA_DST, dst))
1251 goto nla_put_failure;
1252 if (fi->fib_priority &&
1253 nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
1254 goto nla_put_failure;
1255 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
1256 goto nla_put_failure;
1257
1258 if (fi->fib_prefsrc &&
1259 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
1260 goto nla_put_failure;
1261 if (fi->fib_nhs == 1) {
1262 struct in_device *in_dev;
1263
1264 if (fi->fib_nh->nh_gw &&
1265 nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
1266 goto nla_put_failure;
1267 if (fi->fib_nh->nh_oif &&
1268 nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
1269 goto nla_put_failure;
1270 if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
1271 in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev);
1272 if (in_dev &&
1273 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1274 rtm->rtm_flags |= RTNH_F_DEAD;
1275 }
1276#ifdef CONFIG_IP_ROUTE_CLASSID
1277 if (fi->fib_nh[0].nh_tclassid &&
1278 nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
1279 goto nla_put_failure;
1280#endif
1281 if (fi->fib_nh->nh_lwtstate)
1282 lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate);
1283 }
1284#ifdef CONFIG_IP_ROUTE_MULTIPATH
1285 if (fi->fib_nhs > 1) {
1286 struct rtnexthop *rtnh;
1287 struct nlattr *mp;
1288
1289 mp = nla_nest_start(skb, RTA_MULTIPATH);
1290 if (!mp)
1291 goto nla_put_failure;
1292
1293 for_nexthops(fi) {
1294 struct in_device *in_dev;
1295
1296 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1297 if (!rtnh)
1298 goto nla_put_failure;
1299
1300 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1301 if (nh->nh_flags & RTNH_F_LINKDOWN) {
1302 in_dev = __in_dev_get_rtnl(nh->nh_dev);
1303 if (in_dev &&
1304 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1305 rtnh->rtnh_flags |= RTNH_F_DEAD;
1306 }
1307 rtnh->rtnh_hops = nh->nh_weight - 1;
1308 rtnh->rtnh_ifindex = nh->nh_oif;
1309
1310 if (nh->nh_gw &&
1311 nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw))
1312 goto nla_put_failure;
1313#ifdef CONFIG_IP_ROUTE_CLASSID
1314 if (nh->nh_tclassid &&
1315 nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
1316 goto nla_put_failure;
1317#endif
1318 if (nh->nh_lwtstate)
1319 lwtunnel_fill_encap(skb, nh->nh_lwtstate);
1320
1321 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1322 } endfor_nexthops(fi);
1323
1324 nla_nest_end(skb, mp);
1325 }
1326#endif
1327 nlmsg_end(skb, nlh);
1328 return 0;
1329
1330nla_put_failure:
1331 nlmsg_cancel(skb, nlh);
1332 return -EMSGSIZE;
1333}
1334
1335
1336
1337
1338
1339
1340
1341int fib_sync_down_addr(struct net_device *dev, __be32 local)
1342{
1343 int ret = 0;
1344 unsigned int hash = fib_laddr_hashfn(local);
1345 struct hlist_head *head = &fib_info_laddrhash[hash];
1346 struct net *net = dev_net(dev);
1347 int tb_id = l3mdev_fib_table(dev);
1348 struct fib_info *fi;
1349
1350 if (!fib_info_laddrhash || local == 0)
1351 return 0;
1352
1353 hlist_for_each_entry(fi, head, fib_lhash) {
1354 if (!net_eq(fi->fib_net, net) ||
1355 fi->fib_tb_id != tb_id)
1356 continue;
1357 if (fi->fib_prefsrc == local) {
1358 fi->fib_flags |= RTNH_F_DEAD;
1359 ret++;
1360 }
1361 }
1362 return ret;
1363}
1364
1365
1366
1367
1368
1369
1370
1371int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
1372{
1373 int ret = 0;
1374 int scope = RT_SCOPE_NOWHERE;
1375 struct fib_info *prev_fi = NULL;
1376 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1377 struct hlist_head *head = &fib_info_devhash[hash];
1378 struct fib_nh *nh;
1379
1380 if (force)
1381 scope = -1;
1382
1383 hlist_for_each_entry(nh, head, nh_hash) {
1384 struct fib_info *fi = nh->nh_parent;
1385 int dead;
1386
1387 BUG_ON(!fi->fib_nhs);
1388 if (nh->nh_dev != dev || fi == prev_fi)
1389 continue;
1390 prev_fi = fi;
1391 dead = 0;
1392 change_nexthops(fi) {
1393 if (nexthop_nh->nh_flags & RTNH_F_DEAD)
1394 dead++;
1395 else if (nexthop_nh->nh_dev == dev &&
1396 nexthop_nh->nh_scope != scope) {
1397 switch (event) {
1398 case NETDEV_DOWN:
1399 case NETDEV_UNREGISTER:
1400 nexthop_nh->nh_flags |= RTNH_F_DEAD;
1401
1402 case NETDEV_CHANGE:
1403 nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
1404 break;
1405 }
1406 dead++;
1407 }
1408#ifdef CONFIG_IP_ROUTE_MULTIPATH
1409 if (event == NETDEV_UNREGISTER &&
1410 nexthop_nh->nh_dev == dev) {
1411 dead = fi->fib_nhs;
1412 break;
1413 }
1414#endif
1415 } endfor_nexthops(fi)
1416 if (dead == fi->fib_nhs) {
1417 switch (event) {
1418 case NETDEV_DOWN:
1419 case NETDEV_UNREGISTER:
1420 fi->fib_flags |= RTNH_F_DEAD;
1421
1422 case NETDEV_CHANGE:
1423 fi->fib_flags |= RTNH_F_LINKDOWN;
1424 break;
1425 }
1426 ret++;
1427 }
1428
1429 fib_rebalance(fi);
1430 }
1431
1432 return ret;
1433}
1434
1435
1436void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
1437{
1438 struct fib_info *fi = NULL, *last_resort = NULL;
1439 struct hlist_head *fa_head = res->fa_head;
1440 struct fib_table *tb = res->table;
1441 u8 slen = 32 - res->prefixlen;
1442 int order = -1, last_idx = -1;
1443 struct fib_alias *fa, *fa1 = NULL;
1444 u32 last_prio = res->fi->fib_priority;
1445 u8 last_tos = 0;
1446
1447 hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
1448 struct fib_info *next_fi = fa->fa_info;
1449
1450 if (fa->fa_slen != slen)
1451 continue;
1452 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1453 continue;
1454 if (fa->tb_id != tb->tb_id)
1455 continue;
1456 if (next_fi->fib_priority > last_prio &&
1457 fa->fa_tos == last_tos) {
1458 if (last_tos)
1459 continue;
1460 break;
1461 }
1462 if (next_fi->fib_flags & RTNH_F_DEAD)
1463 continue;
1464 last_tos = fa->fa_tos;
1465 last_prio = next_fi->fib_priority;
1466
1467 if (next_fi->fib_scope != res->scope ||
1468 fa->fa_type != RTN_UNICAST)
1469 continue;
1470 if (!next_fi->fib_nh[0].nh_gw ||
1471 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1472 continue;
1473
1474 fib_alias_accessed(fa);
1475
1476 if (!fi) {
1477 if (next_fi != res->fi)
1478 break;
1479 fa1 = fa;
1480 } else if (!fib_detect_death(fi, order, &last_resort,
1481 &last_idx, fa1->fa_default)) {
1482 fib_result_assign(res, fi);
1483 fa1->fa_default = order;
1484 goto out;
1485 }
1486 fi = next_fi;
1487 order++;
1488 }
1489
1490 if (order <= 0 || !fi) {
1491 if (fa1)
1492 fa1->fa_default = -1;
1493 goto out;
1494 }
1495
1496 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1497 fa1->fa_default)) {
1498 fib_result_assign(res, fi);
1499 fa1->fa_default = order;
1500 goto out;
1501 }
1502
1503 if (last_idx >= 0)
1504 fib_result_assign(res, last_resort);
1505 fa1->fa_default = last_idx;
1506out:
1507 return;
1508}
1509
1510
1511
1512
1513
1514int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
1515{
1516 struct fib_info *prev_fi;
1517 unsigned int hash;
1518 struct hlist_head *head;
1519 struct fib_nh *nh;
1520 int ret;
1521
1522 if (!(dev->flags & IFF_UP))
1523 return 0;
1524
1525 if (nh_flags & RTNH_F_DEAD) {
1526 unsigned int flags = dev_get_flags(dev);
1527
1528 if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1529 nh_flags |= RTNH_F_LINKDOWN;
1530 }
1531
1532 prev_fi = NULL;
1533 hash = fib_devindex_hashfn(dev->ifindex);
1534 head = &fib_info_devhash[hash];
1535 ret = 0;
1536
1537 hlist_for_each_entry(nh, head, nh_hash) {
1538 struct fib_info *fi = nh->nh_parent;
1539 int alive;
1540
1541 BUG_ON(!fi->fib_nhs);
1542 if (nh->nh_dev != dev || fi == prev_fi)
1543 continue;
1544
1545 prev_fi = fi;
1546 alive = 0;
1547 change_nexthops(fi) {
1548 if (!(nexthop_nh->nh_flags & nh_flags)) {
1549 alive++;
1550 continue;
1551 }
1552 if (!nexthop_nh->nh_dev ||
1553 !(nexthop_nh->nh_dev->flags & IFF_UP))
1554 continue;
1555 if (nexthop_nh->nh_dev != dev ||
1556 !__in_dev_get_rtnl(dev))
1557 continue;
1558 alive++;
1559 nexthop_nh->nh_flags &= ~nh_flags;
1560 } endfor_nexthops(fi)
1561
1562 if (alive > 0) {
1563 fi->fib_flags &= ~nh_flags;
1564 ret++;
1565 }
1566
1567 fib_rebalance(fi);
1568 }
1569
1570 return ret;
1571}
1572
1573#ifdef CONFIG_IP_ROUTE_MULTIPATH
1574static bool fib_good_nh(const struct fib_nh *nh)
1575{
1576 int state = NUD_REACHABLE;
1577
1578 if (nh->nh_scope == RT_SCOPE_LINK) {
1579 struct neighbour *n;
1580
1581 rcu_read_lock_bh();
1582
1583 n = __ipv4_neigh_lookup_noref(nh->nh_dev,
1584 (__force u32)nh->nh_gw);
1585 if (n)
1586 state = n->nud_state;
1587
1588 rcu_read_unlock_bh();
1589 }
1590
1591 return !!(state & NUD_VALID);
1592}
1593
1594void fib_select_multipath(struct fib_result *res, int hash)
1595{
1596 struct fib_info *fi = res->fi;
1597 struct net *net = fi->fib_net;
1598 bool first = false;
1599
1600 for_nexthops(fi) {
1601 if (hash > atomic_read(&nh->nh_upper_bound))
1602 continue;
1603
1604 if (!net->ipv4.sysctl_fib_multipath_use_neigh ||
1605 fib_good_nh(nh)) {
1606 res->nh_sel = nhsel;
1607 return;
1608 }
1609 if (!first) {
1610 res->nh_sel = nhsel;
1611 first = true;
1612 }
1613 } endfor_nexthops(fi);
1614}
1615#endif
1616
1617void fib_select_path(struct net *net, struct fib_result *res,
1618 struct flowi4 *fl4, int mp_hash)
1619{
1620#ifdef CONFIG_IP_ROUTE_MULTIPATH
1621 if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
1622 if (mp_hash < 0)
1623 mp_hash = get_hash_from_flowi4(fl4) >> 1;
1624
1625 fib_select_multipath(res, mp_hash);
1626 }
1627 else
1628#endif
1629 if (!res->prefixlen &&
1630 res->table->tb_num_default > 1 &&
1631 res->type == RTN_UNICAST && !fl4->flowi4_oif)
1632 fib_select_default(fl4, res);
1633
1634 if (!fl4->saddr)
1635 fl4->saddr = FIB_RES_PREFSRC(net, *res);
1636}
1637EXPORT_SYMBOL_GPL(fib_select_path);
1638