1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/uaccess.h>
17#include <linux/bitops.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/jiffies.h>
21#include <linux/mm.h>
22#include <linux/string.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/errno.h>
26#include <linux/in.h>
27#include <linux/inet.h>
28#include <linux/inetdevice.h>
29#include <linux/netdevice.h>
30#include <linux/if_arp.h>
31#include <linux/proc_fs.h>
32#include <linux/skbuff.h>
33#include <linux/init.h>
34#include <linux/slab.h>
35#include <linux/netlink.h>
36
37#include <net/arp.h>
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/ip_fib.h>
44#include <net/netlink.h>
45#include <net/nexthop.h>
46#include <net/lwtunnel.h>
47#include <net/fib_notifier.h>
48
49#include "fib_lookup.h"
50
51static DEFINE_SPINLOCK(fib_info_lock);
52static struct hlist_head *fib_info_hash;
53static struct hlist_head *fib_info_laddrhash;
54static unsigned int fib_info_hash_size;
55static unsigned int fib_info_cnt;
56
57#define DEVINDEX_HASHBITS 8
58#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
59static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60
61#ifdef CONFIG_IP_ROUTE_MULTIPATH
62
63#define for_nexthops(fi) { \
64 int nhsel; const struct fib_nh *nh; \
65 for (nhsel = 0, nh = (fi)->fib_nh; \
66 nhsel < (fi)->fib_nhs; \
67 nh++, nhsel++)
68
69#define change_nexthops(fi) { \
70 int nhsel; struct fib_nh *nexthop_nh; \
71 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
72 nhsel < (fi)->fib_nhs; \
73 nexthop_nh++, nhsel++)
74
75#else
76
77
78
79#define for_nexthops(fi) { \
80 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \
81 for (nhsel = 0; nhsel < 1; nhsel++)
82
83#define change_nexthops(fi) { \
84 int nhsel; \
85 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
86 for (nhsel = 0; nhsel < 1; nhsel++)
87
88#endif
89
90#define endfor_nexthops(fi) }
91
92
93const struct fib_prop fib_props[RTN_MAX + 1] = {
94 [RTN_UNSPEC] = {
95 .error = 0,
96 .scope = RT_SCOPE_NOWHERE,
97 },
98 [RTN_UNICAST] = {
99 .error = 0,
100 .scope = RT_SCOPE_UNIVERSE,
101 },
102 [RTN_LOCAL] = {
103 .error = 0,
104 .scope = RT_SCOPE_HOST,
105 },
106 [RTN_BROADCAST] = {
107 .error = 0,
108 .scope = RT_SCOPE_LINK,
109 },
110 [RTN_ANYCAST] = {
111 .error = 0,
112 .scope = RT_SCOPE_LINK,
113 },
114 [RTN_MULTICAST] = {
115 .error = 0,
116 .scope = RT_SCOPE_UNIVERSE,
117 },
118 [RTN_BLACKHOLE] = {
119 .error = -EINVAL,
120 .scope = RT_SCOPE_UNIVERSE,
121 },
122 [RTN_UNREACHABLE] = {
123 .error = -EHOSTUNREACH,
124 .scope = RT_SCOPE_UNIVERSE,
125 },
126 [RTN_PROHIBIT] = {
127 .error = -EACCES,
128 .scope = RT_SCOPE_UNIVERSE,
129 },
130 [RTN_THROW] = {
131 .error = -EAGAIN,
132 .scope = RT_SCOPE_UNIVERSE,
133 },
134 [RTN_NAT] = {
135 .error = -EINVAL,
136 .scope = RT_SCOPE_NOWHERE,
137 },
138 [RTN_XRESOLVE] = {
139 .error = -EINVAL,
140 .scope = RT_SCOPE_NOWHERE,
141 },
142};
143
144static void rt_fibinfo_free(struct rtable __rcu **rtp)
145{
146 struct rtable *rt = rcu_dereference_protected(*rtp, 1);
147
148 if (!rt)
149 return;
150
151
152
153
154
155
156 dst_dev_put(&rt->dst);
157 dst_release_immediate(&rt->dst);
158}
159
160static void free_nh_exceptions(struct fib_nh *nh)
161{
162 struct fnhe_hash_bucket *hash;
163 int i;
164
165 hash = rcu_dereference_protected(nh->nh_exceptions, 1);
166 if (!hash)
167 return;
168 for (i = 0; i < FNHE_HASH_SIZE; i++) {
169 struct fib_nh_exception *fnhe;
170
171 fnhe = rcu_dereference_protected(hash[i].chain, 1);
172 while (fnhe) {
173 struct fib_nh_exception *next;
174
175 next = rcu_dereference_protected(fnhe->fnhe_next, 1);
176
177 rt_fibinfo_free(&fnhe->fnhe_rth_input);
178 rt_fibinfo_free(&fnhe->fnhe_rth_output);
179
180 kfree(fnhe);
181
182 fnhe = next;
183 }
184 }
185 kfree(hash);
186}
187
188static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
189{
190 int cpu;
191
192 if (!rtp)
193 return;
194
195 for_each_possible_cpu(cpu) {
196 struct rtable *rt;
197
198 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
199 if (rt) {
200 dst_dev_put(&rt->dst);
201 dst_release_immediate(&rt->dst);
202 }
203 }
204 free_percpu(rtp);
205}
206
207
208static void free_fib_info_rcu(struct rcu_head *head)
209{
210 struct fib_info *fi = container_of(head, struct fib_info, rcu);
211 struct dst_metrics *m;
212
213 change_nexthops(fi) {
214 if (nexthop_nh->nh_dev)
215 dev_put(nexthop_nh->nh_dev);
216 lwtstate_put(nexthop_nh->nh_lwtstate);
217 free_nh_exceptions(nexthop_nh);
218 rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
219 rt_fibinfo_free(&nexthop_nh->nh_rth_input);
220 } endfor_nexthops(fi);
221
222 m = fi->fib_metrics;
223 if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
224 kfree(m);
225 kfree(fi);
226}
227
228void free_fib_info(struct fib_info *fi)
229{
230 if (fi->fib_dead == 0) {
231 pr_warn("Freeing alive fib_info %p\n", fi);
232 return;
233 }
234 fib_info_cnt--;
235#ifdef CONFIG_IP_ROUTE_CLASSID
236 change_nexthops(fi) {
237 if (nexthop_nh->nh_tclassid)
238 fi->fib_net->ipv4.fib_num_tclassid_users--;
239 } endfor_nexthops(fi);
240#endif
241 call_rcu(&fi->rcu, free_fib_info_rcu);
242}
243EXPORT_SYMBOL_GPL(free_fib_info);
244
245void fib_release_info(struct fib_info *fi)
246{
247 spin_lock_bh(&fib_info_lock);
248 if (fi && --fi->fib_treeref == 0) {
249 hlist_del(&fi->fib_hash);
250 if (fi->fib_prefsrc)
251 hlist_del(&fi->fib_lhash);
252 change_nexthops(fi) {
253 if (!nexthop_nh->nh_dev)
254 continue;
255 hlist_del(&nexthop_nh->nh_hash);
256 } endfor_nexthops(fi)
257 fi->fib_dead = 1;
258 fib_info_put(fi);
259 }
260 spin_unlock_bh(&fib_info_lock);
261}
262
263static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
264{
265 const struct fib_nh *onh = ofi->fib_nh;
266
267 for_nexthops(fi) {
268 if (nh->nh_oif != onh->nh_oif ||
269 nh->nh_gw != onh->nh_gw ||
270 nh->nh_scope != onh->nh_scope ||
271#ifdef CONFIG_IP_ROUTE_MULTIPATH
272 nh->nh_weight != onh->nh_weight ||
273#endif
274#ifdef CONFIG_IP_ROUTE_CLASSID
275 nh->nh_tclassid != onh->nh_tclassid ||
276#endif
277 lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) ||
278 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
279 return -1;
280 onh++;
281 } endfor_nexthops(fi);
282 return 0;
283}
284
285static inline unsigned int fib_devindex_hashfn(unsigned int val)
286{
287 unsigned int mask = DEVINDEX_HASHSIZE - 1;
288
289 return (val ^
290 (val >> DEVINDEX_HASHBITS) ^
291 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
292}
293
294static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
295{
296 unsigned int mask = (fib_info_hash_size - 1);
297 unsigned int val = fi->fib_nhs;
298
299 val ^= (fi->fib_protocol << 8) | fi->fib_scope;
300 val ^= (__force u32)fi->fib_prefsrc;
301 val ^= fi->fib_priority;
302 for_nexthops(fi) {
303 val ^= fib_devindex_hashfn(nh->nh_oif);
304 } endfor_nexthops(fi)
305
306 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
307}
308
309static struct fib_info *fib_find_info(const struct fib_info *nfi)
310{
311 struct hlist_head *head;
312 struct fib_info *fi;
313 unsigned int hash;
314
315 hash = fib_info_hashfn(nfi);
316 head = &fib_info_hash[hash];
317
318 hlist_for_each_entry(fi, head, fib_hash) {
319 if (!net_eq(fi->fib_net, nfi->fib_net))
320 continue;
321 if (fi->fib_nhs != nfi->fib_nhs)
322 continue;
323 if (nfi->fib_protocol == fi->fib_protocol &&
324 nfi->fib_scope == fi->fib_scope &&
325 nfi->fib_prefsrc == fi->fib_prefsrc &&
326 nfi->fib_priority == fi->fib_priority &&
327 nfi->fib_type == fi->fib_type &&
328 memcmp(nfi->fib_metrics, fi->fib_metrics,
329 sizeof(u32) * RTAX_MAX) == 0 &&
330 !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
331 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
332 return fi;
333 }
334
335 return NULL;
336}
337
338
339
340
341int ip_fib_check_default(__be32 gw, struct net_device *dev)
342{
343 struct hlist_head *head;
344 struct fib_nh *nh;
345 unsigned int hash;
346
347 spin_lock(&fib_info_lock);
348
349 hash = fib_devindex_hashfn(dev->ifindex);
350 head = &fib_info_devhash[hash];
351 hlist_for_each_entry(nh, head, nh_hash) {
352 if (nh->nh_dev == dev &&
353 nh->nh_gw == gw &&
354 !(nh->nh_flags & RTNH_F_DEAD)) {
355 spin_unlock(&fib_info_lock);
356 return 0;
357 }
358 }
359
360 spin_unlock(&fib_info_lock);
361
362 return -1;
363}
364
365static inline size_t fib_nlmsg_size(struct fib_info *fi)
366{
367 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
368 + nla_total_size(4)
369 + nla_total_size(4)
370 + nla_total_size(4)
371 + nla_total_size(4)
372 + nla_total_size(TCP_CA_NAME_MAX);
373
374
375 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
376
377 if (fi->fib_nhs) {
378 size_t nh_encapsize = 0;
379
380
381
382 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
383
384
385 nhsize += 2 * nla_total_size(4);
386
387
388 for_nexthops(fi) {
389 if (nh->nh_lwtstate) {
390
391 nh_encapsize += lwtunnel_get_encap_size(
392 nh->nh_lwtstate);
393
394 nh_encapsize += nla_total_size(2);
395 }
396 } endfor_nexthops(fi);
397
398
399 payload += nla_total_size((fi->fib_nhs * nhsize) +
400 nh_encapsize);
401
402 }
403
404 return payload;
405}
406
407void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
408 int dst_len, u32 tb_id, const struct nl_info *info,
409 unsigned int nlm_flags)
410{
411 struct sk_buff *skb;
412 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
413 int err = -ENOBUFS;
414
415 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
416 if (!skb)
417 goto errout;
418
419 err = fib_dump_info(skb, info->portid, seq, event, tb_id,
420 fa->fa_type, key, dst_len,
421 fa->fa_tos, fa->fa_info, nlm_flags);
422 if (err < 0) {
423
424 WARN_ON(err == -EMSGSIZE);
425 kfree_skb(skb);
426 goto errout;
427 }
428 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE,
429 info->nlh, GFP_KERNEL);
430 return;
431errout:
432 if (err < 0)
433 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
434}
435
436static int fib_detect_death(struct fib_info *fi, int order,
437 struct fib_info **last_resort, int *last_idx,
438 int dflt)
439{
440 struct neighbour *n;
441 int state = NUD_NONE;
442
443 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
444 if (n) {
445 state = n->nud_state;
446 neigh_release(n);
447 } else {
448 return 0;
449 }
450 if (state == NUD_REACHABLE)
451 return 0;
452 if ((state & NUD_VALID) && order != dflt)
453 return 0;
454 if ((state & NUD_VALID) ||
455 (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) {
456 *last_resort = fi;
457 *last_idx = order;
458 }
459 return 1;
460}
461
462#ifdef CONFIG_IP_ROUTE_MULTIPATH
463
464static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
465 struct netlink_ext_ack *extack)
466{
467 int nhs = 0;
468
469 while (rtnh_ok(rtnh, remaining)) {
470 nhs++;
471 rtnh = rtnh_next(rtnh, &remaining);
472 }
473
474
475 if (remaining > 0) {
476 NL_SET_ERR_MSG(extack,
477 "Invalid nexthop configuration - extra data after nexthops");
478 nhs = 0;
479 }
480
481 return nhs;
482}
483
484static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
485 int remaining, struct fib_config *cfg,
486 struct netlink_ext_ack *extack)
487{
488 int ret;
489
490 change_nexthops(fi) {
491 int attrlen;
492
493 if (!rtnh_ok(rtnh, remaining)) {
494 NL_SET_ERR_MSG(extack,
495 "Invalid nexthop configuration - extra data after nexthop");
496 return -EINVAL;
497 }
498
499 if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
500 NL_SET_ERR_MSG(extack,
501 "Invalid flags for nexthop - can not contain DEAD or LINKDOWN");
502 return -EINVAL;
503 }
504
505 nexthop_nh->nh_flags =
506 (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
507 nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
508 nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
509
510 attrlen = rtnh_attrlen(rtnh);
511 if (attrlen > 0) {
512 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
513
514 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
515 nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0;
516#ifdef CONFIG_IP_ROUTE_CLASSID
517 nla = nla_find(attrs, attrlen, RTA_FLOW);
518 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
519 if (nexthop_nh->nh_tclassid)
520 fi->fib_net->ipv4.fib_num_tclassid_users++;
521#endif
522 nla = nla_find(attrs, attrlen, RTA_ENCAP);
523 if (nla) {
524 struct lwtunnel_state *lwtstate;
525 struct nlattr *nla_entype;
526
527 nla_entype = nla_find(attrs, attrlen,
528 RTA_ENCAP_TYPE);
529 if (!nla_entype) {
530 NL_SET_BAD_ATTR(extack, nla);
531 NL_SET_ERR_MSG(extack,
532 "Encap type is missing");
533 goto err_inval;
534 }
535
536 ret = lwtunnel_build_state(nla_get_u16(
537 nla_entype),
538 nla, AF_INET, cfg,
539 &lwtstate, extack);
540 if (ret)
541 goto errout;
542 nexthop_nh->nh_lwtstate =
543 lwtstate_get(lwtstate);
544 }
545 }
546
547 rtnh = rtnh_next(rtnh, &remaining);
548 } endfor_nexthops(fi);
549
550 return 0;
551
552err_inval:
553 ret = -EINVAL;
554
555errout:
556 return ret;
557}
558
559static void fib_rebalance(struct fib_info *fi)
560{
561 int total;
562 int w;
563 struct in_device *in_dev;
564
565 if (fi->fib_nhs < 2)
566 return;
567
568 total = 0;
569 for_nexthops(fi) {
570 if (nh->nh_flags & RTNH_F_DEAD)
571 continue;
572
573 in_dev = __in_dev_get_rtnl(nh->nh_dev);
574
575 if (in_dev &&
576 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
577 nh->nh_flags & RTNH_F_LINKDOWN)
578 continue;
579
580 total += nh->nh_weight;
581 } endfor_nexthops(fi);
582
583 w = 0;
584 change_nexthops(fi) {
585 int upper_bound;
586
587 in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev);
588
589 if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
590 upper_bound = -1;
591 } else if (in_dev &&
592 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
593 nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
594 upper_bound = -1;
595 } else {
596 w += nexthop_nh->nh_weight;
597 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
598 total) - 1;
599 }
600
601 atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
602 } endfor_nexthops(fi);
603}
604#else
605
606#define fib_rebalance(fi) do { } while (0)
607
608#endif
609
610static int fib_encap_match(u16 encap_type,
611 struct nlattr *encap,
612 const struct fib_nh *nh,
613 const struct fib_config *cfg,
614 struct netlink_ext_ack *extack)
615{
616 struct lwtunnel_state *lwtstate;
617 int ret, result = 0;
618
619 if (encap_type == LWTUNNEL_ENCAP_NONE)
620 return 0;
621
622 ret = lwtunnel_build_state(encap_type, encap, AF_INET,
623 cfg, &lwtstate, extack);
624 if (!ret) {
625 result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
626 lwtstate_free(lwtstate);
627 }
628
629 return result;
630}
631
632int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
633 struct netlink_ext_ack *extack)
634{
635#ifdef CONFIG_IP_ROUTE_MULTIPATH
636 struct rtnexthop *rtnh;
637 int remaining;
638#endif
639
640 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
641 return 1;
642
643 if (cfg->fc_oif || cfg->fc_gw) {
644 if (cfg->fc_encap) {
645 if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
646 fi->fib_nh, cfg, extack))
647 return 1;
648 }
649#ifdef CONFIG_IP_ROUTE_CLASSID
650 if (cfg->fc_flow &&
651 cfg->fc_flow != fi->fib_nh->nh_tclassid)
652 return 1;
653#endif
654 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
655 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
656 return 0;
657 return 1;
658 }
659
660#ifdef CONFIG_IP_ROUTE_MULTIPATH
661 if (!cfg->fc_mp)
662 return 0;
663
664 rtnh = cfg->fc_mp;
665 remaining = cfg->fc_mp_len;
666
667 for_nexthops(fi) {
668 int attrlen;
669
670 if (!rtnh_ok(rtnh, remaining))
671 return -EINVAL;
672
673 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
674 return 1;
675
676 attrlen = rtnh_attrlen(rtnh);
677 if (attrlen > 0) {
678 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
679
680 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
681 if (nla && nla_get_in_addr(nla) != nh->nh_gw)
682 return 1;
683#ifdef CONFIG_IP_ROUTE_CLASSID
684 nla = nla_find(attrs, attrlen, RTA_FLOW);
685 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
686 return 1;
687#endif
688 }
689
690 rtnh = rtnh_next(rtnh, &remaining);
691 } endfor_nexthops(fi);
692#endif
693 return 0;
694}
695
696bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
697{
698 struct nlattr *nla;
699 int remaining;
700
701 if (!cfg->fc_mx)
702 return true;
703
704 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
705 int type = nla_type(nla);
706 u32 fi_val, val;
707
708 if (!type)
709 continue;
710 if (type > RTAX_MAX)
711 return false;
712
713 if (type == RTAX_CC_ALGO) {
714 char tmp[TCP_CA_NAME_MAX];
715 bool ecn_ca = false;
716
717 nla_strlcpy(tmp, nla, sizeof(tmp));
718 val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
719 } else {
720 if (nla_len(nla) != sizeof(u32))
721 return false;
722 val = nla_get_u32(nla);
723 }
724
725 fi_val = fi->fib_metrics->metrics[type - 1];
726 if (type == RTAX_FEATURES)
727 fi_val &= ~DST_FEATURE_ECN_CA;
728
729 if (fi_val != val)
730 return false;
731 }
732
733 return true;
734}
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
781 struct netlink_ext_ack *extack)
782{
783 int err = 0;
784 struct net *net;
785 struct net_device *dev;
786
787 net = cfg->fc_nlinfo.nl_net;
788 if (nh->nh_gw) {
789 struct fib_result res;
790
791 if (nh->nh_flags & RTNH_F_ONLINK) {
792 unsigned int addr_type;
793
794 if (cfg->fc_scope >= RT_SCOPE_LINK) {
795 NL_SET_ERR_MSG(extack,
796 "Nexthop has invalid scope");
797 return -EINVAL;
798 }
799 dev = __dev_get_by_index(net, nh->nh_oif);
800 if (!dev)
801 return -ENODEV;
802 if (!(dev->flags & IFF_UP)) {
803 NL_SET_ERR_MSG(extack,
804 "Nexthop device is not up");
805 return -ENETDOWN;
806 }
807 addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
808 if (addr_type != RTN_UNICAST) {
809 NL_SET_ERR_MSG(extack,
810 "Nexthop has invalid gateway");
811 return -EINVAL;
812 }
813 if (!netif_carrier_ok(dev))
814 nh->nh_flags |= RTNH_F_LINKDOWN;
815 nh->nh_dev = dev;
816 dev_hold(dev);
817 nh->nh_scope = RT_SCOPE_LINK;
818 return 0;
819 }
820 rcu_read_lock();
821 {
822 struct fib_table *tbl = NULL;
823 struct flowi4 fl4 = {
824 .daddr = nh->nh_gw,
825 .flowi4_scope = cfg->fc_scope + 1,
826 .flowi4_oif = nh->nh_oif,
827 .flowi4_iif = LOOPBACK_IFINDEX,
828 };
829
830
831 if (fl4.flowi4_scope < RT_SCOPE_LINK)
832 fl4.flowi4_scope = RT_SCOPE_LINK;
833
834 if (cfg->fc_table)
835 tbl = fib_get_table(net, cfg->fc_table);
836
837 if (tbl)
838 err = fib_table_lookup(tbl, &fl4, &res,
839 FIB_LOOKUP_IGNORE_LINKSTATE |
840 FIB_LOOKUP_NOREF);
841
842
843
844
845
846 if (!tbl || err) {
847 err = fib_lookup(net, &fl4, &res,
848 FIB_LOOKUP_IGNORE_LINKSTATE);
849 }
850
851 if (err) {
852 NL_SET_ERR_MSG(extack,
853 "Nexthop has invalid gateway");
854 rcu_read_unlock();
855 return err;
856 }
857 }
858 err = -EINVAL;
859 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
860 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
861 goto out;
862 }
863 nh->nh_scope = res.scope;
864 nh->nh_oif = FIB_RES_OIF(res);
865 nh->nh_dev = dev = FIB_RES_DEV(res);
866 if (!dev) {
867 NL_SET_ERR_MSG(extack,
868 "No egress device for nexthop gateway");
869 goto out;
870 }
871 dev_hold(dev);
872 if (!netif_carrier_ok(dev))
873 nh->nh_flags |= RTNH_F_LINKDOWN;
874 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
875 } else {
876 struct in_device *in_dev;
877
878 if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
879 NL_SET_ERR_MSG(extack,
880 "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
881 return -EINVAL;
882 }
883 rcu_read_lock();
884 err = -ENODEV;
885 in_dev = inetdev_by_index(net, nh->nh_oif);
886 if (!in_dev)
887 goto out;
888 err = -ENETDOWN;
889 if (!(in_dev->dev->flags & IFF_UP)) {
890 NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
891 goto out;
892 }
893 nh->nh_dev = in_dev->dev;
894 dev_hold(nh->nh_dev);
895 nh->nh_scope = RT_SCOPE_HOST;
896 if (!netif_carrier_ok(nh->nh_dev))
897 nh->nh_flags |= RTNH_F_LINKDOWN;
898 err = 0;
899 }
900out:
901 rcu_read_unlock();
902 return err;
903}
904
905static inline unsigned int fib_laddr_hashfn(__be32 val)
906{
907 unsigned int mask = (fib_info_hash_size - 1);
908
909 return ((__force u32)val ^
910 ((__force u32)val >> 7) ^
911 ((__force u32)val >> 14)) & mask;
912}
913
914static struct hlist_head *fib_info_hash_alloc(int bytes)
915{
916 if (bytes <= PAGE_SIZE)
917 return kzalloc(bytes, GFP_KERNEL);
918 else
919 return (struct hlist_head *)
920 __get_free_pages(GFP_KERNEL | __GFP_ZERO,
921 get_order(bytes));
922}
923
924static void fib_info_hash_free(struct hlist_head *hash, int bytes)
925{
926 if (!hash)
927 return;
928
929 if (bytes <= PAGE_SIZE)
930 kfree(hash);
931 else
932 free_pages((unsigned long) hash, get_order(bytes));
933}
934
935static void fib_info_hash_move(struct hlist_head *new_info_hash,
936 struct hlist_head *new_laddrhash,
937 unsigned int new_size)
938{
939 struct hlist_head *old_info_hash, *old_laddrhash;
940 unsigned int old_size = fib_info_hash_size;
941 unsigned int i, bytes;
942
943 spin_lock_bh(&fib_info_lock);
944 old_info_hash = fib_info_hash;
945 old_laddrhash = fib_info_laddrhash;
946 fib_info_hash_size = new_size;
947
948 for (i = 0; i < old_size; i++) {
949 struct hlist_head *head = &fib_info_hash[i];
950 struct hlist_node *n;
951 struct fib_info *fi;
952
953 hlist_for_each_entry_safe(fi, n, head, fib_hash) {
954 struct hlist_head *dest;
955 unsigned int new_hash;
956
957 new_hash = fib_info_hashfn(fi);
958 dest = &new_info_hash[new_hash];
959 hlist_add_head(&fi->fib_hash, dest);
960 }
961 }
962 fib_info_hash = new_info_hash;
963
964 for (i = 0; i < old_size; i++) {
965 struct hlist_head *lhead = &fib_info_laddrhash[i];
966 struct hlist_node *n;
967 struct fib_info *fi;
968
969 hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
970 struct hlist_head *ldest;
971 unsigned int new_hash;
972
973 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
974 ldest = &new_laddrhash[new_hash];
975 hlist_add_head(&fi->fib_lhash, ldest);
976 }
977 }
978 fib_info_laddrhash = new_laddrhash;
979
980 spin_unlock_bh(&fib_info_lock);
981
982 bytes = old_size * sizeof(struct hlist_head *);
983 fib_info_hash_free(old_info_hash, bytes);
984 fib_info_hash_free(old_laddrhash, bytes);
985}
986
987__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
988{
989 nh->nh_saddr = inet_select_addr(nh->nh_dev,
990 nh->nh_gw,
991 nh->nh_parent->fib_scope);
992 nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
993
994 return nh->nh_saddr;
995}
996
997static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
998{
999 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
1000 fib_prefsrc != cfg->fc_dst) {
1001 u32 tb_id = cfg->fc_table;
1002 int rc;
1003
1004 if (tb_id == RT_TABLE_MAIN)
1005 tb_id = RT_TABLE_LOCAL;
1006
1007 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
1008 fib_prefsrc, tb_id);
1009
1010 if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
1011 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
1012 fib_prefsrc, RT_TABLE_LOCAL);
1013 }
1014
1015 if (rc != RTN_LOCAL)
1016 return false;
1017 }
1018 return true;
1019}
1020
1021static int
1022fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
1023{
1024 return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len,
1025 fi->fib_metrics->metrics);
1026}
1027
1028struct fib_info *fib_create_info(struct fib_config *cfg,
1029 struct netlink_ext_ack *extack)
1030{
1031 int err;
1032 struct fib_info *fi = NULL;
1033 struct fib_info *ofi;
1034 int nhs = 1;
1035 struct net *net = cfg->fc_nlinfo.nl_net;
1036
1037 if (cfg->fc_type > RTN_MAX)
1038 goto err_inval;
1039
1040
1041 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {
1042 NL_SET_ERR_MSG(extack, "Invalid scope");
1043 goto err_inval;
1044 }
1045
1046 if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
1047 NL_SET_ERR_MSG(extack,
1048 "Invalid rtm_flags - can not contain DEAD or LINKDOWN");
1049 goto err_inval;
1050 }
1051
1052#ifdef CONFIG_IP_ROUTE_MULTIPATH
1053 if (cfg->fc_mp) {
1054 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
1055 if (nhs == 0)
1056 goto err_inval;
1057 }
1058#endif
1059
1060 err = -ENOBUFS;
1061 if (fib_info_cnt >= fib_info_hash_size) {
1062 unsigned int new_size = fib_info_hash_size << 1;
1063 struct hlist_head *new_info_hash;
1064 struct hlist_head *new_laddrhash;
1065 unsigned int bytes;
1066
1067 if (!new_size)
1068 new_size = 16;
1069 bytes = new_size * sizeof(struct hlist_head *);
1070 new_info_hash = fib_info_hash_alloc(bytes);
1071 new_laddrhash = fib_info_hash_alloc(bytes);
1072 if (!new_info_hash || !new_laddrhash) {
1073 fib_info_hash_free(new_info_hash, bytes);
1074 fib_info_hash_free(new_laddrhash, bytes);
1075 } else
1076 fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
1077
1078 if (!fib_info_hash_size)
1079 goto failure;
1080 }
1081
1082 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
1083 if (!fi)
1084 goto failure;
1085 if (cfg->fc_mx) {
1086 fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
1087 if (unlikely(!fi->fib_metrics)) {
1088 kfree(fi);
1089 return ERR_PTR(err);
1090 }
1091 refcount_set(&fi->fib_metrics->refcnt, 1);
1092 } else {
1093 fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
1094 }
1095 fib_info_cnt++;
1096 fi->fib_net = net;
1097 fi->fib_protocol = cfg->fc_protocol;
1098 fi->fib_scope = cfg->fc_scope;
1099 fi->fib_flags = cfg->fc_flags;
1100 fi->fib_priority = cfg->fc_priority;
1101 fi->fib_prefsrc = cfg->fc_prefsrc;
1102 fi->fib_type = cfg->fc_type;
1103 fi->fib_tb_id = cfg->fc_table;
1104
1105 fi->fib_nhs = nhs;
1106 change_nexthops(fi) {
1107 nexthop_nh->nh_parent = fi;
1108 nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
1109 if (!nexthop_nh->nh_pcpu_rth_output)
1110 goto failure;
1111 } endfor_nexthops(fi)
1112
1113 err = fib_convert_metrics(fi, cfg);
1114 if (err)
1115 goto failure;
1116
1117 if (cfg->fc_mp) {
1118#ifdef CONFIG_IP_ROUTE_MULTIPATH
1119 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
1120 if (err != 0)
1121 goto failure;
1122 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) {
1123 NL_SET_ERR_MSG(extack,
1124 "Nexthop device index does not match RTA_OIF");
1125 goto err_inval;
1126 }
1127 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) {
1128 NL_SET_ERR_MSG(extack,
1129 "Nexthop gateway does not match RTA_GATEWAY");
1130 goto err_inval;
1131 }
1132#ifdef CONFIG_IP_ROUTE_CLASSID
1133 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
1134 NL_SET_ERR_MSG(extack,
1135 "Nexthop class id does not match RTA_FLOW");
1136 goto err_inval;
1137 }
1138#endif
1139#else
1140 NL_SET_ERR_MSG(extack,
1141 "Multipath support not enabled in kernel");
1142 goto err_inval;
1143#endif
1144 } else {
1145 struct fib_nh *nh = fi->fib_nh;
1146
1147 if (cfg->fc_encap) {
1148 struct lwtunnel_state *lwtstate;
1149
1150 if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) {
1151 NL_SET_ERR_MSG(extack,
1152 "LWT encap type not specified");
1153 goto err_inval;
1154 }
1155 err = lwtunnel_build_state(cfg->fc_encap_type,
1156 cfg->fc_encap, AF_INET, cfg,
1157 &lwtstate, extack);
1158 if (err)
1159 goto failure;
1160
1161 nh->nh_lwtstate = lwtstate_get(lwtstate);
1162 }
1163 nh->nh_oif = cfg->fc_oif;
1164 nh->nh_gw = cfg->fc_gw;
1165 nh->nh_flags = cfg->fc_flags;
1166#ifdef CONFIG_IP_ROUTE_CLASSID
1167 nh->nh_tclassid = cfg->fc_flow;
1168 if (nh->nh_tclassid)
1169 fi->fib_net->ipv4.fib_num_tclassid_users++;
1170#endif
1171#ifdef CONFIG_IP_ROUTE_MULTIPATH
1172 nh->nh_weight = 1;
1173#endif
1174 }
1175
1176 if (fib_props[cfg->fc_type].error) {
1177 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
1178 NL_SET_ERR_MSG(extack,
1179 "Gateway, device and multipath can not be specified for this route type");
1180 goto err_inval;
1181 }
1182 goto link_it;
1183 } else {
1184 switch (cfg->fc_type) {
1185 case RTN_UNICAST:
1186 case RTN_LOCAL:
1187 case RTN_BROADCAST:
1188 case RTN_ANYCAST:
1189 case RTN_MULTICAST:
1190 break;
1191 default:
1192 NL_SET_ERR_MSG(extack, "Invalid route type");
1193 goto err_inval;
1194 }
1195 }
1196
1197 if (cfg->fc_scope > RT_SCOPE_HOST) {
1198 NL_SET_ERR_MSG(extack, "Invalid scope");
1199 goto err_inval;
1200 }
1201
1202 if (cfg->fc_scope == RT_SCOPE_HOST) {
1203 struct fib_nh *nh = fi->fib_nh;
1204
1205
1206 if (nhs != 1) {
1207 NL_SET_ERR_MSG(extack,
1208 "Route with host scope can not have multiple nexthops");
1209 goto err_inval;
1210 }
1211 if (nh->nh_gw) {
1212 NL_SET_ERR_MSG(extack,
1213 "Route with host scope can not have a gateway");
1214 goto err_inval;
1215 }
1216 nh->nh_scope = RT_SCOPE_NOWHERE;
1217 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
1218 err = -ENODEV;
1219 if (!nh->nh_dev)
1220 goto failure;
1221 } else {
1222 int linkdown = 0;
1223
1224 change_nexthops(fi) {
1225 err = fib_check_nh(cfg, nexthop_nh, extack);
1226 if (err != 0)
1227 goto failure;
1228 if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
1229 linkdown++;
1230 } endfor_nexthops(fi)
1231 if (linkdown == fi->fib_nhs)
1232 fi->fib_flags |= RTNH_F_LINKDOWN;
1233 }
1234
1235 if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) {
1236 NL_SET_ERR_MSG(extack, "Invalid prefsrc address");
1237 goto err_inval;
1238 }
1239
1240 change_nexthops(fi) {
1241 fib_info_update_nh_saddr(net, nexthop_nh);
1242 } endfor_nexthops(fi)
1243
1244 fib_rebalance(fi);
1245
1246link_it:
1247 ofi = fib_find_info(fi);
1248 if (ofi) {
1249 fi->fib_dead = 1;
1250 free_fib_info(fi);
1251 ofi->fib_treeref++;
1252 return ofi;
1253 }
1254
1255 fi->fib_treeref++;
1256 refcount_set(&fi->fib_clntref, 1);
1257 spin_lock_bh(&fib_info_lock);
1258 hlist_add_head(&fi->fib_hash,
1259 &fib_info_hash[fib_info_hashfn(fi)]);
1260 if (fi->fib_prefsrc) {
1261 struct hlist_head *head;
1262
1263 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
1264 hlist_add_head(&fi->fib_lhash, head);
1265 }
1266 change_nexthops(fi) {
1267 struct hlist_head *head;
1268 unsigned int hash;
1269
1270 if (!nexthop_nh->nh_dev)
1271 continue;
1272 hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
1273 head = &fib_info_devhash[hash];
1274 hlist_add_head(&nexthop_nh->nh_hash, head);
1275 } endfor_nexthops(fi)
1276 spin_unlock_bh(&fib_info_lock);
1277 return fi;
1278
1279err_inval:
1280 err = -EINVAL;
1281
1282failure:
1283 if (fi) {
1284 fi->fib_dead = 1;
1285 free_fib_info(fi);
1286 }
1287
1288 return ERR_PTR(err);
1289}
1290
1291int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
1292 u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
1293 struct fib_info *fi, unsigned int flags)
1294{
1295 struct nlmsghdr *nlh;
1296 struct rtmsg *rtm;
1297
1298 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
1299 if (!nlh)
1300 return -EMSGSIZE;
1301
1302 rtm = nlmsg_data(nlh);
1303 rtm->rtm_family = AF_INET;
1304 rtm->rtm_dst_len = dst_len;
1305 rtm->rtm_src_len = 0;
1306 rtm->rtm_tos = tos;
1307 if (tb_id < 256)
1308 rtm->rtm_table = tb_id;
1309 else
1310 rtm->rtm_table = RT_TABLE_COMPAT;
1311 if (nla_put_u32(skb, RTA_TABLE, tb_id))
1312 goto nla_put_failure;
1313 rtm->rtm_type = type;
1314 rtm->rtm_flags = fi->fib_flags;
1315 rtm->rtm_scope = fi->fib_scope;
1316 rtm->rtm_protocol = fi->fib_protocol;
1317
1318 if (rtm->rtm_dst_len &&
1319 nla_put_in_addr(skb, RTA_DST, dst))
1320 goto nla_put_failure;
1321 if (fi->fib_priority &&
1322 nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
1323 goto nla_put_failure;
1324 if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
1325 goto nla_put_failure;
1326
1327 if (fi->fib_prefsrc &&
1328 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
1329 goto nla_put_failure;
1330 if (fi->fib_nhs == 1) {
1331 if (fi->fib_nh->nh_gw &&
1332 nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
1333 goto nla_put_failure;
1334 if (fi->fib_nh->nh_oif &&
1335 nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
1336 goto nla_put_failure;
1337 if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
1338 struct in_device *in_dev;
1339
1340 rcu_read_lock();
1341 in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
1342 if (in_dev &&
1343 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1344 rtm->rtm_flags |= RTNH_F_DEAD;
1345 rcu_read_unlock();
1346 }
1347 if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD)
1348 rtm->rtm_flags |= RTNH_F_OFFLOAD;
1349#ifdef CONFIG_IP_ROUTE_CLASSID
1350 if (fi->fib_nh[0].nh_tclassid &&
1351 nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
1352 goto nla_put_failure;
1353#endif
1354 if (fi->fib_nh->nh_lwtstate &&
1355 lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0)
1356 goto nla_put_failure;
1357 }
1358#ifdef CONFIG_IP_ROUTE_MULTIPATH
1359 if (fi->fib_nhs > 1) {
1360 struct rtnexthop *rtnh;
1361 struct nlattr *mp;
1362
1363 mp = nla_nest_start(skb, RTA_MULTIPATH);
1364 if (!mp)
1365 goto nla_put_failure;
1366
1367 for_nexthops(fi) {
1368 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1369 if (!rtnh)
1370 goto nla_put_failure;
1371
1372 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1373 if (nh->nh_flags & RTNH_F_LINKDOWN) {
1374 struct in_device *in_dev;
1375
1376 rcu_read_lock();
1377 in_dev = __in_dev_get_rcu(nh->nh_dev);
1378 if (in_dev &&
1379 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1380 rtnh->rtnh_flags |= RTNH_F_DEAD;
1381 rcu_read_unlock();
1382 }
1383 rtnh->rtnh_hops = nh->nh_weight - 1;
1384 rtnh->rtnh_ifindex = nh->nh_oif;
1385
1386 if (nh->nh_gw &&
1387 nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw))
1388 goto nla_put_failure;
1389#ifdef CONFIG_IP_ROUTE_CLASSID
1390 if (nh->nh_tclassid &&
1391 nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
1392 goto nla_put_failure;
1393#endif
1394 if (nh->nh_lwtstate &&
1395 lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0)
1396 goto nla_put_failure;
1397
1398
1399 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1400 } endfor_nexthops(fi);
1401
1402 nla_nest_end(skb, mp);
1403 }
1404#endif
1405 nlmsg_end(skb, nlh);
1406 return 0;
1407
1408nla_put_failure:
1409 nlmsg_cancel(skb, nlh);
1410 return -EMSGSIZE;
1411}
1412
1413
1414
1415
1416
1417
1418
1419int fib_sync_down_addr(struct net_device *dev, __be32 local)
1420{
1421 int ret = 0;
1422 unsigned int hash = fib_laddr_hashfn(local);
1423 struct hlist_head *head = &fib_info_laddrhash[hash];
1424 struct net *net = dev_net(dev);
1425 int tb_id = l3mdev_fib_table(dev);
1426 struct fib_info *fi;
1427
1428 if (!fib_info_laddrhash || local == 0)
1429 return 0;
1430
1431 hlist_for_each_entry(fi, head, fib_lhash) {
1432 if (!net_eq(fi->fib_net, net) ||
1433 fi->fib_tb_id != tb_id)
1434 continue;
1435 if (fi->fib_prefsrc == local) {
1436 fi->fib_flags |= RTNH_F_DEAD;
1437 ret++;
1438 }
1439 }
1440 return ret;
1441}
1442
1443static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
1444 enum fib_event_type event_type)
1445{
1446 struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev);
1447 struct fib_nh_notifier_info info = {
1448 .fib_nh = fib_nh,
1449 };
1450
1451 switch (event_type) {
1452 case FIB_EVENT_NH_ADD:
1453 if (fib_nh->nh_flags & RTNH_F_DEAD)
1454 break;
1455 if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1456 fib_nh->nh_flags & RTNH_F_LINKDOWN)
1457 break;
1458 return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
1459 &info.info);
1460 case FIB_EVENT_NH_DEL:
1461 if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1462 fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
1463 (fib_nh->nh_flags & RTNH_F_DEAD))
1464 return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
1465 event_type, &info.info);
1466 default:
1467 break;
1468 }
1469
1470 return NOTIFY_DONE;
1471}
1472
1473
1474
1475
1476
1477
1478
1479int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
1480{
1481 int ret = 0;
1482 int scope = RT_SCOPE_NOWHERE;
1483 struct fib_info *prev_fi = NULL;
1484 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1485 struct hlist_head *head = &fib_info_devhash[hash];
1486 struct fib_nh *nh;
1487
1488 if (force)
1489 scope = -1;
1490
1491 hlist_for_each_entry(nh, head, nh_hash) {
1492 struct fib_info *fi = nh->nh_parent;
1493 int dead;
1494
1495 BUG_ON(!fi->fib_nhs);
1496 if (nh->nh_dev != dev || fi == prev_fi)
1497 continue;
1498 prev_fi = fi;
1499 dead = 0;
1500 change_nexthops(fi) {
1501 if (nexthop_nh->nh_flags & RTNH_F_DEAD)
1502 dead++;
1503 else if (nexthop_nh->nh_dev == dev &&
1504 nexthop_nh->nh_scope != scope) {
1505 switch (event) {
1506 case NETDEV_DOWN:
1507 case NETDEV_UNREGISTER:
1508 nexthop_nh->nh_flags |= RTNH_F_DEAD;
1509
1510 case NETDEV_CHANGE:
1511 nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
1512 break;
1513 }
1514 call_fib_nh_notifiers(nexthop_nh,
1515 FIB_EVENT_NH_DEL);
1516 dead++;
1517 }
1518#ifdef CONFIG_IP_ROUTE_MULTIPATH
1519 if (event == NETDEV_UNREGISTER &&
1520 nexthop_nh->nh_dev == dev) {
1521 dead = fi->fib_nhs;
1522 break;
1523 }
1524#endif
1525 } endfor_nexthops(fi)
1526 if (dead == fi->fib_nhs) {
1527 switch (event) {
1528 case NETDEV_DOWN:
1529 case NETDEV_UNREGISTER:
1530 fi->fib_flags |= RTNH_F_DEAD;
1531
1532 case NETDEV_CHANGE:
1533 fi->fib_flags |= RTNH_F_LINKDOWN;
1534 break;
1535 }
1536 ret++;
1537 }
1538
1539 fib_rebalance(fi);
1540 }
1541
1542 return ret;
1543}
1544
1545
1546static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
1547{
1548 struct fib_info *fi = NULL, *last_resort = NULL;
1549 struct hlist_head *fa_head = res->fa_head;
1550 struct fib_table *tb = res->table;
1551 u8 slen = 32 - res->prefixlen;
1552 int order = -1, last_idx = -1;
1553 struct fib_alias *fa, *fa1 = NULL;
1554 u32 last_prio = res->fi->fib_priority;
1555 u8 last_tos = 0;
1556
1557 hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
1558 struct fib_info *next_fi = fa->fa_info;
1559
1560 if (fa->fa_slen != slen)
1561 continue;
1562 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1563 continue;
1564 if (fa->tb_id != tb->tb_id)
1565 continue;
1566 if (next_fi->fib_priority > last_prio &&
1567 fa->fa_tos == last_tos) {
1568 if (last_tos)
1569 continue;
1570 break;
1571 }
1572 if (next_fi->fib_flags & RTNH_F_DEAD)
1573 continue;
1574 last_tos = fa->fa_tos;
1575 last_prio = next_fi->fib_priority;
1576
1577 if (next_fi->fib_scope != res->scope ||
1578 fa->fa_type != RTN_UNICAST)
1579 continue;
1580 if (!next_fi->fib_nh[0].nh_gw ||
1581 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1582 continue;
1583
1584 fib_alias_accessed(fa);
1585
1586 if (!fi) {
1587 if (next_fi != res->fi)
1588 break;
1589 fa1 = fa;
1590 } else if (!fib_detect_death(fi, order, &last_resort,
1591 &last_idx, fa1->fa_default)) {
1592 fib_result_assign(res, fi);
1593 fa1->fa_default = order;
1594 goto out;
1595 }
1596 fi = next_fi;
1597 order++;
1598 }
1599
1600 if (order <= 0 || !fi) {
1601 if (fa1)
1602 fa1->fa_default = -1;
1603 goto out;
1604 }
1605
1606 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1607 fa1->fa_default)) {
1608 fib_result_assign(res, fi);
1609 fa1->fa_default = order;
1610 goto out;
1611 }
1612
1613 if (last_idx >= 0)
1614 fib_result_assign(res, last_resort);
1615 fa1->fa_default = last_idx;
1616out:
1617 return;
1618}
1619
1620
1621
1622
1623
1624int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
1625{
1626 struct fib_info *prev_fi;
1627 unsigned int hash;
1628 struct hlist_head *head;
1629 struct fib_nh *nh;
1630 int ret;
1631
1632 if (!(dev->flags & IFF_UP))
1633 return 0;
1634
1635 if (nh_flags & RTNH_F_DEAD) {
1636 unsigned int flags = dev_get_flags(dev);
1637
1638 if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1639 nh_flags |= RTNH_F_LINKDOWN;
1640 }
1641
1642 prev_fi = NULL;
1643 hash = fib_devindex_hashfn(dev->ifindex);
1644 head = &fib_info_devhash[hash];
1645 ret = 0;
1646
1647 hlist_for_each_entry(nh, head, nh_hash) {
1648 struct fib_info *fi = nh->nh_parent;
1649 int alive;
1650
1651 BUG_ON(!fi->fib_nhs);
1652 if (nh->nh_dev != dev || fi == prev_fi)
1653 continue;
1654
1655 prev_fi = fi;
1656 alive = 0;
1657 change_nexthops(fi) {
1658 if (!(nexthop_nh->nh_flags & nh_flags)) {
1659 alive++;
1660 continue;
1661 }
1662 if (!nexthop_nh->nh_dev ||
1663 !(nexthop_nh->nh_dev->flags & IFF_UP))
1664 continue;
1665 if (nexthop_nh->nh_dev != dev ||
1666 !__in_dev_get_rtnl(dev))
1667 continue;
1668 alive++;
1669 nexthop_nh->nh_flags &= ~nh_flags;
1670 call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);
1671 } endfor_nexthops(fi)
1672
1673 if (alive > 0) {
1674 fi->fib_flags &= ~nh_flags;
1675 ret++;
1676 }
1677
1678 fib_rebalance(fi);
1679 }
1680
1681 return ret;
1682}
1683
1684#ifdef CONFIG_IP_ROUTE_MULTIPATH
1685static bool fib_good_nh(const struct fib_nh *nh)
1686{
1687 int state = NUD_REACHABLE;
1688
1689 if (nh->nh_scope == RT_SCOPE_LINK) {
1690 struct neighbour *n;
1691
1692 rcu_read_lock_bh();
1693
1694 n = __ipv4_neigh_lookup_noref(nh->nh_dev,
1695 (__force u32)nh->nh_gw);
1696 if (n)
1697 state = n->nud_state;
1698
1699 rcu_read_unlock_bh();
1700 }
1701
1702 return !!(state & NUD_VALID);
1703}
1704
1705void fib_select_multipath(struct fib_result *res, int hash)
1706{
1707 struct fib_info *fi = res->fi;
1708 struct net *net = fi->fib_net;
1709 bool first = false;
1710
1711 for_nexthops(fi) {
1712 if (net->ipv4.sysctl_fib_multipath_use_neigh) {
1713 if (!fib_good_nh(nh))
1714 continue;
1715 if (!first) {
1716 res->nh_sel = nhsel;
1717 first = true;
1718 }
1719 }
1720
1721 if (hash > atomic_read(&nh->nh_upper_bound))
1722 continue;
1723
1724 res->nh_sel = nhsel;
1725 return;
1726 } endfor_nexthops(fi);
1727}
1728#endif
1729
1730void fib_select_path(struct net *net, struct fib_result *res,
1731 struct flowi4 *fl4, const struct sk_buff *skb)
1732{
1733 if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
1734 goto check_saddr;
1735
1736#ifdef CONFIG_IP_ROUTE_MULTIPATH
1737 if (res->fi->fib_nhs > 1) {
1738 int h = fib_multipath_hash(net, fl4, skb, NULL);
1739
1740 fib_select_multipath(res, h);
1741 }
1742 else
1743#endif
1744 if (!res->prefixlen &&
1745 res->table->tb_num_default > 1 &&
1746 res->type == RTN_UNICAST)
1747 fib_select_default(fl4, res);
1748
1749check_saddr:
1750 if (!fl4->saddr)
1751 fl4->saddr = FIB_RES_PREFSRC(net, *res);
1752}
1753