1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/uaccess.h>
17#include <linux/bitops.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/jiffies.h>
21#include <linux/mm.h>
22#include <linux/string.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/errno.h>
26#include <linux/in.h>
27#include <linux/inet.h>
28#include <linux/inetdevice.h>
29#include <linux/netdevice.h>
30#include <linux/if_arp.h>
31#include <linux/proc_fs.h>
32#include <linux/skbuff.h>
33#include <linux/init.h>
34#include <linux/slab.h>
35#include <linux/netlink.h>
36
37#include <net/arp.h>
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/ip_fib.h>
44#include <net/netlink.h>
45#include <net/nexthop.h>
46#include <net/lwtunnel.h>
47#include <net/fib_notifier.h>
48
49#include "fib_lookup.h"
50
51static DEFINE_SPINLOCK(fib_info_lock);
52static struct hlist_head *fib_info_hash;
53static struct hlist_head *fib_info_laddrhash;
54static unsigned int fib_info_hash_size;
55static unsigned int fib_info_cnt;
56
57#define DEVINDEX_HASHBITS 8
58#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
59static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60
61#ifdef CONFIG_IP_ROUTE_MULTIPATH
62
63#define for_nexthops(fi) { \
64 int nhsel; const struct fib_nh *nh; \
65 for (nhsel = 0, nh = (fi)->fib_nh; \
66 nhsel < (fi)->fib_nhs; \
67 nh++, nhsel++)
68
69#define change_nexthops(fi) { \
70 int nhsel; struct fib_nh *nexthop_nh; \
71 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
72 nhsel < (fi)->fib_nhs; \
73 nexthop_nh++, nhsel++)
74
75#else
76
77
78
79#define for_nexthops(fi) { \
80 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \
81 for (nhsel = 0; nhsel < 1; nhsel++)
82
83#define change_nexthops(fi) { \
84 int nhsel; \
85 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
86 for (nhsel = 0; nhsel < 1; nhsel++)
87
88#endif
89
90#define endfor_nexthops(fi) }
91
92
93const struct fib_prop fib_props[RTN_MAX + 1] = {
94 [RTN_UNSPEC] = {
95 .error = 0,
96 .scope = RT_SCOPE_NOWHERE,
97 },
98 [RTN_UNICAST] = {
99 .error = 0,
100 .scope = RT_SCOPE_UNIVERSE,
101 },
102 [RTN_LOCAL] = {
103 .error = 0,
104 .scope = RT_SCOPE_HOST,
105 },
106 [RTN_BROADCAST] = {
107 .error = 0,
108 .scope = RT_SCOPE_LINK,
109 },
110 [RTN_ANYCAST] = {
111 .error = 0,
112 .scope = RT_SCOPE_LINK,
113 },
114 [RTN_MULTICAST] = {
115 .error = 0,
116 .scope = RT_SCOPE_UNIVERSE,
117 },
118 [RTN_BLACKHOLE] = {
119 .error = -EINVAL,
120 .scope = RT_SCOPE_UNIVERSE,
121 },
122 [RTN_UNREACHABLE] = {
123 .error = -EHOSTUNREACH,
124 .scope = RT_SCOPE_UNIVERSE,
125 },
126 [RTN_PROHIBIT] = {
127 .error = -EACCES,
128 .scope = RT_SCOPE_UNIVERSE,
129 },
130 [RTN_THROW] = {
131 .error = -EAGAIN,
132 .scope = RT_SCOPE_UNIVERSE,
133 },
134 [RTN_NAT] = {
135 .error = -EINVAL,
136 .scope = RT_SCOPE_NOWHERE,
137 },
138 [RTN_XRESOLVE] = {
139 .error = -EINVAL,
140 .scope = RT_SCOPE_NOWHERE,
141 },
142};
143
144static void rt_fibinfo_free(struct rtable __rcu **rtp)
145{
146 struct rtable *rt = rcu_dereference_protected(*rtp, 1);
147
148 if (!rt)
149 return;
150
151
152
153
154
155
156 dst_dev_put(&rt->dst);
157 dst_release_immediate(&rt->dst);
158}
159
160static void free_nh_exceptions(struct fib_nh *nh)
161{
162 struct fnhe_hash_bucket *hash;
163 int i;
164
165 hash = rcu_dereference_protected(nh->nh_exceptions, 1);
166 if (!hash)
167 return;
168 for (i = 0; i < FNHE_HASH_SIZE; i++) {
169 struct fib_nh_exception *fnhe;
170
171 fnhe = rcu_dereference_protected(hash[i].chain, 1);
172 while (fnhe) {
173 struct fib_nh_exception *next;
174
175 next = rcu_dereference_protected(fnhe->fnhe_next, 1);
176
177 rt_fibinfo_free(&fnhe->fnhe_rth_input);
178 rt_fibinfo_free(&fnhe->fnhe_rth_output);
179
180 kfree(fnhe);
181
182 fnhe = next;
183 }
184 }
185 kfree(hash);
186}
187
188static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
189{
190 int cpu;
191
192 if (!rtp)
193 return;
194
195 for_each_possible_cpu(cpu) {
196 struct rtable *rt;
197
198 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
199 if (rt) {
200 dst_dev_put(&rt->dst);
201 dst_release_immediate(&rt->dst);
202 }
203 }
204 free_percpu(rtp);
205}
206
207
208static void free_fib_info_rcu(struct rcu_head *head)
209{
210 struct fib_info *fi = container_of(head, struct fib_info, rcu);
211 struct dst_metrics *m;
212
213 change_nexthops(fi) {
214 if (nexthop_nh->nh_dev)
215 dev_put(nexthop_nh->nh_dev);
216 lwtstate_put(nexthop_nh->nh_lwtstate);
217 free_nh_exceptions(nexthop_nh);
218 rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
219 rt_fibinfo_free(&nexthop_nh->nh_rth_input);
220 } endfor_nexthops(fi);
221
222 m = fi->fib_metrics;
223 if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
224 kfree(m);
225 kfree(fi);
226}
227
228void free_fib_info(struct fib_info *fi)
229{
230 if (fi->fib_dead == 0) {
231 pr_warn("Freeing alive fib_info %p\n", fi);
232 return;
233 }
234 fib_info_cnt--;
235#ifdef CONFIG_IP_ROUTE_CLASSID
236 change_nexthops(fi) {
237 if (nexthop_nh->nh_tclassid)
238 fi->fib_net->ipv4.fib_num_tclassid_users--;
239 } endfor_nexthops(fi);
240#endif
241 call_rcu(&fi->rcu, free_fib_info_rcu);
242}
243EXPORT_SYMBOL_GPL(free_fib_info);
244
245void fib_release_info(struct fib_info *fi)
246{
247 spin_lock_bh(&fib_info_lock);
248 if (fi && --fi->fib_treeref == 0) {
249 hlist_del(&fi->fib_hash);
250 if (fi->fib_prefsrc)
251 hlist_del(&fi->fib_lhash);
252 change_nexthops(fi) {
253 if (!nexthop_nh->nh_dev)
254 continue;
255 hlist_del(&nexthop_nh->nh_hash);
256 } endfor_nexthops(fi)
257 fi->fib_dead = 1;
258 fib_info_put(fi);
259 }
260 spin_unlock_bh(&fib_info_lock);
261}
262
263static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
264{
265 const struct fib_nh *onh = ofi->fib_nh;
266
267 for_nexthops(fi) {
268 if (nh->nh_oif != onh->nh_oif ||
269 nh->nh_gw != onh->nh_gw ||
270 nh->nh_scope != onh->nh_scope ||
271#ifdef CONFIG_IP_ROUTE_MULTIPATH
272 nh->nh_weight != onh->nh_weight ||
273#endif
274#ifdef CONFIG_IP_ROUTE_CLASSID
275 nh->nh_tclassid != onh->nh_tclassid ||
276#endif
277 lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) ||
278 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
279 return -1;
280 onh++;
281 } endfor_nexthops(fi);
282 return 0;
283}
284
285static inline unsigned int fib_devindex_hashfn(unsigned int val)
286{
287 unsigned int mask = DEVINDEX_HASHSIZE - 1;
288
289 return (val ^
290 (val >> DEVINDEX_HASHBITS) ^
291 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
292}
293
294static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
295{
296 unsigned int mask = (fib_info_hash_size - 1);
297 unsigned int val = fi->fib_nhs;
298
299 val ^= (fi->fib_protocol << 8) | fi->fib_scope;
300 val ^= (__force u32)fi->fib_prefsrc;
301 val ^= fi->fib_priority;
302 for_nexthops(fi) {
303 val ^= fib_devindex_hashfn(nh->nh_oif);
304 } endfor_nexthops(fi)
305
306 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
307}
308
309static struct fib_info *fib_find_info(const struct fib_info *nfi)
310{
311 struct hlist_head *head;
312 struct fib_info *fi;
313 unsigned int hash;
314
315 hash = fib_info_hashfn(nfi);
316 head = &fib_info_hash[hash];
317
318 hlist_for_each_entry(fi, head, fib_hash) {
319 if (!net_eq(fi->fib_net, nfi->fib_net))
320 continue;
321 if (fi->fib_nhs != nfi->fib_nhs)
322 continue;
323 if (nfi->fib_protocol == fi->fib_protocol &&
324 nfi->fib_scope == fi->fib_scope &&
325 nfi->fib_prefsrc == fi->fib_prefsrc &&
326 nfi->fib_priority == fi->fib_priority &&
327 nfi->fib_type == fi->fib_type &&
328 memcmp(nfi->fib_metrics, fi->fib_metrics,
329 sizeof(u32) * RTAX_MAX) == 0 &&
330 !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
331 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
332 return fi;
333 }
334
335 return NULL;
336}
337
338
339
340
341int ip_fib_check_default(__be32 gw, struct net_device *dev)
342{
343 struct hlist_head *head;
344 struct fib_nh *nh;
345 unsigned int hash;
346
347 spin_lock(&fib_info_lock);
348
349 hash = fib_devindex_hashfn(dev->ifindex);
350 head = &fib_info_devhash[hash];
351 hlist_for_each_entry(nh, head, nh_hash) {
352 if (nh->nh_dev == dev &&
353 nh->nh_gw == gw &&
354 !(nh->nh_flags & RTNH_F_DEAD)) {
355 spin_unlock(&fib_info_lock);
356 return 0;
357 }
358 }
359
360 spin_unlock(&fib_info_lock);
361
362 return -1;
363}
364
365static inline size_t fib_nlmsg_size(struct fib_info *fi)
366{
367 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
368 + nla_total_size(4)
369 + nla_total_size(4)
370 + nla_total_size(4)
371 + nla_total_size(4)
372 + nla_total_size(TCP_CA_NAME_MAX);
373
374
375 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
376
377 if (fi->fib_nhs) {
378 size_t nh_encapsize = 0;
379
380
381
382 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
383
384
385 nhsize += 2 * nla_total_size(4);
386
387
388 for_nexthops(fi) {
389 if (nh->nh_lwtstate) {
390
391 nh_encapsize += lwtunnel_get_encap_size(
392 nh->nh_lwtstate);
393
394 nh_encapsize += nla_total_size(2);
395 }
396 } endfor_nexthops(fi);
397
398
399 payload += nla_total_size((fi->fib_nhs * nhsize) +
400 nh_encapsize);
401
402 }
403
404 return payload;
405}
406
407void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
408 int dst_len, u32 tb_id, const struct nl_info *info,
409 unsigned int nlm_flags)
410{
411 struct sk_buff *skb;
412 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
413 int err = -ENOBUFS;
414
415 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
416 if (!skb)
417 goto errout;
418
419 err = fib_dump_info(skb, info->portid, seq, event, tb_id,
420 fa->fa_type, key, dst_len,
421 fa->fa_tos, fa->fa_info, nlm_flags);
422 if (err < 0) {
423
424 WARN_ON(err == -EMSGSIZE);
425 kfree_skb(skb);
426 goto errout;
427 }
428 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE,
429 info->nlh, GFP_KERNEL);
430 return;
431errout:
432 if (err < 0)
433 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
434}
435
436static int fib_detect_death(struct fib_info *fi, int order,
437 struct fib_info **last_resort, int *last_idx,
438 int dflt)
439{
440 struct neighbour *n;
441 int state = NUD_NONE;
442
443 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
444 if (n) {
445 state = n->nud_state;
446 neigh_release(n);
447 } else {
448 return 0;
449 }
450 if (state == NUD_REACHABLE)
451 return 0;
452 if ((state & NUD_VALID) && order != dflt)
453 return 0;
454 if ((state & NUD_VALID) ||
455 (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) {
456 *last_resort = fi;
457 *last_idx = order;
458 }
459 return 1;
460}
461
462#ifdef CONFIG_IP_ROUTE_MULTIPATH
463
464static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
465 struct netlink_ext_ack *extack)
466{
467 int nhs = 0;
468
469 while (rtnh_ok(rtnh, remaining)) {
470 nhs++;
471 rtnh = rtnh_next(rtnh, &remaining);
472 }
473
474
475 if (remaining > 0) {
476 NL_SET_ERR_MSG(extack,
477 "Invalid nexthop configuration - extra data after nexthops");
478 nhs = 0;
479 }
480
481 return nhs;
482}
483
484static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
485 int remaining, struct fib_config *cfg,
486 struct netlink_ext_ack *extack)
487{
488 int ret;
489
490 change_nexthops(fi) {
491 int attrlen;
492
493 if (!rtnh_ok(rtnh, remaining)) {
494 NL_SET_ERR_MSG(extack,
495 "Invalid nexthop configuration - extra data after nexthop");
496 return -EINVAL;
497 }
498
499 if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
500 NL_SET_ERR_MSG(extack,
501 "Invalid flags for nexthop - can not contain DEAD or LINKDOWN");
502 return -EINVAL;
503 }
504
505 nexthop_nh->nh_flags =
506 (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
507 nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
508 nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
509
510 attrlen = rtnh_attrlen(rtnh);
511 if (attrlen > 0) {
512 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
513
514 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
515 nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0;
516#ifdef CONFIG_IP_ROUTE_CLASSID
517 nla = nla_find(attrs, attrlen, RTA_FLOW);
518 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
519 if (nexthop_nh->nh_tclassid)
520 fi->fib_net->ipv4.fib_num_tclassid_users++;
521#endif
522 nla = nla_find(attrs, attrlen, RTA_ENCAP);
523 if (nla) {
524 struct lwtunnel_state *lwtstate;
525 struct nlattr *nla_entype;
526
527 nla_entype = nla_find(attrs, attrlen,
528 RTA_ENCAP_TYPE);
529 if (!nla_entype) {
530 NL_SET_BAD_ATTR(extack, nla);
531 NL_SET_ERR_MSG(extack,
532 "Encap type is missing");
533 goto err_inval;
534 }
535
536 ret = lwtunnel_build_state(nla_get_u16(
537 nla_entype),
538 nla, AF_INET, cfg,
539 &lwtstate, extack);
540 if (ret)
541 goto errout;
542 nexthop_nh->nh_lwtstate =
543 lwtstate_get(lwtstate);
544 }
545 }
546
547 rtnh = rtnh_next(rtnh, &remaining);
548 } endfor_nexthops(fi);
549
550 return 0;
551
552err_inval:
553 ret = -EINVAL;
554
555errout:
556 return ret;
557}
558
559static void fib_rebalance(struct fib_info *fi)
560{
561 int total;
562 int w;
563 struct in_device *in_dev;
564
565 if (fi->fib_nhs < 2)
566 return;
567
568 total = 0;
569 for_nexthops(fi) {
570 if (nh->nh_flags & RTNH_F_DEAD)
571 continue;
572
573 in_dev = __in_dev_get_rtnl(nh->nh_dev);
574
575 if (in_dev &&
576 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
577 nh->nh_flags & RTNH_F_LINKDOWN)
578 continue;
579
580 total += nh->nh_weight;
581 } endfor_nexthops(fi);
582
583 w = 0;
584 change_nexthops(fi) {
585 int upper_bound;
586
587 in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev);
588
589 if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
590 upper_bound = -1;
591 } else if (in_dev &&
592 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
593 nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
594 upper_bound = -1;
595 } else {
596 w += nexthop_nh->nh_weight;
597 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
598 total) - 1;
599 }
600
601 atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
602 } endfor_nexthops(fi);
603}
604#else
605
606#define fib_rebalance(fi) do { } while (0)
607
608#endif
609
610static int fib_encap_match(u16 encap_type,
611 struct nlattr *encap,
612 const struct fib_nh *nh,
613 const struct fib_config *cfg,
614 struct netlink_ext_ack *extack)
615{
616 struct lwtunnel_state *lwtstate;
617 int ret, result = 0;
618
619 if (encap_type == LWTUNNEL_ENCAP_NONE)
620 return 0;
621
622 ret = lwtunnel_build_state(encap_type, encap, AF_INET,
623 cfg, &lwtstate, extack);
624 if (!ret) {
625 result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
626 lwtstate_free(lwtstate);
627 }
628
629 return result;
630}
631
632int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
633 struct netlink_ext_ack *extack)
634{
635#ifdef CONFIG_IP_ROUTE_MULTIPATH
636 struct rtnexthop *rtnh;
637 int remaining;
638#endif
639
640 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
641 return 1;
642
643 if (cfg->fc_oif || cfg->fc_gw) {
644 if (cfg->fc_encap) {
645 if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
646 fi->fib_nh, cfg, extack))
647 return 1;
648 }
649#ifdef CONFIG_IP_ROUTE_CLASSID
650 if (cfg->fc_flow &&
651 cfg->fc_flow != fi->fib_nh->nh_tclassid)
652 return 1;
653#endif
654 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
655 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
656 return 0;
657 return 1;
658 }
659
660#ifdef CONFIG_IP_ROUTE_MULTIPATH
661 if (!cfg->fc_mp)
662 return 0;
663
664 rtnh = cfg->fc_mp;
665 remaining = cfg->fc_mp_len;
666
667 for_nexthops(fi) {
668 int attrlen;
669
670 if (!rtnh_ok(rtnh, remaining))
671 return -EINVAL;
672
673 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
674 return 1;
675
676 attrlen = rtnh_attrlen(rtnh);
677 if (attrlen > 0) {
678 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
679
680 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
681 if (nla && nla_get_in_addr(nla) != nh->nh_gw)
682 return 1;
683#ifdef CONFIG_IP_ROUTE_CLASSID
684 nla = nla_find(attrs, attrlen, RTA_FLOW);
685 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
686 return 1;
687#endif
688 }
689
690 rtnh = rtnh_next(rtnh, &remaining);
691 } endfor_nexthops(fi);
692#endif
693 return 0;
694}
695
696bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
697{
698 struct nlattr *nla;
699 int remaining;
700
701 if (!cfg->fc_mx)
702 return true;
703
704 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
705 int type = nla_type(nla);
706 u32 fi_val, val;
707
708 if (!type)
709 continue;
710 if (type > RTAX_MAX)
711 return false;
712
713 if (type == RTAX_CC_ALGO) {
714 char tmp[TCP_CA_NAME_MAX];
715 bool ecn_ca = false;
716
717 nla_strlcpy(tmp, nla, sizeof(tmp));
718 val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
719 } else {
720 if (nla_len(nla) != sizeof(u32))
721 return false;
722 val = nla_get_u32(nla);
723 }
724
725 fi_val = fi->fib_metrics->metrics[type - 1];
726 if (type == RTAX_FEATURES)
727 fi_val &= ~DST_FEATURE_ECN_CA;
728
729 if (fi_val != val)
730 return false;
731 }
732
733 return true;
734}
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
781 struct netlink_ext_ack *extack)
782{
783 int err = 0;
784 struct net *net;
785 struct net_device *dev;
786
787 net = cfg->fc_nlinfo.nl_net;
788 if (nh->nh_gw) {
789 struct fib_result res;
790
791 if (nh->nh_flags & RTNH_F_ONLINK) {
792 unsigned int addr_type;
793
794 if (cfg->fc_scope >= RT_SCOPE_LINK) {
795 NL_SET_ERR_MSG(extack,
796 "Nexthop has invalid scope");
797 return -EINVAL;
798 }
799 dev = __dev_get_by_index(net, nh->nh_oif);
800 if (!dev)
801 return -ENODEV;
802 if (!(dev->flags & IFF_UP)) {
803 NL_SET_ERR_MSG(extack,
804 "Nexthop device is not up");
805 return -ENETDOWN;
806 }
807 addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
808 if (addr_type != RTN_UNICAST) {
809 NL_SET_ERR_MSG(extack,
810 "Nexthop has invalid gateway");
811 return -EINVAL;
812 }
813 if (!netif_carrier_ok(dev))
814 nh->nh_flags |= RTNH_F_LINKDOWN;
815 nh->nh_dev = dev;
816 dev_hold(dev);
817 nh->nh_scope = RT_SCOPE_LINK;
818 return 0;
819 }
820 rcu_read_lock();
821 {
822 struct fib_table *tbl = NULL;
823 struct flowi4 fl4 = {
824 .daddr = nh->nh_gw,
825 .flowi4_scope = cfg->fc_scope + 1,
826 .flowi4_oif = nh->nh_oif,
827 .flowi4_iif = LOOPBACK_IFINDEX,
828 };
829
830
831 if (fl4.flowi4_scope < RT_SCOPE_LINK)
832 fl4.flowi4_scope = RT_SCOPE_LINK;
833
834 if (cfg->fc_table)
835 tbl = fib_get_table(net, cfg->fc_table);
836
837 if (tbl)
838 err = fib_table_lookup(tbl, &fl4, &res,
839 FIB_LOOKUP_IGNORE_LINKSTATE |
840 FIB_LOOKUP_NOREF);
841
842
843
844
845
846 if (!tbl || err) {
847 err = fib_lookup(net, &fl4, &res,
848 FIB_LOOKUP_IGNORE_LINKSTATE);
849 }
850
851 if (err) {
852 NL_SET_ERR_MSG(extack,
853 "Nexthop has invalid gateway");
854 rcu_read_unlock();
855 return err;
856 }
857 }
858 err = -EINVAL;
859 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
860 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
861 goto out;
862 }
863 nh->nh_scope = res.scope;
864 nh->nh_oif = FIB_RES_OIF(res);
865 nh->nh_dev = dev = FIB_RES_DEV(res);
866 if (!dev) {
867 NL_SET_ERR_MSG(extack,
868 "No egress device for nexthop gateway");
869 goto out;
870 }
871 dev_hold(dev);
872 if (!netif_carrier_ok(dev))
873 nh->nh_flags |= RTNH_F_LINKDOWN;
874 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
875 } else {
876 struct in_device *in_dev;
877
878 if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
879 NL_SET_ERR_MSG(extack,
880 "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
881 return -EINVAL;
882 }
883 rcu_read_lock();
884 err = -ENODEV;
885 in_dev = inetdev_by_index(net, nh->nh_oif);
886 if (!in_dev)
887 goto out;
888 err = -ENETDOWN;
889 if (!(in_dev->dev->flags & IFF_UP)) {
890 NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
891 goto out;
892 }
893 nh->nh_dev = in_dev->dev;
894 dev_hold(nh->nh_dev);
895 nh->nh_scope = RT_SCOPE_HOST;
896 if (!netif_carrier_ok(nh->nh_dev))
897 nh->nh_flags |= RTNH_F_LINKDOWN;
898 err = 0;
899 }
900out:
901 rcu_read_unlock();
902 return err;
903}
904
905static inline unsigned int fib_laddr_hashfn(__be32 val)
906{
907 unsigned int mask = (fib_info_hash_size - 1);
908
909 return ((__force u32)val ^
910 ((__force u32)val >> 7) ^
911 ((__force u32)val >> 14)) & mask;
912}
913
914static struct hlist_head *fib_info_hash_alloc(int bytes)
915{
916 if (bytes <= PAGE_SIZE)
917 return kzalloc(bytes, GFP_KERNEL);
918 else
919 return (struct hlist_head *)
920 __get_free_pages(GFP_KERNEL | __GFP_ZERO,
921 get_order(bytes));
922}
923
924static void fib_info_hash_free(struct hlist_head *hash, int bytes)
925{
926 if (!hash)
927 return;
928
929 if (bytes <= PAGE_SIZE)
930 kfree(hash);
931 else
932 free_pages((unsigned long) hash, get_order(bytes));
933}
934
935static void fib_info_hash_move(struct hlist_head *new_info_hash,
936 struct hlist_head *new_laddrhash,
937 unsigned int new_size)
938{
939 struct hlist_head *old_info_hash, *old_laddrhash;
940 unsigned int old_size = fib_info_hash_size;
941 unsigned int i, bytes;
942
943 spin_lock_bh(&fib_info_lock);
944 old_info_hash = fib_info_hash;
945 old_laddrhash = fib_info_laddrhash;
946 fib_info_hash_size = new_size;
947
948 for (i = 0; i < old_size; i++) {
949 struct hlist_head *head = &fib_info_hash[i];
950 struct hlist_node *n;
951 struct fib_info *fi;
952
953 hlist_for_each_entry_safe(fi, n, head, fib_hash) {
954 struct hlist_head *dest;
955 unsigned int new_hash;
956
957 new_hash = fib_info_hashfn(fi);
958 dest = &new_info_hash[new_hash];
959 hlist_add_head(&fi->fib_hash, dest);
960 }
961 }
962 fib_info_hash = new_info_hash;
963
964 for (i = 0; i < old_size; i++) {
965 struct hlist_head *lhead = &fib_info_laddrhash[i];
966 struct hlist_node *n;
967 struct fib_info *fi;
968
969 hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
970 struct hlist_head *ldest;
971 unsigned int new_hash;
972
973 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
974 ldest = &new_laddrhash[new_hash];
975 hlist_add_head(&fi->fib_lhash, ldest);
976 }
977 }
978 fib_info_laddrhash = new_laddrhash;
979
980 spin_unlock_bh(&fib_info_lock);
981
982 bytes = old_size * sizeof(struct hlist_head *);
983 fib_info_hash_free(old_info_hash, bytes);
984 fib_info_hash_free(old_laddrhash, bytes);
985}
986
987__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
988{
989 nh->nh_saddr = inet_select_addr(nh->nh_dev,
990 nh->nh_gw,
991 nh->nh_parent->fib_scope);
992 nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
993
994 return nh->nh_saddr;
995}
996
997static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
998{
999 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
1000 fib_prefsrc != cfg->fc_dst) {
1001 u32 tb_id = cfg->fc_table;
1002 int rc;
1003
1004 if (tb_id == RT_TABLE_MAIN)
1005 tb_id = RT_TABLE_LOCAL;
1006
1007 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
1008 fib_prefsrc, tb_id);
1009
1010 if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
1011 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
1012 fib_prefsrc, RT_TABLE_LOCAL);
1013 }
1014
1015 if (rc != RTN_LOCAL)
1016 return false;
1017 }
1018 return true;
1019}
1020
1021static int
1022fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
1023{
1024 return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len,
1025 fi->fib_metrics->metrics);
1026}
1027
1028struct fib_info *fib_create_info(struct fib_config *cfg,
1029 struct netlink_ext_ack *extack)
1030{
1031 int err;
1032 struct fib_info *fi = NULL;
1033 struct fib_info *ofi;
1034 int nhs = 1;
1035 struct net *net = cfg->fc_nlinfo.nl_net;
1036
1037 if (cfg->fc_type > RTN_MAX)
1038 goto err_inval;
1039
1040
1041 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {
1042 NL_SET_ERR_MSG(extack, "Invalid scope");
1043 goto err_inval;
1044 }
1045
1046 if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
1047 NL_SET_ERR_MSG(extack,
1048 "Invalid rtm_flags - can not contain DEAD or LINKDOWN");
1049 goto err_inval;
1050 }
1051
1052#ifdef CONFIG_IP_ROUTE_MULTIPATH
1053 if (cfg->fc_mp) {
1054 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
1055 if (nhs == 0)
1056 goto err_inval;
1057 }
1058#endif
1059
1060 err = -ENOBUFS;
1061 if (fib_info_cnt >= fib_info_hash_size) {
1062 unsigned int new_size = fib_info_hash_size << 1;
1063 struct hlist_head *new_info_hash;
1064 struct hlist_head *new_laddrhash;
1065 unsigned int bytes;
1066
1067 if (!new_size)
1068 new_size = 16;
1069 bytes = new_size * sizeof(struct hlist_head *);
1070 new_info_hash = fib_info_hash_alloc(bytes);
1071 new_laddrhash = fib_info_hash_alloc(bytes);
1072 if (!new_info_hash || !new_laddrhash) {
1073 fib_info_hash_free(new_info_hash, bytes);
1074 fib_info_hash_free(new_laddrhash, bytes);
1075 } else
1076 fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
1077
1078 if (!fib_info_hash_size)
1079 goto failure;
1080 }
1081
1082 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
1083 if (!fi)
1084 goto failure;
1085 if (cfg->fc_mx) {
1086 fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
1087 if (unlikely(!fi->fib_metrics)) {
1088 kfree(fi);
1089 return ERR_PTR(err);
1090 }
1091 refcount_set(&fi->fib_metrics->refcnt, 1);
1092 } else {
1093 fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
1094 }
1095 fib_info_cnt++;
1096 fi->fib_net = net;
1097 fi->fib_protocol = cfg->fc_protocol;
1098 fi->fib_scope = cfg->fc_scope;
1099 fi->fib_flags = cfg->fc_flags;
1100 fi->fib_priority = cfg->fc_priority;
1101 fi->fib_prefsrc = cfg->fc_prefsrc;
1102 fi->fib_type = cfg->fc_type;
1103 fi->fib_tb_id = cfg->fc_table;
1104
1105 fi->fib_nhs = nhs;
1106 change_nexthops(fi) {
1107 nexthop_nh->nh_parent = fi;
1108 nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
1109 if (!nexthop_nh->nh_pcpu_rth_output)
1110 goto failure;
1111 } endfor_nexthops(fi)
1112
1113 err = fib_convert_metrics(fi, cfg);
1114 if (err)
1115 goto failure;
1116
1117 if (cfg->fc_mp) {
1118#ifdef CONFIG_IP_ROUTE_MULTIPATH
1119 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
1120 if (err != 0)
1121 goto failure;
1122 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) {
1123 NL_SET_ERR_MSG(extack,
1124 "Nexthop device index does not match RTA_OIF");
1125 goto err_inval;
1126 }
1127 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) {
1128 NL_SET_ERR_MSG(extack,
1129 "Nexthop gateway does not match RTA_GATEWAY");
1130 goto err_inval;
1131 }
1132#ifdef CONFIG_IP_ROUTE_CLASSID
1133 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
1134 NL_SET_ERR_MSG(extack,
1135 "Nexthop class id does not match RTA_FLOW");
1136 goto err_inval;
1137 }
1138#endif
1139#else
1140 NL_SET_ERR_MSG(extack,
1141 "Multipath support not enabled in kernel");
1142 goto err_inval;
1143#endif
1144 } else {
1145 struct fib_nh *nh = fi->fib_nh;
1146
1147 if (cfg->fc_encap) {
1148 struct lwtunnel_state *lwtstate;
1149
1150 if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) {
1151 NL_SET_ERR_MSG(extack,
1152 "LWT encap type not specified");
1153 goto err_inval;
1154 }
1155 err = lwtunnel_build_state(cfg->fc_encap_type,
1156 cfg->fc_encap, AF_INET, cfg,
1157 &lwtstate, extack);
1158 if (err)
1159 goto failure;
1160
1161 nh->nh_lwtstate = lwtstate_get(lwtstate);
1162 }
1163 nh->nh_oif = cfg->fc_oif;
1164 nh->nh_gw = cfg->fc_gw;
1165 nh->nh_flags = cfg->fc_flags;
1166#ifdef CONFIG_IP_ROUTE_CLASSID
1167 nh->nh_tclassid = cfg->fc_flow;
1168 if (nh->nh_tclassid)
1169 fi->fib_net->ipv4.fib_num_tclassid_users++;
1170#endif
1171#ifdef CONFIG_IP_ROUTE_MULTIPATH
1172 nh->nh_weight = 1;
1173#endif
1174 }
1175
1176 if (fib_props[cfg->fc_type].error) {
1177 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
1178 NL_SET_ERR_MSG(extack,
1179 "Gateway, device and multipath can not be specified for this route type");
1180 goto err_inval;
1181 }
1182 goto link_it;
1183 } else {
1184 switch (cfg->fc_type) {
1185 case RTN_UNICAST:
1186 case RTN_LOCAL:
1187 case RTN_BROADCAST:
1188 case RTN_ANYCAST:
1189 case RTN_MULTICAST:
1190 break;
1191 default:
1192 NL_SET_ERR_MSG(extack, "Invalid route type");
1193 goto err_inval;
1194 }
1195 }
1196
1197 if (cfg->fc_scope > RT_SCOPE_HOST) {
1198 NL_SET_ERR_MSG(extack, "Invalid scope");
1199 goto err_inval;
1200 }
1201
1202 if (cfg->fc_scope == RT_SCOPE_HOST) {
1203 struct fib_nh *nh = fi->fib_nh;
1204
1205
1206 if (nhs != 1) {
1207 NL_SET_ERR_MSG(extack,
1208 "Route with host scope can not have multiple nexthops");
1209 goto err_inval;
1210 }
1211 if (nh->nh_gw) {
1212 NL_SET_ERR_MSG(extack,
1213 "Route with host scope can not have a gateway");
1214 goto err_inval;
1215 }
1216 nh->nh_scope = RT_SCOPE_NOWHERE;
1217 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
1218 err = -ENODEV;
1219 if (!nh->nh_dev)
1220 goto failure;
1221 } else {
1222 int linkdown = 0;
1223
1224 change_nexthops(fi) {
1225 err = fib_check_nh(cfg, nexthop_nh, extack);
1226 if (err != 0)
1227 goto failure;
1228 if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
1229 linkdown++;
1230 } endfor_nexthops(fi)
1231 if (linkdown == fi->fib_nhs)
1232 fi->fib_flags |= RTNH_F_LINKDOWN;
1233 }
1234
1235 if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) {
1236 NL_SET_ERR_MSG(extack, "Invalid prefsrc address");
1237 goto err_inval;
1238 }
1239
1240 change_nexthops(fi) {
1241 fib_info_update_nh_saddr(net, nexthop_nh);
1242 } endfor_nexthops(fi)
1243
1244 fib_rebalance(fi);
1245
1246link_it:
1247 ofi = fib_find_info(fi);
1248 if (ofi) {
1249 fi->fib_dead = 1;
1250 free_fib_info(fi);
1251 ofi->fib_treeref++;
1252 return ofi;
1253 }
1254
1255 fi->fib_treeref++;
1256 refcount_set(&fi->fib_clntref, 1);
1257 spin_lock_bh(&fib_info_lock);
1258 hlist_add_head(&fi->fib_hash,
1259 &fib_info_hash[fib_info_hashfn(fi)]);
1260 if (fi->fib_prefsrc) {
1261 struct hlist_head *head;
1262
1263 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
1264 hlist_add_head(&fi->fib_lhash, head);
1265 }
1266 change_nexthops(fi) {
1267 struct hlist_head *head;
1268 unsigned int hash;
1269
1270 if (!nexthop_nh->nh_dev)
1271 continue;
1272 hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
1273 head = &fib_info_devhash[hash];
1274 hlist_add_head(&nexthop_nh->nh_hash, head);
1275 } endfor_nexthops(fi)
1276 spin_unlock_bh(&fib_info_lock);
1277 return fi;
1278
1279err_inval:
1280 err = -EINVAL;
1281
1282failure:
1283 if (fi) {
1284 fi->fib_dead = 1;
1285 free_fib_info(fi);
1286 }
1287
1288 return ERR_PTR(err);
1289}
1290
1291int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
1292 u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
1293 struct fib_info *fi, unsigned int flags)
1294{
1295 struct nlmsghdr *nlh;
1296 struct rtmsg *rtm;
1297
1298 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
1299 if (!nlh)
1300 return -EMSGSIZE;
1301
1302 rtm = nlmsg_data(nlh);
1303 rtm->rtm_family = AF_INET;
1304 rtm->rtm_dst_len = dst_len;
1305 rtm->rtm_src_len = 0;
1306 rtm->rtm_tos = tos;
1307 if (tb_id < 256)
1308 rtm->rtm_table = tb_id;
1309 else
1310 rtm->rtm_table = RT_TABLE_COMPAT;
1311 if (nla_put_u32(skb, RTA_TABLE, tb_id))
1312 goto nla_put_failure;
1313 rtm->rtm_type = type;
1314 rtm->rtm_flags = fi->fib_flags;
1315 rtm->rtm_scope = fi->fib_scope;
1316 rtm->rtm_protocol = fi->fib_protocol;
1317
1318 if (rtm->rtm_dst_len &&
1319 nla_put_in_addr(skb, RTA_DST, dst))
1320 goto nla_put_failure;
1321 if (fi->fib_priority &&
1322 nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
1323 goto nla_put_failure;
1324 if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
1325 goto nla_put_failure;
1326
1327 if (fi->fib_prefsrc &&
1328 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
1329 goto nla_put_failure;
1330 if (fi->fib_nhs == 1) {
1331 if (fi->fib_nh->nh_gw &&
1332 nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
1333 goto nla_put_failure;
1334 if (fi->fib_nh->nh_oif &&
1335 nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
1336 goto nla_put_failure;
1337 if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
1338 struct in_device *in_dev;
1339
1340 rcu_read_lock();
1341 in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
1342 if (in_dev &&
1343 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1344 rtm->rtm_flags |= RTNH_F_DEAD;
1345 rcu_read_unlock();
1346 }
1347 if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD)
1348 rtm->rtm_flags |= RTNH_F_OFFLOAD;
1349#ifdef CONFIG_IP_ROUTE_CLASSID
1350 if (fi->fib_nh[0].nh_tclassid &&
1351 nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
1352 goto nla_put_failure;
1353#endif
1354 if (fi->fib_nh->nh_lwtstate &&
1355 lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0)
1356 goto nla_put_failure;
1357 }
1358#ifdef CONFIG_IP_ROUTE_MULTIPATH
1359 if (fi->fib_nhs > 1) {
1360 struct rtnexthop *rtnh;
1361 struct nlattr *mp;
1362
1363 mp = nla_nest_start(skb, RTA_MULTIPATH);
1364 if (!mp)
1365 goto nla_put_failure;
1366
1367 for_nexthops(fi) {
1368 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1369 if (!rtnh)
1370 goto nla_put_failure;
1371
1372 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1373 if (nh->nh_flags & RTNH_F_LINKDOWN) {
1374 struct in_device *in_dev;
1375
1376 rcu_read_lock();
1377 in_dev = __in_dev_get_rcu(nh->nh_dev);
1378 if (in_dev &&
1379 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1380 rtnh->rtnh_flags |= RTNH_F_DEAD;
1381 rcu_read_unlock();
1382 }
1383 rtnh->rtnh_hops = nh->nh_weight - 1;
1384 rtnh->rtnh_ifindex = nh->nh_oif;
1385
1386 if (nh->nh_gw &&
1387 nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw))
1388 goto nla_put_failure;
1389#ifdef CONFIG_IP_ROUTE_CLASSID
1390 if (nh->nh_tclassid &&
1391 nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
1392 goto nla_put_failure;
1393#endif
1394 if (nh->nh_lwtstate &&
1395 lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0)
1396 goto nla_put_failure;
1397
1398
1399 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1400 } endfor_nexthops(fi);
1401
1402 nla_nest_end(skb, mp);
1403 }
1404#endif
1405 nlmsg_end(skb, nlh);
1406 return 0;
1407
1408nla_put_failure:
1409 nlmsg_cancel(skb, nlh);
1410 return -EMSGSIZE;
1411}
1412
1413
1414
1415
1416
1417
1418
1419int fib_sync_down_addr(struct net_device *dev, __be32 local)
1420{
1421 int ret = 0;
1422 unsigned int hash = fib_laddr_hashfn(local);
1423 struct hlist_head *head = &fib_info_laddrhash[hash];
1424 struct net *net = dev_net(dev);
1425 int tb_id = l3mdev_fib_table(dev);
1426 struct fib_info *fi;
1427
1428 if (!fib_info_laddrhash || local == 0)
1429 return 0;
1430
1431 hlist_for_each_entry(fi, head, fib_lhash) {
1432 if (!net_eq(fi->fib_net, net) ||
1433 fi->fib_tb_id != tb_id)
1434 continue;
1435 if (fi->fib_prefsrc == local) {
1436 fi->fib_flags |= RTNH_F_DEAD;
1437 ret++;
1438 }
1439 }
1440 return ret;
1441}
1442
1443static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
1444 enum fib_event_type event_type)
1445{
1446 struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev);
1447 struct fib_nh_notifier_info info = {
1448 .fib_nh = fib_nh,
1449 };
1450
1451 switch (event_type) {
1452 case FIB_EVENT_NH_ADD:
1453 if (fib_nh->nh_flags & RTNH_F_DEAD)
1454 break;
1455 if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1456 fib_nh->nh_flags & RTNH_F_LINKDOWN)
1457 break;
1458 return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
1459 &info.info);
1460 case FIB_EVENT_NH_DEL:
1461 if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1462 fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
1463 (fib_nh->nh_flags & RTNH_F_DEAD))
1464 return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
1465 event_type, &info.info);
1466 default:
1467 break;
1468 }
1469
1470 return NOTIFY_DONE;
1471}
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
1484{
1485 struct fnhe_hash_bucket *bucket;
1486 int i;
1487
1488 bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
1489 if (!bucket)
1490 return;
1491
1492 for (i = 0; i < FNHE_HASH_SIZE; i++) {
1493 struct fib_nh_exception *fnhe;
1494
1495 for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
1496 fnhe;
1497 fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
1498 if (fnhe->fnhe_mtu_locked) {
1499 if (new <= fnhe->fnhe_pmtu) {
1500 fnhe->fnhe_pmtu = new;
1501 fnhe->fnhe_mtu_locked = false;
1502 }
1503 } else if (new < fnhe->fnhe_pmtu ||
1504 orig == fnhe->fnhe_pmtu) {
1505 fnhe->fnhe_pmtu = new;
1506 }
1507 }
1508 }
1509}
1510
1511void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
1512{
1513 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1514 struct hlist_head *head = &fib_info_devhash[hash];
1515 struct fib_nh *nh;
1516
1517 hlist_for_each_entry(nh, head, nh_hash) {
1518 if (nh->nh_dev == dev)
1519 nh_update_mtu(nh, dev->mtu, orig_mtu);
1520 }
1521}
1522
1523
1524
1525
1526
1527
1528
1529int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
1530{
1531 int ret = 0;
1532 int scope = RT_SCOPE_NOWHERE;
1533 struct fib_info *prev_fi = NULL;
1534 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1535 struct hlist_head *head = &fib_info_devhash[hash];
1536 struct fib_nh *nh;
1537
1538 if (force)
1539 scope = -1;
1540
1541 hlist_for_each_entry(nh, head, nh_hash) {
1542 struct fib_info *fi = nh->nh_parent;
1543 int dead;
1544
1545 BUG_ON(!fi->fib_nhs);
1546 if (nh->nh_dev != dev || fi == prev_fi)
1547 continue;
1548 prev_fi = fi;
1549 dead = 0;
1550 change_nexthops(fi) {
1551 if (nexthop_nh->nh_flags & RTNH_F_DEAD)
1552 dead++;
1553 else if (nexthop_nh->nh_dev == dev &&
1554 nexthop_nh->nh_scope != scope) {
1555 switch (event) {
1556 case NETDEV_DOWN:
1557 case NETDEV_UNREGISTER:
1558 nexthop_nh->nh_flags |= RTNH_F_DEAD;
1559
1560 case NETDEV_CHANGE:
1561 nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
1562 break;
1563 }
1564 call_fib_nh_notifiers(nexthop_nh,
1565 FIB_EVENT_NH_DEL);
1566 dead++;
1567 }
1568#ifdef CONFIG_IP_ROUTE_MULTIPATH
1569 if (event == NETDEV_UNREGISTER &&
1570 nexthop_nh->nh_dev == dev) {
1571 dead = fi->fib_nhs;
1572 break;
1573 }
1574#endif
1575 } endfor_nexthops(fi)
1576 if (dead == fi->fib_nhs) {
1577 switch (event) {
1578 case NETDEV_DOWN:
1579 case NETDEV_UNREGISTER:
1580 fi->fib_flags |= RTNH_F_DEAD;
1581
1582 case NETDEV_CHANGE:
1583 fi->fib_flags |= RTNH_F_LINKDOWN;
1584 break;
1585 }
1586 ret++;
1587 }
1588
1589 fib_rebalance(fi);
1590 }
1591
1592 return ret;
1593}
1594
1595
1596static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
1597{
1598 struct fib_info *fi = NULL, *last_resort = NULL;
1599 struct hlist_head *fa_head = res->fa_head;
1600 struct fib_table *tb = res->table;
1601 u8 slen = 32 - res->prefixlen;
1602 int order = -1, last_idx = -1;
1603 struct fib_alias *fa, *fa1 = NULL;
1604 u32 last_prio = res->fi->fib_priority;
1605 u8 last_tos = 0;
1606
1607 hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
1608 struct fib_info *next_fi = fa->fa_info;
1609
1610 if (fa->fa_slen != slen)
1611 continue;
1612 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1613 continue;
1614 if (fa->tb_id != tb->tb_id)
1615 continue;
1616 if (next_fi->fib_priority > last_prio &&
1617 fa->fa_tos == last_tos) {
1618 if (last_tos)
1619 continue;
1620 break;
1621 }
1622 if (next_fi->fib_flags & RTNH_F_DEAD)
1623 continue;
1624 last_tos = fa->fa_tos;
1625 last_prio = next_fi->fib_priority;
1626
1627 if (next_fi->fib_scope != res->scope ||
1628 fa->fa_type != RTN_UNICAST)
1629 continue;
1630 if (!next_fi->fib_nh[0].nh_gw ||
1631 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1632 continue;
1633
1634 fib_alias_accessed(fa);
1635
1636 if (!fi) {
1637 if (next_fi != res->fi)
1638 break;
1639 fa1 = fa;
1640 } else if (!fib_detect_death(fi, order, &last_resort,
1641 &last_idx, fa1->fa_default)) {
1642 fib_result_assign(res, fi);
1643 fa1->fa_default = order;
1644 goto out;
1645 }
1646 fi = next_fi;
1647 order++;
1648 }
1649
1650 if (order <= 0 || !fi) {
1651 if (fa1)
1652 fa1->fa_default = -1;
1653 goto out;
1654 }
1655
1656 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1657 fa1->fa_default)) {
1658 fib_result_assign(res, fi);
1659 fa1->fa_default = order;
1660 goto out;
1661 }
1662
1663 if (last_idx >= 0)
1664 fib_result_assign(res, last_resort);
1665 fa1->fa_default = last_idx;
1666out:
1667 return;
1668}
1669
1670
1671
1672
1673
1674int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
1675{
1676 struct fib_info *prev_fi;
1677 unsigned int hash;
1678 struct hlist_head *head;
1679 struct fib_nh *nh;
1680 int ret;
1681
1682 if (!(dev->flags & IFF_UP))
1683 return 0;
1684
1685 if (nh_flags & RTNH_F_DEAD) {
1686 unsigned int flags = dev_get_flags(dev);
1687
1688 if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1689 nh_flags |= RTNH_F_LINKDOWN;
1690 }
1691
1692 prev_fi = NULL;
1693 hash = fib_devindex_hashfn(dev->ifindex);
1694 head = &fib_info_devhash[hash];
1695 ret = 0;
1696
1697 hlist_for_each_entry(nh, head, nh_hash) {
1698 struct fib_info *fi = nh->nh_parent;
1699 int alive;
1700
1701 BUG_ON(!fi->fib_nhs);
1702 if (nh->nh_dev != dev || fi == prev_fi)
1703 continue;
1704
1705 prev_fi = fi;
1706 alive = 0;
1707 change_nexthops(fi) {
1708 if (!(nexthop_nh->nh_flags & nh_flags)) {
1709 alive++;
1710 continue;
1711 }
1712 if (!nexthop_nh->nh_dev ||
1713 !(nexthop_nh->nh_dev->flags & IFF_UP))
1714 continue;
1715 if (nexthop_nh->nh_dev != dev ||
1716 !__in_dev_get_rtnl(dev))
1717 continue;
1718 alive++;
1719 nexthop_nh->nh_flags &= ~nh_flags;
1720 call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);
1721 } endfor_nexthops(fi)
1722
1723 if (alive > 0) {
1724 fi->fib_flags &= ~nh_flags;
1725 ret++;
1726 }
1727
1728 fib_rebalance(fi);
1729 }
1730
1731 return ret;
1732}
1733
1734#ifdef CONFIG_IP_ROUTE_MULTIPATH
1735static bool fib_good_nh(const struct fib_nh *nh)
1736{
1737 int state = NUD_REACHABLE;
1738
1739 if (nh->nh_scope == RT_SCOPE_LINK) {
1740 struct neighbour *n;
1741
1742 rcu_read_lock_bh();
1743
1744 n = __ipv4_neigh_lookup_noref(nh->nh_dev,
1745 (__force u32)nh->nh_gw);
1746 if (n)
1747 state = n->nud_state;
1748
1749 rcu_read_unlock_bh();
1750 }
1751
1752 return !!(state & NUD_VALID);
1753}
1754
1755void fib_select_multipath(struct fib_result *res, int hash)
1756{
1757 struct fib_info *fi = res->fi;
1758 struct net *net = fi->fib_net;
1759 bool first = false;
1760
1761 for_nexthops(fi) {
1762 if (net->ipv4.sysctl_fib_multipath_use_neigh) {
1763 if (!fib_good_nh(nh))
1764 continue;
1765 if (!first) {
1766 res->nh_sel = nhsel;
1767 first = true;
1768 }
1769 }
1770
1771 if (hash > atomic_read(&nh->nh_upper_bound))
1772 continue;
1773
1774 res->nh_sel = nhsel;
1775 return;
1776 } endfor_nexthops(fi);
1777}
1778#endif
1779
1780void fib_select_path(struct net *net, struct fib_result *res,
1781 struct flowi4 *fl4, const struct sk_buff *skb)
1782{
1783 if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
1784 goto check_saddr;
1785
1786#ifdef CONFIG_IP_ROUTE_MULTIPATH
1787 if (res->fi->fib_nhs > 1) {
1788 int h = fib_multipath_hash(net, fl4, skb, NULL);
1789
1790 fib_select_multipath(res, h);
1791 }
1792 else
1793#endif
1794 if (!res->prefixlen &&
1795 res->table->tb_num_default > 1 &&
1796 res->type == RTN_UNICAST)
1797 fib_select_default(fl4, res);
1798
1799check_saddr:
1800 if (!fl4->saddr)
1801 fl4->saddr = FIB_RES_PREFSRC(net, *res);
1802}
1803