1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/uaccess.h>
17#include <linux/bitops.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/jiffies.h>
21#include <linux/mm.h>
22#include <linux/string.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/errno.h>
26#include <linux/in.h>
27#include <linux/inet.h>
28#include <linux/inetdevice.h>
29#include <linux/netdevice.h>
30#include <linux/if_arp.h>
31#include <linux/proc_fs.h>
32#include <linux/skbuff.h>
33#include <linux/init.h>
34#include <linux/slab.h>
35#include <linux/netlink.h>
36
37#include <net/arp.h>
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/ip_fib.h>
44#include <net/netlink.h>
45#include <net/nexthop.h>
46#include <net/lwtunnel.h>
47#include <net/fib_notifier.h>
48
49#include "fib_lookup.h"
50
51static DEFINE_SPINLOCK(fib_info_lock);
52static struct hlist_head *fib_info_hash;
53static struct hlist_head *fib_info_laddrhash;
54static unsigned int fib_info_hash_size;
55static unsigned int fib_info_cnt;
56
57#define DEVINDEX_HASHBITS 8
58#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
59static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60
61#ifdef CONFIG_IP_ROUTE_MULTIPATH
62
63#define for_nexthops(fi) { \
64 int nhsel; const struct fib_nh *nh; \
65 for (nhsel = 0, nh = (fi)->fib_nh; \
66 nhsel < (fi)->fib_nhs; \
67 nh++, nhsel++)
68
69#define change_nexthops(fi) { \
70 int nhsel; struct fib_nh *nexthop_nh; \
71 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
72 nhsel < (fi)->fib_nhs; \
73 nexthop_nh++, nhsel++)
74
75#else
76
77
78
79#define for_nexthops(fi) { \
80 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \
81 for (nhsel = 0; nhsel < 1; nhsel++)
82
83#define change_nexthops(fi) { \
84 int nhsel; \
85 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
86 for (nhsel = 0; nhsel < 1; nhsel++)
87
88#endif
89
90#define endfor_nexthops(fi) }
91
92
93const struct fib_prop fib_props[RTN_MAX + 1] = {
94 [RTN_UNSPEC] = {
95 .error = 0,
96 .scope = RT_SCOPE_NOWHERE,
97 },
98 [RTN_UNICAST] = {
99 .error = 0,
100 .scope = RT_SCOPE_UNIVERSE,
101 },
102 [RTN_LOCAL] = {
103 .error = 0,
104 .scope = RT_SCOPE_HOST,
105 },
106 [RTN_BROADCAST] = {
107 .error = 0,
108 .scope = RT_SCOPE_LINK,
109 },
110 [RTN_ANYCAST] = {
111 .error = 0,
112 .scope = RT_SCOPE_LINK,
113 },
114 [RTN_MULTICAST] = {
115 .error = 0,
116 .scope = RT_SCOPE_UNIVERSE,
117 },
118 [RTN_BLACKHOLE] = {
119 .error = -EINVAL,
120 .scope = RT_SCOPE_UNIVERSE,
121 },
122 [RTN_UNREACHABLE] = {
123 .error = -EHOSTUNREACH,
124 .scope = RT_SCOPE_UNIVERSE,
125 },
126 [RTN_PROHIBIT] = {
127 .error = -EACCES,
128 .scope = RT_SCOPE_UNIVERSE,
129 },
130 [RTN_THROW] = {
131 .error = -EAGAIN,
132 .scope = RT_SCOPE_UNIVERSE,
133 },
134 [RTN_NAT] = {
135 .error = -EINVAL,
136 .scope = RT_SCOPE_NOWHERE,
137 },
138 [RTN_XRESOLVE] = {
139 .error = -EINVAL,
140 .scope = RT_SCOPE_NOWHERE,
141 },
142};
143
144static void rt_fibinfo_free(struct rtable __rcu **rtp)
145{
146 struct rtable *rt = rcu_dereference_protected(*rtp, 1);
147
148 if (!rt)
149 return;
150
151
152
153
154
155
156 dst_dev_put(&rt->dst);
157 dst_release_immediate(&rt->dst);
158}
159
160static void free_nh_exceptions(struct fib_nh *nh)
161{
162 struct fnhe_hash_bucket *hash;
163 int i;
164
165 hash = rcu_dereference_protected(nh->nh_exceptions, 1);
166 if (!hash)
167 return;
168 for (i = 0; i < FNHE_HASH_SIZE; i++) {
169 struct fib_nh_exception *fnhe;
170
171 fnhe = rcu_dereference_protected(hash[i].chain, 1);
172 while (fnhe) {
173 struct fib_nh_exception *next;
174
175 next = rcu_dereference_protected(fnhe->fnhe_next, 1);
176
177 rt_fibinfo_free(&fnhe->fnhe_rth_input);
178 rt_fibinfo_free(&fnhe->fnhe_rth_output);
179
180 kfree(fnhe);
181
182 fnhe = next;
183 }
184 }
185 kfree(hash);
186}
187
188static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
189{
190 int cpu;
191
192 if (!rtp)
193 return;
194
195 for_each_possible_cpu(cpu) {
196 struct rtable *rt;
197
198 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
199 if (rt) {
200 dst_dev_put(&rt->dst);
201 dst_release_immediate(&rt->dst);
202 }
203 }
204 free_percpu(rtp);
205}
206
207
208static void free_fib_info_rcu(struct rcu_head *head)
209{
210 struct fib_info *fi = container_of(head, struct fib_info, rcu);
211
212 change_nexthops(fi) {
213 if (nexthop_nh->nh_dev)
214 dev_put(nexthop_nh->nh_dev);
215 lwtstate_put(nexthop_nh->nh_lwtstate);
216 free_nh_exceptions(nexthop_nh);
217 rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
218 rt_fibinfo_free(&nexthop_nh->nh_rth_input);
219 } endfor_nexthops(fi);
220
221 ip_fib_metrics_put(fi->fib_metrics);
222
223 kfree(fi);
224}
225
226void free_fib_info(struct fib_info *fi)
227{
228 if (fi->fib_dead == 0) {
229 pr_warn("Freeing alive fib_info %p\n", fi);
230 return;
231 }
232 fib_info_cnt--;
233#ifdef CONFIG_IP_ROUTE_CLASSID
234 change_nexthops(fi) {
235 if (nexthop_nh->nh_tclassid)
236 fi->fib_net->ipv4.fib_num_tclassid_users--;
237 } endfor_nexthops(fi);
238#endif
239 call_rcu(&fi->rcu, free_fib_info_rcu);
240}
241EXPORT_SYMBOL_GPL(free_fib_info);
242
243void fib_release_info(struct fib_info *fi)
244{
245 spin_lock_bh(&fib_info_lock);
246 if (fi && --fi->fib_treeref == 0) {
247 hlist_del(&fi->fib_hash);
248 if (fi->fib_prefsrc)
249 hlist_del(&fi->fib_lhash);
250 change_nexthops(fi) {
251 if (!nexthop_nh->nh_dev)
252 continue;
253 hlist_del(&nexthop_nh->nh_hash);
254 } endfor_nexthops(fi)
255 fi->fib_dead = 1;
256 fib_info_put(fi);
257 }
258 spin_unlock_bh(&fib_info_lock);
259}
260
261static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
262{
263 const struct fib_nh *onh = ofi->fib_nh;
264
265 for_nexthops(fi) {
266 if (nh->nh_oif != onh->nh_oif ||
267 nh->nh_gw != onh->nh_gw ||
268 nh->nh_scope != onh->nh_scope ||
269#ifdef CONFIG_IP_ROUTE_MULTIPATH
270 nh->nh_weight != onh->nh_weight ||
271#endif
272#ifdef CONFIG_IP_ROUTE_CLASSID
273 nh->nh_tclassid != onh->nh_tclassid ||
274#endif
275 lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) ||
276 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
277 return -1;
278 onh++;
279 } endfor_nexthops(fi);
280 return 0;
281}
282
283static inline unsigned int fib_devindex_hashfn(unsigned int val)
284{
285 unsigned int mask = DEVINDEX_HASHSIZE - 1;
286
287 return (val ^
288 (val >> DEVINDEX_HASHBITS) ^
289 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
290}
291
292static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
293{
294 unsigned int mask = (fib_info_hash_size - 1);
295 unsigned int val = fi->fib_nhs;
296
297 val ^= (fi->fib_protocol << 8) | fi->fib_scope;
298 val ^= (__force u32)fi->fib_prefsrc;
299 val ^= fi->fib_priority;
300 for_nexthops(fi) {
301 val ^= fib_devindex_hashfn(nh->nh_oif);
302 } endfor_nexthops(fi)
303
304 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
305}
306
307static struct fib_info *fib_find_info(const struct fib_info *nfi)
308{
309 struct hlist_head *head;
310 struct fib_info *fi;
311 unsigned int hash;
312
313 hash = fib_info_hashfn(nfi);
314 head = &fib_info_hash[hash];
315
316 hlist_for_each_entry(fi, head, fib_hash) {
317 if (!net_eq(fi->fib_net, nfi->fib_net))
318 continue;
319 if (fi->fib_nhs != nfi->fib_nhs)
320 continue;
321 if (nfi->fib_protocol == fi->fib_protocol &&
322 nfi->fib_scope == fi->fib_scope &&
323 nfi->fib_prefsrc == fi->fib_prefsrc &&
324 nfi->fib_priority == fi->fib_priority &&
325 nfi->fib_type == fi->fib_type &&
326 memcmp(nfi->fib_metrics, fi->fib_metrics,
327 sizeof(u32) * RTAX_MAX) == 0 &&
328 !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
329 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
330 return fi;
331 }
332
333 return NULL;
334}
335
336
337
338
339int ip_fib_check_default(__be32 gw, struct net_device *dev)
340{
341 struct hlist_head *head;
342 struct fib_nh *nh;
343 unsigned int hash;
344
345 spin_lock(&fib_info_lock);
346
347 hash = fib_devindex_hashfn(dev->ifindex);
348 head = &fib_info_devhash[hash];
349 hlist_for_each_entry(nh, head, nh_hash) {
350 if (nh->nh_dev == dev &&
351 nh->nh_gw == gw &&
352 !(nh->nh_flags & RTNH_F_DEAD)) {
353 spin_unlock(&fib_info_lock);
354 return 0;
355 }
356 }
357
358 spin_unlock(&fib_info_lock);
359
360 return -1;
361}
362
363static inline size_t fib_nlmsg_size(struct fib_info *fi)
364{
365 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
366 + nla_total_size(4)
367 + nla_total_size(4)
368 + nla_total_size(4)
369 + nla_total_size(4)
370 + nla_total_size(TCP_CA_NAME_MAX);
371
372
373 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
374
375 if (fi->fib_nhs) {
376 size_t nh_encapsize = 0;
377
378
379
380 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
381
382
383 nhsize += 2 * nla_total_size(4);
384
385
386 for_nexthops(fi) {
387 if (nh->nh_lwtstate) {
388
389 nh_encapsize += lwtunnel_get_encap_size(
390 nh->nh_lwtstate);
391
392 nh_encapsize += nla_total_size(2);
393 }
394 } endfor_nexthops(fi);
395
396
397 payload += nla_total_size((fi->fib_nhs * nhsize) +
398 nh_encapsize);
399
400 }
401
402 return payload;
403}
404
405void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
406 int dst_len, u32 tb_id, const struct nl_info *info,
407 unsigned int nlm_flags)
408{
409 struct fib_rt_info fri;
410 struct sk_buff *skb;
411 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
412 int err = -ENOBUFS;
413
414 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
415 if (!skb)
416 goto errout;
417
418 fri.fi = fa->fa_info;
419 fri.tb_id = tb_id;
420 fri.dst = key;
421 fri.dst_len = dst_len;
422 fri.tos = fa->fa_tos;
423 fri.type = fa->fa_type;
424 fri.offload = fa->offload;
425 fri.trap = fa->trap;
426 err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
427 if (err < 0) {
428
429 WARN_ON(err == -EMSGSIZE);
430 kfree_skb(skb);
431 goto errout;
432 }
433 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE,
434 info->nlh, GFP_KERNEL);
435 return;
436errout:
437 if (err < 0)
438 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
439}
440
441static int fib_detect_death(struct fib_info *fi, int order,
442 struct fib_info **last_resort, int *last_idx,
443 int dflt)
444{
445 struct neighbour *n;
446 int state = NUD_NONE;
447
448 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
449 if (n) {
450 state = n->nud_state;
451 neigh_release(n);
452 } else {
453 return 0;
454 }
455 if (state == NUD_REACHABLE)
456 return 0;
457 if ((state & NUD_VALID) && order != dflt)
458 return 0;
459 if ((state & NUD_VALID) ||
460 (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) {
461 *last_resort = fi;
462 *last_idx = order;
463 }
464 return 1;
465}
466
467#ifdef CONFIG_IP_ROUTE_MULTIPATH
468
469static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
470 struct netlink_ext_ack *extack)
471{
472 int nhs = 0;
473
474 while (rtnh_ok(rtnh, remaining)) {
475 nhs++;
476 rtnh = rtnh_next(rtnh, &remaining);
477 }
478
479
480 if (remaining > 0) {
481 NL_SET_ERR_MSG(extack,
482 "Invalid nexthop configuration - extra data after nexthops");
483 nhs = 0;
484 }
485
486 return nhs;
487}
488
489static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
490 int remaining, struct fib_config *cfg,
491 struct netlink_ext_ack *extack)
492{
493 int ret;
494
495 change_nexthops(fi) {
496 int attrlen;
497
498 if (!rtnh_ok(rtnh, remaining)) {
499 NL_SET_ERR_MSG(extack,
500 "Invalid nexthop configuration - extra data after nexthop");
501 return -EINVAL;
502 }
503
504 if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
505 NL_SET_ERR_MSG(extack,
506 "Invalid flags for nexthop - can not contain DEAD or LINKDOWN");
507 return -EINVAL;
508 }
509
510 nexthop_nh->nh_flags =
511 (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
512 nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
513 nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
514
515 attrlen = rtnh_attrlen(rtnh);
516 if (attrlen > 0) {
517 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
518
519 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
520 nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0;
521#ifdef CONFIG_IP_ROUTE_CLASSID
522 nla = nla_find(attrs, attrlen, RTA_FLOW);
523 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
524 if (nexthop_nh->nh_tclassid)
525 fi->fib_net->ipv4.fib_num_tclassid_users++;
526#endif
527 nla = nla_find(attrs, attrlen, RTA_ENCAP);
528 if (nla) {
529 struct lwtunnel_state *lwtstate;
530 struct nlattr *nla_entype;
531
532 nla_entype = nla_find(attrs, attrlen,
533 RTA_ENCAP_TYPE);
534 if (!nla_entype) {
535 NL_SET_BAD_ATTR(extack, nla);
536 NL_SET_ERR_MSG(extack,
537 "Encap type is missing");
538 goto err_inval;
539 }
540
541 ret = lwtunnel_build_state(nla_get_u16(
542 nla_entype),
543 nla, AF_INET, cfg,
544 &lwtstate, extack);
545 if (ret)
546 goto errout;
547 nexthop_nh->nh_lwtstate =
548 lwtstate_get(lwtstate);
549 }
550 }
551
552 rtnh = rtnh_next(rtnh, &remaining);
553 } endfor_nexthops(fi);
554
555 return 0;
556
557err_inval:
558 ret = -EINVAL;
559
560errout:
561 return ret;
562}
563
564static void fib_rebalance(struct fib_info *fi)
565{
566 int total;
567 int w;
568 struct in_device *in_dev;
569
570 if (fi->fib_nhs < 2)
571 return;
572
573 total = 0;
574 for_nexthops(fi) {
575 if (nh->nh_flags & RTNH_F_DEAD)
576 continue;
577
578 in_dev = __in_dev_get_rtnl(nh->nh_dev);
579
580 if (in_dev &&
581 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
582 nh->nh_flags & RTNH_F_LINKDOWN)
583 continue;
584
585 total += nh->nh_weight;
586 } endfor_nexthops(fi);
587
588 w = 0;
589 change_nexthops(fi) {
590 int upper_bound;
591
592 in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev);
593
594 if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
595 upper_bound = -1;
596 } else if (in_dev &&
597 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
598 nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
599 upper_bound = -1;
600 } else {
601 w += nexthop_nh->nh_weight;
602 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
603 total) - 1;
604 }
605
606 atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
607 } endfor_nexthops(fi);
608}
609#else
610
611#define fib_rebalance(fi) do { } while (0)
612
613#endif
614
615static int fib_encap_match(u16 encap_type,
616 struct nlattr *encap,
617 const struct fib_nh *nh,
618 const struct fib_config *cfg,
619 struct netlink_ext_ack *extack)
620{
621 struct lwtunnel_state *lwtstate;
622 int ret, result = 0;
623
624 if (encap_type == LWTUNNEL_ENCAP_NONE)
625 return 0;
626
627 ret = lwtunnel_build_state(encap_type, encap, AF_INET,
628 cfg, &lwtstate, extack);
629 if (!ret) {
630 result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
631 lwtstate_free(lwtstate);
632 }
633
634 return result;
635}
636
637int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
638 struct netlink_ext_ack *extack)
639{
640#ifdef CONFIG_IP_ROUTE_MULTIPATH
641 struct rtnexthop *rtnh;
642 int remaining;
643#endif
644
645 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
646 return 1;
647
648 if (cfg->fc_oif || cfg->fc_gw) {
649 if (cfg->fc_encap) {
650 if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
651 fi->fib_nh, cfg, extack))
652 return 1;
653 }
654#ifdef CONFIG_IP_ROUTE_CLASSID
655 if (cfg->fc_flow &&
656 cfg->fc_flow != fi->fib_nh->nh_tclassid)
657 return 1;
658#endif
659 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
660 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
661 return 0;
662 return 1;
663 }
664
665#ifdef CONFIG_IP_ROUTE_MULTIPATH
666 if (!cfg->fc_mp)
667 return 0;
668
669 rtnh = cfg->fc_mp;
670 remaining = cfg->fc_mp_len;
671
672 for_nexthops(fi) {
673 int attrlen;
674
675 if (!rtnh_ok(rtnh, remaining))
676 return -EINVAL;
677
678 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
679 return 1;
680
681 attrlen = rtnh_attrlen(rtnh);
682 if (attrlen > 0) {
683 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
684
685 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
686 if (nla && nla_get_in_addr(nla) != nh->nh_gw)
687 return 1;
688#ifdef CONFIG_IP_ROUTE_CLASSID
689 nla = nla_find(attrs, attrlen, RTA_FLOW);
690 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
691 return 1;
692#endif
693 }
694
695 rtnh = rtnh_next(rtnh, &remaining);
696 } endfor_nexthops(fi);
697#endif
698 return 0;
699}
700
701bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
702{
703 struct nlattr *nla;
704 int remaining;
705
706 if (!cfg->fc_mx)
707 return true;
708
709 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
710 int type = nla_type(nla);
711 u32 fi_val, val;
712
713 if (!type)
714 continue;
715 if (type > RTAX_MAX)
716 return false;
717
718 if (type == RTAX_CC_ALGO) {
719 char tmp[TCP_CA_NAME_MAX];
720 bool ecn_ca = false;
721
722 nla_strscpy(tmp, nla, sizeof(tmp));
723 val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
724 } else {
725 if (nla_len(nla) != sizeof(u32))
726 return false;
727 val = nla_get_u32(nla);
728 }
729
730 fi_val = fi->fib_metrics->metrics[type - 1];
731 if (type == RTAX_FEATURES)
732 fi_val &= ~DST_FEATURE_ECN_CA;
733
734 if (fi_val != val)
735 return false;
736 }
737
738 return true;
739}
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
786 struct netlink_ext_ack *extack)
787{
788 int err = 0;
789 struct net *net;
790 struct net_device *dev;
791
792 net = cfg->fc_nlinfo.nl_net;
793 if (nh->nh_gw) {
794 struct fib_result res;
795
796 if (nh->nh_flags & RTNH_F_ONLINK) {
797 unsigned int addr_type;
798
799 if (cfg->fc_scope >= RT_SCOPE_LINK) {
800 NL_SET_ERR_MSG(extack,
801 "Nexthop has invalid scope");
802 return -EINVAL;
803 }
804 dev = __dev_get_by_index(net, nh->nh_oif);
805 if (!dev)
806 return -ENODEV;
807 if (!(dev->flags & IFF_UP)) {
808 NL_SET_ERR_MSG(extack,
809 "Nexthop device is not up");
810 return -ENETDOWN;
811 }
812 addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
813 if (addr_type != RTN_UNICAST) {
814 NL_SET_ERR_MSG(extack,
815 "Nexthop has invalid gateway");
816 return -EINVAL;
817 }
818 if (!netif_carrier_ok(dev))
819 nh->nh_flags |= RTNH_F_LINKDOWN;
820 nh->nh_dev = dev;
821 dev_hold(dev);
822 nh->nh_scope = RT_SCOPE_LINK;
823 return 0;
824 }
825 rcu_read_lock();
826 {
827 struct fib_table *tbl = NULL;
828 struct flowi4 fl4 = {
829 .daddr = nh->nh_gw,
830 .flowi4_scope = cfg->fc_scope + 1,
831 .flowi4_oif = nh->nh_oif,
832 .flowi4_iif = LOOPBACK_IFINDEX,
833 };
834
835
836 if (fl4.flowi4_scope < RT_SCOPE_LINK)
837 fl4.flowi4_scope = RT_SCOPE_LINK;
838
839 if (cfg->fc_table)
840 tbl = fib_get_table(net, cfg->fc_table);
841
842 if (tbl)
843 err = fib_table_lookup(tbl, &fl4, &res,
844 FIB_LOOKUP_IGNORE_LINKSTATE |
845 FIB_LOOKUP_NOREF);
846
847
848
849
850
851 if (!tbl || err) {
852 err = fib_lookup(net, &fl4, &res,
853 FIB_LOOKUP_IGNORE_LINKSTATE);
854 }
855
856 if (err) {
857 NL_SET_ERR_MSG(extack,
858 "Nexthop has invalid gateway");
859 rcu_read_unlock();
860 return err;
861 }
862 }
863 err = -EINVAL;
864 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
865 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
866 goto out;
867 }
868 nh->nh_scope = res.scope;
869 nh->nh_oif = FIB_RES_OIF(res);
870 nh->nh_dev = dev = FIB_RES_DEV(res);
871 if (!dev) {
872 NL_SET_ERR_MSG(extack,
873 "No egress device for nexthop gateway");
874 goto out;
875 }
876 dev_hold(dev);
877 if (!netif_carrier_ok(dev))
878 nh->nh_flags |= RTNH_F_LINKDOWN;
879 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
880 } else {
881 struct in_device *in_dev;
882
883 if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
884 NL_SET_ERR_MSG(extack,
885 "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
886 return -EINVAL;
887 }
888 rcu_read_lock();
889 err = -ENODEV;
890 in_dev = inetdev_by_index(net, nh->nh_oif);
891 if (!in_dev)
892 goto out;
893 err = -ENETDOWN;
894 if (!(in_dev->dev->flags & IFF_UP)) {
895 NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
896 goto out;
897 }
898 nh->nh_dev = in_dev->dev;
899 dev_hold(nh->nh_dev);
900 nh->nh_scope = RT_SCOPE_HOST;
901 if (!netif_carrier_ok(nh->nh_dev))
902 nh->nh_flags |= RTNH_F_LINKDOWN;
903 err = 0;
904 }
905out:
906 rcu_read_unlock();
907 return err;
908}
909
910static inline unsigned int fib_laddr_hashfn(__be32 val)
911{
912 unsigned int mask = (fib_info_hash_size - 1);
913
914 return ((__force u32)val ^
915 ((__force u32)val >> 7) ^
916 ((__force u32)val >> 14)) & mask;
917}
918
919static struct hlist_head *fib_info_hash_alloc(int bytes)
920{
921 if (bytes <= PAGE_SIZE)
922 return kzalloc(bytes, GFP_KERNEL);
923 else
924 return (struct hlist_head *)
925 __get_free_pages(GFP_KERNEL | __GFP_ZERO,
926 get_order(bytes));
927}
928
929static void fib_info_hash_free(struct hlist_head *hash, int bytes)
930{
931 if (!hash)
932 return;
933
934 if (bytes <= PAGE_SIZE)
935 kfree(hash);
936 else
937 free_pages((unsigned long) hash, get_order(bytes));
938}
939
940static void fib_info_hash_move(struct hlist_head *new_info_hash,
941 struct hlist_head *new_laddrhash,
942 unsigned int new_size)
943{
944 struct hlist_head *old_info_hash, *old_laddrhash;
945 unsigned int old_size = fib_info_hash_size;
946 unsigned int i, bytes;
947
948 spin_lock_bh(&fib_info_lock);
949 old_info_hash = fib_info_hash;
950 old_laddrhash = fib_info_laddrhash;
951 fib_info_hash_size = new_size;
952
953 for (i = 0; i < old_size; i++) {
954 struct hlist_head *head = &fib_info_hash[i];
955 struct hlist_node *n;
956 struct fib_info *fi;
957
958 hlist_for_each_entry_safe(fi, n, head, fib_hash) {
959 struct hlist_head *dest;
960 unsigned int new_hash;
961
962 new_hash = fib_info_hashfn(fi);
963 dest = &new_info_hash[new_hash];
964 hlist_add_head(&fi->fib_hash, dest);
965 }
966 }
967 fib_info_hash = new_info_hash;
968
969 for (i = 0; i < old_size; i++) {
970 struct hlist_head *lhead = &fib_info_laddrhash[i];
971 struct hlist_node *n;
972 struct fib_info *fi;
973
974 hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
975 struct hlist_head *ldest;
976 unsigned int new_hash;
977
978 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
979 ldest = &new_laddrhash[new_hash];
980 hlist_add_head(&fi->fib_lhash, ldest);
981 }
982 }
983 fib_info_laddrhash = new_laddrhash;
984
985 spin_unlock_bh(&fib_info_lock);
986
987 bytes = old_size * sizeof(struct hlist_head *);
988 fib_info_hash_free(old_info_hash, bytes);
989 fib_info_hash_free(old_laddrhash, bytes);
990}
991
992__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
993{
994 nh->nh_saddr = inet_select_addr(nh->nh_dev,
995 nh->nh_gw,
996 nh->nh_parent->fib_scope);
997 nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
998
999 return nh->nh_saddr;
1000}
1001
1002static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
1003{
1004 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
1005 fib_prefsrc != cfg->fc_dst) {
1006 u32 tb_id = cfg->fc_table;
1007 int rc;
1008
1009 if (tb_id == RT_TABLE_MAIN)
1010 tb_id = RT_TABLE_LOCAL;
1011
1012 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
1013 fib_prefsrc, tb_id);
1014
1015 if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
1016 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
1017 fib_prefsrc, RT_TABLE_LOCAL);
1018 }
1019
1020 if (rc != RTN_LOCAL)
1021 return false;
1022 }
1023 return true;
1024}
1025
1026struct fib_info *fib_create_info(struct fib_config *cfg,
1027 struct netlink_ext_ack *extack)
1028{
1029 int err;
1030 struct fib_info *fi = NULL;
1031 struct fib_info *ofi;
1032 int nhs = 1;
1033 struct net *net = cfg->fc_nlinfo.nl_net;
1034
1035 if (cfg->fc_type > RTN_MAX)
1036 goto err_inval;
1037
1038
1039 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {
1040 NL_SET_ERR_MSG(extack, "Invalid scope");
1041 goto err_inval;
1042 }
1043
1044 if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
1045 NL_SET_ERR_MSG(extack,
1046 "Invalid rtm_flags - can not contain DEAD or LINKDOWN");
1047 goto err_inval;
1048 }
1049
1050#ifdef CONFIG_IP_ROUTE_MULTIPATH
1051 if (cfg->fc_mp) {
1052 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
1053 if (nhs == 0)
1054 goto err_inval;
1055 }
1056#endif
1057
1058 err = -ENOBUFS;
1059 if (fib_info_cnt >= fib_info_hash_size) {
1060 unsigned int new_size = fib_info_hash_size << 1;
1061 struct hlist_head *new_info_hash;
1062 struct hlist_head *new_laddrhash;
1063 unsigned int bytes;
1064
1065 if (!new_size)
1066 new_size = 16;
1067 bytes = new_size * sizeof(struct hlist_head *);
1068 new_info_hash = fib_info_hash_alloc(bytes);
1069 new_laddrhash = fib_info_hash_alloc(bytes);
1070 if (!new_info_hash || !new_laddrhash) {
1071 fib_info_hash_free(new_info_hash, bytes);
1072 fib_info_hash_free(new_laddrhash, bytes);
1073 } else
1074 fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
1075
1076 if (!fib_info_hash_size)
1077 goto failure;
1078 }
1079
1080 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
1081 if (!fi)
1082 goto failure;
1083 fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx,
1084 cfg->fc_mx_len, extack);
1085 if (unlikely(IS_ERR(fi->fib_metrics))) {
1086 err = PTR_ERR(fi->fib_metrics);
1087 kfree(fi);
1088 return ERR_PTR(err);
1089 }
1090
1091 fib_info_cnt++;
1092 fi->fib_net = net;
1093 fi->fib_protocol = cfg->fc_protocol;
1094 fi->fib_scope = cfg->fc_scope;
1095 fi->fib_flags = cfg->fc_flags;
1096 fi->fib_priority = cfg->fc_priority;
1097 fi->fib_prefsrc = cfg->fc_prefsrc;
1098 fi->fib_type = cfg->fc_type;
1099 fi->fib_tb_id = cfg->fc_table;
1100
1101 fi->fib_nhs = nhs;
1102 change_nexthops(fi) {
1103 nexthop_nh->nh_parent = fi;
1104 nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
1105 if (!nexthop_nh->nh_pcpu_rth_output)
1106 goto failure;
1107 } endfor_nexthops(fi)
1108
1109 if (cfg->fc_mp) {
1110#ifdef CONFIG_IP_ROUTE_MULTIPATH
1111 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
1112 if (err != 0)
1113 goto failure;
1114 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) {
1115 NL_SET_ERR_MSG(extack,
1116 "Nexthop device index does not match RTA_OIF");
1117 goto err_inval;
1118 }
1119 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) {
1120 NL_SET_ERR_MSG(extack,
1121 "Nexthop gateway does not match RTA_GATEWAY");
1122 goto err_inval;
1123 }
1124#ifdef CONFIG_IP_ROUTE_CLASSID
1125 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
1126 NL_SET_ERR_MSG(extack,
1127 "Nexthop class id does not match RTA_FLOW");
1128 goto err_inval;
1129 }
1130#endif
1131#else
1132 NL_SET_ERR_MSG(extack,
1133 "Multipath support not enabled in kernel");
1134 goto err_inval;
1135#endif
1136 } else {
1137 struct fib_nh *nh = fi->fib_nh;
1138
1139 if (cfg->fc_encap) {
1140 struct lwtunnel_state *lwtstate;
1141
1142 if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) {
1143 NL_SET_ERR_MSG(extack,
1144 "LWT encap type not specified");
1145 goto err_inval;
1146 }
1147 err = lwtunnel_build_state(cfg->fc_encap_type,
1148 cfg->fc_encap, AF_INET, cfg,
1149 &lwtstate, extack);
1150 if (err)
1151 goto failure;
1152
1153 nh->nh_lwtstate = lwtstate_get(lwtstate);
1154 }
1155 nh->nh_oif = cfg->fc_oif;
1156 nh->nh_gw = cfg->fc_gw;
1157 nh->nh_flags = cfg->fc_flags;
1158#ifdef CONFIG_IP_ROUTE_CLASSID
1159 nh->nh_tclassid = cfg->fc_flow;
1160 if (nh->nh_tclassid)
1161 fi->fib_net->ipv4.fib_num_tclassid_users++;
1162#endif
1163#ifdef CONFIG_IP_ROUTE_MULTIPATH
1164 nh->nh_weight = 1;
1165#endif
1166 }
1167
1168 if (fib_props[cfg->fc_type].error) {
1169 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
1170 NL_SET_ERR_MSG(extack,
1171 "Gateway, device and multipath can not be specified for this route type");
1172 goto err_inval;
1173 }
1174 goto link_it;
1175 } else {
1176 switch (cfg->fc_type) {
1177 case RTN_UNICAST:
1178 case RTN_LOCAL:
1179 case RTN_BROADCAST:
1180 case RTN_ANYCAST:
1181 case RTN_MULTICAST:
1182 break;
1183 default:
1184 NL_SET_ERR_MSG(extack, "Invalid route type");
1185 goto err_inval;
1186 }
1187 }
1188
1189 if (cfg->fc_scope > RT_SCOPE_HOST) {
1190 NL_SET_ERR_MSG(extack, "Invalid scope");
1191 goto err_inval;
1192 }
1193
1194 if (cfg->fc_scope == RT_SCOPE_HOST) {
1195 struct fib_nh *nh = fi->fib_nh;
1196
1197
1198 if (nhs != 1) {
1199 NL_SET_ERR_MSG(extack,
1200 "Route with host scope can not have multiple nexthops");
1201 goto err_inval;
1202 }
1203 if (nh->nh_gw) {
1204 NL_SET_ERR_MSG(extack,
1205 "Route with host scope can not have a gateway");
1206 goto err_inval;
1207 }
1208 nh->nh_scope = RT_SCOPE_NOWHERE;
1209 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
1210 err = -ENODEV;
1211 if (!nh->nh_dev)
1212 goto failure;
1213 } else {
1214 int linkdown = 0;
1215
1216 change_nexthops(fi) {
1217 err = fib_check_nh(cfg, nexthop_nh, extack);
1218 if (err != 0)
1219 goto failure;
1220 if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
1221 linkdown++;
1222 } endfor_nexthops(fi)
1223 if (linkdown == fi->fib_nhs)
1224 fi->fib_flags |= RTNH_F_LINKDOWN;
1225 }
1226
1227 if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) {
1228 NL_SET_ERR_MSG(extack, "Invalid prefsrc address");
1229 goto err_inval;
1230 }
1231
1232 change_nexthops(fi) {
1233 fib_info_update_nh_saddr(net, nexthop_nh);
1234 } endfor_nexthops(fi)
1235
1236 fib_rebalance(fi);
1237
1238link_it:
1239 ofi = fib_find_info(fi);
1240 if (ofi) {
1241 fi->fib_dead = 1;
1242 free_fib_info(fi);
1243 ofi->fib_treeref++;
1244 return ofi;
1245 }
1246
1247 fi->fib_treeref++;
1248 refcount_set(&fi->fib_clntref, 1);
1249 spin_lock_bh(&fib_info_lock);
1250 hlist_add_head(&fi->fib_hash,
1251 &fib_info_hash[fib_info_hashfn(fi)]);
1252 if (fi->fib_prefsrc) {
1253 struct hlist_head *head;
1254
1255 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
1256 hlist_add_head(&fi->fib_lhash, head);
1257 }
1258 change_nexthops(fi) {
1259 struct hlist_head *head;
1260 unsigned int hash;
1261
1262 if (!nexthop_nh->nh_dev)
1263 continue;
1264 hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
1265 head = &fib_info_devhash[hash];
1266 hlist_add_head(&nexthop_nh->nh_hash, head);
1267 } endfor_nexthops(fi)
1268 spin_unlock_bh(&fib_info_lock);
1269 return fi;
1270
1271err_inval:
1272 err = -EINVAL;
1273
1274failure:
1275 if (fi) {
1276 fi->fib_dead = 1;
1277 free_fib_info(fi);
1278 }
1279
1280 return ERR_PTR(err);
1281}
1282
1283int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
1284 struct fib_rt_info *fri, unsigned int flags)
1285{
1286 struct fib_info *fi = fri->fi;
1287 u32 tb_id = fri->tb_id;
1288 struct nlmsghdr *nlh;
1289 struct rtmsg *rtm;
1290
1291 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
1292 if (!nlh)
1293 return -EMSGSIZE;
1294
1295 rtm = nlmsg_data(nlh);
1296 rtm->rtm_family = AF_INET;
1297 rtm->rtm_dst_len = fri->dst_len;
1298 rtm->rtm_src_len = 0;
1299 rtm->rtm_tos = fri->tos;
1300 if (tb_id < 256)
1301 rtm->rtm_table = tb_id;
1302 else
1303 rtm->rtm_table = RT_TABLE_COMPAT;
1304 if (nla_put_u32(skb, RTA_TABLE, tb_id))
1305 goto nla_put_failure;
1306 rtm->rtm_type = fri->type;
1307 rtm->rtm_flags = fi->fib_flags;
1308 rtm->rtm_scope = fi->fib_scope;
1309 rtm->rtm_protocol = fi->fib_protocol;
1310
1311 if (rtm->rtm_dst_len &&
1312 nla_put_in_addr(skb, RTA_DST, fri->dst))
1313 goto nla_put_failure;
1314 if (fi->fib_priority &&
1315 nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
1316 goto nla_put_failure;
1317 if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
1318 goto nla_put_failure;
1319
1320 if (fi->fib_prefsrc &&
1321 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
1322 goto nla_put_failure;
1323 if (fi->fib_nhs == 1) {
1324 if (fi->fib_nh->nh_gw &&
1325 nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
1326 goto nla_put_failure;
1327 if (fi->fib_nh->nh_oif &&
1328 nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
1329 goto nla_put_failure;
1330 if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
1331 struct in_device *in_dev;
1332
1333 rcu_read_lock();
1334 in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
1335 if (in_dev &&
1336 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1337 rtm->rtm_flags |= RTNH_F_DEAD;
1338 rcu_read_unlock();
1339 }
1340 if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD)
1341 rtm->rtm_flags |= RTNH_F_OFFLOAD;
1342#ifdef CONFIG_IP_ROUTE_CLASSID
1343 if (fi->fib_nh[0].nh_tclassid &&
1344 nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
1345 goto nla_put_failure;
1346#endif
1347 if (fi->fib_nh->nh_lwtstate &&
1348 lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0)
1349 goto nla_put_failure;
1350 }
1351#ifdef CONFIG_IP_ROUTE_MULTIPATH
1352 if (fi->fib_nhs > 1) {
1353 struct rtnexthop *rtnh;
1354 struct nlattr *mp;
1355
1356 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
1357 if (!mp)
1358 goto nla_put_failure;
1359
1360 for_nexthops(fi) {
1361 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1362 if (!rtnh)
1363 goto nla_put_failure;
1364
1365 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1366 if (nh->nh_flags & RTNH_F_LINKDOWN) {
1367 struct in_device *in_dev;
1368
1369 rcu_read_lock();
1370 in_dev = __in_dev_get_rcu(nh->nh_dev);
1371 if (in_dev &&
1372 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1373 rtnh->rtnh_flags |= RTNH_F_DEAD;
1374 rcu_read_unlock();
1375 }
1376 rtnh->rtnh_hops = nh->nh_weight - 1;
1377 rtnh->rtnh_ifindex = nh->nh_oif;
1378
1379 if (nh->nh_gw &&
1380 nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw))
1381 goto nla_put_failure;
1382#ifdef CONFIG_IP_ROUTE_CLASSID
1383 if (nh->nh_tclassid &&
1384 nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
1385 goto nla_put_failure;
1386#endif
1387 if (nh->nh_lwtstate &&
1388 lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0)
1389 goto nla_put_failure;
1390
1391
1392 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1393 } endfor_nexthops(fi);
1394
1395 nla_nest_end(skb, mp);
1396 }
1397#endif
1398
1399 if (fri->offload)
1400 rtm->rtm_flags |= RTM_F_OFFLOAD;
1401 if (fri->trap)
1402 rtm->rtm_flags |= RTM_F_TRAP;
1403
1404 nlmsg_end(skb, nlh);
1405 return 0;
1406
1407nla_put_failure:
1408 nlmsg_cancel(skb, nlh);
1409 return -EMSGSIZE;
1410}
1411
1412
1413
1414
1415
1416
1417
1418int fib_sync_down_addr(struct net_device *dev, __be32 local)
1419{
1420 int ret = 0;
1421 unsigned int hash = fib_laddr_hashfn(local);
1422 struct hlist_head *head = &fib_info_laddrhash[hash];
1423 int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
1424 struct net *net = dev_net(dev);
1425 struct fib_info *fi;
1426
1427 if (!fib_info_laddrhash || local == 0)
1428 return 0;
1429
1430 hlist_for_each_entry(fi, head, fib_lhash) {
1431 if (!net_eq(fi->fib_net, net) ||
1432 fi->fib_tb_id != tb_id)
1433 continue;
1434 if (fi->fib_prefsrc == local) {
1435 fi->fib_flags |= RTNH_F_DEAD;
1436 ret++;
1437 }
1438 }
1439 return ret;
1440}
1441
1442static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
1443 enum fib_event_type event_type)
1444{
1445 struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev);
1446 struct fib_nh_notifier_info info = {
1447 .fib_nh = fib_nh,
1448 };
1449
1450 switch (event_type) {
1451 case FIB_EVENT_NH_ADD:
1452 if (fib_nh->nh_flags & RTNH_F_DEAD)
1453 break;
1454 if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1455 fib_nh->nh_flags & RTNH_F_LINKDOWN)
1456 break;
1457 return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
1458 &info.info);
1459 case FIB_EVENT_NH_DEL:
1460 if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1461 fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
1462 (fib_nh->nh_flags & RTNH_F_DEAD))
1463 return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
1464 event_type, &info.info);
1465 default:
1466 break;
1467 }
1468
1469 return NOTIFY_DONE;
1470}
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
1483{
1484 struct fnhe_hash_bucket *bucket;
1485 int i;
1486
1487 bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
1488 if (!bucket)
1489 return;
1490
1491 for (i = 0; i < FNHE_HASH_SIZE; i++) {
1492 struct fib_nh_exception *fnhe;
1493
1494 for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
1495 fnhe;
1496 fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
1497 if (fnhe->fnhe_mtu_locked) {
1498 if (new <= fnhe->fnhe_pmtu) {
1499 fnhe->fnhe_pmtu = new;
1500 fnhe->fnhe_mtu_locked = false;
1501 }
1502 } else if (new < fnhe->fnhe_pmtu ||
1503 orig == fnhe->fnhe_pmtu) {
1504 fnhe->fnhe_pmtu = new;
1505 }
1506 }
1507 }
1508}
1509
1510void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
1511{
1512 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1513 struct hlist_head *head = &fib_info_devhash[hash];
1514 struct fib_nh *nh;
1515
1516 hlist_for_each_entry(nh, head, nh_hash) {
1517 if (nh->nh_dev == dev)
1518 nh_update_mtu(nh, dev->mtu, orig_mtu);
1519 }
1520}
1521
1522
1523
1524
1525
1526
1527
1528int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
1529{
1530 int ret = 0;
1531 int scope = RT_SCOPE_NOWHERE;
1532 struct fib_info *prev_fi = NULL;
1533 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1534 struct hlist_head *head = &fib_info_devhash[hash];
1535 struct fib_nh *nh;
1536
1537 if (force)
1538 scope = -1;
1539
1540 hlist_for_each_entry(nh, head, nh_hash) {
1541 struct fib_info *fi = nh->nh_parent;
1542 int dead;
1543
1544 BUG_ON(!fi->fib_nhs);
1545 if (nh->nh_dev != dev || fi == prev_fi)
1546 continue;
1547 prev_fi = fi;
1548 dead = 0;
1549 change_nexthops(fi) {
1550 if (nexthop_nh->nh_flags & RTNH_F_DEAD)
1551 dead++;
1552 else if (nexthop_nh->nh_dev == dev &&
1553 nexthop_nh->nh_scope != scope) {
1554 switch (event) {
1555 case NETDEV_DOWN:
1556 case NETDEV_UNREGISTER:
1557 nexthop_nh->nh_flags |= RTNH_F_DEAD;
1558
1559 case NETDEV_CHANGE:
1560 nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
1561 break;
1562 }
1563 call_fib_nh_notifiers(nexthop_nh,
1564 FIB_EVENT_NH_DEL);
1565 dead++;
1566 }
1567#ifdef CONFIG_IP_ROUTE_MULTIPATH
1568 if (event == NETDEV_UNREGISTER &&
1569 nexthop_nh->nh_dev == dev) {
1570 dead = fi->fib_nhs;
1571 break;
1572 }
1573#endif
1574 } endfor_nexthops(fi)
1575 if (dead == fi->fib_nhs) {
1576 switch (event) {
1577 case NETDEV_DOWN:
1578 case NETDEV_UNREGISTER:
1579 fi->fib_flags |= RTNH_F_DEAD;
1580
1581 case NETDEV_CHANGE:
1582 fi->fib_flags |= RTNH_F_LINKDOWN;
1583 break;
1584 }
1585 ret++;
1586 }
1587
1588 fib_rebalance(fi);
1589 }
1590
1591 return ret;
1592}
1593
1594
1595static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
1596{
1597 struct fib_info *fi = NULL, *last_resort = NULL;
1598 struct hlist_head *fa_head = res->fa_head;
1599 struct fib_table *tb = res->table;
1600 u8 slen = 32 - res->prefixlen;
1601 int order = -1, last_idx = -1;
1602 struct fib_alias *fa, *fa1 = NULL;
1603 u32 last_prio = res->fi->fib_priority;
1604 u8 last_tos = 0;
1605
1606 hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
1607 struct fib_info *next_fi = fa->fa_info;
1608
1609 if (fa->fa_slen != slen)
1610 continue;
1611 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1612 continue;
1613 if (fa->tb_id != tb->tb_id)
1614 continue;
1615 if (next_fi->fib_priority > last_prio &&
1616 fa->fa_tos == last_tos) {
1617 if (last_tos)
1618 continue;
1619 break;
1620 }
1621 if (next_fi->fib_flags & RTNH_F_DEAD)
1622 continue;
1623 last_tos = fa->fa_tos;
1624 last_prio = next_fi->fib_priority;
1625
1626 if (next_fi->fib_scope != res->scope ||
1627 fa->fa_type != RTN_UNICAST)
1628 continue;
1629 if (!next_fi->fib_nh[0].nh_gw ||
1630 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1631 continue;
1632
1633 fib_alias_accessed(fa);
1634
1635 if (!fi) {
1636 if (next_fi != res->fi)
1637 break;
1638 fa1 = fa;
1639 } else if (!fib_detect_death(fi, order, &last_resort,
1640 &last_idx, fa1->fa_default)) {
1641 fib_result_assign(res, fi);
1642 fa1->fa_default = order;
1643 goto out;
1644 }
1645 fi = next_fi;
1646 order++;
1647 }
1648
1649 if (order <= 0 || !fi) {
1650 if (fa1)
1651 fa1->fa_default = -1;
1652 goto out;
1653 }
1654
1655 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1656 fa1->fa_default)) {
1657 fib_result_assign(res, fi);
1658 fa1->fa_default = order;
1659 goto out;
1660 }
1661
1662 if (last_idx >= 0)
1663 fib_result_assign(res, last_resort);
1664 fa1->fa_default = last_idx;
1665out:
1666 return;
1667}
1668
1669
1670
1671
1672
1673int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
1674{
1675 struct fib_info *prev_fi;
1676 unsigned int hash;
1677 struct hlist_head *head;
1678 struct fib_nh *nh;
1679 int ret;
1680
1681 if (!(dev->flags & IFF_UP))
1682 return 0;
1683
1684 if (nh_flags & RTNH_F_DEAD) {
1685 unsigned int flags = dev_get_flags(dev);
1686
1687 if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1688 nh_flags |= RTNH_F_LINKDOWN;
1689 }
1690
1691 prev_fi = NULL;
1692 hash = fib_devindex_hashfn(dev->ifindex);
1693 head = &fib_info_devhash[hash];
1694 ret = 0;
1695
1696 hlist_for_each_entry(nh, head, nh_hash) {
1697 struct fib_info *fi = nh->nh_parent;
1698 int alive;
1699
1700 BUG_ON(!fi->fib_nhs);
1701 if (nh->nh_dev != dev || fi == prev_fi)
1702 continue;
1703
1704 prev_fi = fi;
1705 alive = 0;
1706 change_nexthops(fi) {
1707 if (!(nexthop_nh->nh_flags & nh_flags)) {
1708 alive++;
1709 continue;
1710 }
1711 if (!nexthop_nh->nh_dev ||
1712 !(nexthop_nh->nh_dev->flags & IFF_UP))
1713 continue;
1714 if (nexthop_nh->nh_dev != dev ||
1715 !__in_dev_get_rtnl(dev))
1716 continue;
1717 alive++;
1718 nexthop_nh->nh_flags &= ~nh_flags;
1719 call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);
1720 } endfor_nexthops(fi)
1721
1722 if (alive > 0) {
1723 fi->fib_flags &= ~nh_flags;
1724 ret++;
1725 }
1726
1727 fib_rebalance(fi);
1728 }
1729
1730 return ret;
1731}
1732
1733#ifdef CONFIG_IP_ROUTE_MULTIPATH
1734static bool fib_good_nh(const struct fib_nh *nh)
1735{
1736 int state = NUD_REACHABLE;
1737
1738 if (nh->nh_scope == RT_SCOPE_LINK) {
1739 struct neighbour *n;
1740
1741 rcu_read_lock_bh();
1742
1743 n = __ipv4_neigh_lookup_noref(nh->nh_dev,
1744 (__force u32)nh->nh_gw);
1745 if (n)
1746 state = n->nud_state;
1747
1748 rcu_read_unlock_bh();
1749 }
1750
1751 return !!(state & NUD_VALID);
1752}
1753
1754void fib_select_multipath(struct fib_result *res, int hash)
1755{
1756 struct fib_info *fi = res->fi;
1757 struct net *net = fi->fib_net;
1758 bool first = false;
1759
1760 for_nexthops(fi) {
1761 if (net->ipv4.sysctl_fib_multipath_use_neigh) {
1762 if (!fib_good_nh(nh))
1763 continue;
1764 if (!first) {
1765 res->nh_sel = nhsel;
1766 first = true;
1767 }
1768 }
1769
1770 if (hash > atomic_read(&nh->nh_upper_bound))
1771 continue;
1772
1773 res->nh_sel = nhsel;
1774 return;
1775 } endfor_nexthops(fi);
1776}
1777#endif
1778
1779void fib_select_path(struct net *net, struct fib_result *res,
1780 struct flowi4 *fl4, const struct sk_buff *skb)
1781{
1782 if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
1783 goto check_saddr;
1784
1785#ifdef CONFIG_IP_ROUTE_MULTIPATH
1786 if (res->fi->fib_nhs > 1) {
1787 int h = fib_multipath_hash(net, fl4, skb, NULL);
1788
1789 fib_select_multipath(res, h);
1790 }
1791 else
1792#endif
1793 if (!res->prefixlen &&
1794 res->table->tb_num_default > 1 &&
1795 res->type == RTN_UNICAST)
1796 fib_select_default(fl4, res);
1797
1798check_saddr:
1799 if (!fl4->saddr)
1800 fl4->saddr = FIB_RES_PREFSRC(net, *res);
1801}
1802