1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/uaccess.h>
17#include <linux/bitops.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/jiffies.h>
21#include <linux/mm.h>
22#include <linux/string.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/errno.h>
26#include <linux/in.h>
27#include <linux/inet.h>
28#include <linux/inetdevice.h>
29#include <linux/netdevice.h>
30#include <linux/if_arp.h>
31#include <linux/proc_fs.h>
32#include <linux/skbuff.h>
33#include <linux/init.h>
34#include <linux/slab.h>
35#include <linux/netlink.h>
36
37#include <net/arp.h>
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/ip_fib.h>
44#include <net/netlink.h>
45#include <net/nexthop.h>
46#include <net/lwtunnel.h>
47
48#include "fib_lookup.h"
49
50static DEFINE_SPINLOCK(fib_info_lock);
51static struct hlist_head *fib_info_hash;
52static struct hlist_head *fib_info_laddrhash;
53static unsigned int fib_info_hash_size;
54static unsigned int fib_info_cnt;
55
56#define DEVINDEX_HASHBITS 8
57#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
58static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
59
60#ifdef CONFIG_IP_ROUTE_MULTIPATH
61
62#define for_nexthops(fi) { \
63 int nhsel; const struct fib_nh *nh; \
64 for (nhsel = 0, nh = (fi)->fib_nh; \
65 nhsel < (fi)->fib_nhs; \
66 nh++, nhsel++)
67
68#define change_nexthops(fi) { \
69 int nhsel; struct fib_nh *nexthop_nh; \
70 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
71 nhsel < (fi)->fib_nhs; \
72 nexthop_nh++, nhsel++)
73
74#else
75
76
77
78#define for_nexthops(fi) { \
79 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \
80 for (nhsel = 0; nhsel < 1; nhsel++)
81
82#define change_nexthops(fi) { \
83 int nhsel; \
84 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
85 for (nhsel = 0; nhsel < 1; nhsel++)
86
87#endif
88
89#define endfor_nexthops(fi) }
90
91
92const struct fib_prop fib_props[RTN_MAX + 1] = {
93 [RTN_UNSPEC] = {
94 .error = 0,
95 .scope = RT_SCOPE_NOWHERE,
96 },
97 [RTN_UNICAST] = {
98 .error = 0,
99 .scope = RT_SCOPE_UNIVERSE,
100 },
101 [RTN_LOCAL] = {
102 .error = 0,
103 .scope = RT_SCOPE_HOST,
104 },
105 [RTN_BROADCAST] = {
106 .error = 0,
107 .scope = RT_SCOPE_LINK,
108 },
109 [RTN_ANYCAST] = {
110 .error = 0,
111 .scope = RT_SCOPE_LINK,
112 },
113 [RTN_MULTICAST] = {
114 .error = 0,
115 .scope = RT_SCOPE_UNIVERSE,
116 },
117 [RTN_BLACKHOLE] = {
118 .error = -EINVAL,
119 .scope = RT_SCOPE_UNIVERSE,
120 },
121 [RTN_UNREACHABLE] = {
122 .error = -EHOSTUNREACH,
123 .scope = RT_SCOPE_UNIVERSE,
124 },
125 [RTN_PROHIBIT] = {
126 .error = -EACCES,
127 .scope = RT_SCOPE_UNIVERSE,
128 },
129 [RTN_THROW] = {
130 .error = -EAGAIN,
131 .scope = RT_SCOPE_UNIVERSE,
132 },
133 [RTN_NAT] = {
134 .error = -EINVAL,
135 .scope = RT_SCOPE_NOWHERE,
136 },
137 [RTN_XRESOLVE] = {
138 .error = -EINVAL,
139 .scope = RT_SCOPE_NOWHERE,
140 },
141};
142
143static void rt_fibinfo_free(struct rtable __rcu **rtp)
144{
145 struct rtable *rt = rcu_dereference_protected(*rtp, 1);
146
147 if (!rt)
148 return;
149
150
151
152
153
154
155 dst_dev_put(&rt->dst);
156 dst_release_immediate(&rt->dst);
157}
158
159static void free_nh_exceptions(struct fib_nh *nh)
160{
161 struct fnhe_hash_bucket *hash;
162 int i;
163
164 hash = rcu_dereference_protected(nh->nh_exceptions, 1);
165 if (!hash)
166 return;
167 for (i = 0; i < FNHE_HASH_SIZE; i++) {
168 struct fib_nh_exception *fnhe;
169
170 fnhe = rcu_dereference_protected(hash[i].chain, 1);
171 while (fnhe) {
172 struct fib_nh_exception *next;
173
174 next = rcu_dereference_protected(fnhe->fnhe_next, 1);
175
176 rt_fibinfo_free(&fnhe->fnhe_rth_input);
177 rt_fibinfo_free(&fnhe->fnhe_rth_output);
178
179 kfree(fnhe);
180
181 fnhe = next;
182 }
183 }
184 kfree(hash);
185}
186
187static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
188{
189 int cpu;
190
191 if (!rtp)
192 return;
193
194 for_each_possible_cpu(cpu) {
195 struct rtable *rt;
196
197 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
198 if (rt) {
199 dst_dev_put(&rt->dst);
200 dst_release_immediate(&rt->dst);
201 }
202 }
203 free_percpu(rtp);
204}
205
206
207static void free_fib_info_rcu(struct rcu_head *head)
208{
209 struct fib_info *fi = container_of(head, struct fib_info, rcu);
210 struct dst_metrics *m;
211
212 change_nexthops(fi) {
213 if (nexthop_nh->nh_dev)
214 dev_put(nexthop_nh->nh_dev);
215 lwtstate_put(nexthop_nh->nh_lwtstate);
216 free_nh_exceptions(nexthop_nh);
217 rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
218 rt_fibinfo_free(&nexthop_nh->nh_rth_input);
219 } endfor_nexthops(fi);
220
221 m = fi->fib_metrics;
222 if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
223 kfree(m);
224 kfree(fi);
225}
226
227void free_fib_info(struct fib_info *fi)
228{
229 if (fi->fib_dead == 0) {
230 pr_warn("Freeing alive fib_info %p\n", fi);
231 return;
232 }
233 fib_info_cnt--;
234#ifdef CONFIG_IP_ROUTE_CLASSID
235 change_nexthops(fi) {
236 if (nexthop_nh->nh_tclassid)
237 fi->fib_net->ipv4.fib_num_tclassid_users--;
238 } endfor_nexthops(fi);
239#endif
240 call_rcu(&fi->rcu, free_fib_info_rcu);
241}
242EXPORT_SYMBOL_GPL(free_fib_info);
243
244void fib_release_info(struct fib_info *fi)
245{
246 spin_lock_bh(&fib_info_lock);
247 if (fi && --fi->fib_treeref == 0) {
248 hlist_del(&fi->fib_hash);
249 if (fi->fib_prefsrc)
250 hlist_del(&fi->fib_lhash);
251 change_nexthops(fi) {
252 if (!nexthop_nh->nh_dev)
253 continue;
254 hlist_del(&nexthop_nh->nh_hash);
255 } endfor_nexthops(fi)
256 fi->fib_dead = 1;
257 fib_info_put(fi);
258 }
259 spin_unlock_bh(&fib_info_lock);
260}
261
262static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
263{
264 const struct fib_nh *onh = ofi->fib_nh;
265
266 for_nexthops(fi) {
267 if (nh->nh_oif != onh->nh_oif ||
268 nh->nh_gw != onh->nh_gw ||
269 nh->nh_scope != onh->nh_scope ||
270#ifdef CONFIG_IP_ROUTE_MULTIPATH
271 nh->nh_weight != onh->nh_weight ||
272#endif
273#ifdef CONFIG_IP_ROUTE_CLASSID
274 nh->nh_tclassid != onh->nh_tclassid ||
275#endif
276 lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) ||
277 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
278 return -1;
279 onh++;
280 } endfor_nexthops(fi);
281 return 0;
282}
283
284static inline unsigned int fib_devindex_hashfn(unsigned int val)
285{
286 unsigned int mask = DEVINDEX_HASHSIZE - 1;
287
288 return (val ^
289 (val >> DEVINDEX_HASHBITS) ^
290 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
291}
292
293static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
294{
295 unsigned int mask = (fib_info_hash_size - 1);
296 unsigned int val = fi->fib_nhs;
297
298 val ^= (fi->fib_protocol << 8) | fi->fib_scope;
299 val ^= (__force u32)fi->fib_prefsrc;
300 val ^= fi->fib_priority;
301 for_nexthops(fi) {
302 val ^= fib_devindex_hashfn(nh->nh_oif);
303 } endfor_nexthops(fi)
304
305 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
306}
307
308static struct fib_info *fib_find_info(const struct fib_info *nfi)
309{
310 struct hlist_head *head;
311 struct fib_info *fi;
312 unsigned int hash;
313
314 hash = fib_info_hashfn(nfi);
315 head = &fib_info_hash[hash];
316
317 hlist_for_each_entry(fi, head, fib_hash) {
318 if (!net_eq(fi->fib_net, nfi->fib_net))
319 continue;
320 if (fi->fib_nhs != nfi->fib_nhs)
321 continue;
322 if (nfi->fib_protocol == fi->fib_protocol &&
323 nfi->fib_scope == fi->fib_scope &&
324 nfi->fib_prefsrc == fi->fib_prefsrc &&
325 nfi->fib_priority == fi->fib_priority &&
326 nfi->fib_type == fi->fib_type &&
327 memcmp(nfi->fib_metrics, fi->fib_metrics,
328 sizeof(u32) * RTAX_MAX) == 0 &&
329 !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
330 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
331 return fi;
332 }
333
334 return NULL;
335}
336
337
338
339
340int ip_fib_check_default(__be32 gw, struct net_device *dev)
341{
342 struct hlist_head *head;
343 struct fib_nh *nh;
344 unsigned int hash;
345
346 spin_lock(&fib_info_lock);
347
348 hash = fib_devindex_hashfn(dev->ifindex);
349 head = &fib_info_devhash[hash];
350 hlist_for_each_entry(nh, head, nh_hash) {
351 if (nh->nh_dev == dev &&
352 nh->nh_gw == gw &&
353 !(nh->nh_flags & RTNH_F_DEAD)) {
354 spin_unlock(&fib_info_lock);
355 return 0;
356 }
357 }
358
359 spin_unlock(&fib_info_lock);
360
361 return -1;
362}
363
364static inline size_t fib_nlmsg_size(struct fib_info *fi)
365{
366 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
367 + nla_total_size(4)
368 + nla_total_size(4)
369 + nla_total_size(4)
370 + nla_total_size(4)
371 + nla_total_size(TCP_CA_NAME_MAX);
372
373
374 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
375
376 if (fi->fib_nhs) {
377 size_t nh_encapsize = 0;
378
379
380
381 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
382
383
384 nhsize += 2 * nla_total_size(4);
385
386
387 for_nexthops(fi) {
388 if (nh->nh_lwtstate) {
389
390 nh_encapsize += lwtunnel_get_encap_size(
391 nh->nh_lwtstate);
392
393 nh_encapsize += nla_total_size(2);
394 }
395 } endfor_nexthops(fi);
396
397
398 payload += nla_total_size((fi->fib_nhs * nhsize) +
399 nh_encapsize);
400
401 }
402
403 return payload;
404}
405
406void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
407 int dst_len, u32 tb_id, const struct nl_info *info,
408 unsigned int nlm_flags)
409{
410 struct sk_buff *skb;
411 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
412 int err = -ENOBUFS;
413
414 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
415 if (!skb)
416 goto errout;
417
418 err = fib_dump_info(skb, info->portid, seq, event, tb_id,
419 fa->fa_type, key, dst_len,
420 fa->fa_tos, fa->fa_info, nlm_flags);
421 if (err < 0) {
422
423 WARN_ON(err == -EMSGSIZE);
424 kfree_skb(skb);
425 goto errout;
426 }
427 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE,
428 info->nlh, GFP_KERNEL);
429 return;
430errout:
431 if (err < 0)
432 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
433}
434
435static int fib_detect_death(struct fib_info *fi, int order,
436 struct fib_info **last_resort, int *last_idx,
437 int dflt)
438{
439 struct neighbour *n;
440 int state = NUD_NONE;
441
442 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
443 if (n) {
444 state = n->nud_state;
445 neigh_release(n);
446 } else {
447 return 0;
448 }
449 if (state == NUD_REACHABLE)
450 return 0;
451 if ((state & NUD_VALID) && order != dflt)
452 return 0;
453 if ((state & NUD_VALID) ||
454 (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) {
455 *last_resort = fi;
456 *last_idx = order;
457 }
458 return 1;
459}
460
461#ifdef CONFIG_IP_ROUTE_MULTIPATH
462
463static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
464 struct netlink_ext_ack *extack)
465{
466 int nhs = 0;
467
468 while (rtnh_ok(rtnh, remaining)) {
469 nhs++;
470 rtnh = rtnh_next(rtnh, &remaining);
471 }
472
473
474 if (remaining > 0) {
475 NL_SET_ERR_MSG(extack,
476 "Invalid nexthop configuration - extra data after nexthops");
477 nhs = 0;
478 }
479
480 return nhs;
481}
482
483static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
484 int remaining, struct fib_config *cfg,
485 struct netlink_ext_ack *extack)
486{
487 int ret;
488
489 change_nexthops(fi) {
490 int attrlen;
491
492 if (!rtnh_ok(rtnh, remaining)) {
493 NL_SET_ERR_MSG(extack,
494 "Invalid nexthop configuration - extra data after nexthop");
495 return -EINVAL;
496 }
497
498 if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
499 NL_SET_ERR_MSG(extack,
500 "Invalid flags for nexthop - can not contain DEAD or LINKDOWN");
501 return -EINVAL;
502 }
503
504 nexthop_nh->nh_flags =
505 (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
506 nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
507 nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
508
509 attrlen = rtnh_attrlen(rtnh);
510 if (attrlen > 0) {
511 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
512
513 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
514 nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0;
515#ifdef CONFIG_IP_ROUTE_CLASSID
516 nla = nla_find(attrs, attrlen, RTA_FLOW);
517 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
518 if (nexthop_nh->nh_tclassid)
519 fi->fib_net->ipv4.fib_num_tclassid_users++;
520#endif
521 nla = nla_find(attrs, attrlen, RTA_ENCAP);
522 if (nla) {
523 struct lwtunnel_state *lwtstate;
524 struct nlattr *nla_entype;
525
526 nla_entype = nla_find(attrs, attrlen,
527 RTA_ENCAP_TYPE);
528 if (!nla_entype) {
529 NL_SET_BAD_ATTR(extack, nla);
530 NL_SET_ERR_MSG(extack,
531 "Encap type is missing");
532 goto err_inval;
533 }
534
535 ret = lwtunnel_build_state(nla_get_u16(
536 nla_entype),
537 nla, AF_INET, cfg,
538 &lwtstate, extack);
539 if (ret)
540 goto errout;
541 nexthop_nh->nh_lwtstate =
542 lwtstate_get(lwtstate);
543 }
544 }
545
546 rtnh = rtnh_next(rtnh, &remaining);
547 } endfor_nexthops(fi);
548
549 return 0;
550
551err_inval:
552 ret = -EINVAL;
553
554errout:
555 return ret;
556}
557
558static void fib_rebalance(struct fib_info *fi)
559{
560 int total;
561 int w;
562 struct in_device *in_dev;
563
564 if (fi->fib_nhs < 2)
565 return;
566
567 total = 0;
568 for_nexthops(fi) {
569 if (nh->nh_flags & RTNH_F_DEAD)
570 continue;
571
572 in_dev = __in_dev_get_rtnl(nh->nh_dev);
573
574 if (in_dev &&
575 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
576 nh->nh_flags & RTNH_F_LINKDOWN)
577 continue;
578
579 total += nh->nh_weight;
580 } endfor_nexthops(fi);
581
582 w = 0;
583 change_nexthops(fi) {
584 int upper_bound;
585
586 in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev);
587
588 if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
589 upper_bound = -1;
590 } else if (in_dev &&
591 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
592 nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
593 upper_bound = -1;
594 } else {
595 w += nexthop_nh->nh_weight;
596 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
597 total) - 1;
598 }
599
600 atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
601 } endfor_nexthops(fi);
602}
603
604static inline void fib_add_weight(struct fib_info *fi,
605 const struct fib_nh *nh)
606{
607 fi->fib_weight += nh->nh_weight;
608}
609
610#else
611
612#define fib_rebalance(fi) do { } while (0)
613#define fib_add_weight(fi, nh) do { } while (0)
614
615#endif
616
617static int fib_encap_match(u16 encap_type,
618 struct nlattr *encap,
619 const struct fib_nh *nh,
620 const struct fib_config *cfg,
621 struct netlink_ext_ack *extack)
622{
623 struct lwtunnel_state *lwtstate;
624 int ret, result = 0;
625
626 if (encap_type == LWTUNNEL_ENCAP_NONE)
627 return 0;
628
629 ret = lwtunnel_build_state(encap_type, encap, AF_INET,
630 cfg, &lwtstate, extack);
631 if (!ret) {
632 result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
633 lwtstate_free(lwtstate);
634 }
635
636 return result;
637}
638
639int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
640 struct netlink_ext_ack *extack)
641{
642#ifdef CONFIG_IP_ROUTE_MULTIPATH
643 struct rtnexthop *rtnh;
644 int remaining;
645#endif
646
647 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
648 return 1;
649
650 if (cfg->fc_oif || cfg->fc_gw) {
651 if (cfg->fc_encap) {
652 if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
653 fi->fib_nh, cfg, extack))
654 return 1;
655 }
656 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
657 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
658 return 0;
659 return 1;
660 }
661
662#ifdef CONFIG_IP_ROUTE_MULTIPATH
663 if (!cfg->fc_mp)
664 return 0;
665
666 rtnh = cfg->fc_mp;
667 remaining = cfg->fc_mp_len;
668
669 for_nexthops(fi) {
670 int attrlen;
671
672 if (!rtnh_ok(rtnh, remaining))
673 return -EINVAL;
674
675 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
676 return 1;
677
678 attrlen = rtnh_attrlen(rtnh);
679 if (attrlen > 0) {
680 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
681
682 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
683 if (nla && nla_get_in_addr(nla) != nh->nh_gw)
684 return 1;
685#ifdef CONFIG_IP_ROUTE_CLASSID
686 nla = nla_find(attrs, attrlen, RTA_FLOW);
687 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
688 return 1;
689#endif
690 }
691
692 rtnh = rtnh_next(rtnh, &remaining);
693 } endfor_nexthops(fi);
694#endif
695 return 0;
696}
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
743 struct fib_nh *nh, struct netlink_ext_ack *extack)
744{
745 int err = 0;
746 struct net *net;
747 struct net_device *dev;
748
749 net = cfg->fc_nlinfo.nl_net;
750 if (nh->nh_gw) {
751 struct fib_result res;
752
753 if (nh->nh_flags & RTNH_F_ONLINK) {
754 unsigned int addr_type;
755
756 if (cfg->fc_scope >= RT_SCOPE_LINK) {
757 NL_SET_ERR_MSG(extack,
758 "Nexthop has invalid scope");
759 return -EINVAL;
760 }
761 dev = __dev_get_by_index(net, nh->nh_oif);
762 if (!dev)
763 return -ENODEV;
764 if (!(dev->flags & IFF_UP)) {
765 NL_SET_ERR_MSG(extack,
766 "Nexthop device is not up");
767 return -ENETDOWN;
768 }
769 addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
770 if (addr_type != RTN_UNICAST) {
771 NL_SET_ERR_MSG(extack,
772 "Nexthop has invalid gateway");
773 return -EINVAL;
774 }
775 if (!netif_carrier_ok(dev))
776 nh->nh_flags |= RTNH_F_LINKDOWN;
777 nh->nh_dev = dev;
778 dev_hold(dev);
779 nh->nh_scope = RT_SCOPE_LINK;
780 return 0;
781 }
782 rcu_read_lock();
783 {
784 struct fib_table *tbl = NULL;
785 struct flowi4 fl4 = {
786 .daddr = nh->nh_gw,
787 .flowi4_scope = cfg->fc_scope + 1,
788 .flowi4_oif = nh->nh_oif,
789 .flowi4_iif = LOOPBACK_IFINDEX,
790 };
791
792
793 if (fl4.flowi4_scope < RT_SCOPE_LINK)
794 fl4.flowi4_scope = RT_SCOPE_LINK;
795
796 if (cfg->fc_table)
797 tbl = fib_get_table(net, cfg->fc_table);
798
799 if (tbl)
800 err = fib_table_lookup(tbl, &fl4, &res,
801 FIB_LOOKUP_IGNORE_LINKSTATE |
802 FIB_LOOKUP_NOREF);
803
804
805
806
807
808 if (!tbl || err) {
809 err = fib_lookup(net, &fl4, &res,
810 FIB_LOOKUP_IGNORE_LINKSTATE);
811 }
812
813 if (err) {
814 NL_SET_ERR_MSG(extack,
815 "Nexthop has invalid gateway");
816 rcu_read_unlock();
817 return err;
818 }
819 }
820 err = -EINVAL;
821 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
822 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
823 goto out;
824 }
825 nh->nh_scope = res.scope;
826 nh->nh_oif = FIB_RES_OIF(res);
827 nh->nh_dev = dev = FIB_RES_DEV(res);
828 if (!dev) {
829 NL_SET_ERR_MSG(extack,
830 "No egress device for nexthop gateway");
831 goto out;
832 }
833 dev_hold(dev);
834 if (!netif_carrier_ok(dev))
835 nh->nh_flags |= RTNH_F_LINKDOWN;
836 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
837 } else {
838 struct in_device *in_dev;
839
840 if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
841 NL_SET_ERR_MSG(extack,
842 "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
843 return -EINVAL;
844 }
845 rcu_read_lock();
846 err = -ENODEV;
847 in_dev = inetdev_by_index(net, nh->nh_oif);
848 if (!in_dev)
849 goto out;
850 err = -ENETDOWN;
851 if (!(in_dev->dev->flags & IFF_UP)) {
852 NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
853 goto out;
854 }
855 nh->nh_dev = in_dev->dev;
856 dev_hold(nh->nh_dev);
857 nh->nh_scope = RT_SCOPE_HOST;
858 if (!netif_carrier_ok(nh->nh_dev))
859 nh->nh_flags |= RTNH_F_LINKDOWN;
860 err = 0;
861 }
862out:
863 rcu_read_unlock();
864 return err;
865}
866
867static inline unsigned int fib_laddr_hashfn(__be32 val)
868{
869 unsigned int mask = (fib_info_hash_size - 1);
870
871 return ((__force u32)val ^
872 ((__force u32)val >> 7) ^
873 ((__force u32)val >> 14)) & mask;
874}
875
876static struct hlist_head *fib_info_hash_alloc(int bytes)
877{
878 if (bytes <= PAGE_SIZE)
879 return kzalloc(bytes, GFP_KERNEL);
880 else
881 return (struct hlist_head *)
882 __get_free_pages(GFP_KERNEL | __GFP_ZERO,
883 get_order(bytes));
884}
885
886static void fib_info_hash_free(struct hlist_head *hash, int bytes)
887{
888 if (!hash)
889 return;
890
891 if (bytes <= PAGE_SIZE)
892 kfree(hash);
893 else
894 free_pages((unsigned long) hash, get_order(bytes));
895}
896
897static void fib_info_hash_move(struct hlist_head *new_info_hash,
898 struct hlist_head *new_laddrhash,
899 unsigned int new_size)
900{
901 struct hlist_head *old_info_hash, *old_laddrhash;
902 unsigned int old_size = fib_info_hash_size;
903 unsigned int i, bytes;
904
905 spin_lock_bh(&fib_info_lock);
906 old_info_hash = fib_info_hash;
907 old_laddrhash = fib_info_laddrhash;
908 fib_info_hash_size = new_size;
909
910 for (i = 0; i < old_size; i++) {
911 struct hlist_head *head = &fib_info_hash[i];
912 struct hlist_node *n;
913 struct fib_info *fi;
914
915 hlist_for_each_entry_safe(fi, n, head, fib_hash) {
916 struct hlist_head *dest;
917 unsigned int new_hash;
918
919 new_hash = fib_info_hashfn(fi);
920 dest = &new_info_hash[new_hash];
921 hlist_add_head(&fi->fib_hash, dest);
922 }
923 }
924 fib_info_hash = new_info_hash;
925
926 for (i = 0; i < old_size; i++) {
927 struct hlist_head *lhead = &fib_info_laddrhash[i];
928 struct hlist_node *n;
929 struct fib_info *fi;
930
931 hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
932 struct hlist_head *ldest;
933 unsigned int new_hash;
934
935 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
936 ldest = &new_laddrhash[new_hash];
937 hlist_add_head(&fi->fib_lhash, ldest);
938 }
939 }
940 fib_info_laddrhash = new_laddrhash;
941
942 spin_unlock_bh(&fib_info_lock);
943
944 bytes = old_size * sizeof(struct hlist_head *);
945 fib_info_hash_free(old_info_hash, bytes);
946 fib_info_hash_free(old_laddrhash, bytes);
947}
948
949__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
950{
951 nh->nh_saddr = inet_select_addr(nh->nh_dev,
952 nh->nh_gw,
953 nh->nh_parent->fib_scope);
954 nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
955
956 return nh->nh_saddr;
957}
958
959static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
960{
961 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
962 fib_prefsrc != cfg->fc_dst) {
963 u32 tb_id = cfg->fc_table;
964 int rc;
965
966 if (tb_id == RT_TABLE_MAIN)
967 tb_id = RT_TABLE_LOCAL;
968
969 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
970 fib_prefsrc, tb_id);
971
972 if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
973 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
974 fib_prefsrc, RT_TABLE_LOCAL);
975 }
976
977 if (rc != RTN_LOCAL)
978 return false;
979 }
980 return true;
981}
982
983static int
984fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
985{
986 bool ecn_ca = false;
987 struct nlattr *nla;
988 int remaining;
989
990 if (!cfg->fc_mx)
991 return 0;
992
993 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
994 int type = nla_type(nla);
995 u32 val;
996
997 if (!type)
998 continue;
999 if (type > RTAX_MAX)
1000 return -EINVAL;
1001
1002 if (type == RTAX_CC_ALGO) {
1003 char tmp[TCP_CA_NAME_MAX];
1004
1005 nla_strlcpy(tmp, nla, sizeof(tmp));
1006 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1007 if (val == TCP_CA_UNSPEC)
1008 return -EINVAL;
1009 } else {
1010 val = nla_get_u32(nla);
1011 }
1012 if (type == RTAX_ADVMSS && val > 65535 - 40)
1013 val = 65535 - 40;
1014 if (type == RTAX_MTU && val > 65535 - 15)
1015 val = 65535 - 15;
1016 if (type == RTAX_HOPLIMIT && val > 255)
1017 val = 255;
1018 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1019 return -EINVAL;
1020 fi->fib_metrics->metrics[type - 1] = val;
1021 }
1022
1023 if (ecn_ca)
1024 fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1025
1026 return 0;
1027}
1028
1029struct fib_info *fib_create_info(struct fib_config *cfg,
1030 struct netlink_ext_ack *extack)
1031{
1032 int err;
1033 struct fib_info *fi = NULL;
1034 struct fib_info *ofi;
1035 int nhs = 1;
1036 struct net *net = cfg->fc_nlinfo.nl_net;
1037
1038 if (cfg->fc_type > RTN_MAX)
1039 goto err_inval;
1040
1041
1042 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {
1043 NL_SET_ERR_MSG(extack, "Invalid scope");
1044 goto err_inval;
1045 }
1046
1047 if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
1048 NL_SET_ERR_MSG(extack,
1049 "Invalid rtm_flags - can not contain DEAD or LINKDOWN");
1050 goto err_inval;
1051 }
1052
1053#ifdef CONFIG_IP_ROUTE_MULTIPATH
1054 if (cfg->fc_mp) {
1055 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
1056 if (nhs == 0)
1057 goto err_inval;
1058 }
1059#endif
1060
1061 err = -ENOBUFS;
1062 if (fib_info_cnt >= fib_info_hash_size) {
1063 unsigned int new_size = fib_info_hash_size << 1;
1064 struct hlist_head *new_info_hash;
1065 struct hlist_head *new_laddrhash;
1066 unsigned int bytes;
1067
1068 if (!new_size)
1069 new_size = 16;
1070 bytes = new_size * sizeof(struct hlist_head *);
1071 new_info_hash = fib_info_hash_alloc(bytes);
1072 new_laddrhash = fib_info_hash_alloc(bytes);
1073 if (!new_info_hash || !new_laddrhash) {
1074 fib_info_hash_free(new_info_hash, bytes);
1075 fib_info_hash_free(new_laddrhash, bytes);
1076 } else
1077 fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
1078
1079 if (!fib_info_hash_size)
1080 goto failure;
1081 }
1082
1083 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
1084 if (!fi)
1085 goto failure;
1086 if (cfg->fc_mx) {
1087 fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
1088 if (unlikely(!fi->fib_metrics)) {
1089 kfree(fi);
1090 return ERR_PTR(err);
1091 }
1092 atomic_set(&fi->fib_metrics->refcnt, 1);
1093 } else {
1094 fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
1095 }
1096 fib_info_cnt++;
1097 fi->fib_net = net;
1098 fi->fib_protocol = cfg->fc_protocol;
1099 fi->fib_scope = cfg->fc_scope;
1100 fi->fib_flags = cfg->fc_flags;
1101 fi->fib_priority = cfg->fc_priority;
1102 fi->fib_prefsrc = cfg->fc_prefsrc;
1103 fi->fib_type = cfg->fc_type;
1104 fi->fib_tb_id = cfg->fc_table;
1105
1106 fi->fib_nhs = nhs;
1107 change_nexthops(fi) {
1108 nexthop_nh->nh_parent = fi;
1109 nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
1110 if (!nexthop_nh->nh_pcpu_rth_output)
1111 goto failure;
1112 } endfor_nexthops(fi)
1113
1114 err = fib_convert_metrics(fi, cfg);
1115 if (err)
1116 goto failure;
1117
1118 if (cfg->fc_mp) {
1119#ifdef CONFIG_IP_ROUTE_MULTIPATH
1120 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
1121 if (err != 0)
1122 goto failure;
1123 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) {
1124 NL_SET_ERR_MSG(extack,
1125 "Nexthop device index does not match RTA_OIF");
1126 goto err_inval;
1127 }
1128 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) {
1129 NL_SET_ERR_MSG(extack,
1130 "Nexthop gateway does not match RTA_GATEWAY");
1131 goto err_inval;
1132 }
1133#ifdef CONFIG_IP_ROUTE_CLASSID
1134 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
1135 NL_SET_ERR_MSG(extack,
1136 "Nexthop class id does not match RTA_FLOW");
1137 goto err_inval;
1138 }
1139#endif
1140#else
1141 NL_SET_ERR_MSG(extack,
1142 "Multipath support not enabled in kernel");
1143 goto err_inval;
1144#endif
1145 } else {
1146 struct fib_nh *nh = fi->fib_nh;
1147
1148 if (cfg->fc_encap) {
1149 struct lwtunnel_state *lwtstate;
1150
1151 if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) {
1152 NL_SET_ERR_MSG(extack,
1153 "LWT encap type not specified");
1154 goto err_inval;
1155 }
1156 err = lwtunnel_build_state(cfg->fc_encap_type,
1157 cfg->fc_encap, AF_INET, cfg,
1158 &lwtstate, extack);
1159 if (err)
1160 goto failure;
1161
1162 nh->nh_lwtstate = lwtstate_get(lwtstate);
1163 }
1164 nh->nh_oif = cfg->fc_oif;
1165 nh->nh_gw = cfg->fc_gw;
1166 nh->nh_flags = cfg->fc_flags;
1167#ifdef CONFIG_IP_ROUTE_CLASSID
1168 nh->nh_tclassid = cfg->fc_flow;
1169 if (nh->nh_tclassid)
1170 fi->fib_net->ipv4.fib_num_tclassid_users++;
1171#endif
1172#ifdef CONFIG_IP_ROUTE_MULTIPATH
1173 nh->nh_weight = 1;
1174#endif
1175 }
1176
1177 if (fib_props[cfg->fc_type].error) {
1178 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
1179 NL_SET_ERR_MSG(extack,
1180 "Gateway, device and multipath can not be specified for this route type");
1181 goto err_inval;
1182 }
1183 goto link_it;
1184 } else {
1185 switch (cfg->fc_type) {
1186 case RTN_UNICAST:
1187 case RTN_LOCAL:
1188 case RTN_BROADCAST:
1189 case RTN_ANYCAST:
1190 case RTN_MULTICAST:
1191 break;
1192 default:
1193 NL_SET_ERR_MSG(extack, "Invalid route type");
1194 goto err_inval;
1195 }
1196 }
1197
1198 if (cfg->fc_scope > RT_SCOPE_HOST) {
1199 NL_SET_ERR_MSG(extack, "Invalid scope");
1200 goto err_inval;
1201 }
1202
1203 if (cfg->fc_scope == RT_SCOPE_HOST) {
1204 struct fib_nh *nh = fi->fib_nh;
1205
1206
1207 if (nhs != 1) {
1208 NL_SET_ERR_MSG(extack,
1209 "Route with host scope can not have multiple nexthops");
1210 goto err_inval;
1211 }
1212 if (nh->nh_gw) {
1213 NL_SET_ERR_MSG(extack,
1214 "Route with host scope can not have a gateway");
1215 goto err_inval;
1216 }
1217 nh->nh_scope = RT_SCOPE_NOWHERE;
1218 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
1219 err = -ENODEV;
1220 if (!nh->nh_dev)
1221 goto failure;
1222 } else {
1223 int linkdown = 0;
1224
1225 change_nexthops(fi) {
1226 err = fib_check_nh(cfg, fi, nexthop_nh, extack);
1227 if (err != 0)
1228 goto failure;
1229 if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
1230 linkdown++;
1231 } endfor_nexthops(fi)
1232 if (linkdown == fi->fib_nhs)
1233 fi->fib_flags |= RTNH_F_LINKDOWN;
1234 }
1235
1236 if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) {
1237 NL_SET_ERR_MSG(extack, "Invalid prefsrc address");
1238 goto err_inval;
1239 }
1240
1241 change_nexthops(fi) {
1242 fib_info_update_nh_saddr(net, nexthop_nh);
1243 fib_add_weight(fi, nexthop_nh);
1244 } endfor_nexthops(fi)
1245
1246 fib_rebalance(fi);
1247
1248link_it:
1249 ofi = fib_find_info(fi);
1250 if (ofi) {
1251 fi->fib_dead = 1;
1252 free_fib_info(fi);
1253 ofi->fib_treeref++;
1254 return ofi;
1255 }
1256
1257 fi->fib_treeref++;
1258 refcount_set(&fi->fib_clntref, 1);
1259 spin_lock_bh(&fib_info_lock);
1260 hlist_add_head(&fi->fib_hash,
1261 &fib_info_hash[fib_info_hashfn(fi)]);
1262 if (fi->fib_prefsrc) {
1263 struct hlist_head *head;
1264
1265 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
1266 hlist_add_head(&fi->fib_lhash, head);
1267 }
1268 change_nexthops(fi) {
1269 struct hlist_head *head;
1270 unsigned int hash;
1271
1272 if (!nexthop_nh->nh_dev)
1273 continue;
1274 hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
1275 head = &fib_info_devhash[hash];
1276 hlist_add_head(&nexthop_nh->nh_hash, head);
1277 } endfor_nexthops(fi)
1278 spin_unlock_bh(&fib_info_lock);
1279 return fi;
1280
1281err_inval:
1282 err = -EINVAL;
1283
1284failure:
1285 if (fi) {
1286 fi->fib_dead = 1;
1287 free_fib_info(fi);
1288 }
1289
1290 return ERR_PTR(err);
1291}
1292
1293int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
1294 u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
1295 struct fib_info *fi, unsigned int flags)
1296{
1297 struct nlmsghdr *nlh;
1298 struct rtmsg *rtm;
1299
1300 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
1301 if (!nlh)
1302 return -EMSGSIZE;
1303
1304 rtm = nlmsg_data(nlh);
1305 rtm->rtm_family = AF_INET;
1306 rtm->rtm_dst_len = dst_len;
1307 rtm->rtm_src_len = 0;
1308 rtm->rtm_tos = tos;
1309 if (tb_id < 256)
1310 rtm->rtm_table = tb_id;
1311 else
1312 rtm->rtm_table = RT_TABLE_COMPAT;
1313 if (nla_put_u32(skb, RTA_TABLE, tb_id))
1314 goto nla_put_failure;
1315 rtm->rtm_type = type;
1316 rtm->rtm_flags = fi->fib_flags;
1317 rtm->rtm_scope = fi->fib_scope;
1318 rtm->rtm_protocol = fi->fib_protocol;
1319
1320 if (rtm->rtm_dst_len &&
1321 nla_put_in_addr(skb, RTA_DST, dst))
1322 goto nla_put_failure;
1323 if (fi->fib_priority &&
1324 nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
1325 goto nla_put_failure;
1326 if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
1327 goto nla_put_failure;
1328
1329 if (fi->fib_prefsrc &&
1330 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
1331 goto nla_put_failure;
1332 if (fi->fib_nhs == 1) {
1333 struct in_device *in_dev;
1334
1335 if (fi->fib_nh->nh_gw &&
1336 nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
1337 goto nla_put_failure;
1338 if (fi->fib_nh->nh_oif &&
1339 nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
1340 goto nla_put_failure;
1341 if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
1342 in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev);
1343 if (in_dev &&
1344 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1345 rtm->rtm_flags |= RTNH_F_DEAD;
1346 }
1347#ifdef CONFIG_IP_ROUTE_CLASSID
1348 if (fi->fib_nh[0].nh_tclassid &&
1349 nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
1350 goto nla_put_failure;
1351#endif
1352 if (fi->fib_nh->nh_lwtstate &&
1353 lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0)
1354 goto nla_put_failure;
1355 }
1356#ifdef CONFIG_IP_ROUTE_MULTIPATH
1357 if (fi->fib_nhs > 1) {
1358 struct rtnexthop *rtnh;
1359 struct nlattr *mp;
1360
1361 mp = nla_nest_start(skb, RTA_MULTIPATH);
1362 if (!mp)
1363 goto nla_put_failure;
1364
1365 for_nexthops(fi) {
1366 struct in_device *in_dev;
1367
1368 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1369 if (!rtnh)
1370 goto nla_put_failure;
1371
1372 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1373 if (nh->nh_flags & RTNH_F_LINKDOWN) {
1374 in_dev = __in_dev_get_rtnl(nh->nh_dev);
1375 if (in_dev &&
1376 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1377 rtnh->rtnh_flags |= RTNH_F_DEAD;
1378 }
1379 rtnh->rtnh_hops = nh->nh_weight - 1;
1380 rtnh->rtnh_ifindex = nh->nh_oif;
1381
1382 if (nh->nh_gw &&
1383 nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw))
1384 goto nla_put_failure;
1385#ifdef CONFIG_IP_ROUTE_CLASSID
1386 if (nh->nh_tclassid &&
1387 nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
1388 goto nla_put_failure;
1389#endif
1390 if (nh->nh_lwtstate &&
1391 lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0)
1392 goto nla_put_failure;
1393
1394
1395 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1396 } endfor_nexthops(fi);
1397
1398 nla_nest_end(skb, mp);
1399 }
1400#endif
1401 nlmsg_end(skb, nlh);
1402 return 0;
1403
1404nla_put_failure:
1405 nlmsg_cancel(skb, nlh);
1406 return -EMSGSIZE;
1407}
1408
1409
1410
1411
1412
1413
1414
1415int fib_sync_down_addr(struct net_device *dev, __be32 local)
1416{
1417 int ret = 0;
1418 unsigned int hash = fib_laddr_hashfn(local);
1419 struct hlist_head *head = &fib_info_laddrhash[hash];
1420 struct net *net = dev_net(dev);
1421 int tb_id = l3mdev_fib_table(dev);
1422 struct fib_info *fi;
1423
1424 if (!fib_info_laddrhash || local == 0)
1425 return 0;
1426
1427 hlist_for_each_entry(fi, head, fib_lhash) {
1428 if (!net_eq(fi->fib_net, net) ||
1429 fi->fib_tb_id != tb_id)
1430 continue;
1431 if (fi->fib_prefsrc == local) {
1432 fi->fib_flags |= RTNH_F_DEAD;
1433 ret++;
1434 }
1435 }
1436 return ret;
1437}
1438
1439static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
1440 enum fib_event_type event_type)
1441{
1442 struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev);
1443 struct fib_nh_notifier_info info = {
1444 .fib_nh = fib_nh,
1445 };
1446
1447 switch (event_type) {
1448 case FIB_EVENT_NH_ADD:
1449 if (fib_nh->nh_flags & RTNH_F_DEAD)
1450 break;
1451 if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1452 fib_nh->nh_flags & RTNH_F_LINKDOWN)
1453 break;
1454 return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
1455 &info.info);
1456 case FIB_EVENT_NH_DEL:
1457 if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1458 fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
1459 (fib_nh->nh_flags & RTNH_F_DEAD))
1460 return call_fib_notifiers(dev_net(fib_nh->nh_dev),
1461 event_type, &info.info);
1462 default:
1463 break;
1464 }
1465
1466 return NOTIFY_DONE;
1467}
1468
1469
1470
1471
1472
1473
1474
1475int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
1476{
1477 int ret = 0;
1478 int scope = RT_SCOPE_NOWHERE;
1479 struct fib_info *prev_fi = NULL;
1480 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1481 struct hlist_head *head = &fib_info_devhash[hash];
1482 struct fib_nh *nh;
1483
1484 if (force)
1485 scope = -1;
1486
1487 hlist_for_each_entry(nh, head, nh_hash) {
1488 struct fib_info *fi = nh->nh_parent;
1489 int dead;
1490
1491 BUG_ON(!fi->fib_nhs);
1492 if (nh->nh_dev != dev || fi == prev_fi)
1493 continue;
1494 prev_fi = fi;
1495 dead = 0;
1496 change_nexthops(fi) {
1497 if (nexthop_nh->nh_flags & RTNH_F_DEAD)
1498 dead++;
1499 else if (nexthop_nh->nh_dev == dev &&
1500 nexthop_nh->nh_scope != scope) {
1501 switch (event) {
1502 case NETDEV_DOWN:
1503 case NETDEV_UNREGISTER:
1504 nexthop_nh->nh_flags |= RTNH_F_DEAD;
1505
1506 case NETDEV_CHANGE:
1507 nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
1508 break;
1509 }
1510 call_fib_nh_notifiers(nexthop_nh,
1511 FIB_EVENT_NH_DEL);
1512 dead++;
1513 }
1514#ifdef CONFIG_IP_ROUTE_MULTIPATH
1515 if (event == NETDEV_UNREGISTER &&
1516 nexthop_nh->nh_dev == dev) {
1517 dead = fi->fib_nhs;
1518 break;
1519 }
1520#endif
1521 } endfor_nexthops(fi)
1522 if (dead == fi->fib_nhs) {
1523 switch (event) {
1524 case NETDEV_DOWN:
1525 case NETDEV_UNREGISTER:
1526 fi->fib_flags |= RTNH_F_DEAD;
1527
1528 case NETDEV_CHANGE:
1529 fi->fib_flags |= RTNH_F_LINKDOWN;
1530 break;
1531 }
1532 ret++;
1533 }
1534
1535 fib_rebalance(fi);
1536 }
1537
1538 return ret;
1539}
1540
1541
1542static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
1543{
1544 struct fib_info *fi = NULL, *last_resort = NULL;
1545 struct hlist_head *fa_head = res->fa_head;
1546 struct fib_table *tb = res->table;
1547 u8 slen = 32 - res->prefixlen;
1548 int order = -1, last_idx = -1;
1549 struct fib_alias *fa, *fa1 = NULL;
1550 u32 last_prio = res->fi->fib_priority;
1551 u8 last_tos = 0;
1552
1553 hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
1554 struct fib_info *next_fi = fa->fa_info;
1555
1556 if (fa->fa_slen != slen)
1557 continue;
1558 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1559 continue;
1560 if (fa->tb_id != tb->tb_id)
1561 continue;
1562 if (next_fi->fib_priority > last_prio &&
1563 fa->fa_tos == last_tos) {
1564 if (last_tos)
1565 continue;
1566 break;
1567 }
1568 if (next_fi->fib_flags & RTNH_F_DEAD)
1569 continue;
1570 last_tos = fa->fa_tos;
1571 last_prio = next_fi->fib_priority;
1572
1573 if (next_fi->fib_scope != res->scope ||
1574 fa->fa_type != RTN_UNICAST)
1575 continue;
1576 if (!next_fi->fib_nh[0].nh_gw ||
1577 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1578 continue;
1579
1580 fib_alias_accessed(fa);
1581
1582 if (!fi) {
1583 if (next_fi != res->fi)
1584 break;
1585 fa1 = fa;
1586 } else if (!fib_detect_death(fi, order, &last_resort,
1587 &last_idx, fa1->fa_default)) {
1588 fib_result_assign(res, fi);
1589 fa1->fa_default = order;
1590 goto out;
1591 }
1592 fi = next_fi;
1593 order++;
1594 }
1595
1596 if (order <= 0 || !fi) {
1597 if (fa1)
1598 fa1->fa_default = -1;
1599 goto out;
1600 }
1601
1602 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1603 fa1->fa_default)) {
1604 fib_result_assign(res, fi);
1605 fa1->fa_default = order;
1606 goto out;
1607 }
1608
1609 if (last_idx >= 0)
1610 fib_result_assign(res, last_resort);
1611 fa1->fa_default = last_idx;
1612out:
1613 return;
1614}
1615
1616
1617
1618
1619
1620int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
1621{
1622 struct fib_info *prev_fi;
1623 unsigned int hash;
1624 struct hlist_head *head;
1625 struct fib_nh *nh;
1626 int ret;
1627
1628 if (!(dev->flags & IFF_UP))
1629 return 0;
1630
1631 if (nh_flags & RTNH_F_DEAD) {
1632 unsigned int flags = dev_get_flags(dev);
1633
1634 if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1635 nh_flags |= RTNH_F_LINKDOWN;
1636 }
1637
1638 prev_fi = NULL;
1639 hash = fib_devindex_hashfn(dev->ifindex);
1640 head = &fib_info_devhash[hash];
1641 ret = 0;
1642
1643 hlist_for_each_entry(nh, head, nh_hash) {
1644 struct fib_info *fi = nh->nh_parent;
1645 int alive;
1646
1647 BUG_ON(!fi->fib_nhs);
1648 if (nh->nh_dev != dev || fi == prev_fi)
1649 continue;
1650
1651 prev_fi = fi;
1652 alive = 0;
1653 change_nexthops(fi) {
1654 if (!(nexthop_nh->nh_flags & nh_flags)) {
1655 alive++;
1656 continue;
1657 }
1658 if (!nexthop_nh->nh_dev ||
1659 !(nexthop_nh->nh_dev->flags & IFF_UP))
1660 continue;
1661 if (nexthop_nh->nh_dev != dev ||
1662 !__in_dev_get_rtnl(dev))
1663 continue;
1664 alive++;
1665 nexthop_nh->nh_flags &= ~nh_flags;
1666 call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);
1667 } endfor_nexthops(fi)
1668
1669 if (alive > 0) {
1670 fi->fib_flags &= ~nh_flags;
1671 ret++;
1672 }
1673
1674 fib_rebalance(fi);
1675 }
1676
1677 return ret;
1678}
1679
1680#ifdef CONFIG_IP_ROUTE_MULTIPATH
1681static bool fib_good_nh(const struct fib_nh *nh)
1682{
1683 int state = NUD_REACHABLE;
1684
1685 if (nh->nh_scope == RT_SCOPE_LINK) {
1686 struct neighbour *n;
1687
1688 rcu_read_lock_bh();
1689
1690 n = __ipv4_neigh_lookup_noref(nh->nh_dev,
1691 (__force u32)nh->nh_gw);
1692 if (n)
1693 state = n->nud_state;
1694
1695 rcu_read_unlock_bh();
1696 }
1697
1698 return !!(state & NUD_VALID);
1699}
1700
1701void fib_select_multipath(struct fib_result *res, int hash)
1702{
1703 struct fib_info *fi = res->fi;
1704 struct net *net = fi->fib_net;
1705 bool first = false;
1706
1707 for_nexthops(fi) {
1708 if (hash > atomic_read(&nh->nh_upper_bound))
1709 continue;
1710
1711 if (!net->ipv4.sysctl_fib_multipath_use_neigh ||
1712 fib_good_nh(nh)) {
1713 res->nh_sel = nhsel;
1714 return;
1715 }
1716 if (!first) {
1717 res->nh_sel = nhsel;
1718 first = true;
1719 }
1720 } endfor_nexthops(fi);
1721}
1722#endif
1723
1724void fib_select_path(struct net *net, struct fib_result *res,
1725 struct flowi4 *fl4, const struct sk_buff *skb)
1726{
1727 bool oif_check;
1728
1729 oif_check = (fl4->flowi4_oif == 0 ||
1730 fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
1731
1732#ifdef CONFIG_IP_ROUTE_MULTIPATH
1733 if (res->fi->fib_nhs > 1 && oif_check) {
1734 int h = fib_multipath_hash(res->fi, fl4, skb);
1735
1736 fib_select_multipath(res, h);
1737 }
1738 else
1739#endif
1740 if (!res->prefixlen &&
1741 res->table->tb_num_default > 1 &&
1742 res->type == RTN_UNICAST && oif_check)
1743 fib_select_default(fl4, res);
1744
1745 if (!fl4->saddr)
1746 fl4->saddr = FIB_RES_PREFSRC(net, *res);
1747}
1748EXPORT_SYMBOL_GPL(fib_select_path);
1749