1
2
3
4
5
6
7
8
9
10
11
12#include <linux/uaccess.h>
13#include <linux/bitops.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/jiffies.h>
17#include <linux/mm.h>
18#include <linux/string.h>
19#include <linux/socket.h>
20#include <linux/sockios.h>
21#include <linux/errno.h>
22#include <linux/in.h>
23#include <linux/inet.h>
24#include <linux/inetdevice.h>
25#include <linux/netdevice.h>
26#include <linux/if_arp.h>
27#include <linux/proc_fs.h>
28#include <linux/skbuff.h>
29#include <linux/init.h>
30#include <linux/slab.h>
31#include <linux/netlink.h>
32
33#include <net/arp.h>
34#include <net/ip.h>
35#include <net/protocol.h>
36#include <net/route.h>
37#include <net/tcp.h>
38#include <net/sock.h>
39#include <net/ip_fib.h>
40#include <net/ip6_fib.h>
41#include <net/netlink.h>
42#include <net/rtnh.h>
43#include <net/lwtunnel.h>
44#include <net/fib_notifier.h>
45#include <net/addrconf.h>
46
47#include "fib_lookup.h"
48
49static DEFINE_SPINLOCK(fib_info_lock);
50static struct hlist_head *fib_info_hash;
51static struct hlist_head *fib_info_laddrhash;
52static unsigned int fib_info_hash_size;
53static unsigned int fib_info_cnt;
54
55#define DEVINDEX_HASHBITS 8
56#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
57static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
58
59#ifdef CONFIG_IP_ROUTE_MULTIPATH
60
61#define for_nexthops(fi) { \
62 int nhsel; const struct fib_nh *nh; \
63 for (nhsel = 0, nh = (fi)->fib_nh; \
64 nhsel < (fi)->fib_nhs; \
65 nh++, nhsel++)
66
67#define change_nexthops(fi) { \
68 int nhsel; struct fib_nh *nexthop_nh; \
69 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
70 nhsel < (fi)->fib_nhs; \
71 nexthop_nh++, nhsel++)
72
73#else
74
75
76
77#define for_nexthops(fi) { \
78 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \
79 for (nhsel = 0; nhsel < 1; nhsel++)
80
81#define change_nexthops(fi) { \
82 int nhsel; \
83 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
84 for (nhsel = 0; nhsel < 1; nhsel++)
85
86#endif
87
88#define endfor_nexthops(fi) }
89
90
91const struct fib_prop fib_props[RTN_MAX + 1] = {
92 [RTN_UNSPEC] = {
93 .error = 0,
94 .scope = RT_SCOPE_NOWHERE,
95 },
96 [RTN_UNICAST] = {
97 .error = 0,
98 .scope = RT_SCOPE_UNIVERSE,
99 },
100 [RTN_LOCAL] = {
101 .error = 0,
102 .scope = RT_SCOPE_HOST,
103 },
104 [RTN_BROADCAST] = {
105 .error = 0,
106 .scope = RT_SCOPE_LINK,
107 },
108 [RTN_ANYCAST] = {
109 .error = 0,
110 .scope = RT_SCOPE_LINK,
111 },
112 [RTN_MULTICAST] = {
113 .error = 0,
114 .scope = RT_SCOPE_UNIVERSE,
115 },
116 [RTN_BLACKHOLE] = {
117 .error = -EINVAL,
118 .scope = RT_SCOPE_UNIVERSE,
119 },
120 [RTN_UNREACHABLE] = {
121 .error = -EHOSTUNREACH,
122 .scope = RT_SCOPE_UNIVERSE,
123 },
124 [RTN_PROHIBIT] = {
125 .error = -EACCES,
126 .scope = RT_SCOPE_UNIVERSE,
127 },
128 [RTN_THROW] = {
129 .error = -EAGAIN,
130 .scope = RT_SCOPE_UNIVERSE,
131 },
132 [RTN_NAT] = {
133 .error = -EINVAL,
134 .scope = RT_SCOPE_NOWHERE,
135 },
136 [RTN_XRESOLVE] = {
137 .error = -EINVAL,
138 .scope = RT_SCOPE_NOWHERE,
139 },
140};
141
142static void rt_fibinfo_free(struct rtable __rcu **rtp)
143{
144 struct rtable *rt = rcu_dereference_protected(*rtp, 1);
145
146 if (!rt)
147 return;
148
149
150
151
152
153
154 dst_dev_put(&rt->dst);
155 dst_release_immediate(&rt->dst);
156}
157
158static void free_nh_exceptions(struct fib_nh_common *nhc)
159{
160 struct fnhe_hash_bucket *hash;
161 int i;
162
163 hash = rcu_dereference_protected(nhc->nhc_exceptions, 1);
164 if (!hash)
165 return;
166 for (i = 0; i < FNHE_HASH_SIZE; i++) {
167 struct fib_nh_exception *fnhe;
168
169 fnhe = rcu_dereference_protected(hash[i].chain, 1);
170 while (fnhe) {
171 struct fib_nh_exception *next;
172
173 next = rcu_dereference_protected(fnhe->fnhe_next, 1);
174
175 rt_fibinfo_free(&fnhe->fnhe_rth_input);
176 rt_fibinfo_free(&fnhe->fnhe_rth_output);
177
178 kfree(fnhe);
179
180 fnhe = next;
181 }
182 }
183 kfree(hash);
184}
185
186static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
187{
188 int cpu;
189
190 if (!rtp)
191 return;
192
193 for_each_possible_cpu(cpu) {
194 struct rtable *rt;
195
196 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
197 if (rt) {
198 dst_dev_put(&rt->dst);
199 dst_release_immediate(&rt->dst);
200 }
201 }
202 free_percpu(rtp);
203}
204
205void fib_nh_common_release(struct fib_nh_common *nhc)
206{
207 if (nhc->nhc_dev)
208 dev_put(nhc->nhc_dev);
209
210 lwtstate_put(nhc->nhc_lwtstate);
211 rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
212 rt_fibinfo_free(&nhc->nhc_rth_input);
213 free_nh_exceptions(nhc);
214}
215EXPORT_SYMBOL_GPL(fib_nh_common_release);
216
217void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
218{
219#ifdef CONFIG_IP_ROUTE_CLASSID
220 if (fib_nh->nh_tclassid)
221 net->ipv4.fib_num_tclassid_users--;
222#endif
223 fib_nh_common_release(&fib_nh->nh_common);
224}
225
226
227static void free_fib_info_rcu(struct rcu_head *head)
228{
229 struct fib_info *fi = container_of(head, struct fib_info, rcu);
230
231 change_nexthops(fi) {
232 fib_nh_release(fi->fib_net, nexthop_nh);
233 } endfor_nexthops(fi);
234
235 ip_fib_metrics_put(fi->fib_metrics);
236
237 kfree(fi);
238}
239
240void free_fib_info(struct fib_info *fi)
241{
242 if (fi->fib_dead == 0) {
243 pr_warn("Freeing alive fib_info %p\n", fi);
244 return;
245 }
246 fib_info_cnt--;
247
248 call_rcu(&fi->rcu, free_fib_info_rcu);
249}
250EXPORT_SYMBOL_GPL(free_fib_info);
251
252void fib_release_info(struct fib_info *fi)
253{
254 spin_lock_bh(&fib_info_lock);
255 if (fi && --fi->fib_treeref == 0) {
256 hlist_del(&fi->fib_hash);
257 if (fi->fib_prefsrc)
258 hlist_del(&fi->fib_lhash);
259 change_nexthops(fi) {
260 if (!nexthop_nh->fib_nh_dev)
261 continue;
262 hlist_del(&nexthop_nh->nh_hash);
263 } endfor_nexthops(fi)
264 fi->fib_dead = 1;
265 fib_info_put(fi);
266 }
267 spin_unlock_bh(&fib_info_lock);
268}
269
270static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
271{
272 const struct fib_nh *onh = ofi->fib_nh;
273
274 for_nexthops(fi) {
275 if (nh->fib_nh_oif != onh->fib_nh_oif ||
276 nh->fib_nh_gw_family != onh->fib_nh_gw_family ||
277 nh->fib_nh_scope != onh->fib_nh_scope ||
278#ifdef CONFIG_IP_ROUTE_MULTIPATH
279 nh->fib_nh_weight != onh->fib_nh_weight ||
280#endif
281#ifdef CONFIG_IP_ROUTE_CLASSID
282 nh->nh_tclassid != onh->nh_tclassid ||
283#endif
284 lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) ||
285 ((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK))
286 return -1;
287
288 if (nh->fib_nh_gw_family == AF_INET &&
289 nh->fib_nh_gw4 != onh->fib_nh_gw4)
290 return -1;
291
292 if (nh->fib_nh_gw_family == AF_INET6 &&
293 ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6))
294 return -1;
295
296 onh++;
297 } endfor_nexthops(fi);
298 return 0;
299}
300
301static inline unsigned int fib_devindex_hashfn(unsigned int val)
302{
303 unsigned int mask = DEVINDEX_HASHSIZE - 1;
304
305 return (val ^
306 (val >> DEVINDEX_HASHBITS) ^
307 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
308}
309
310static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
311{
312 unsigned int mask = (fib_info_hash_size - 1);
313 unsigned int val = fi->fib_nhs;
314
315 val ^= (fi->fib_protocol << 8) | fi->fib_scope;
316 val ^= (__force u32)fi->fib_prefsrc;
317 val ^= fi->fib_priority;
318 for_nexthops(fi) {
319 val ^= fib_devindex_hashfn(nh->fib_nh_oif);
320 } endfor_nexthops(fi)
321
322 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
323}
324
325static struct fib_info *fib_find_info(const struct fib_info *nfi)
326{
327 struct hlist_head *head;
328 struct fib_info *fi;
329 unsigned int hash;
330
331 hash = fib_info_hashfn(nfi);
332 head = &fib_info_hash[hash];
333
334 hlist_for_each_entry(fi, head, fib_hash) {
335 if (!net_eq(fi->fib_net, nfi->fib_net))
336 continue;
337 if (fi->fib_nhs != nfi->fib_nhs)
338 continue;
339 if (nfi->fib_protocol == fi->fib_protocol &&
340 nfi->fib_scope == fi->fib_scope &&
341 nfi->fib_prefsrc == fi->fib_prefsrc &&
342 nfi->fib_priority == fi->fib_priority &&
343 nfi->fib_type == fi->fib_type &&
344 memcmp(nfi->fib_metrics, fi->fib_metrics,
345 sizeof(u32) * RTAX_MAX) == 0 &&
346 !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
347 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
348 return fi;
349 }
350
351 return NULL;
352}
353
354
355
356
357int ip_fib_check_default(__be32 gw, struct net_device *dev)
358{
359 struct hlist_head *head;
360 struct fib_nh *nh;
361 unsigned int hash;
362
363 spin_lock(&fib_info_lock);
364
365 hash = fib_devindex_hashfn(dev->ifindex);
366 head = &fib_info_devhash[hash];
367 hlist_for_each_entry(nh, head, nh_hash) {
368 if (nh->fib_nh_dev == dev &&
369 nh->fib_nh_gw4 == gw &&
370 !(nh->fib_nh_flags & RTNH_F_DEAD)) {
371 spin_unlock(&fib_info_lock);
372 return 0;
373 }
374 }
375
376 spin_unlock(&fib_info_lock);
377
378 return -1;
379}
380
381static inline size_t fib_nlmsg_size(struct fib_info *fi)
382{
383 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
384 + nla_total_size(4)
385 + nla_total_size(4)
386 + nla_total_size(4)
387 + nla_total_size(4)
388 + nla_total_size(TCP_CA_NAME_MAX);
389
390
391 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
392
393 if (fi->fib_nhs) {
394 size_t nh_encapsize = 0;
395
396
397
398 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
399
400
401 nhsize += 2 * nla_total_size(4);
402
403
404 for_nexthops(fi) {
405 if (nh->fib_nh_lws) {
406
407 nh_encapsize += lwtunnel_get_encap_size(
408 nh->fib_nh_lws);
409
410 nh_encapsize += nla_total_size(2);
411 }
412 } endfor_nexthops(fi);
413
414
415 payload += nla_total_size((fi->fib_nhs * nhsize) +
416 nh_encapsize);
417
418 }
419
420 return payload;
421}
422
423void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
424 int dst_len, u32 tb_id, const struct nl_info *info,
425 unsigned int nlm_flags)
426{
427 struct sk_buff *skb;
428 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
429 int err = -ENOBUFS;
430
431 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
432 if (!skb)
433 goto errout;
434
435 err = fib_dump_info(skb, info->portid, seq, event, tb_id,
436 fa->fa_type, key, dst_len,
437 fa->fa_tos, fa->fa_info, nlm_flags);
438 if (err < 0) {
439
440 WARN_ON(err == -EMSGSIZE);
441 kfree_skb(skb);
442 goto errout;
443 }
444 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE,
445 info->nlh, GFP_KERNEL);
446 return;
447errout:
448 if (err < 0)
449 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
450}
451
452static int fib_detect_death(struct fib_info *fi, int order,
453 struct fib_info **last_resort, int *last_idx,
454 int dflt)
455{
456 const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
457 struct neighbour *n;
458 int state = NUD_NONE;
459
460 if (likely(nhc->nhc_gw_family == AF_INET))
461 n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev);
462 else if (nhc->nhc_gw_family == AF_INET6)
463 n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6,
464 nhc->nhc_dev);
465 else
466 n = NULL;
467
468 if (n) {
469 state = n->nud_state;
470 neigh_release(n);
471 } else {
472 return 0;
473 }
474 if (state == NUD_REACHABLE)
475 return 0;
476 if ((state & NUD_VALID) && order != dflt)
477 return 0;
478 if ((state & NUD_VALID) ||
479 (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) {
480 *last_resort = fi;
481 *last_idx = order;
482 }
483 return 1;
484}
485
486int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap,
487 u16 encap_type, void *cfg, gfp_t gfp_flags,
488 struct netlink_ext_ack *extack)
489{
490 int err;
491
492 nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *,
493 gfp_flags);
494 if (!nhc->nhc_pcpu_rth_output)
495 return -ENOMEM;
496
497 if (encap) {
498 struct lwtunnel_state *lwtstate;
499
500 if (encap_type == LWTUNNEL_ENCAP_NONE) {
501 NL_SET_ERR_MSG(extack, "LWT encap type not specified");
502 err = -EINVAL;
503 goto lwt_failure;
504 }
505 err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family,
506 cfg, &lwtstate, extack);
507 if (err)
508 goto lwt_failure;
509
510 nhc->nhc_lwtstate = lwtstate_get(lwtstate);
511 }
512
513 return 0;
514
515lwt_failure:
516 rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
517 nhc->nhc_pcpu_rth_output = NULL;
518 return err;
519}
520EXPORT_SYMBOL_GPL(fib_nh_common_init);
521
522int fib_nh_init(struct net *net, struct fib_nh *nh,
523 struct fib_config *cfg, int nh_weight,
524 struct netlink_ext_ack *extack)
525{
526 int err;
527
528 nh->fib_nh_family = AF_INET;
529
530 err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap,
531 cfg->fc_encap_type, cfg, GFP_KERNEL, extack);
532 if (err)
533 return err;
534
535 nh->fib_nh_oif = cfg->fc_oif;
536 nh->fib_nh_gw_family = cfg->fc_gw_family;
537 if (cfg->fc_gw_family == AF_INET)
538 nh->fib_nh_gw4 = cfg->fc_gw4;
539 else if (cfg->fc_gw_family == AF_INET6)
540 nh->fib_nh_gw6 = cfg->fc_gw6;
541
542 nh->fib_nh_flags = cfg->fc_flags;
543
544#ifdef CONFIG_IP_ROUTE_CLASSID
545 nh->nh_tclassid = cfg->fc_flow;
546 if (nh->nh_tclassid)
547 net->ipv4.fib_num_tclassid_users++;
548#endif
549#ifdef CONFIG_IP_ROUTE_MULTIPATH
550 nh->fib_nh_weight = nh_weight;
551#endif
552 return 0;
553}
554
555#ifdef CONFIG_IP_ROUTE_MULTIPATH
556
557static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
558 struct netlink_ext_ack *extack)
559{
560 int nhs = 0;
561
562 while (rtnh_ok(rtnh, remaining)) {
563 nhs++;
564 rtnh = rtnh_next(rtnh, &remaining);
565 }
566
567
568 if (remaining > 0) {
569 NL_SET_ERR_MSG(extack,
570 "Invalid nexthop configuration - extra data after nexthops");
571 nhs = 0;
572 }
573
574 return nhs;
575}
576
577static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
578 int remaining, struct fib_config *cfg,
579 struct netlink_ext_ack *extack)
580{
581 struct net *net = fi->fib_net;
582 struct fib_config fib_cfg;
583 int ret;
584
585 change_nexthops(fi) {
586 int attrlen;
587
588 memset(&fib_cfg, 0, sizeof(fib_cfg));
589
590 if (!rtnh_ok(rtnh, remaining)) {
591 NL_SET_ERR_MSG(extack,
592 "Invalid nexthop configuration - extra data after nexthop");
593 return -EINVAL;
594 }
595
596 if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
597 NL_SET_ERR_MSG(extack,
598 "Invalid flags for nexthop - can not contain DEAD or LINKDOWN");
599 return -EINVAL;
600 }
601
602 fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
603 fib_cfg.fc_oif = rtnh->rtnh_ifindex;
604
605 attrlen = rtnh_attrlen(rtnh);
606 if (attrlen > 0) {
607 struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
608
609 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
610 nlav = nla_find(attrs, attrlen, RTA_VIA);
611 if (nla && nlav) {
612 NL_SET_ERR_MSG(extack,
613 "Nexthop configuration can not contain both GATEWAY and VIA");
614 return -EINVAL;
615 }
616 if (nla) {
617 fib_cfg.fc_gw4 = nla_get_in_addr(nla);
618 if (fib_cfg.fc_gw4)
619 fib_cfg.fc_gw_family = AF_INET;
620 } else if (nlav) {
621 ret = fib_gw_from_via(&fib_cfg, nlav, extack);
622 if (ret)
623 goto errout;
624 }
625
626 nla = nla_find(attrs, attrlen, RTA_FLOW);
627 if (nla)
628 fib_cfg.fc_flow = nla_get_u32(nla);
629
630 fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
631 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
632 if (nla)
633 fib_cfg.fc_encap_type = nla_get_u16(nla);
634 }
635
636 ret = fib_nh_init(net, nexthop_nh, &fib_cfg,
637 rtnh->rtnh_hops + 1, extack);
638 if (ret)
639 goto errout;
640
641 rtnh = rtnh_next(rtnh, &remaining);
642 } endfor_nexthops(fi);
643
644 ret = -EINVAL;
645 if (cfg->fc_oif && fi->fib_nh->fib_nh_oif != cfg->fc_oif) {
646 NL_SET_ERR_MSG(extack,
647 "Nexthop device index does not match RTA_OIF");
648 goto errout;
649 }
650 if (cfg->fc_gw_family) {
651 if (cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family ||
652 (cfg->fc_gw_family == AF_INET &&
653 fi->fib_nh->fib_nh_gw4 != cfg->fc_gw4) ||
654 (cfg->fc_gw_family == AF_INET6 &&
655 ipv6_addr_cmp(&fi->fib_nh->fib_nh_gw6, &cfg->fc_gw6))) {
656 NL_SET_ERR_MSG(extack,
657 "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA");
658 goto errout;
659 }
660 }
661#ifdef CONFIG_IP_ROUTE_CLASSID
662 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
663 NL_SET_ERR_MSG(extack,
664 "Nexthop class id does not match RTA_FLOW");
665 goto errout;
666 }
667#endif
668 ret = 0;
669errout:
670 return ret;
671}
672
673static void fib_rebalance(struct fib_info *fi)
674{
675 int total;
676 int w;
677
678 if (fi->fib_nhs < 2)
679 return;
680
681 total = 0;
682 for_nexthops(fi) {
683 if (nh->fib_nh_flags & RTNH_F_DEAD)
684 continue;
685
686 if (ip_ignore_linkdown(nh->fib_nh_dev) &&
687 nh->fib_nh_flags & RTNH_F_LINKDOWN)
688 continue;
689
690 total += nh->fib_nh_weight;
691 } endfor_nexthops(fi);
692
693 w = 0;
694 change_nexthops(fi) {
695 int upper_bound;
696
697 if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) {
698 upper_bound = -1;
699 } else if (ip_ignore_linkdown(nexthop_nh->fib_nh_dev) &&
700 nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
701 upper_bound = -1;
702 } else {
703 w += nexthop_nh->fib_nh_weight;
704 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
705 total) - 1;
706 }
707
708 atomic_set(&nexthop_nh->fib_nh_upper_bound, upper_bound);
709 } endfor_nexthops(fi);
710}
711#else
712
713static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
714 int remaining, struct fib_config *cfg,
715 struct netlink_ext_ack *extack)
716{
717 NL_SET_ERR_MSG(extack, "Multipath support not enabled in kernel");
718
719 return -EINVAL;
720}
721
722#define fib_rebalance(fi) do { } while (0)
723
724#endif
725
726static int fib_encap_match(u16 encap_type,
727 struct nlattr *encap,
728 const struct fib_nh *nh,
729 const struct fib_config *cfg,
730 struct netlink_ext_ack *extack)
731{
732 struct lwtunnel_state *lwtstate;
733 int ret, result = 0;
734
735 if (encap_type == LWTUNNEL_ENCAP_NONE)
736 return 0;
737
738 ret = lwtunnel_build_state(encap_type, encap, AF_INET,
739 cfg, &lwtstate, extack);
740 if (!ret) {
741 result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws);
742 lwtstate_free(lwtstate);
743 }
744
745 return result;
746}
747
748int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
749 struct netlink_ext_ack *extack)
750{
751#ifdef CONFIG_IP_ROUTE_MULTIPATH
752 struct rtnexthop *rtnh;
753 int remaining;
754#endif
755
756 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
757 return 1;
758
759 if (cfg->fc_oif || cfg->fc_gw_family) {
760 if (cfg->fc_encap) {
761 if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
762 fi->fib_nh, cfg, extack))
763 return 1;
764 }
765#ifdef CONFIG_IP_ROUTE_CLASSID
766 if (cfg->fc_flow &&
767 cfg->fc_flow != fi->fib_nh->nh_tclassid)
768 return 1;
769#endif
770 if ((cfg->fc_oif && cfg->fc_oif != fi->fib_nh->fib_nh_oif) ||
771 (cfg->fc_gw_family &&
772 cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family))
773 return 1;
774
775 if (cfg->fc_gw_family == AF_INET &&
776 cfg->fc_gw4 != fi->fib_nh->fib_nh_gw4)
777 return 1;
778
779 if (cfg->fc_gw_family == AF_INET6 &&
780 ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->fib_nh_gw6))
781 return 1;
782
783 return 0;
784 }
785
786#ifdef CONFIG_IP_ROUTE_MULTIPATH
787 if (!cfg->fc_mp)
788 return 0;
789
790 rtnh = cfg->fc_mp;
791 remaining = cfg->fc_mp_len;
792
793 for_nexthops(fi) {
794 int attrlen;
795
796 if (!rtnh_ok(rtnh, remaining))
797 return -EINVAL;
798
799 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->fib_nh_oif)
800 return 1;
801
802 attrlen = rtnh_attrlen(rtnh);
803 if (attrlen > 0) {
804 struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
805
806 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
807 nlav = nla_find(attrs, attrlen, RTA_VIA);
808 if (nla && nlav) {
809 NL_SET_ERR_MSG(extack,
810 "Nexthop configuration can not contain both GATEWAY and VIA");
811 return -EINVAL;
812 }
813
814 if (nla) {
815 if (nh->fib_nh_gw_family != AF_INET ||
816 nla_get_in_addr(nla) != nh->fib_nh_gw4)
817 return 1;
818 } else if (nlav) {
819 struct fib_config cfg2;
820 int err;
821
822 err = fib_gw_from_via(&cfg2, nlav, extack);
823 if (err)
824 return err;
825
826 switch (nh->fib_nh_gw_family) {
827 case AF_INET:
828 if (cfg2.fc_gw_family != AF_INET ||
829 cfg2.fc_gw4 != nh->fib_nh_gw4)
830 return 1;
831 break;
832 case AF_INET6:
833 if (cfg2.fc_gw_family != AF_INET6 ||
834 ipv6_addr_cmp(&cfg2.fc_gw6,
835 &nh->fib_nh_gw6))
836 return 1;
837 break;
838 }
839 }
840
841#ifdef CONFIG_IP_ROUTE_CLASSID
842 nla = nla_find(attrs, attrlen, RTA_FLOW);
843 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
844 return 1;
845#endif
846 }
847
848 rtnh = rtnh_next(rtnh, &remaining);
849 } endfor_nexthops(fi);
850#endif
851 return 0;
852}
853
854bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
855{
856 struct nlattr *nla;
857 int remaining;
858
859 if (!cfg->fc_mx)
860 return true;
861
862 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
863 int type = nla_type(nla);
864 u32 fi_val, val;
865
866 if (!type)
867 continue;
868 if (type > RTAX_MAX)
869 return false;
870
871 if (type == RTAX_CC_ALGO) {
872 char tmp[TCP_CA_NAME_MAX];
873 bool ecn_ca = false;
874
875 nla_strlcpy(tmp, nla, sizeof(tmp));
876 val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
877 } else {
878 if (nla_len(nla) != sizeof(u32))
879 return false;
880 val = nla_get_u32(nla);
881 }
882
883 fi_val = fi->fib_metrics->metrics[type - 1];
884 if (type == RTAX_FEATURES)
885 fi_val &= ~DST_FEATURE_ECN_CA;
886
887 if (fi_val != val)
888 return false;
889 }
890
891 return true;
892}
893
894static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh,
895 u32 table, struct netlink_ext_ack *extack)
896{
897 struct fib6_config cfg = {
898 .fc_table = table,
899 .fc_flags = nh->fib_nh_flags | RTF_GATEWAY,
900 .fc_ifindex = nh->fib_nh_oif,
901 .fc_gateway = nh->fib_nh_gw6,
902 };
903 struct fib6_nh fib6_nh = {};
904 int err;
905
906 err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack);
907 if (!err) {
908 nh->fib_nh_dev = fib6_nh.fib_nh_dev;
909 dev_hold(nh->fib_nh_dev);
910 nh->fib_nh_oif = nh->fib_nh_dev->ifindex;
911 nh->fib_nh_scope = RT_SCOPE_LINK;
912
913 ipv6_stub->fib6_nh_release(&fib6_nh);
914 }
915
916 return err;
917}
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
963 u8 scope, struct netlink_ext_ack *extack)
964{
965 struct net_device *dev;
966 struct fib_result res;
967 int err = 0;
968
969 if (nh->fib_nh_flags & RTNH_F_ONLINK) {
970 unsigned int addr_type;
971
972 if (scope >= RT_SCOPE_LINK) {
973 NL_SET_ERR_MSG(extack, "Nexthop has invalid scope");
974 return -EINVAL;
975 }
976 dev = __dev_get_by_index(net, nh->fib_nh_oif);
977 if (!dev) {
978 NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
979 return -ENODEV;
980 }
981 if (!(dev->flags & IFF_UP)) {
982 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
983 return -ENETDOWN;
984 }
985 addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4);
986 if (addr_type != RTN_UNICAST) {
987 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
988 return -EINVAL;
989 }
990 if (!netif_carrier_ok(dev))
991 nh->fib_nh_flags |= RTNH_F_LINKDOWN;
992 nh->fib_nh_dev = dev;
993 dev_hold(dev);
994 nh->fib_nh_scope = RT_SCOPE_LINK;
995 return 0;
996 }
997 rcu_read_lock();
998 {
999 struct fib_table *tbl = NULL;
1000 struct flowi4 fl4 = {
1001 .daddr = nh->fib_nh_gw4,
1002 .flowi4_scope = scope + 1,
1003 .flowi4_oif = nh->fib_nh_oif,
1004 .flowi4_iif = LOOPBACK_IFINDEX,
1005 };
1006
1007
1008 if (fl4.flowi4_scope < RT_SCOPE_LINK)
1009 fl4.flowi4_scope = RT_SCOPE_LINK;
1010
1011 if (table)
1012 tbl = fib_get_table(net, table);
1013
1014 if (tbl)
1015 err = fib_table_lookup(tbl, &fl4, &res,
1016 FIB_LOOKUP_IGNORE_LINKSTATE |
1017 FIB_LOOKUP_NOREF);
1018
1019
1020
1021
1022
1023 if (!tbl || err) {
1024 err = fib_lookup(net, &fl4, &res,
1025 FIB_LOOKUP_IGNORE_LINKSTATE);
1026 }
1027
1028 if (err) {
1029 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
1030 goto out;
1031 }
1032 }
1033
1034 err = -EINVAL;
1035 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
1036 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
1037 goto out;
1038 }
1039 nh->fib_nh_scope = res.scope;
1040 nh->fib_nh_oif = FIB_RES_OIF(res);
1041 nh->fib_nh_dev = dev = FIB_RES_DEV(res);
1042 if (!dev) {
1043 NL_SET_ERR_MSG(extack,
1044 "No egress device for nexthop gateway");
1045 goto out;
1046 }
1047 dev_hold(dev);
1048 if (!netif_carrier_ok(dev))
1049 nh->fib_nh_flags |= RTNH_F_LINKDOWN;
1050 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
1051out:
1052 rcu_read_unlock();
1053 return err;
1054}
1055
1056static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh,
1057 struct netlink_ext_ack *extack)
1058{
1059 struct in_device *in_dev;
1060 int err;
1061
1062 if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
1063 NL_SET_ERR_MSG(extack,
1064 "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
1065 return -EINVAL;
1066 }
1067
1068 rcu_read_lock();
1069
1070 err = -ENODEV;
1071 in_dev = inetdev_by_index(net, nh->fib_nh_oif);
1072 if (!in_dev)
1073 goto out;
1074 err = -ENETDOWN;
1075 if (!(in_dev->dev->flags & IFF_UP)) {
1076 NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
1077 goto out;
1078 }
1079
1080 nh->fib_nh_dev = in_dev->dev;
1081 dev_hold(nh->fib_nh_dev);
1082 nh->fib_nh_scope = RT_SCOPE_HOST;
1083 if (!netif_carrier_ok(nh->fib_nh_dev))
1084 nh->fib_nh_flags |= RTNH_F_LINKDOWN;
1085 err = 0;
1086out:
1087 rcu_read_unlock();
1088 return err;
1089}
1090
1091static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
1092 struct netlink_ext_ack *extack)
1093{
1094 struct net *net = cfg->fc_nlinfo.nl_net;
1095 u32 table = cfg->fc_table;
1096 int err;
1097
1098 if (nh->fib_nh_gw_family == AF_INET)
1099 err = fib_check_nh_v4_gw(net, nh, table, cfg->fc_scope, extack);
1100 else if (nh->fib_nh_gw_family == AF_INET6)
1101 err = fib_check_nh_v6_gw(net, nh, table, extack);
1102 else
1103 err = fib_check_nh_nongw(net, nh, extack);
1104
1105 return err;
1106}
1107
1108static inline unsigned int fib_laddr_hashfn(__be32 val)
1109{
1110 unsigned int mask = (fib_info_hash_size - 1);
1111
1112 return ((__force u32)val ^
1113 ((__force u32)val >> 7) ^
1114 ((__force u32)val >> 14)) & mask;
1115}
1116
1117static struct hlist_head *fib_info_hash_alloc(int bytes)
1118{
1119 if (bytes <= PAGE_SIZE)
1120 return kzalloc(bytes, GFP_KERNEL);
1121 else
1122 return (struct hlist_head *)
1123 __get_free_pages(GFP_KERNEL | __GFP_ZERO,
1124 get_order(bytes));
1125}
1126
1127static void fib_info_hash_free(struct hlist_head *hash, int bytes)
1128{
1129 if (!hash)
1130 return;
1131
1132 if (bytes <= PAGE_SIZE)
1133 kfree(hash);
1134 else
1135 free_pages((unsigned long) hash, get_order(bytes));
1136}
1137
1138static void fib_info_hash_move(struct hlist_head *new_info_hash,
1139 struct hlist_head *new_laddrhash,
1140 unsigned int new_size)
1141{
1142 struct hlist_head *old_info_hash, *old_laddrhash;
1143 unsigned int old_size = fib_info_hash_size;
1144 unsigned int i, bytes;
1145
1146 spin_lock_bh(&fib_info_lock);
1147 old_info_hash = fib_info_hash;
1148 old_laddrhash = fib_info_laddrhash;
1149 fib_info_hash_size = new_size;
1150
1151 for (i = 0; i < old_size; i++) {
1152 struct hlist_head *head = &fib_info_hash[i];
1153 struct hlist_node *n;
1154 struct fib_info *fi;
1155
1156 hlist_for_each_entry_safe(fi, n, head, fib_hash) {
1157 struct hlist_head *dest;
1158 unsigned int new_hash;
1159
1160 new_hash = fib_info_hashfn(fi);
1161 dest = &new_info_hash[new_hash];
1162 hlist_add_head(&fi->fib_hash, dest);
1163 }
1164 }
1165 fib_info_hash = new_info_hash;
1166
1167 for (i = 0; i < old_size; i++) {
1168 struct hlist_head *lhead = &fib_info_laddrhash[i];
1169 struct hlist_node *n;
1170 struct fib_info *fi;
1171
1172 hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
1173 struct hlist_head *ldest;
1174 unsigned int new_hash;
1175
1176 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
1177 ldest = &new_laddrhash[new_hash];
1178 hlist_add_head(&fi->fib_lhash, ldest);
1179 }
1180 }
1181 fib_info_laddrhash = new_laddrhash;
1182
1183 spin_unlock_bh(&fib_info_lock);
1184
1185 bytes = old_size * sizeof(struct hlist_head *);
1186 fib_info_hash_free(old_info_hash, bytes);
1187 fib_info_hash_free(old_laddrhash, bytes);
1188}
1189
1190__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
1191{
1192 nh->nh_saddr = inet_select_addr(nh->fib_nh_dev,
1193 nh->fib_nh_gw4,
1194 nh->nh_parent->fib_scope);
1195 nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
1196
1197 return nh->nh_saddr;
1198}
1199
1200__be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
1201{
1202 struct fib_nh_common *nhc = res->nhc;
1203 struct fib_nh *nh;
1204
1205 if (res->fi->fib_prefsrc)
1206 return res->fi->fib_prefsrc;
1207
1208 nh = container_of(nhc, struct fib_nh, nh_common);
1209 if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
1210 return nh->nh_saddr;
1211
1212 return fib_info_update_nh_saddr(net, nh);
1213}
1214
1215static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
1216{
1217 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
1218 fib_prefsrc != cfg->fc_dst) {
1219 u32 tb_id = cfg->fc_table;
1220 int rc;
1221
1222 if (tb_id == RT_TABLE_MAIN)
1223 tb_id = RT_TABLE_LOCAL;
1224
1225 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
1226 fib_prefsrc, tb_id);
1227
1228 if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
1229 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
1230 fib_prefsrc, RT_TABLE_LOCAL);
1231 }
1232
1233 if (rc != RTN_LOCAL)
1234 return false;
1235 }
1236 return true;
1237}
1238
1239struct fib_info *fib_create_info(struct fib_config *cfg,
1240 struct netlink_ext_ack *extack)
1241{
1242 int err;
1243 struct fib_info *fi = NULL;
1244 struct fib_info *ofi;
1245 int nhs = 1;
1246 struct net *net = cfg->fc_nlinfo.nl_net;
1247
1248 if (cfg->fc_type > RTN_MAX)
1249 goto err_inval;
1250
1251
1252 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {
1253 NL_SET_ERR_MSG(extack, "Invalid scope");
1254 goto err_inval;
1255 }
1256
1257 if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
1258 NL_SET_ERR_MSG(extack,
1259 "Invalid rtm_flags - can not contain DEAD or LINKDOWN");
1260 goto err_inval;
1261 }
1262
1263#ifdef CONFIG_IP_ROUTE_MULTIPATH
1264 if (cfg->fc_mp) {
1265 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
1266 if (nhs == 0)
1267 goto err_inval;
1268 }
1269#endif
1270
1271 err = -ENOBUFS;
1272 if (fib_info_cnt >= fib_info_hash_size) {
1273 unsigned int new_size = fib_info_hash_size << 1;
1274 struct hlist_head *new_info_hash;
1275 struct hlist_head *new_laddrhash;
1276 unsigned int bytes;
1277
1278 if (!new_size)
1279 new_size = 16;
1280 bytes = new_size * sizeof(struct hlist_head *);
1281 new_info_hash = fib_info_hash_alloc(bytes);
1282 new_laddrhash = fib_info_hash_alloc(bytes);
1283 if (!new_info_hash || !new_laddrhash) {
1284 fib_info_hash_free(new_info_hash, bytes);
1285 fib_info_hash_free(new_laddrhash, bytes);
1286 } else
1287 fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
1288
1289 if (!fib_info_hash_size)
1290 goto failure;
1291 }
1292
1293 fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
1294 if (!fi)
1295 goto failure;
1296 fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx,
1297 cfg->fc_mx_len, extack);
1298 if (unlikely(IS_ERR(fi->fib_metrics))) {
1299 err = PTR_ERR(fi->fib_metrics);
1300 kfree(fi);
1301 return ERR_PTR(err);
1302 }
1303
1304 fib_info_cnt++;
1305 fi->fib_net = net;
1306 fi->fib_protocol = cfg->fc_protocol;
1307 fi->fib_scope = cfg->fc_scope;
1308 fi->fib_flags = cfg->fc_flags;
1309 fi->fib_priority = cfg->fc_priority;
1310 fi->fib_prefsrc = cfg->fc_prefsrc;
1311 fi->fib_type = cfg->fc_type;
1312 fi->fib_tb_id = cfg->fc_table;
1313
1314 fi->fib_nhs = nhs;
1315 change_nexthops(fi) {
1316 nexthop_nh->nh_parent = fi;
1317 } endfor_nexthops(fi)
1318
1319 if (cfg->fc_mp)
1320 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
1321 else
1322 err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack);
1323
1324 if (err != 0)
1325 goto failure;
1326
1327 if (fib_props[cfg->fc_type].error) {
1328 if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) {
1329 NL_SET_ERR_MSG(extack,
1330 "Gateway, device and multipath can not be specified for this route type");
1331 goto err_inval;
1332 }
1333 goto link_it;
1334 } else {
1335 switch (cfg->fc_type) {
1336 case RTN_UNICAST:
1337 case RTN_LOCAL:
1338 case RTN_BROADCAST:
1339 case RTN_ANYCAST:
1340 case RTN_MULTICAST:
1341 break;
1342 default:
1343 NL_SET_ERR_MSG(extack, "Invalid route type");
1344 goto err_inval;
1345 }
1346 }
1347
1348 if (cfg->fc_scope > RT_SCOPE_HOST) {
1349 NL_SET_ERR_MSG(extack, "Invalid scope");
1350 goto err_inval;
1351 }
1352
1353 if (cfg->fc_scope == RT_SCOPE_HOST) {
1354 struct fib_nh *nh = fi->fib_nh;
1355
1356
1357 if (nhs != 1) {
1358 NL_SET_ERR_MSG(extack,
1359 "Route with host scope can not have multiple nexthops");
1360 goto err_inval;
1361 }
1362 if (nh->fib_nh_gw_family) {
1363 NL_SET_ERR_MSG(extack,
1364 "Route with host scope can not have a gateway");
1365 goto err_inval;
1366 }
1367 nh->fib_nh_scope = RT_SCOPE_NOWHERE;
1368 nh->fib_nh_dev = dev_get_by_index(net, fi->fib_nh->fib_nh_oif);
1369 err = -ENODEV;
1370 if (!nh->fib_nh_dev)
1371 goto failure;
1372 } else {
1373 int linkdown = 0;
1374
1375 change_nexthops(fi) {
1376 err = fib_check_nh(cfg, nexthop_nh, extack);
1377 if (err != 0)
1378 goto failure;
1379 if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN)
1380 linkdown++;
1381 } endfor_nexthops(fi)
1382 if (linkdown == fi->fib_nhs)
1383 fi->fib_flags |= RTNH_F_LINKDOWN;
1384 }
1385
1386 if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) {
1387 NL_SET_ERR_MSG(extack, "Invalid prefsrc address");
1388 goto err_inval;
1389 }
1390
1391 change_nexthops(fi) {
1392 fib_info_update_nh_saddr(net, nexthop_nh);
1393 if (nexthop_nh->fib_nh_gw_family == AF_INET6)
1394 fi->fib_nh_is_v6 = true;
1395 } endfor_nexthops(fi)
1396
1397 fib_rebalance(fi);
1398
1399link_it:
1400 ofi = fib_find_info(fi);
1401 if (ofi) {
1402 fi->fib_dead = 1;
1403 free_fib_info(fi);
1404 ofi->fib_treeref++;
1405 return ofi;
1406 }
1407
1408 fi->fib_treeref++;
1409 refcount_set(&fi->fib_clntref, 1);
1410 spin_lock_bh(&fib_info_lock);
1411 hlist_add_head(&fi->fib_hash,
1412 &fib_info_hash[fib_info_hashfn(fi)]);
1413 if (fi->fib_prefsrc) {
1414 struct hlist_head *head;
1415
1416 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
1417 hlist_add_head(&fi->fib_lhash, head);
1418 }
1419 change_nexthops(fi) {
1420 struct hlist_head *head;
1421 unsigned int hash;
1422
1423 if (!nexthop_nh->fib_nh_dev)
1424 continue;
1425 hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
1426 head = &fib_info_devhash[hash];
1427 hlist_add_head(&nexthop_nh->nh_hash, head);
1428 } endfor_nexthops(fi)
1429 spin_unlock_bh(&fib_info_lock);
1430 return fi;
1431
1432err_inval:
1433 err = -EINVAL;
1434
1435failure:
1436 if (fi) {
1437 fi->fib_dead = 1;
1438 free_fib_info(fi);
1439 }
1440
1441 return ERR_PTR(err);
1442}
1443
1444int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
1445 unsigned char *flags, bool skip_oif)
1446{
1447 if (nhc->nhc_flags & RTNH_F_DEAD)
1448 *flags |= RTNH_F_DEAD;
1449
1450 if (nhc->nhc_flags & RTNH_F_LINKDOWN) {
1451 *flags |= RTNH_F_LINKDOWN;
1452
1453 rcu_read_lock();
1454 switch (nhc->nhc_family) {
1455 case AF_INET:
1456 if (ip_ignore_linkdown(nhc->nhc_dev))
1457 *flags |= RTNH_F_DEAD;
1458 break;
1459 case AF_INET6:
1460 if (ip6_ignore_linkdown(nhc->nhc_dev))
1461 *flags |= RTNH_F_DEAD;
1462 break;
1463 }
1464 rcu_read_unlock();
1465 }
1466
1467 switch (nhc->nhc_gw_family) {
1468 case AF_INET:
1469 if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4))
1470 goto nla_put_failure;
1471 break;
1472 case AF_INET6:
1473
1474
1475
1476 if (nhc->nhc_gw_family != nhc->nhc_family) {
1477 int alen = sizeof(struct in6_addr);
1478 struct nlattr *nla;
1479 struct rtvia *via;
1480
1481 nla = nla_reserve(skb, RTA_VIA, alen + 2);
1482 if (!nla)
1483 goto nla_put_failure;
1484
1485 via = nla_data(nla);
1486 via->rtvia_family = AF_INET6;
1487 memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen);
1488 } else if (nla_put_in6_addr(skb, RTA_GATEWAY,
1489 &nhc->nhc_gw.ipv6) < 0) {
1490 goto nla_put_failure;
1491 }
1492 break;
1493 }
1494
1495 *flags |= (nhc->nhc_flags & RTNH_F_ONLINK);
1496 if (nhc->nhc_flags & RTNH_F_OFFLOAD)
1497 *flags |= RTNH_F_OFFLOAD;
1498
1499 if (!skip_oif && nhc->nhc_dev &&
1500 nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex))
1501 goto nla_put_failure;
1502
1503 if (nhc->nhc_lwtstate &&
1504 lwtunnel_fill_encap(skb, nhc->nhc_lwtstate,
1505 RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
1506 goto nla_put_failure;
1507
1508 return 0;
1509
1510nla_put_failure:
1511 return -EMSGSIZE;
1512}
1513EXPORT_SYMBOL_GPL(fib_nexthop_info);
1514
1515#if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6)
1516int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
1517 int nh_weight)
1518{
1519 const struct net_device *dev = nhc->nhc_dev;
1520 struct rtnexthop *rtnh;
1521 unsigned char flags = 0;
1522
1523 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1524 if (!rtnh)
1525 goto nla_put_failure;
1526
1527 rtnh->rtnh_hops = nh_weight - 1;
1528 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
1529
1530 if (fib_nexthop_info(skb, nhc, &flags, true) < 0)
1531 goto nla_put_failure;
1532
1533 rtnh->rtnh_flags = flags;
1534
1535
1536 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
1537
1538 return 0;
1539
1540nla_put_failure:
1541 return -EMSGSIZE;
1542}
1543EXPORT_SYMBOL_GPL(fib_add_nexthop);
1544#endif
1545
1546#ifdef CONFIG_IP_ROUTE_MULTIPATH
1547static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
1548{
1549 struct nlattr *mp;
1550
1551 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
1552 if (!mp)
1553 goto nla_put_failure;
1554
1555 for_nexthops(fi) {
1556 if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0)
1557 goto nla_put_failure;
1558#ifdef CONFIG_IP_ROUTE_CLASSID
1559 if (nh->nh_tclassid &&
1560 nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
1561 goto nla_put_failure;
1562#endif
1563 } endfor_nexthops(fi);
1564
1565 nla_nest_end(skb, mp);
1566
1567 return 0;
1568
1569nla_put_failure:
1570 return -EMSGSIZE;
1571}
1572#else
1573static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
1574{
1575 return 0;
1576}
1577#endif
1578
1579int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
1580 u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
1581 struct fib_info *fi, unsigned int flags)
1582{
1583 struct nlmsghdr *nlh;
1584 struct rtmsg *rtm;
1585
1586 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
1587 if (!nlh)
1588 return -EMSGSIZE;
1589
1590 rtm = nlmsg_data(nlh);
1591 rtm->rtm_family = AF_INET;
1592 rtm->rtm_dst_len = dst_len;
1593 rtm->rtm_src_len = 0;
1594 rtm->rtm_tos = tos;
1595 if (tb_id < 256)
1596 rtm->rtm_table = tb_id;
1597 else
1598 rtm->rtm_table = RT_TABLE_COMPAT;
1599 if (nla_put_u32(skb, RTA_TABLE, tb_id))
1600 goto nla_put_failure;
1601 rtm->rtm_type = type;
1602 rtm->rtm_flags = fi->fib_flags;
1603 rtm->rtm_scope = fi->fib_scope;
1604 rtm->rtm_protocol = fi->fib_protocol;
1605
1606 if (rtm->rtm_dst_len &&
1607 nla_put_in_addr(skb, RTA_DST, dst))
1608 goto nla_put_failure;
1609 if (fi->fib_priority &&
1610 nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
1611 goto nla_put_failure;
1612 if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
1613 goto nla_put_failure;
1614
1615 if (fi->fib_prefsrc &&
1616 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
1617 goto nla_put_failure;
1618 if (fi->fib_nhs == 1) {
1619 struct fib_nh *nh = &fi->fib_nh[0];
1620 unsigned char flags = 0;
1621
1622 if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0)
1623 goto nla_put_failure;
1624
1625 rtm->rtm_flags = flags;
1626#ifdef CONFIG_IP_ROUTE_CLASSID
1627 if (nh->nh_tclassid &&
1628 nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
1629 goto nla_put_failure;
1630#endif
1631 } else {
1632 if (fib_add_multipath(skb, fi) < 0)
1633 goto nla_put_failure;
1634 }
1635
1636 nlmsg_end(skb, nlh);
1637 return 0;
1638
1639nla_put_failure:
1640 nlmsg_cancel(skb, nlh);
1641 return -EMSGSIZE;
1642}
1643
1644
1645
1646
1647
1648
1649
1650int fib_sync_down_addr(struct net_device *dev, __be32 local)
1651{
1652 int ret = 0;
1653 unsigned int hash = fib_laddr_hashfn(local);
1654 struct hlist_head *head = &fib_info_laddrhash[hash];
1655 struct net *net = dev_net(dev);
1656 int tb_id = l3mdev_fib_table(dev);
1657 struct fib_info *fi;
1658
1659 if (!fib_info_laddrhash || local == 0)
1660 return 0;
1661
1662 hlist_for_each_entry(fi, head, fib_lhash) {
1663 if (!net_eq(fi->fib_net, net) ||
1664 fi->fib_tb_id != tb_id)
1665 continue;
1666 if (fi->fib_prefsrc == local) {
1667 fi->fib_flags |= RTNH_F_DEAD;
1668 ret++;
1669 }
1670 }
1671 return ret;
1672}
1673
1674static int call_fib_nh_notifiers(struct fib_nh *nh,
1675 enum fib_event_type event_type)
1676{
1677 bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev);
1678 struct fib_nh_notifier_info info = {
1679 .fib_nh = nh,
1680 };
1681
1682 switch (event_type) {
1683 case FIB_EVENT_NH_ADD:
1684 if (nh->fib_nh_flags & RTNH_F_DEAD)
1685 break;
1686 if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN)
1687 break;
1688 return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type,
1689 &info.info);
1690 case FIB_EVENT_NH_DEL:
1691 if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) ||
1692 (nh->fib_nh_flags & RTNH_F_DEAD))
1693 return call_fib4_notifiers(dev_net(nh->fib_nh_dev),
1694 event_type, &info.info);
1695 default:
1696 break;
1697 }
1698
1699 return NOTIFY_DONE;
1700}
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712static void nh_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
1713{
1714 struct fnhe_hash_bucket *bucket;
1715 int i;
1716
1717 bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1);
1718 if (!bucket)
1719 return;
1720
1721 for (i = 0; i < FNHE_HASH_SIZE; i++) {
1722 struct fib_nh_exception *fnhe;
1723
1724 for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
1725 fnhe;
1726 fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
1727 if (fnhe->fnhe_mtu_locked) {
1728 if (new <= fnhe->fnhe_pmtu) {
1729 fnhe->fnhe_pmtu = new;
1730 fnhe->fnhe_mtu_locked = false;
1731 }
1732 } else if (new < fnhe->fnhe_pmtu ||
1733 orig == fnhe->fnhe_pmtu) {
1734 fnhe->fnhe_pmtu = new;
1735 }
1736 }
1737 }
1738}
1739
1740void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
1741{
1742 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1743 struct hlist_head *head = &fib_info_devhash[hash];
1744 struct fib_nh *nh;
1745
1746 hlist_for_each_entry(nh, head, nh_hash) {
1747 if (nh->fib_nh_dev == dev)
1748 nh_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
1749 }
1750}
1751
1752
1753
1754
1755
1756
1757
1758int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
1759{
1760 int ret = 0;
1761 int scope = RT_SCOPE_NOWHERE;
1762 struct fib_info *prev_fi = NULL;
1763 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1764 struct hlist_head *head = &fib_info_devhash[hash];
1765 struct fib_nh *nh;
1766
1767 if (force)
1768 scope = -1;
1769
1770 hlist_for_each_entry(nh, head, nh_hash) {
1771 struct fib_info *fi = nh->nh_parent;
1772 int dead;
1773
1774 BUG_ON(!fi->fib_nhs);
1775 if (nh->fib_nh_dev != dev || fi == prev_fi)
1776 continue;
1777 prev_fi = fi;
1778 dead = 0;
1779 change_nexthops(fi) {
1780 if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD)
1781 dead++;
1782 else if (nexthop_nh->fib_nh_dev == dev &&
1783 nexthop_nh->fib_nh_scope != scope) {
1784 switch (event) {
1785 case NETDEV_DOWN:
1786 case NETDEV_UNREGISTER:
1787 nexthop_nh->fib_nh_flags |= RTNH_F_DEAD;
1788
1789 case NETDEV_CHANGE:
1790 nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
1791 break;
1792 }
1793 call_fib_nh_notifiers(nexthop_nh,
1794 FIB_EVENT_NH_DEL);
1795 dead++;
1796 }
1797#ifdef CONFIG_IP_ROUTE_MULTIPATH
1798 if (event == NETDEV_UNREGISTER &&
1799 nexthop_nh->fib_nh_dev == dev) {
1800 dead = fi->fib_nhs;
1801 break;
1802 }
1803#endif
1804 } endfor_nexthops(fi)
1805 if (dead == fi->fib_nhs) {
1806 switch (event) {
1807 case NETDEV_DOWN:
1808 case NETDEV_UNREGISTER:
1809 fi->fib_flags |= RTNH_F_DEAD;
1810
1811 case NETDEV_CHANGE:
1812 fi->fib_flags |= RTNH_F_LINKDOWN;
1813 break;
1814 }
1815 ret++;
1816 }
1817
1818 fib_rebalance(fi);
1819 }
1820
1821 return ret;
1822}
1823
1824
1825static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
1826{
1827 struct fib_info *fi = NULL, *last_resort = NULL;
1828 struct hlist_head *fa_head = res->fa_head;
1829 struct fib_table *tb = res->table;
1830 u8 slen = 32 - res->prefixlen;
1831 int order = -1, last_idx = -1;
1832 struct fib_alias *fa, *fa1 = NULL;
1833 u32 last_prio = res->fi->fib_priority;
1834 u8 last_tos = 0;
1835
1836 hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
1837 struct fib_info *next_fi = fa->fa_info;
1838
1839 if (fa->fa_slen != slen)
1840 continue;
1841 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1842 continue;
1843 if (fa->tb_id != tb->tb_id)
1844 continue;
1845 if (next_fi->fib_priority > last_prio &&
1846 fa->fa_tos == last_tos) {
1847 if (last_tos)
1848 continue;
1849 break;
1850 }
1851 if (next_fi->fib_flags & RTNH_F_DEAD)
1852 continue;
1853 last_tos = fa->fa_tos;
1854 last_prio = next_fi->fib_priority;
1855
1856 if (next_fi->fib_scope != res->scope ||
1857 fa->fa_type != RTN_UNICAST)
1858 continue;
1859 if (!next_fi->fib_nh[0].fib_nh_gw4 ||
1860 next_fi->fib_nh[0].fib_nh_scope != RT_SCOPE_LINK)
1861 continue;
1862
1863 fib_alias_accessed(fa);
1864
1865 if (!fi) {
1866 if (next_fi != res->fi)
1867 break;
1868 fa1 = fa;
1869 } else if (!fib_detect_death(fi, order, &last_resort,
1870 &last_idx, fa1->fa_default)) {
1871 fib_result_assign(res, fi);
1872 fa1->fa_default = order;
1873 goto out;
1874 }
1875 fi = next_fi;
1876 order++;
1877 }
1878
1879 if (order <= 0 || !fi) {
1880 if (fa1)
1881 fa1->fa_default = -1;
1882 goto out;
1883 }
1884
1885 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1886 fa1->fa_default)) {
1887 fib_result_assign(res, fi);
1888 fa1->fa_default = order;
1889 goto out;
1890 }
1891
1892 if (last_idx >= 0)
1893 fib_result_assign(res, last_resort);
1894 fa1->fa_default = last_idx;
1895out:
1896 return;
1897}
1898
1899
1900
1901
1902
1903int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
1904{
1905 struct fib_info *prev_fi;
1906 unsigned int hash;
1907 struct hlist_head *head;
1908 struct fib_nh *nh;
1909 int ret;
1910
1911 if (!(dev->flags & IFF_UP))
1912 return 0;
1913
1914 if (nh_flags & RTNH_F_DEAD) {
1915 unsigned int flags = dev_get_flags(dev);
1916
1917 if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1918 nh_flags |= RTNH_F_LINKDOWN;
1919 }
1920
1921 prev_fi = NULL;
1922 hash = fib_devindex_hashfn(dev->ifindex);
1923 head = &fib_info_devhash[hash];
1924 ret = 0;
1925
1926 hlist_for_each_entry(nh, head, nh_hash) {
1927 struct fib_info *fi = nh->nh_parent;
1928 int alive;
1929
1930 BUG_ON(!fi->fib_nhs);
1931 if (nh->fib_nh_dev != dev || fi == prev_fi)
1932 continue;
1933
1934 prev_fi = fi;
1935 alive = 0;
1936 change_nexthops(fi) {
1937 if (!(nexthop_nh->fib_nh_flags & nh_flags)) {
1938 alive++;
1939 continue;
1940 }
1941 if (!nexthop_nh->fib_nh_dev ||
1942 !(nexthop_nh->fib_nh_dev->flags & IFF_UP))
1943 continue;
1944 if (nexthop_nh->fib_nh_dev != dev ||
1945 !__in_dev_get_rtnl(dev))
1946 continue;
1947 alive++;
1948 nexthop_nh->fib_nh_flags &= ~nh_flags;
1949 call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);
1950 } endfor_nexthops(fi)
1951
1952 if (alive > 0) {
1953 fi->fib_flags &= ~nh_flags;
1954 ret++;
1955 }
1956
1957 fib_rebalance(fi);
1958 }
1959
1960 return ret;
1961}
1962
1963#ifdef CONFIG_IP_ROUTE_MULTIPATH
1964static bool fib_good_nh(const struct fib_nh *nh)
1965{
1966 int state = NUD_REACHABLE;
1967
1968 if (nh->fib_nh_scope == RT_SCOPE_LINK) {
1969 struct neighbour *n;
1970
1971 rcu_read_lock_bh();
1972
1973 if (likely(nh->fib_nh_gw_family == AF_INET))
1974 n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
1975 (__force u32)nh->fib_nh_gw4);
1976 else if (nh->fib_nh_gw_family == AF_INET6)
1977 n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev,
1978 &nh->fib_nh_gw6);
1979 else
1980 n = NULL;
1981 if (n)
1982 state = n->nud_state;
1983
1984 rcu_read_unlock_bh();
1985 }
1986
1987 return !!(state & NUD_VALID);
1988}
1989
1990void fib_select_multipath(struct fib_result *res, int hash)
1991{
1992 struct fib_info *fi = res->fi;
1993 struct net *net = fi->fib_net;
1994 bool first = false;
1995
1996 change_nexthops(fi) {
1997 if (net->ipv4.sysctl_fib_multipath_use_neigh) {
1998 if (!fib_good_nh(nexthop_nh))
1999 continue;
2000 if (!first) {
2001 res->nh_sel = nhsel;
2002 res->nhc = &nexthop_nh->nh_common;
2003 first = true;
2004 }
2005 }
2006
2007 if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound))
2008 continue;
2009
2010 res->nh_sel = nhsel;
2011 res->nhc = &nexthop_nh->nh_common;
2012 return;
2013 } endfor_nexthops(fi);
2014}
2015#endif
2016
2017void fib_select_path(struct net *net, struct fib_result *res,
2018 struct flowi4 *fl4, const struct sk_buff *skb)
2019{
2020 if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
2021 goto check_saddr;
2022
2023#ifdef CONFIG_IP_ROUTE_MULTIPATH
2024 if (res->fi->fib_nhs > 1) {
2025 int h = fib_multipath_hash(net, fl4, skb, NULL);
2026
2027 fib_select_multipath(res, h);
2028 }
2029 else
2030#endif
2031 if (!res->prefixlen &&
2032 res->table->tb_num_default > 1 &&
2033 res->type == RTN_UNICAST)
2034 fib_select_default(fl4, res);
2035
2036check_saddr:
2037 if (!fl4->saddr)
2038 fl4->saddr = fib_result_prefsrc(net, res);
2039}
2040