1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#define pr_fmt(fmt) "IPv6: " fmt
28
29#include <linux/capability.h>
30#include <linux/errno.h>
31#include <linux/export.h>
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
40#include <linux/mroute6.h>
41#include <linux/init.h>
42#include <linux/if_arp.h>
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#include <linux/nsproxy.h>
46#include <linux/slab.h>
47#include <linux/jhash.h>
48#include <net/net_namespace.h>
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
58#include <net/dst_metadata.h>
59#include <net/xfrm.h>
60#include <net/netevent.h>
61#include <net/netlink.h>
62#include <net/nexthop.h>
63#include <net/lwtunnel.h>
64#include <net/ip_tunnels.h>
65#include <net/l3mdev.h>
66#include <net/ip.h>
67#include <linux/uaccess.h>
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
80enum rt6_nud_state {
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
84 RT6_NUD_SUCCEED = 1
85};
86
87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
89static unsigned int ip6_mtu(const struct dst_entry *dst);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(struct dst_ops *ops);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
98static int ip6_pkt_prohibit(struct sk_buff *skb);
99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
100static void ip6_link_failure(struct sk_buff *skb);
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
105static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106static size_t rt6_nlmsg_size(struct fib6_info *rt);
107static int rt6_fill_node(struct net *net, struct sk_buff *skb,
108 struct fib6_info *rt, struct dst_entry *dst,
109 struct in6_addr *dest, struct in6_addr *src,
110 int iif, int type, u32 portid, u32 seq,
111 unsigned int flags);
112static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
113 struct in6_addr *daddr,
114 struct in6_addr *saddr);
115
116#ifdef CONFIG_IPV6_ROUTE_INFO
117static struct fib6_info *rt6_add_route_info(struct net *net,
118 const struct in6_addr *prefix, int prefixlen,
119 const struct in6_addr *gwaddr,
120 struct net_device *dev,
121 unsigned int pref);
122static struct fib6_info *rt6_get_route_info(struct net *net,
123 const struct in6_addr *prefix, int prefixlen,
124 const struct in6_addr *gwaddr,
125 struct net_device *dev);
126#endif
127
128struct uncached_list {
129 spinlock_t lock;
130 struct list_head head;
131};
132
133static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134
135void rt6_uncached_list_add(struct rt6_info *rt)
136{
137 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138
139 rt->rt6i_uncached_list = ul;
140
141 spin_lock_bh(&ul->lock);
142 list_add_tail(&rt->rt6i_uncached, &ul->head);
143 spin_unlock_bh(&ul->lock);
144}
145
146void rt6_uncached_list_del(struct rt6_info *rt)
147{
148 if (!list_empty(&rt->rt6i_uncached)) {
149 struct uncached_list *ul = rt->rt6i_uncached_list;
150 struct net *net = dev_net(rt->dst.dev);
151
152 spin_lock_bh(&ul->lock);
153 list_del(&rt->rt6i_uncached);
154 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
155 spin_unlock_bh(&ul->lock);
156 }
157}
158
159static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160{
161 struct net_device *loopback_dev = net->loopback_dev;
162 int cpu;
163
164 if (dev == loopback_dev)
165 return;
166
167 for_each_possible_cpu(cpu) {
168 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169 struct rt6_info *rt;
170
171 spin_lock_bh(&ul->lock);
172 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173 struct inet6_dev *rt_idev = rt->rt6i_idev;
174 struct net_device *rt_dev = rt->dst.dev;
175
176 if (rt_idev->dev == dev) {
177 rt->rt6i_idev = in6_dev_get(loopback_dev);
178 in6_dev_put(rt_idev);
179 }
180
181 if (rt_dev == dev) {
182 rt->dst.dev = loopback_dev;
183 dev_hold(rt->dst.dev);
184 dev_put(rt_dev);
185 }
186 }
187 spin_unlock_bh(&ul->lock);
188 }
189}
190
191static inline const void *choose_neigh_daddr(const struct in6_addr *p,
192 struct sk_buff *skb,
193 const void *daddr)
194{
195 if (!ipv6_addr_any(p))
196 return (const void *) p;
197 else if (skb)
198 return &ipv6_hdr(skb)->daddr;
199 return daddr;
200}
201
202struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203 struct net_device *dev,
204 struct sk_buff *skb,
205 const void *daddr)
206{
207 struct neighbour *n;
208
209 daddr = choose_neigh_daddr(gw, skb, daddr);
210 n = __ipv6_neigh_lookup(dev, daddr);
211 if (n)
212 return n;
213 return neigh_create(&nd_tbl, daddr, dev);
214}
215
216static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
217 struct sk_buff *skb,
218 const void *daddr)
219{
220 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
221
222 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
223}
224
225static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226{
227 struct net_device *dev = dst->dev;
228 struct rt6_info *rt = (struct rt6_info *)dst;
229
230 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
231 if (!daddr)
232 return;
233 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 return;
235 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 return;
237 __ipv6_confirm_neigh(dev, daddr);
238}
239
240static struct dst_ops ip6_dst_ops_template = {
241 .family = AF_INET6,
242 .gc = ip6_dst_gc,
243 .gc_thresh = 1024,
244 .check = ip6_dst_check,
245 .default_advmss = ip6_default_advmss,
246 .mtu = ip6_mtu,
247 .cow_metrics = dst_cow_metrics_generic,
248 .destroy = ip6_dst_destroy,
249 .ifdown = ip6_dst_ifdown,
250 .negative_advice = ip6_negative_advice,
251 .link_failure = ip6_link_failure,
252 .update_pmtu = ip6_rt_update_pmtu,
253 .redirect = rt6_do_redirect,
254 .local_out = __ip6_local_out,
255 .neigh_lookup = ip6_dst_neigh_lookup,
256 .confirm_neigh = ip6_confirm_neigh,
257};
258
259static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
260{
261 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262
263 return mtu ? : dst->dev->mtu;
264}
265
266static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb, u32 mtu)
268{
269}
270
271static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 struct sk_buff *skb)
273{
274}
275
276static struct dst_ops ip6_dst_blackhole_ops = {
277 .family = AF_INET6,
278 .destroy = ip6_dst_destroy,
279 .check = ip6_dst_check,
280 .mtu = ip6_blackhole_mtu,
281 .default_advmss = ip6_default_advmss,
282 .update_pmtu = ip6_rt_blackhole_update_pmtu,
283 .redirect = ip6_rt_blackhole_redirect,
284 .cow_metrics = dst_cow_metrics_generic,
285 .neigh_lookup = ip6_dst_neigh_lookup,
286};
287
288static const u32 ip6_template_metrics[RTAX_MAX] = {
289 [RTAX_HOPLIMIT - 1] = 0,
290};
291
292static const struct fib6_info fib6_null_entry_template = {
293 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
294 .fib6_protocol = RTPROT_KERNEL,
295 .fib6_metric = ~(u32)0,
296 .fib6_ref = ATOMIC_INIT(1),
297 .fib6_type = RTN_UNREACHABLE,
298 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
299};
300
301static const struct rt6_info ip6_null_entry_template = {
302 .dst = {
303 .__refcnt = ATOMIC_INIT(1),
304 .__use = 1,
305 .obsolete = DST_OBSOLETE_FORCE_CHK,
306 .error = -ENETUNREACH,
307 .input = ip6_pkt_discard,
308 .output = ip6_pkt_discard_out,
309 },
310 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
311};
312
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
315static const struct rt6_info ip6_prohibit_entry_template = {
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
319 .obsolete = DST_OBSOLETE_FORCE_CHK,
320 .error = -EACCES,
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
325};
326
327static const struct rt6_info ip6_blk_hole_entry_template = {
328 .dst = {
329 .__refcnt = ATOMIC_INIT(1),
330 .__use = 1,
331 .obsolete = DST_OBSOLETE_FORCE_CHK,
332 .error = -EINVAL,
333 .input = dst_discard,
334 .output = dst_discard_out,
335 },
336 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
337};
338
339#endif
340
341static void rt6_info_init(struct rt6_info *rt)
342{
343 struct dst_entry *dst = &rt->dst;
344
345 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
346 INIT_LIST_HEAD(&rt->rt6i_uncached);
347}
348
349
350struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
351 int flags)
352{
353 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
354 1, DST_OBSOLETE_FORCE_CHK, flags);
355
356 if (rt) {
357 rt6_info_init(rt);
358 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
359 }
360
361 return rt;
362}
363EXPORT_SYMBOL(ip6_dst_alloc);
364
365static void ip6_dst_destroy(struct dst_entry *dst)
366{
367 struct rt6_info *rt = (struct rt6_info *)dst;
368 struct fib6_info *from;
369 struct inet6_dev *idev;
370
371 ip_dst_metrics_put(dst);
372 rt6_uncached_list_del(rt);
373
374 idev = rt->rt6i_idev;
375 if (idev) {
376 rt->rt6i_idev = NULL;
377 in6_dev_put(idev);
378 }
379
380 rcu_read_lock();
381 from = rcu_dereference(rt->from);
382 rcu_assign_pointer(rt->from, NULL);
383 fib6_info_release(from);
384 rcu_read_unlock();
385}
386
387static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388 int how)
389{
390 struct rt6_info *rt = (struct rt6_info *)dst;
391 struct inet6_dev *idev = rt->rt6i_idev;
392 struct net_device *loopback_dev =
393 dev_net(dev)->loopback_dev;
394
395 if (idev && idev->dev != loopback_dev) {
396 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
397 if (loopback_idev) {
398 rt->rt6i_idev = loopback_idev;
399 in6_dev_put(idev);
400 }
401 }
402}
403
404static bool __rt6_check_expired(const struct rt6_info *rt)
405{
406 if (rt->rt6i_flags & RTF_EXPIRES)
407 return time_after(jiffies, rt->dst.expires);
408 else
409 return false;
410}
411
412static bool rt6_check_expired(const struct rt6_info *rt)
413{
414 struct fib6_info *from;
415
416 from = rcu_dereference(rt->from);
417
418 if (rt->rt6i_flags & RTF_EXPIRES) {
419 if (time_after(jiffies, rt->dst.expires))
420 return true;
421 } else if (from) {
422 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
423 fib6_check_expired(from);
424 }
425 return false;
426}
427
428struct fib6_info *fib6_multipath_select(const struct net *net,
429 struct fib6_info *match,
430 struct flowi6 *fl6, int oif,
431 const struct sk_buff *skb,
432 int strict)
433{
434 struct fib6_info *sibling, *next_sibling;
435
436
437
438
439 if (!fl6->mp_hash)
440 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
441
442 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
443 return match;
444
445 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
446 fib6_siblings) {
447 int nh_upper_bound;
448
449 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
450 if (fl6->mp_hash > nh_upper_bound)
451 continue;
452 if (rt6_score_route(sibling, oif, strict) < 0)
453 break;
454 match = sibling;
455 break;
456 }
457
458 return match;
459}
460
461
462
463
464
465static inline struct fib6_info *rt6_device_match(struct net *net,
466 struct fib6_info *rt,
467 const struct in6_addr *saddr,
468 int oif,
469 int flags)
470{
471 struct fib6_info *sprt;
472
473 if (!oif && ipv6_addr_any(saddr) &&
474 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
475 return rt;
476
477 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
478 const struct net_device *dev = sprt->fib6_nh.nh_dev;
479
480 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
481 continue;
482
483 if (oif) {
484 if (dev->ifindex == oif)
485 return sprt;
486 } else {
487 if (ipv6_chk_addr(net, saddr, dev,
488 flags & RT6_LOOKUP_F_IFACE))
489 return sprt;
490 }
491 }
492
493 if (oif && flags & RT6_LOOKUP_F_IFACE)
494 return net->ipv6.fib6_null_entry;
495
496 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
497}
498
499#ifdef CONFIG_IPV6_ROUTER_PREF
500struct __rt6_probe_work {
501 struct work_struct work;
502 struct in6_addr target;
503 struct net_device *dev;
504};
505
506static void rt6_probe_deferred(struct work_struct *w)
507{
508 struct in6_addr mcaddr;
509 struct __rt6_probe_work *work =
510 container_of(w, struct __rt6_probe_work, work);
511
512 addrconf_addr_solict_mult(&work->target, &mcaddr);
513 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
514 dev_put(work->dev);
515 kfree(work);
516}
517
518static void rt6_probe(struct fib6_info *rt)
519{
520 struct __rt6_probe_work *work = NULL;
521 const struct in6_addr *nh_gw;
522 struct neighbour *neigh;
523 struct net_device *dev;
524 struct inet6_dev *idev;
525
526
527
528
529
530
531
532
533
534 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
535 return;
536
537 nh_gw = &rt->fib6_nh.nh_gw;
538 dev = rt->fib6_nh.nh_dev;
539 rcu_read_lock_bh();
540 idev = __in6_dev_get(dev);
541 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
542 if (neigh) {
543 if (neigh->nud_state & NUD_VALID)
544 goto out;
545
546 write_lock(&neigh->lock);
547 if (!(neigh->nud_state & NUD_VALID) &&
548 time_after(jiffies,
549 neigh->updated + idev->cnf.rtr_probe_interval)) {
550 work = kmalloc(sizeof(*work), GFP_ATOMIC);
551 if (work)
552 __neigh_set_probe_once(neigh);
553 }
554 write_unlock(&neigh->lock);
555 } else if (time_after(jiffies, rt->last_probe +
556 idev->cnf.rtr_probe_interval)) {
557 work = kmalloc(sizeof(*work), GFP_ATOMIC);
558 }
559
560 if (work) {
561 rt->last_probe = jiffies;
562 INIT_WORK(&work->work, rt6_probe_deferred);
563 work->target = *nh_gw;
564 dev_hold(dev);
565 work->dev = dev;
566 schedule_work(&work->work);
567 }
568
569out:
570 rcu_read_unlock_bh();
571}
572#else
573static inline void rt6_probe(struct fib6_info *rt)
574{
575}
576#endif
577
578
579
580
581static inline int rt6_check_dev(struct fib6_info *rt, int oif)
582{
583 const struct net_device *dev = rt->fib6_nh.nh_dev;
584
585 if (!oif || dev->ifindex == oif)
586 return 2;
587 return 0;
588}
589
590static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
591{
592 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
593 struct neighbour *neigh;
594
595 if (rt->fib6_flags & RTF_NONEXTHOP ||
596 !(rt->fib6_flags & RTF_GATEWAY))
597 return RT6_NUD_SUCCEED;
598
599 rcu_read_lock_bh();
600 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
601 &rt->fib6_nh.nh_gw);
602 if (neigh) {
603 read_lock(&neigh->lock);
604 if (neigh->nud_state & NUD_VALID)
605 ret = RT6_NUD_SUCCEED;
606#ifdef CONFIG_IPV6_ROUTER_PREF
607 else if (!(neigh->nud_state & NUD_FAILED))
608 ret = RT6_NUD_SUCCEED;
609 else
610 ret = RT6_NUD_FAIL_PROBE;
611#endif
612 read_unlock(&neigh->lock);
613 } else {
614 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
615 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
616 }
617 rcu_read_unlock_bh();
618
619 return ret;
620}
621
622static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
623{
624 int m;
625
626 m = rt6_check_dev(rt, oif);
627 if (!m && (strict & RT6_LOOKUP_F_IFACE))
628 return RT6_NUD_FAIL_HARD;
629#ifdef CONFIG_IPV6_ROUTER_PREF
630 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
631#endif
632 if (strict & RT6_LOOKUP_F_REACHABLE) {
633 int n = rt6_check_neigh(rt);
634 if (n < 0)
635 return n;
636 }
637 return m;
638}
639
640
641static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
642{
643 const struct net_device *dev = fib6_info_nh_dev(f6i);
644 bool rc = false;
645
646 if (dev) {
647 const struct inet6_dev *idev = __in6_dev_get(dev);
648
649 rc = !!idev->cnf.ignore_routes_with_linkdown;
650 }
651
652 return rc;
653}
654
655static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
656 int *mpri, struct fib6_info *match,
657 bool *do_rr)
658{
659 int m;
660 bool match_do_rr = false;
661
662 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
663 goto out;
664
665 if (fib6_ignore_linkdown(rt) &&
666 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
667 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
668 goto out;
669
670 if (fib6_check_expired(rt))
671 goto out;
672
673 m = rt6_score_route(rt, oif, strict);
674 if (m == RT6_NUD_FAIL_DO_RR) {
675 match_do_rr = true;
676 m = 0;
677 } else if (m == RT6_NUD_FAIL_HARD) {
678 goto out;
679 }
680
681 if (strict & RT6_LOOKUP_F_REACHABLE)
682 rt6_probe(rt);
683
684
685 if (m > *mpri) {
686 *do_rr = match_do_rr;
687 *mpri = m;
688 match = rt;
689 }
690out:
691 return match;
692}
693
694static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
695 struct fib6_info *leaf,
696 struct fib6_info *rr_head,
697 u32 metric, int oif, int strict,
698 bool *do_rr)
699{
700 struct fib6_info *rt, *match, *cont;
701 int mpri = -1;
702
703 match = NULL;
704 cont = NULL;
705 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
706 if (rt->fib6_metric != metric) {
707 cont = rt;
708 break;
709 }
710
711 match = find_match(rt, oif, strict, &mpri, match, do_rr);
712 }
713
714 for (rt = leaf; rt && rt != rr_head;
715 rt = rcu_dereference(rt->fib6_next)) {
716 if (rt->fib6_metric != metric) {
717 cont = rt;
718 break;
719 }
720
721 match = find_match(rt, oif, strict, &mpri, match, do_rr);
722 }
723
724 if (match || !cont)
725 return match;
726
727 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
728 match = find_match(rt, oif, strict, &mpri, match, do_rr);
729
730 return match;
731}
732
733static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
734 int oif, int strict)
735{
736 struct fib6_info *leaf = rcu_dereference(fn->leaf);
737 struct fib6_info *match, *rt0;
738 bool do_rr = false;
739 int key_plen;
740
741 if (!leaf || leaf == net->ipv6.fib6_null_entry)
742 return net->ipv6.fib6_null_entry;
743
744 rt0 = rcu_dereference(fn->rr_ptr);
745 if (!rt0)
746 rt0 = leaf;
747
748
749
750
751
752
753 key_plen = rt0->fib6_dst.plen;
754#ifdef CONFIG_IPV6_SUBTREES
755 if (rt0->fib6_src.plen)
756 key_plen = rt0->fib6_src.plen;
757#endif
758 if (fn->fn_bit != key_plen)
759 return net->ipv6.fib6_null_entry;
760
761 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
762 &do_rr);
763
764 if (do_rr) {
765 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
766
767
768 if (!next || next->fib6_metric != rt0->fib6_metric)
769 next = leaf;
770
771 if (next != rt0) {
772 spin_lock_bh(&leaf->fib6_table->tb6_lock);
773
774 if (next->fib6_node)
775 rcu_assign_pointer(fn->rr_ptr, next);
776 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
777 }
778 }
779
780 return match ? match : net->ipv6.fib6_null_entry;
781}
782
783static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
784{
785 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
786}
787
788#ifdef CONFIG_IPV6_ROUTE_INFO
789int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
790 const struct in6_addr *gwaddr)
791{
792 struct net *net = dev_net(dev);
793 struct route_info *rinfo = (struct route_info *) opt;
794 struct in6_addr prefix_buf, *prefix;
795 unsigned int pref;
796 unsigned long lifetime;
797 struct fib6_info *rt;
798
799 if (len < sizeof(struct route_info)) {
800 return -EINVAL;
801 }
802
803
804 if (rinfo->length > 3) {
805 return -EINVAL;
806 } else if (rinfo->prefix_len > 128) {
807 return -EINVAL;
808 } else if (rinfo->prefix_len > 64) {
809 if (rinfo->length < 2) {
810 return -EINVAL;
811 }
812 } else if (rinfo->prefix_len > 0) {
813 if (rinfo->length < 1) {
814 return -EINVAL;
815 }
816 }
817
818 pref = rinfo->route_pref;
819 if (pref == ICMPV6_ROUTER_PREF_INVALID)
820 return -EINVAL;
821
822 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
823
824 if (rinfo->length == 3)
825 prefix = (struct in6_addr *)rinfo->prefix;
826 else {
827
828 ipv6_addr_prefix(&prefix_buf,
829 (struct in6_addr *)rinfo->prefix,
830 rinfo->prefix_len);
831 prefix = &prefix_buf;
832 }
833
834 if (rinfo->prefix_len == 0)
835 rt = rt6_get_dflt_router(net, gwaddr, dev);
836 else
837 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
838 gwaddr, dev);
839
840 if (rt && !lifetime) {
841 ip6_del_rt(net, rt);
842 rt = NULL;
843 }
844
845 if (!rt && lifetime)
846 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
847 dev, pref);
848 else if (rt)
849 rt->fib6_flags = RTF_ROUTEINFO |
850 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
851
852 if (rt) {
853 if (!addrconf_finite_timeout(lifetime))
854 fib6_clean_expires(rt);
855 else
856 fib6_set_expires(rt, jiffies + HZ * lifetime);
857
858 fib6_info_release(rt);
859 }
860 return 0;
861}
862#endif
863
864
865
866
867
868
869static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
870{
871 struct net_device *dev = rt->fib6_nh.nh_dev;
872
873 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
874
875
876
877
878 if (netif_is_l3_slave(dev) &&
879 !rt6_need_strict(&rt->fib6_dst.addr))
880 dev = l3mdev_master_dev_rcu(dev);
881 else if (!netif_is_l3_master(dev))
882 dev = dev_net(dev)->loopback_dev;
883
884
885
886 }
887
888 return dev;
889}
890
891static const int fib6_prop[RTN_MAX + 1] = {
892 [RTN_UNSPEC] = 0,
893 [RTN_UNICAST] = 0,
894 [RTN_LOCAL] = 0,
895 [RTN_BROADCAST] = 0,
896 [RTN_ANYCAST] = 0,
897 [RTN_MULTICAST] = 0,
898 [RTN_BLACKHOLE] = -EINVAL,
899 [RTN_UNREACHABLE] = -EHOSTUNREACH,
900 [RTN_PROHIBIT] = -EACCES,
901 [RTN_THROW] = -EAGAIN,
902 [RTN_NAT] = -EINVAL,
903 [RTN_XRESOLVE] = -EINVAL,
904};
905
906static int ip6_rt_type_to_error(u8 fib6_type)
907{
908 return fib6_prop[fib6_type];
909}
910
911static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
912{
913 unsigned short flags = 0;
914
915 if (rt->dst_nocount)
916 flags |= DST_NOCOUNT;
917 if (rt->dst_nopolicy)
918 flags |= DST_NOPOLICY;
919 if (rt->dst_host)
920 flags |= DST_HOST;
921
922 return flags;
923}
924
925static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
926{
927 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
928
929 switch (ort->fib6_type) {
930 case RTN_BLACKHOLE:
931 rt->dst.output = dst_discard_out;
932 rt->dst.input = dst_discard;
933 break;
934 case RTN_PROHIBIT:
935 rt->dst.output = ip6_pkt_prohibit_out;
936 rt->dst.input = ip6_pkt_prohibit;
937 break;
938 case RTN_THROW:
939 case RTN_UNREACHABLE:
940 default:
941 rt->dst.output = ip6_pkt_discard_out;
942 rt->dst.input = ip6_pkt_discard;
943 break;
944 }
945}
946
947static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
948{
949 if (ort->fib6_flags & RTF_REJECT) {
950 ip6_rt_init_dst_reject(rt, ort);
951 return;
952 }
953
954 rt->dst.error = 0;
955 rt->dst.output = ip6_output;
956
957 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
958 rt->dst.input = ip6_input;
959 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
960 rt->dst.input = ip6_mc_input;
961 } else {
962 rt->dst.input = ip6_forward;
963 }
964
965 if (ort->fib6_nh.nh_lwtstate) {
966 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
967 lwtunnel_set_redirect(&rt->dst);
968 }
969
970 rt->dst.lastuse = jiffies;
971}
972
973
974static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
975{
976 rt->rt6i_flags &= ~RTF_EXPIRES;
977 rcu_assign_pointer(rt->from, from);
978 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
979}
980
981
982static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
983{
984 struct net_device *dev = fib6_info_nh_dev(ort);
985
986 ip6_rt_init_dst(rt, ort);
987
988 rt->rt6i_dst = ort->fib6_dst;
989 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
990 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
991 rt->rt6i_flags = ort->fib6_flags;
992 rt6_set_from(rt, ort);
993#ifdef CONFIG_IPV6_SUBTREES
994 rt->rt6i_src = ort->fib6_src;
995#endif
996}
997
998static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
999 struct in6_addr *saddr)
1000{
1001 struct fib6_node *pn, *sn;
1002 while (1) {
1003 if (fn->fn_flags & RTN_TL_ROOT)
1004 return NULL;
1005 pn = rcu_dereference(fn->parent);
1006 sn = FIB6_SUBTREE(pn);
1007 if (sn && sn != fn)
1008 fn = fib6_node_lookup(sn, NULL, saddr);
1009 else
1010 fn = pn;
1011 if (fn->fn_flags & RTN_RTINFO)
1012 return fn;
1013 }
1014}
1015
1016static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1017 bool null_fallback)
1018{
1019 struct rt6_info *rt = *prt;
1020
1021 if (dst_hold_safe(&rt->dst))
1022 return true;
1023 if (null_fallback) {
1024 rt = net->ipv6.ip6_null_entry;
1025 dst_hold(&rt->dst);
1026 } else {
1027 rt = NULL;
1028 }
1029 *prt = rt;
1030 return false;
1031}
1032
1033
1034static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1035{
1036 unsigned short flags = fib6_info_dst_flags(rt);
1037 struct net_device *dev = rt->fib6_nh.nh_dev;
1038 struct rt6_info *nrt;
1039
1040 if (!fib6_info_hold_safe(rt))
1041 return NULL;
1042
1043 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1044 if (nrt)
1045 ip6_rt_copy_init(nrt, rt);
1046 else
1047 fib6_info_release(rt);
1048
1049 return nrt;
1050}
1051
1052static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1053 struct fib6_table *table,
1054 struct flowi6 *fl6,
1055 const struct sk_buff *skb,
1056 int flags)
1057{
1058 struct fib6_info *f6i;
1059 struct fib6_node *fn;
1060 struct rt6_info *rt;
1061
1062 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1063 flags &= ~RT6_LOOKUP_F_IFACE;
1064
1065 rcu_read_lock();
1066 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1067restart:
1068 f6i = rcu_dereference(fn->leaf);
1069 if (!f6i) {
1070 f6i = net->ipv6.fib6_null_entry;
1071 } else {
1072 f6i = rt6_device_match(net, f6i, &fl6->saddr,
1073 fl6->flowi6_oif, flags);
1074 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
1075 f6i = fib6_multipath_select(net, f6i, fl6,
1076 fl6->flowi6_oif, skb,
1077 flags);
1078 }
1079 if (f6i == net->ipv6.fib6_null_entry) {
1080 fn = fib6_backtrack(fn, &fl6->saddr);
1081 if (fn)
1082 goto restart;
1083 }
1084
1085 trace_fib6_table_lookup(net, f6i, table, fl6);
1086
1087
1088 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1089 if (rt) {
1090 if (ip6_hold_safe(net, &rt, true))
1091 dst_use_noref(&rt->dst, jiffies);
1092 } else if (f6i == net->ipv6.fib6_null_entry) {
1093 rt = net->ipv6.ip6_null_entry;
1094 dst_hold(&rt->dst);
1095 } else {
1096 rt = ip6_create_rt_rcu(f6i);
1097 if (!rt) {
1098 rt = net->ipv6.ip6_null_entry;
1099 dst_hold(&rt->dst);
1100 }
1101 }
1102
1103 rcu_read_unlock();
1104
1105 return rt;
1106}
1107
1108struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1109 const struct sk_buff *skb, int flags)
1110{
1111 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1112}
1113EXPORT_SYMBOL_GPL(ip6_route_lookup);
1114
1115struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1116 const struct in6_addr *saddr, int oif,
1117 const struct sk_buff *skb, int strict)
1118{
1119 struct flowi6 fl6 = {
1120 .flowi6_oif = oif,
1121 .daddr = *daddr,
1122 };
1123 struct dst_entry *dst;
1124 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1125
1126 if (saddr) {
1127 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1128 flags |= RT6_LOOKUP_F_HAS_SADDR;
1129 }
1130
1131 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1132 if (dst->error == 0)
1133 return (struct rt6_info *) dst;
1134
1135 dst_release(dst);
1136
1137 return NULL;
1138}
1139EXPORT_SYMBOL(rt6_lookup);
1140
1141
1142
1143
1144
1145
1146
1147static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1148 struct netlink_ext_ack *extack)
1149{
1150 int err;
1151 struct fib6_table *table;
1152
1153 table = rt->fib6_table;
1154 spin_lock_bh(&table->tb6_lock);
1155 err = fib6_add(&table->tb6_root, rt, info, extack);
1156 spin_unlock_bh(&table->tb6_lock);
1157
1158 return err;
1159}
1160
1161int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1162{
1163 struct nl_info info = { .nl_net = net, };
1164
1165 return __ip6_ins_rt(rt, &info, NULL);
1166}
1167
1168static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
1169 const struct in6_addr *daddr,
1170 const struct in6_addr *saddr)
1171{
1172 struct net_device *dev;
1173 struct rt6_info *rt;
1174
1175
1176
1177
1178
1179 if (!fib6_info_hold_safe(ort))
1180 return NULL;
1181
1182 dev = ip6_rt_get_dev_rcu(ort);
1183 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1184 if (!rt) {
1185 fib6_info_release(ort);
1186 return NULL;
1187 }
1188
1189 ip6_rt_copy_init(rt, ort);
1190 rt->rt6i_flags |= RTF_CACHE;
1191 rt->dst.flags |= DST_HOST;
1192 rt->rt6i_dst.addr = *daddr;
1193 rt->rt6i_dst.plen = 128;
1194
1195 if (!rt6_is_gw_or_nonexthop(ort)) {
1196 if (ort->fib6_dst.plen != 128 &&
1197 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
1198 rt->rt6i_flags |= RTF_ANYCAST;
1199#ifdef CONFIG_IPV6_SUBTREES
1200 if (rt->rt6i_src.plen && saddr) {
1201 rt->rt6i_src.addr = *saddr;
1202 rt->rt6i_src.plen = 128;
1203 }
1204#endif
1205 }
1206
1207 return rt;
1208}
1209
1210static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1211{
1212 unsigned short flags = fib6_info_dst_flags(rt);
1213 struct net_device *dev;
1214 struct rt6_info *pcpu_rt;
1215
1216 if (!fib6_info_hold_safe(rt))
1217 return NULL;
1218
1219 rcu_read_lock();
1220 dev = ip6_rt_get_dev_rcu(rt);
1221 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
1222 rcu_read_unlock();
1223 if (!pcpu_rt) {
1224 fib6_info_release(rt);
1225 return NULL;
1226 }
1227 ip6_rt_copy_init(pcpu_rt, rt);
1228 pcpu_rt->rt6i_flags |= RTF_PCPU;
1229 return pcpu_rt;
1230}
1231
1232
1233static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1234{
1235 struct rt6_info *pcpu_rt, **p;
1236
1237 p = this_cpu_ptr(rt->rt6i_pcpu);
1238 pcpu_rt = *p;
1239
1240 if (pcpu_rt)
1241 ip6_hold_safe(NULL, &pcpu_rt, false);
1242
1243 return pcpu_rt;
1244}
1245
1246static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1247 struct fib6_info *rt)
1248{
1249 struct rt6_info *pcpu_rt, *prev, **p;
1250
1251 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1252 if (!pcpu_rt) {
1253 dst_hold(&net->ipv6.ip6_null_entry->dst);
1254 return net->ipv6.ip6_null_entry;
1255 }
1256
1257 dst_hold(&pcpu_rt->dst);
1258 p = this_cpu_ptr(rt->rt6i_pcpu);
1259 prev = cmpxchg(p, NULL, pcpu_rt);
1260 BUG_ON(prev);
1261
1262 return pcpu_rt;
1263}
1264
1265
1266
1267static DEFINE_SPINLOCK(rt6_exception_lock);
1268
1269
1270
1271
1272static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1273 struct rt6_exception *rt6_ex)
1274{
1275 struct net *net;
1276
1277 if (!bucket || !rt6_ex)
1278 return;
1279
1280 net = dev_net(rt6_ex->rt6i->dst.dev);
1281 hlist_del_rcu(&rt6_ex->hlist);
1282 dst_release(&rt6_ex->rt6i->dst);
1283 kfree_rcu(rt6_ex, rcu);
1284 WARN_ON_ONCE(!bucket->depth);
1285 bucket->depth--;
1286 net->ipv6.rt6_stats->fib_rt_cache--;
1287}
1288
1289
1290
1291
1292static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1293{
1294 struct rt6_exception *rt6_ex, *oldest = NULL;
1295
1296 if (!bucket)
1297 return;
1298
1299 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1300 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1301 oldest = rt6_ex;
1302 }
1303 rt6_remove_exception(bucket, oldest);
1304}
1305
1306static u32 rt6_exception_hash(const struct in6_addr *dst,
1307 const struct in6_addr *src)
1308{
1309 static u32 seed __read_mostly;
1310 u32 val;
1311
1312 net_get_random_once(&seed, sizeof(seed));
1313 val = jhash(dst, sizeof(*dst), seed);
1314
1315#ifdef CONFIG_IPV6_SUBTREES
1316 if (src)
1317 val = jhash(src, sizeof(*src), val);
1318#endif
1319 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1320}
1321
1322
1323
1324
1325
1326
1327static struct rt6_exception *
1328__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1329 const struct in6_addr *daddr,
1330 const struct in6_addr *saddr)
1331{
1332 struct rt6_exception *rt6_ex;
1333 u32 hval;
1334
1335 if (!(*bucket) || !daddr)
1336 return NULL;
1337
1338 hval = rt6_exception_hash(daddr, saddr);
1339 *bucket += hval;
1340
1341 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1342 struct rt6_info *rt6 = rt6_ex->rt6i;
1343 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1344
1345#ifdef CONFIG_IPV6_SUBTREES
1346 if (matched && saddr)
1347 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1348#endif
1349 if (matched)
1350 return rt6_ex;
1351 }
1352 return NULL;
1353}
1354
1355
1356
1357
1358
1359
1360static struct rt6_exception *
1361__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1362 const struct in6_addr *daddr,
1363 const struct in6_addr *saddr)
1364{
1365 struct rt6_exception *rt6_ex;
1366 u32 hval;
1367
1368 WARN_ON_ONCE(!rcu_read_lock_held());
1369
1370 if (!(*bucket) || !daddr)
1371 return NULL;
1372
1373 hval = rt6_exception_hash(daddr, saddr);
1374 *bucket += hval;
1375
1376 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1377 struct rt6_info *rt6 = rt6_ex->rt6i;
1378 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1379
1380#ifdef CONFIG_IPV6_SUBTREES
1381 if (matched && saddr)
1382 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1383#endif
1384 if (matched)
1385 return rt6_ex;
1386 }
1387 return NULL;
1388}
1389
1390static unsigned int fib6_mtu(const struct fib6_info *rt)
1391{
1392 unsigned int mtu;
1393
1394 if (rt->fib6_pmtu) {
1395 mtu = rt->fib6_pmtu;
1396 } else {
1397 struct net_device *dev = fib6_info_nh_dev(rt);
1398 struct inet6_dev *idev;
1399
1400 rcu_read_lock();
1401 idev = __in6_dev_get(dev);
1402 mtu = idev->cnf.mtu6;
1403 rcu_read_unlock();
1404 }
1405
1406 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1407
1408 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1409}
1410
1411static int rt6_insert_exception(struct rt6_info *nrt,
1412 struct fib6_info *ort)
1413{
1414 struct net *net = dev_net(nrt->dst.dev);
1415 struct rt6_exception_bucket *bucket;
1416 struct in6_addr *src_key = NULL;
1417 struct rt6_exception *rt6_ex;
1418 int err = 0;
1419
1420 spin_lock_bh(&rt6_exception_lock);
1421
1422 if (ort->exception_bucket_flushed) {
1423 err = -EINVAL;
1424 goto out;
1425 }
1426
1427 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1428 lockdep_is_held(&rt6_exception_lock));
1429 if (!bucket) {
1430 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1431 GFP_ATOMIC);
1432 if (!bucket) {
1433 err = -ENOMEM;
1434 goto out;
1435 }
1436 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1437 }
1438
1439#ifdef CONFIG_IPV6_SUBTREES
1440
1441
1442
1443
1444
1445
1446 if (ort->fib6_src.plen)
1447 src_key = &nrt->rt6i_src.addr;
1448#endif
1449
1450
1451
1452
1453 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1454 err = -EINVAL;
1455 goto out;
1456 }
1457
1458 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1459 src_key);
1460 if (rt6_ex)
1461 rt6_remove_exception(bucket, rt6_ex);
1462
1463 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1464 if (!rt6_ex) {
1465 err = -ENOMEM;
1466 goto out;
1467 }
1468 rt6_ex->rt6i = nrt;
1469 rt6_ex->stamp = jiffies;
1470 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1471 bucket->depth++;
1472 net->ipv6.rt6_stats->fib_rt_cache++;
1473
1474 if (bucket->depth > FIB6_MAX_DEPTH)
1475 rt6_exception_remove_oldest(bucket);
1476
1477out:
1478 spin_unlock_bh(&rt6_exception_lock);
1479
1480
1481 if (!err) {
1482 spin_lock_bh(&ort->fib6_table->tb6_lock);
1483 fib6_update_sernum(net, ort);
1484 spin_unlock_bh(&ort->fib6_table->tb6_lock);
1485 fib6_force_start_gc(net);
1486 }
1487
1488 return err;
1489}
1490
1491void rt6_flush_exceptions(struct fib6_info *rt)
1492{
1493 struct rt6_exception_bucket *bucket;
1494 struct rt6_exception *rt6_ex;
1495 struct hlist_node *tmp;
1496 int i;
1497
1498 spin_lock_bh(&rt6_exception_lock);
1499
1500 rt->exception_bucket_flushed = 1;
1501
1502 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1503 lockdep_is_held(&rt6_exception_lock));
1504 if (!bucket)
1505 goto out;
1506
1507 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1508 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1509 rt6_remove_exception(bucket, rt6_ex);
1510 WARN_ON_ONCE(bucket->depth);
1511 bucket++;
1512 }
1513
1514out:
1515 spin_unlock_bh(&rt6_exception_lock);
1516}
1517
1518
1519
1520
1521static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
1522 struct in6_addr *daddr,
1523 struct in6_addr *saddr)
1524{
1525 struct rt6_exception_bucket *bucket;
1526 struct in6_addr *src_key = NULL;
1527 struct rt6_exception *rt6_ex;
1528 struct rt6_info *res = NULL;
1529
1530 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1531
1532#ifdef CONFIG_IPV6_SUBTREES
1533
1534
1535
1536
1537
1538
1539 if (rt->fib6_src.plen)
1540 src_key = saddr;
1541#endif
1542 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1543
1544 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1545 res = rt6_ex->rt6i;
1546
1547 return res;
1548}
1549
1550
1551static int rt6_remove_exception_rt(struct rt6_info *rt)
1552{
1553 struct rt6_exception_bucket *bucket;
1554 struct in6_addr *src_key = NULL;
1555 struct rt6_exception *rt6_ex;
1556 struct fib6_info *from;
1557 int err;
1558
1559 from = rcu_dereference(rt->from);
1560 if (!from ||
1561 !(rt->rt6i_flags & RTF_CACHE))
1562 return -EINVAL;
1563
1564 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1565 return -ENOENT;
1566
1567 spin_lock_bh(&rt6_exception_lock);
1568 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1569 lockdep_is_held(&rt6_exception_lock));
1570#ifdef CONFIG_IPV6_SUBTREES
1571
1572
1573
1574
1575
1576
1577 if (from->fib6_src.plen)
1578 src_key = &rt->rt6i_src.addr;
1579#endif
1580 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1581 &rt->rt6i_dst.addr,
1582 src_key);
1583 if (rt6_ex) {
1584 rt6_remove_exception(bucket, rt6_ex);
1585 err = 0;
1586 } else {
1587 err = -ENOENT;
1588 }
1589
1590 spin_unlock_bh(&rt6_exception_lock);
1591 return err;
1592}
1593
1594
1595
1596
1597static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1598{
1599 struct rt6_exception_bucket *bucket;
1600 struct fib6_info *from = rt->from;
1601 struct in6_addr *src_key = NULL;
1602 struct rt6_exception *rt6_ex;
1603
1604 if (!from ||
1605 !(rt->rt6i_flags & RTF_CACHE))
1606 return;
1607
1608 rcu_read_lock();
1609 bucket = rcu_dereference(from->rt6i_exception_bucket);
1610
1611#ifdef CONFIG_IPV6_SUBTREES
1612
1613
1614
1615
1616
1617
1618 if (from->fib6_src.plen)
1619 src_key = &rt->rt6i_src.addr;
1620#endif
1621 rt6_ex = __rt6_find_exception_rcu(&bucket,
1622 &rt->rt6i_dst.addr,
1623 src_key);
1624 if (rt6_ex)
1625 rt6_ex->stamp = jiffies;
1626
1627 rcu_read_unlock();
1628}
1629
1630static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1631 struct rt6_info *rt, int mtu)
1632{
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643 if (dst_mtu(&rt->dst) >= mtu)
1644 return true;
1645
1646 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1647 return true;
1648
1649 return false;
1650}
1651
1652static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1653 struct fib6_info *rt, int mtu)
1654{
1655 struct rt6_exception_bucket *bucket;
1656 struct rt6_exception *rt6_ex;
1657 int i;
1658
1659 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1660 lockdep_is_held(&rt6_exception_lock));
1661
1662 if (!bucket)
1663 return;
1664
1665 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1666 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1667 struct rt6_info *entry = rt6_ex->rt6i;
1668
1669
1670
1671
1672
1673 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1674 rt6_mtu_change_route_allowed(idev, entry, mtu))
1675 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1676 }
1677 bucket++;
1678 }
1679}
1680
1681#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1682
1683static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1684 struct in6_addr *gateway)
1685{
1686 struct rt6_exception_bucket *bucket;
1687 struct rt6_exception *rt6_ex;
1688 struct hlist_node *tmp;
1689 int i;
1690
1691 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1692 return;
1693
1694 spin_lock_bh(&rt6_exception_lock);
1695 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1696 lockdep_is_held(&rt6_exception_lock));
1697
1698 if (bucket) {
1699 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1700 hlist_for_each_entry_safe(rt6_ex, tmp,
1701 &bucket->chain, hlist) {
1702 struct rt6_info *entry = rt6_ex->rt6i;
1703
1704 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1705 RTF_CACHE_GATEWAY &&
1706 ipv6_addr_equal(gateway,
1707 &entry->rt6i_gateway)) {
1708 rt6_remove_exception(bucket, rt6_ex);
1709 }
1710 }
1711 bucket++;
1712 }
1713 }
1714
1715 spin_unlock_bh(&rt6_exception_lock);
1716}
1717
1718static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1719 struct rt6_exception *rt6_ex,
1720 struct fib6_gc_args *gc_args,
1721 unsigned long now)
1722{
1723 struct rt6_info *rt = rt6_ex->rt6i;
1724
1725
1726
1727
1728
1729
1730
1731 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1732 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1733 RT6_TRACE("aging clone %p\n", rt);
1734 rt6_remove_exception(bucket, rt6_ex);
1735 return;
1736 }
1737 } else if (time_after(jiffies, rt->dst.expires)) {
1738 RT6_TRACE("purging expired route %p\n", rt);
1739 rt6_remove_exception(bucket, rt6_ex);
1740 return;
1741 }
1742
1743 if (rt->rt6i_flags & RTF_GATEWAY) {
1744 struct neighbour *neigh;
1745 __u8 neigh_flags = 0;
1746
1747 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1748 if (neigh)
1749 neigh_flags = neigh->flags;
1750
1751 if (!(neigh_flags & NTF_ROUTER)) {
1752 RT6_TRACE("purging route %p via non-router but gateway\n",
1753 rt);
1754 rt6_remove_exception(bucket, rt6_ex);
1755 return;
1756 }
1757 }
1758
1759 gc_args->more++;
1760}
1761
1762void rt6_age_exceptions(struct fib6_info *rt,
1763 struct fib6_gc_args *gc_args,
1764 unsigned long now)
1765{
1766 struct rt6_exception_bucket *bucket;
1767 struct rt6_exception *rt6_ex;
1768 struct hlist_node *tmp;
1769 int i;
1770
1771 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1772 return;
1773
1774 rcu_read_lock_bh();
1775 spin_lock(&rt6_exception_lock);
1776 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1777 lockdep_is_held(&rt6_exception_lock));
1778
1779 if (bucket) {
1780 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1781 hlist_for_each_entry_safe(rt6_ex, tmp,
1782 &bucket->chain, hlist) {
1783 rt6_age_examine_exception(bucket, rt6_ex,
1784 gc_args, now);
1785 }
1786 bucket++;
1787 }
1788 }
1789 spin_unlock(&rt6_exception_lock);
1790 rcu_read_unlock_bh();
1791}
1792
1793
1794struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1795 int oif, struct flowi6 *fl6, int strict)
1796{
1797 struct fib6_node *fn, *saved_fn;
1798 struct fib6_info *f6i;
1799
1800 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1801 saved_fn = fn;
1802
1803 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1804 oif = 0;
1805
1806redo_rt6_select:
1807 f6i = rt6_select(net, fn, oif, strict);
1808 if (f6i == net->ipv6.fib6_null_entry) {
1809 fn = fib6_backtrack(fn, &fl6->saddr);
1810 if (fn)
1811 goto redo_rt6_select;
1812 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1813
1814 strict &= ~RT6_LOOKUP_F_REACHABLE;
1815 fn = saved_fn;
1816 goto redo_rt6_select;
1817 }
1818 }
1819
1820 trace_fib6_table_lookup(net, f6i, table, fl6);
1821
1822 return f6i;
1823}
1824
1825struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1826 int oif, struct flowi6 *fl6,
1827 const struct sk_buff *skb, int flags)
1828{
1829 struct fib6_info *f6i;
1830 struct rt6_info *rt;
1831 int strict = 0;
1832
1833 strict |= flags & RT6_LOOKUP_F_IFACE;
1834 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1835 if (net->ipv6.devconf_all->forwarding == 0)
1836 strict |= RT6_LOOKUP_F_REACHABLE;
1837
1838 rcu_read_lock();
1839
1840 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1841 if (f6i->fib6_nsiblings)
1842 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1843
1844 if (f6i == net->ipv6.fib6_null_entry) {
1845 rt = net->ipv6.ip6_null_entry;
1846 rcu_read_unlock();
1847 dst_hold(&rt->dst);
1848 return rt;
1849 }
1850
1851
1852 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1853 if (rt) {
1854 if (ip6_hold_safe(net, &rt, true))
1855 dst_use_noref(&rt->dst, jiffies);
1856
1857 rcu_read_unlock();
1858 return rt;
1859 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1860 !(f6i->fib6_flags & RTF_GATEWAY))) {
1861
1862
1863
1864
1865
1866 struct rt6_info *uncached_rt;
1867
1868 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
1869
1870 rcu_read_unlock();
1871
1872 if (uncached_rt) {
1873
1874
1875
1876 rt6_uncached_list_add(uncached_rt);
1877 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1878 } else {
1879 uncached_rt = net->ipv6.ip6_null_entry;
1880 dst_hold(&uncached_rt->dst);
1881 }
1882
1883 return uncached_rt;
1884 } else {
1885
1886
1887 struct rt6_info *pcpu_rt;
1888
1889 local_bh_disable();
1890 pcpu_rt = rt6_get_pcpu_route(f6i);
1891
1892 if (!pcpu_rt)
1893 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1894
1895 local_bh_enable();
1896 rcu_read_unlock();
1897
1898 return pcpu_rt;
1899 }
1900}
1901EXPORT_SYMBOL_GPL(ip6_pol_route);
1902
1903static struct rt6_info *ip6_pol_route_input(struct net *net,
1904 struct fib6_table *table,
1905 struct flowi6 *fl6,
1906 const struct sk_buff *skb,
1907 int flags)
1908{
1909 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
1910}
1911
1912struct dst_entry *ip6_route_input_lookup(struct net *net,
1913 struct net_device *dev,
1914 struct flowi6 *fl6,
1915 const struct sk_buff *skb,
1916 int flags)
1917{
1918 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1919 flags |= RT6_LOOKUP_F_IFACE;
1920
1921 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
1922}
1923EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1924
1925static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1926 struct flow_keys *keys,
1927 struct flow_keys *flkeys)
1928{
1929 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1930 const struct ipv6hdr *key_iph = outer_iph;
1931 struct flow_keys *_flkeys = flkeys;
1932 const struct ipv6hdr *inner_iph;
1933 const struct icmp6hdr *icmph;
1934 struct ipv6hdr _inner_iph;
1935 struct icmp6hdr _icmph;
1936
1937 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1938 goto out;
1939
1940 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1941 sizeof(_icmph), &_icmph);
1942 if (!icmph)
1943 goto out;
1944
1945 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1946 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1947 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1948 icmph->icmp6_type != ICMPV6_PARAMPROB)
1949 goto out;
1950
1951 inner_iph = skb_header_pointer(skb,
1952 skb_transport_offset(skb) + sizeof(*icmph),
1953 sizeof(_inner_iph), &_inner_iph);
1954 if (!inner_iph)
1955 goto out;
1956
1957 key_iph = inner_iph;
1958 _flkeys = NULL;
1959out:
1960 if (_flkeys) {
1961 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1962 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1963 keys->tags.flow_label = _flkeys->tags.flow_label;
1964 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1965 } else {
1966 keys->addrs.v6addrs.src = key_iph->saddr;
1967 keys->addrs.v6addrs.dst = key_iph->daddr;
1968 keys->tags.flow_label = ip6_flowlabel(key_iph);
1969 keys->basic.ip_proto = key_iph->nexthdr;
1970 }
1971}
1972
1973
1974u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1975 const struct sk_buff *skb, struct flow_keys *flkeys)
1976{
1977 struct flow_keys hash_keys;
1978 u32 mhash;
1979
1980 switch (ip6_multipath_hash_policy(net)) {
1981 case 0:
1982 memset(&hash_keys, 0, sizeof(hash_keys));
1983 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1984 if (skb) {
1985 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1986 } else {
1987 hash_keys.addrs.v6addrs.src = fl6->saddr;
1988 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1989 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
1990 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1991 }
1992 break;
1993 case 1:
1994 if (skb) {
1995 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1996 struct flow_keys keys;
1997
1998
1999 if (skb->l4_hash)
2000 return skb_get_hash_raw(skb) >> 1;
2001
2002 memset(&hash_keys, 0, sizeof(hash_keys));
2003
2004 if (!flkeys) {
2005 skb_flow_dissect_flow_keys(skb, &keys, flag);
2006 flkeys = &keys;
2007 }
2008 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2009 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2010 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2011 hash_keys.ports.src = flkeys->ports.src;
2012 hash_keys.ports.dst = flkeys->ports.dst;
2013 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2014 } else {
2015 memset(&hash_keys, 0, sizeof(hash_keys));
2016 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2017 hash_keys.addrs.v6addrs.src = fl6->saddr;
2018 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2019 hash_keys.ports.src = fl6->fl6_sport;
2020 hash_keys.ports.dst = fl6->fl6_dport;
2021 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2022 }
2023 break;
2024 }
2025 mhash = flow_hash_from_keys(&hash_keys);
2026
2027 return mhash >> 1;
2028}
2029
2030void ip6_route_input(struct sk_buff *skb)
2031{
2032 const struct ipv6hdr *iph = ipv6_hdr(skb);
2033 struct net *net = dev_net(skb->dev);
2034 int flags = RT6_LOOKUP_F_HAS_SADDR;
2035 struct ip_tunnel_info *tun_info;
2036 struct flowi6 fl6 = {
2037 .flowi6_iif = skb->dev->ifindex,
2038 .daddr = iph->daddr,
2039 .saddr = iph->saddr,
2040 .flowlabel = ip6_flowinfo(iph),
2041 .flowi6_mark = skb->mark,
2042 .flowi6_proto = iph->nexthdr,
2043 };
2044 struct flow_keys *flkeys = NULL, _flkeys;
2045
2046 tun_info = skb_tunnel_info(skb);
2047 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2048 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2049
2050 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2051 flkeys = &_flkeys;
2052
2053 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2054 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2055 skb_dst_drop(skb);
2056 skb_dst_set(skb,
2057 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2058}
2059
2060static struct rt6_info *ip6_pol_route_output(struct net *net,
2061 struct fib6_table *table,
2062 struct flowi6 *fl6,
2063 const struct sk_buff *skb,
2064 int flags)
2065{
2066 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2067}
2068
2069struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2070 struct flowi6 *fl6, int flags)
2071{
2072 bool any_src;
2073
2074 if (ipv6_addr_type(&fl6->daddr) &
2075 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2076 struct dst_entry *dst;
2077
2078 dst = l3mdev_link_scope_lookup(net, fl6);
2079 if (dst)
2080 return dst;
2081 }
2082
2083 fl6->flowi6_iif = LOOPBACK_IFINDEX;
2084
2085 any_src = ipv6_addr_any(&fl6->saddr);
2086 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2087 (fl6->flowi6_oif && any_src))
2088 flags |= RT6_LOOKUP_F_IFACE;
2089
2090 if (!any_src)
2091 flags |= RT6_LOOKUP_F_HAS_SADDR;
2092 else if (sk)
2093 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2094
2095 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2096}
2097EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2098
2099struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2100{
2101 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2102 struct net_device *loopback_dev = net->loopback_dev;
2103 struct dst_entry *new = NULL;
2104
2105 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2106 DST_OBSOLETE_DEAD, 0);
2107 if (rt) {
2108 rt6_info_init(rt);
2109 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2110
2111 new = &rt->dst;
2112 new->__use = 1;
2113 new->input = dst_discard;
2114 new->output = dst_discard_out;
2115
2116 dst_copy_metrics(new, &ort->dst);
2117
2118 rt->rt6i_idev = in6_dev_get(loopback_dev);
2119 rt->rt6i_gateway = ort->rt6i_gateway;
2120 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
2121
2122 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2123#ifdef CONFIG_IPV6_SUBTREES
2124 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2125#endif
2126 }
2127
2128 dst_release(dst_orig);
2129 return new ? new : ERR_PTR(-ENOMEM);
2130}
2131
2132
2133
2134
2135
2136static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2137{
2138 u32 rt_cookie = 0;
2139
2140 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2141 return false;
2142
2143 if (fib6_check_expired(f6i))
2144 return false;
2145
2146 return true;
2147}
2148
2149static struct dst_entry *rt6_check(struct rt6_info *rt,
2150 struct fib6_info *from,
2151 u32 cookie)
2152{
2153 u32 rt_cookie = 0;
2154
2155 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
2156 rt_cookie != cookie)
2157 return NULL;
2158
2159 if (rt6_check_expired(rt))
2160 return NULL;
2161
2162 return &rt->dst;
2163}
2164
2165static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2166 struct fib6_info *from,
2167 u32 cookie)
2168{
2169 if (!__rt6_check_expired(rt) &&
2170 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2171 fib6_check(from, cookie))
2172 return &rt->dst;
2173 else
2174 return NULL;
2175}
2176
2177static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2178{
2179 struct dst_entry *dst_ret;
2180 struct fib6_info *from;
2181 struct rt6_info *rt;
2182
2183 rt = container_of(dst, struct rt6_info, dst);
2184
2185 rcu_read_lock();
2186
2187
2188
2189
2190
2191
2192 from = rcu_dereference(rt->from);
2193
2194 if (from && (rt->rt6i_flags & RTF_PCPU ||
2195 unlikely(!list_empty(&rt->rt6i_uncached))))
2196 dst_ret = rt6_dst_from_check(rt, from, cookie);
2197 else
2198 dst_ret = rt6_check(rt, from, cookie);
2199
2200 rcu_read_unlock();
2201
2202 return dst_ret;
2203}
2204
2205static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2206{
2207 struct rt6_info *rt = (struct rt6_info *) dst;
2208
2209 if (rt) {
2210 if (rt->rt6i_flags & RTF_CACHE) {
2211 rcu_read_lock();
2212 if (rt6_check_expired(rt)) {
2213 rt6_remove_exception_rt(rt);
2214 dst = NULL;
2215 }
2216 rcu_read_unlock();
2217 } else {
2218 dst_release(dst);
2219 dst = NULL;
2220 }
2221 }
2222 return dst;
2223}
2224
2225static void ip6_link_failure(struct sk_buff *skb)
2226{
2227 struct rt6_info *rt;
2228
2229 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2230
2231 rt = (struct rt6_info *) skb_dst(skb);
2232 if (rt) {
2233 rcu_read_lock();
2234 if (rt->rt6i_flags & RTF_CACHE) {
2235 rt6_remove_exception_rt(rt);
2236 } else {
2237 struct fib6_info *from;
2238 struct fib6_node *fn;
2239
2240 from = rcu_dereference(rt->from);
2241 if (from) {
2242 fn = rcu_dereference(from->fib6_node);
2243 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2244 fn->fn_sernum = -1;
2245 }
2246 }
2247 rcu_read_unlock();
2248 }
2249}
2250
2251static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2252{
2253 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2254 struct fib6_info *from;
2255
2256 rcu_read_lock();
2257 from = rcu_dereference(rt0->from);
2258 if (from)
2259 rt0->dst.expires = from->expires;
2260 rcu_read_unlock();
2261 }
2262
2263 dst_set_expires(&rt0->dst, timeout);
2264 rt0->rt6i_flags |= RTF_EXPIRES;
2265}
2266
2267static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2268{
2269 struct net *net = dev_net(rt->dst.dev);
2270
2271 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2272 rt->rt6i_flags |= RTF_MODIFIED;
2273 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2274}
2275
2276static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2277{
2278 bool from_set;
2279
2280 rcu_read_lock();
2281 from_set = !!rcu_dereference(rt->from);
2282 rcu_read_unlock();
2283
2284 return !(rt->rt6i_flags & RTF_CACHE) &&
2285 (rt->rt6i_flags & RTF_PCPU || from_set);
2286}
2287
2288static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2289 const struct ipv6hdr *iph, u32 mtu)
2290{
2291 const struct in6_addr *daddr, *saddr;
2292 struct rt6_info *rt6 = (struct rt6_info *)dst;
2293
2294 if (dst_metric_locked(dst, RTAX_MTU))
2295 return;
2296
2297 if (iph) {
2298 daddr = &iph->daddr;
2299 saddr = &iph->saddr;
2300 } else if (sk) {
2301 daddr = &sk->sk_v6_daddr;
2302 saddr = &inet6_sk(sk)->saddr;
2303 } else {
2304 daddr = NULL;
2305 saddr = NULL;
2306 }
2307 dst_confirm_neigh(dst, daddr);
2308 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2309 if (mtu >= dst_mtu(dst))
2310 return;
2311
2312 if (!rt6_cache_allowed_for_pmtu(rt6)) {
2313 rt6_do_update_pmtu(rt6, mtu);
2314
2315 if (rt6->rt6i_flags & RTF_CACHE)
2316 rt6_update_exception_stamp_rt(rt6);
2317 } else if (daddr) {
2318 struct fib6_info *from;
2319 struct rt6_info *nrt6;
2320
2321 rcu_read_lock();
2322 from = rcu_dereference(rt6->from);
2323 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
2324 if (nrt6) {
2325 rt6_do_update_pmtu(nrt6, mtu);
2326 if (rt6_insert_exception(nrt6, from))
2327 dst_release_immediate(&nrt6->dst);
2328 }
2329 rcu_read_unlock();
2330 }
2331}
2332
2333static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2334 struct sk_buff *skb, u32 mtu)
2335{
2336 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2337}
2338
2339void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2340 int oif, u32 mark, kuid_t uid)
2341{
2342 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2343 struct dst_entry *dst;
2344 struct flowi6 fl6 = {
2345 .flowi6_oif = oif,
2346 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2347 .daddr = iph->daddr,
2348 .saddr = iph->saddr,
2349 .flowlabel = ip6_flowinfo(iph),
2350 .flowi6_uid = uid,
2351 };
2352
2353 dst = ip6_route_output(net, NULL, &fl6);
2354 if (!dst->error)
2355 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2356 dst_release(dst);
2357}
2358EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2359
2360void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2361{
2362 int oif = sk->sk_bound_dev_if;
2363 struct dst_entry *dst;
2364
2365 if (!oif && skb->dev)
2366 oif = l3mdev_master_ifindex(skb->dev);
2367
2368 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
2369
2370 dst = __sk_dst_get(sk);
2371 if (!dst || !dst->obsolete ||
2372 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2373 return;
2374
2375 bh_lock_sock(sk);
2376 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2377 ip6_datagram_dst_update(sk, false);
2378 bh_unlock_sock(sk);
2379}
2380EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2381
2382void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2383 const struct flowi6 *fl6)
2384{
2385#ifdef CONFIG_IPV6_SUBTREES
2386 struct ipv6_pinfo *np = inet6_sk(sk);
2387#endif
2388
2389 ip6_dst_store(sk, dst,
2390 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2391 &sk->sk_v6_daddr : NULL,
2392#ifdef CONFIG_IPV6_SUBTREES
2393 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2394 &np->saddr :
2395#endif
2396 NULL);
2397}
2398
2399
2400struct ip6rd_flowi {
2401 struct flowi6 fl6;
2402 struct in6_addr gateway;
2403};
2404
2405static struct rt6_info *__ip6_route_redirect(struct net *net,
2406 struct fib6_table *table,
2407 struct flowi6 *fl6,
2408 const struct sk_buff *skb,
2409 int flags)
2410{
2411 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2412 struct rt6_info *ret = NULL, *rt_cache;
2413 struct fib6_info *rt;
2414 struct fib6_node *fn;
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426 rcu_read_lock();
2427 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2428restart:
2429 for_each_fib6_node_rt_rcu(fn) {
2430 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
2431 continue;
2432 if (fib6_check_expired(rt))
2433 continue;
2434 if (rt->fib6_flags & RTF_REJECT)
2435 break;
2436 if (!(rt->fib6_flags & RTF_GATEWAY))
2437 continue;
2438 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
2439 continue;
2440
2441
2442
2443
2444
2445 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2446 rt_cache = rt6_find_cached_rt(rt,
2447 &fl6->daddr,
2448 &fl6->saddr);
2449 if (rt_cache &&
2450 ipv6_addr_equal(&rdfl->gateway,
2451 &rt_cache->rt6i_gateway)) {
2452 ret = rt_cache;
2453 break;
2454 }
2455 continue;
2456 }
2457 break;
2458 }
2459
2460 if (!rt)
2461 rt = net->ipv6.fib6_null_entry;
2462 else if (rt->fib6_flags & RTF_REJECT) {
2463 ret = net->ipv6.ip6_null_entry;
2464 goto out;
2465 }
2466
2467 if (rt == net->ipv6.fib6_null_entry) {
2468 fn = fib6_backtrack(fn, &fl6->saddr);
2469 if (fn)
2470 goto restart;
2471 }
2472
2473out:
2474 if (ret)
2475 ip6_hold_safe(net, &ret, true);
2476 else
2477 ret = ip6_create_rt_rcu(rt);
2478
2479 rcu_read_unlock();
2480
2481 trace_fib6_table_lookup(net, rt, table, fl6);
2482 return ret;
2483};
2484
2485static struct dst_entry *ip6_route_redirect(struct net *net,
2486 const struct flowi6 *fl6,
2487 const struct sk_buff *skb,
2488 const struct in6_addr *gateway)
2489{
2490 int flags = RT6_LOOKUP_F_HAS_SADDR;
2491 struct ip6rd_flowi rdfl;
2492
2493 rdfl.fl6 = *fl6;
2494 rdfl.gateway = *gateway;
2495
2496 return fib6_rule_lookup(net, &rdfl.fl6, skb,
2497 flags, __ip6_route_redirect);
2498}
2499
2500void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2501 kuid_t uid)
2502{
2503 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2504 struct dst_entry *dst;
2505 struct flowi6 fl6 = {
2506 .flowi6_iif = LOOPBACK_IFINDEX,
2507 .flowi6_oif = oif,
2508 .flowi6_mark = mark,
2509 .daddr = iph->daddr,
2510 .saddr = iph->saddr,
2511 .flowlabel = ip6_flowinfo(iph),
2512 .flowi6_uid = uid,
2513 };
2514
2515 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
2516 rt6_do_redirect(dst, NULL, skb);
2517 dst_release(dst);
2518}
2519EXPORT_SYMBOL_GPL(ip6_redirect);
2520
2521void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
2522{
2523 const struct ipv6hdr *iph = ipv6_hdr(skb);
2524 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2525 struct dst_entry *dst;
2526 struct flowi6 fl6 = {
2527 .flowi6_iif = LOOPBACK_IFINDEX,
2528 .flowi6_oif = oif,
2529 .daddr = msg->dest,
2530 .saddr = iph->daddr,
2531 .flowi6_uid = sock_net_uid(net, NULL),
2532 };
2533
2534 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2535 rt6_do_redirect(dst, NULL, skb);
2536 dst_release(dst);
2537}
2538
2539void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2540{
2541 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2542 sk->sk_uid);
2543}
2544EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2545
2546static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2547{
2548 struct net_device *dev = dst->dev;
2549 unsigned int mtu = dst_mtu(dst);
2550 struct net *net = dev_net(dev);
2551
2552 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2553
2554 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2555 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2556
2557
2558
2559
2560
2561
2562
2563 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2564 mtu = IPV6_MAXPLEN;
2565 return mtu;
2566}
2567
2568static unsigned int ip6_mtu(const struct dst_entry *dst)
2569{
2570 struct inet6_dev *idev;
2571 unsigned int mtu;
2572
2573 mtu = dst_metric_raw(dst, RTAX_MTU);
2574 if (mtu)
2575 goto out;
2576
2577 mtu = IPV6_MIN_MTU;
2578
2579 rcu_read_lock();
2580 idev = __in6_dev_get(dst->dev);
2581 if (idev)
2582 mtu = idev->cnf.mtu6;
2583 rcu_read_unlock();
2584
2585out:
2586 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2587
2588 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2589}
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2600 struct in6_addr *saddr)
2601{
2602 struct rt6_exception_bucket *bucket;
2603 struct rt6_exception *rt6_ex;
2604 struct in6_addr *src_key;
2605 struct inet6_dev *idev;
2606 u32 mtu = 0;
2607
2608 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2609 mtu = f6i->fib6_pmtu;
2610 if (mtu)
2611 goto out;
2612 }
2613
2614 src_key = NULL;
2615#ifdef CONFIG_IPV6_SUBTREES
2616 if (f6i->fib6_src.plen)
2617 src_key = saddr;
2618#endif
2619
2620 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2621 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2622 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2623 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2624
2625 if (likely(!mtu)) {
2626 struct net_device *dev = fib6_info_nh_dev(f6i);
2627
2628 mtu = IPV6_MIN_MTU;
2629 idev = __in6_dev_get(dev);
2630 if (idev && idev->cnf.mtu6 > mtu)
2631 mtu = idev->cnf.mtu6;
2632 }
2633
2634 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2635out:
2636 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2637}
2638
2639struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2640 struct flowi6 *fl6)
2641{
2642 struct dst_entry *dst;
2643 struct rt6_info *rt;
2644 struct inet6_dev *idev = in6_dev_get(dev);
2645 struct net *net = dev_net(dev);
2646
2647 if (unlikely(!idev))
2648 return ERR_PTR(-ENODEV);
2649
2650 rt = ip6_dst_alloc(net, dev, 0);
2651 if (unlikely(!rt)) {
2652 in6_dev_put(idev);
2653 dst = ERR_PTR(-ENOMEM);
2654 goto out;
2655 }
2656
2657 rt->dst.flags |= DST_HOST;
2658 rt->dst.input = ip6_input;
2659 rt->dst.output = ip6_output;
2660 rt->rt6i_gateway = fl6->daddr;
2661 rt->rt6i_dst.addr = fl6->daddr;
2662 rt->rt6i_dst.plen = 128;
2663 rt->rt6i_idev = idev;
2664 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2665
2666
2667
2668
2669 rt6_uncached_list_add(rt);
2670 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2671
2672 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2673
2674out:
2675 return dst;
2676}
2677
2678static int ip6_dst_gc(struct dst_ops *ops)
2679{
2680 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2681 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2682 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2683 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2684 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2685 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2686 int entries;
2687
2688 entries = dst_entries_get_fast(ops);
2689 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2690 entries <= rt_max_size)
2691 goto out;
2692
2693 net->ipv6.ip6_rt_gc_expire++;
2694 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2695 entries = dst_entries_get_slow(ops);
2696 if (entries < ops->gc_thresh)
2697 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2698out:
2699 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2700 return entries > rt_max_size;
2701}
2702
2703static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2704 struct fib6_config *cfg,
2705 const struct in6_addr *gw_addr,
2706 u32 tbid, int flags)
2707{
2708 struct flowi6 fl6 = {
2709 .flowi6_oif = cfg->fc_ifindex,
2710 .daddr = *gw_addr,
2711 .saddr = cfg->fc_prefsrc,
2712 };
2713 struct fib6_table *table;
2714 struct rt6_info *rt;
2715
2716 table = fib6_get_table(net, tbid);
2717 if (!table)
2718 return NULL;
2719
2720 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2721 flags |= RT6_LOOKUP_F_HAS_SADDR;
2722
2723 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2724 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
2725
2726
2727 if (rt == net->ipv6.ip6_null_entry) {
2728 ip6_rt_put(rt);
2729 rt = NULL;
2730 }
2731
2732 return rt;
2733}
2734
2735static int ip6_route_check_nh_onlink(struct net *net,
2736 struct fib6_config *cfg,
2737 const struct net_device *dev,
2738 struct netlink_ext_ack *extack)
2739{
2740 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2741 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2742 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2743 struct rt6_info *grt;
2744 int err;
2745
2746 err = 0;
2747 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2748 if (grt) {
2749 if (!grt->dst.error &&
2750
2751 grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) &&
2752 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
2753 NL_SET_ERR_MSG(extack,
2754 "Nexthop has invalid gateway or device mismatch");
2755 err = -EINVAL;
2756 }
2757
2758 ip6_rt_put(grt);
2759 }
2760
2761 return err;
2762}
2763
2764static int ip6_route_check_nh(struct net *net,
2765 struct fib6_config *cfg,
2766 struct net_device **_dev,
2767 struct inet6_dev **idev)
2768{
2769 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2770 struct net_device *dev = _dev ? *_dev : NULL;
2771 struct rt6_info *grt = NULL;
2772 int err = -EHOSTUNREACH;
2773
2774 if (cfg->fc_table) {
2775 int flags = RT6_LOOKUP_F_IFACE;
2776
2777 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2778 cfg->fc_table, flags);
2779 if (grt) {
2780 if (grt->rt6i_flags & RTF_GATEWAY ||
2781 (dev && dev != grt->dst.dev)) {
2782 ip6_rt_put(grt);
2783 grt = NULL;
2784 }
2785 }
2786 }
2787
2788 if (!grt)
2789 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
2790
2791 if (!grt)
2792 goto out;
2793
2794 if (dev) {
2795 if (dev != grt->dst.dev) {
2796 ip6_rt_put(grt);
2797 goto out;
2798 }
2799 } else {
2800 *_dev = dev = grt->dst.dev;
2801 *idev = grt->rt6i_idev;
2802 dev_hold(dev);
2803 in6_dev_hold(grt->rt6i_idev);
2804 }
2805
2806 if (!(grt->rt6i_flags & RTF_GATEWAY))
2807 err = 0;
2808
2809 ip6_rt_put(grt);
2810
2811out:
2812 return err;
2813}
2814
2815static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2816 struct net_device **_dev, struct inet6_dev **idev,
2817 struct netlink_ext_ack *extack)
2818{
2819 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2820 int gwa_type = ipv6_addr_type(gw_addr);
2821 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
2822 const struct net_device *dev = *_dev;
2823 bool need_addr_check = !dev;
2824 int err = -EINVAL;
2825
2826
2827
2828
2829
2830
2831 if (dev &&
2832 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2833 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2834 goto out;
2835 }
2836
2837 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2848 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2849 goto out;
2850 }
2851
2852 if (cfg->fc_flags & RTNH_F_ONLINK)
2853 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2854 else
2855 err = ip6_route_check_nh(net, cfg, _dev, idev);
2856
2857 if (err)
2858 goto out;
2859 }
2860
2861
2862 dev = *_dev;
2863
2864 err = -EINVAL;
2865 if (!dev) {
2866 NL_SET_ERR_MSG(extack, "Egress device not specified");
2867 goto out;
2868 } else if (dev->flags & IFF_LOOPBACK) {
2869 NL_SET_ERR_MSG(extack,
2870 "Egress device can not be loopback device for this route");
2871 goto out;
2872 }
2873
2874
2875
2876
2877 if (need_addr_check &&
2878 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2879 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2880 goto out;
2881 }
2882
2883 err = 0;
2884out:
2885 return err;
2886}
2887
2888static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
2889 gfp_t gfp_flags,
2890 struct netlink_ext_ack *extack)
2891{
2892 struct net *net = cfg->fc_nlinfo.nl_net;
2893 struct fib6_info *rt = NULL;
2894 struct net_device *dev = NULL;
2895 struct inet6_dev *idev = NULL;
2896 struct fib6_table *table;
2897 int addr_type;
2898 int err = -EINVAL;
2899
2900
2901 if (cfg->fc_flags & RTF_PCPU) {
2902 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
2903 goto out;
2904 }
2905
2906
2907 if (cfg->fc_flags & RTF_CACHE) {
2908 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2909 goto out;
2910 }
2911
2912 if (cfg->fc_type > RTN_MAX) {
2913 NL_SET_ERR_MSG(extack, "Invalid route type");
2914 goto out;
2915 }
2916
2917 if (cfg->fc_dst_len > 128) {
2918 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2919 goto out;
2920 }
2921 if (cfg->fc_src_len > 128) {
2922 NL_SET_ERR_MSG(extack, "Invalid source address length");
2923 goto out;
2924 }
2925#ifndef CONFIG_IPV6_SUBTREES
2926 if (cfg->fc_src_len) {
2927 NL_SET_ERR_MSG(extack,
2928 "Specifying source address requires IPV6_SUBTREES to be enabled");
2929 goto out;
2930 }
2931#endif
2932 if (cfg->fc_ifindex) {
2933 err = -ENODEV;
2934 dev = dev_get_by_index(net, cfg->fc_ifindex);
2935 if (!dev)
2936 goto out;
2937 idev = in6_dev_get(dev);
2938 if (!idev)
2939 goto out;
2940 }
2941
2942 if (cfg->fc_metric == 0)
2943 cfg->fc_metric = IP6_RT_PRIO_USER;
2944
2945 if (cfg->fc_flags & RTNH_F_ONLINK) {
2946 if (!dev) {
2947 NL_SET_ERR_MSG(extack,
2948 "Nexthop device required for onlink");
2949 err = -ENODEV;
2950 goto out;
2951 }
2952
2953 if (!(dev->flags & IFF_UP)) {
2954 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2955 err = -ENETDOWN;
2956 goto out;
2957 }
2958 }
2959
2960 err = -ENOBUFS;
2961 if (cfg->fc_nlinfo.nlh &&
2962 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
2963 table = fib6_get_table(net, cfg->fc_table);
2964 if (!table) {
2965 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
2966 table = fib6_new_table(net, cfg->fc_table);
2967 }
2968 } else {
2969 table = fib6_new_table(net, cfg->fc_table);
2970 }
2971
2972 if (!table)
2973 goto out;
2974
2975 err = -ENOMEM;
2976 rt = fib6_info_alloc(gfp_flags);
2977 if (!rt)
2978 goto out;
2979
2980 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len);
2981 if (IS_ERR(rt->fib6_metrics)) {
2982 err = PTR_ERR(rt->fib6_metrics);
2983
2984 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
2985 goto out;
2986 }
2987
2988 if (cfg->fc_flags & RTF_ADDRCONF)
2989 rt->dst_nocount = true;
2990
2991 if (cfg->fc_flags & RTF_EXPIRES)
2992 fib6_set_expires(rt, jiffies +
2993 clock_t_to_jiffies(cfg->fc_expires));
2994 else
2995 fib6_clean_expires(rt);
2996
2997 if (cfg->fc_protocol == RTPROT_UNSPEC)
2998 cfg->fc_protocol = RTPROT_BOOT;
2999 rt->fib6_protocol = cfg->fc_protocol;
3000
3001 addr_type = ipv6_addr_type(&cfg->fc_dst);
3002
3003 if (cfg->fc_encap) {
3004 struct lwtunnel_state *lwtstate;
3005
3006 err = lwtunnel_build_state(cfg->fc_encap_type,
3007 cfg->fc_encap, AF_INET6, cfg,
3008 &lwtstate, extack);
3009 if (err)
3010 goto out;
3011 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
3012 }
3013
3014 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3015 rt->fib6_dst.plen = cfg->fc_dst_len;
3016 if (rt->fib6_dst.plen == 128)
3017 rt->dst_host = true;
3018
3019#ifdef CONFIG_IPV6_SUBTREES
3020 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3021 rt->fib6_src.plen = cfg->fc_src_len;
3022#endif
3023
3024 rt->fib6_metric = cfg->fc_metric;
3025 rt->fib6_nh.nh_weight = 1;
3026
3027 rt->fib6_type = cfg->fc_type;
3028
3029
3030
3031
3032 if ((cfg->fc_flags & RTF_REJECT) ||
3033 (dev && (dev->flags & IFF_LOOPBACK) &&
3034 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3035 !(cfg->fc_flags & RTF_LOCAL))) {
3036
3037 if (dev != net->loopback_dev) {
3038 if (dev) {
3039 dev_put(dev);
3040 in6_dev_put(idev);
3041 }
3042 dev = net->loopback_dev;
3043 dev_hold(dev);
3044 idev = in6_dev_get(dev);
3045 if (!idev) {
3046 err = -ENODEV;
3047 goto out;
3048 }
3049 }
3050 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
3051 goto install_route;
3052 }
3053
3054 if (cfg->fc_flags & RTF_GATEWAY) {
3055 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3056 if (err)
3057 goto out;
3058
3059 rt->fib6_nh.nh_gw = cfg->fc_gateway;
3060 }
3061
3062 err = -ENODEV;
3063 if (!dev)
3064 goto out;
3065
3066 if (idev->cnf.disable_ipv6) {
3067 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3068 err = -EACCES;
3069 goto out;
3070 }
3071
3072 if (!(dev->flags & IFF_UP)) {
3073 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3074 err = -ENETDOWN;
3075 goto out;
3076 }
3077
3078 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3079 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3080 NL_SET_ERR_MSG(extack, "Invalid source address");
3081 err = -EINVAL;
3082 goto out;
3083 }
3084 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3085 rt->fib6_prefsrc.plen = 128;
3086 } else
3087 rt->fib6_prefsrc.plen = 0;
3088
3089 rt->fib6_flags = cfg->fc_flags;
3090
3091install_route:
3092 if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3093 !netif_carrier_ok(dev))
3094 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3095 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3096 rt->fib6_nh.nh_dev = dev;
3097 rt->fib6_table = table;
3098
3099 if (idev)
3100 in6_dev_put(idev);
3101
3102 return rt;
3103out:
3104 if (dev)
3105 dev_put(dev);
3106 if (idev)
3107 in6_dev_put(idev);
3108
3109 fib6_info_release(rt);
3110 return ERR_PTR(err);
3111}
3112
3113int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3114 struct netlink_ext_ack *extack)
3115{
3116 struct fib6_info *rt;
3117 int err;
3118
3119 rt = ip6_route_info_create(cfg, gfp_flags, extack);
3120 if (IS_ERR(rt))
3121 return PTR_ERR(rt);
3122
3123 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3124 fib6_info_release(rt);
3125
3126 return err;
3127}
3128
3129static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3130{
3131 struct net *net = info->nl_net;
3132 struct fib6_table *table;
3133 int err;
3134
3135 if (rt == net->ipv6.fib6_null_entry) {
3136 err = -ENOENT;
3137 goto out;
3138 }
3139
3140 table = rt->fib6_table;
3141 spin_lock_bh(&table->tb6_lock);
3142 err = fib6_del(rt, info);
3143 spin_unlock_bh(&table->tb6_lock);
3144
3145out:
3146 fib6_info_release(rt);
3147 return err;
3148}
3149
3150int ip6_del_rt(struct net *net, struct fib6_info *rt)
3151{
3152 struct nl_info info = { .nl_net = net };
3153
3154 return __ip6_del_rt(rt, &info);
3155}
3156
3157static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3158{
3159 struct nl_info *info = &cfg->fc_nlinfo;
3160 struct net *net = info->nl_net;
3161 struct sk_buff *skb = NULL;
3162 struct fib6_table *table;
3163 int err = -ENOENT;
3164
3165 if (rt == net->ipv6.fib6_null_entry)
3166 goto out_put;
3167 table = rt->fib6_table;
3168 spin_lock_bh(&table->tb6_lock);
3169
3170 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3171 struct fib6_info *sibling, *next_sibling;
3172
3173
3174 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3175 if (skb) {
3176 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3177
3178 if (rt6_fill_node(net, skb, rt, NULL,
3179 NULL, NULL, 0, RTM_DELROUTE,
3180 info->portid, seq, 0) < 0) {
3181 kfree_skb(skb);
3182 skb = NULL;
3183 } else
3184 info->skip_notify = 1;
3185 }
3186
3187 list_for_each_entry_safe(sibling, next_sibling,
3188 &rt->fib6_siblings,
3189 fib6_siblings) {
3190 err = fib6_del(sibling, info);
3191 if (err)
3192 goto out_unlock;
3193 }
3194 }
3195
3196 err = fib6_del(rt, info);
3197out_unlock:
3198 spin_unlock_bh(&table->tb6_lock);
3199out_put:
3200 fib6_info_release(rt);
3201
3202 if (skb) {
3203 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3204 info->nlh, gfp_any());
3205 }
3206 return err;
3207}
3208
3209static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3210{
3211 int rc = -ESRCH;
3212
3213 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3214 goto out;
3215
3216 if (cfg->fc_flags & RTF_GATEWAY &&
3217 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3218 goto out;
3219
3220 rc = rt6_remove_exception_rt(rt);
3221out:
3222 return rc;
3223}
3224
3225static int ip6_route_del(struct fib6_config *cfg,
3226 struct netlink_ext_ack *extack)
3227{
3228 struct rt6_info *rt_cache;
3229 struct fib6_table *table;
3230 struct fib6_info *rt;
3231 struct fib6_node *fn;
3232 int err = -ESRCH;
3233
3234 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3235 if (!table) {
3236 NL_SET_ERR_MSG(extack, "FIB table does not exist");
3237 return err;
3238 }
3239
3240 rcu_read_lock();
3241
3242 fn = fib6_locate(&table->tb6_root,
3243 &cfg->fc_dst, cfg->fc_dst_len,
3244 &cfg->fc_src, cfg->fc_src_len,
3245 !(cfg->fc_flags & RTF_CACHE));
3246
3247 if (fn) {
3248 for_each_fib6_node_rt_rcu(fn) {
3249 if (cfg->fc_flags & RTF_CACHE) {
3250 int rc;
3251
3252 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3253 &cfg->fc_src);
3254 if (rt_cache) {
3255 rc = ip6_del_cached_rt(rt_cache, cfg);
3256 if (rc != -ESRCH) {
3257 rcu_read_unlock();
3258 return rc;
3259 }
3260 }
3261 continue;
3262 }
3263 if (cfg->fc_ifindex &&
3264 (!rt->fib6_nh.nh_dev ||
3265 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
3266 continue;
3267 if (cfg->fc_flags & RTF_GATEWAY &&
3268 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
3269 continue;
3270 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3271 continue;
3272 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3273 continue;
3274 if (!fib6_info_hold_safe(rt))
3275 continue;
3276 rcu_read_unlock();
3277
3278
3279 if (cfg->fc_flags & RTF_GATEWAY)
3280 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3281
3282 return __ip6_del_rt_siblings(rt, cfg);
3283 }
3284 }
3285 rcu_read_unlock();
3286
3287 return err;
3288}
3289
3290static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3291{
3292 struct netevent_redirect netevent;
3293 struct rt6_info *rt, *nrt = NULL;
3294 struct ndisc_options ndopts;
3295 struct inet6_dev *in6_dev;
3296 struct neighbour *neigh;
3297 struct fib6_info *from;
3298 struct rd_msg *msg;
3299 int optlen, on_link;
3300 u8 *lladdr;
3301
3302 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
3303 optlen -= sizeof(*msg);
3304
3305 if (optlen < 0) {
3306 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3307 return;
3308 }
3309
3310 msg = (struct rd_msg *)icmp6_hdr(skb);
3311
3312 if (ipv6_addr_is_multicast(&msg->dest)) {
3313 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3314 return;
3315 }
3316
3317 on_link = 0;
3318 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3319 on_link = 1;
3320 } else if (ipv6_addr_type(&msg->target) !=
3321 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
3322 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3323 return;
3324 }
3325
3326 in6_dev = __in6_dev_get(skb->dev);
3327 if (!in6_dev)
3328 return;
3329 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3330 return;
3331
3332
3333
3334
3335
3336
3337 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3338 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3339 return;
3340 }
3341
3342 lladdr = NULL;
3343 if (ndopts.nd_opts_tgt_lladdr) {
3344 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3345 skb->dev);
3346 if (!lladdr) {
3347 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3348 return;
3349 }
3350 }
3351
3352 rt = (struct rt6_info *) dst;
3353 if (rt->rt6i_flags & RTF_REJECT) {
3354 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3355 return;
3356 }
3357
3358
3359
3360
3361
3362 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
3363
3364 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3365 if (!neigh)
3366 return;
3367
3368
3369
3370
3371
3372 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3373 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3374 NEIGH_UPDATE_F_OVERRIDE|
3375 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3376 NEIGH_UPDATE_F_ISROUTER)),
3377 NDISC_REDIRECT, &ndopts);
3378
3379 rcu_read_lock();
3380 from = rcu_dereference(rt->from);
3381
3382
3383
3384 fib6_info_hold(from);
3385 rcu_read_unlock();
3386
3387 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
3388 if (!nrt)
3389 goto out;
3390
3391 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3392 if (on_link)
3393 nrt->rt6i_flags &= ~RTF_GATEWAY;
3394
3395 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3396
3397
3398
3399
3400
3401 if (rt6_insert_exception(nrt, from)) {
3402 dst_release_immediate(&nrt->dst);
3403 goto out;
3404 }
3405
3406 netevent.old = &rt->dst;
3407 netevent.new = &nrt->dst;
3408 netevent.daddr = &msg->dest;
3409 netevent.neigh = neigh;
3410 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3411
3412out:
3413 fib6_info_release(from);
3414 neigh_release(neigh);
3415}
3416
3417#ifdef CONFIG_IPV6_ROUTE_INFO
3418static struct fib6_info *rt6_get_route_info(struct net *net,
3419 const struct in6_addr *prefix, int prefixlen,
3420 const struct in6_addr *gwaddr,
3421 struct net_device *dev)
3422{
3423 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3424 int ifindex = dev->ifindex;
3425 struct fib6_node *fn;
3426 struct fib6_info *rt = NULL;
3427 struct fib6_table *table;
3428
3429 table = fib6_get_table(net, tb_id);
3430 if (!table)
3431 return NULL;
3432
3433 rcu_read_lock();
3434 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3435 if (!fn)
3436 goto out;
3437
3438 for_each_fib6_node_rt_rcu(fn) {
3439 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
3440 continue;
3441 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3442 continue;
3443 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
3444 continue;
3445 if (!fib6_info_hold_safe(rt))
3446 continue;
3447 break;
3448 }
3449out:
3450 rcu_read_unlock();
3451 return rt;
3452}
3453
3454static struct fib6_info *rt6_add_route_info(struct net *net,
3455 const struct in6_addr *prefix, int prefixlen,
3456 const struct in6_addr *gwaddr,
3457 struct net_device *dev,
3458 unsigned int pref)
3459{
3460 struct fib6_config cfg = {
3461 .fc_metric = IP6_RT_PRIO_USER,
3462 .fc_ifindex = dev->ifindex,
3463 .fc_dst_len = prefixlen,
3464 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3465 RTF_UP | RTF_PREF(pref),
3466 .fc_protocol = RTPROT_RA,
3467 .fc_type = RTN_UNICAST,
3468 .fc_nlinfo.portid = 0,
3469 .fc_nlinfo.nlh = NULL,
3470 .fc_nlinfo.nl_net = net,
3471 };
3472
3473 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3474 cfg.fc_dst = *prefix;
3475 cfg.fc_gateway = *gwaddr;
3476
3477
3478 if (!prefixlen)
3479 cfg.fc_flags |= RTF_DEFAULT;
3480
3481 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
3482
3483 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3484}
3485#endif
3486
3487struct fib6_info *rt6_get_dflt_router(struct net *net,
3488 const struct in6_addr *addr,
3489 struct net_device *dev)
3490{
3491 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3492 struct fib6_info *rt;
3493 struct fib6_table *table;
3494
3495 table = fib6_get_table(net, tb_id);
3496 if (!table)
3497 return NULL;
3498
3499 rcu_read_lock();
3500 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3501 if (dev == rt->fib6_nh.nh_dev &&
3502 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3503 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
3504 break;
3505 }
3506 if (rt && !fib6_info_hold_safe(rt))
3507 rt = NULL;
3508 rcu_read_unlock();
3509 return rt;
3510}
3511
3512struct fib6_info *rt6_add_dflt_router(struct net *net,
3513 const struct in6_addr *gwaddr,
3514 struct net_device *dev,
3515 unsigned int pref)
3516{
3517 struct fib6_config cfg = {
3518 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3519 .fc_metric = IP6_RT_PRIO_USER,
3520 .fc_ifindex = dev->ifindex,
3521 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3522 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3523 .fc_protocol = RTPROT_RA,
3524 .fc_type = RTN_UNICAST,
3525 .fc_nlinfo.portid = 0,
3526 .fc_nlinfo.nlh = NULL,
3527 .fc_nlinfo.nl_net = net,
3528 };
3529
3530 cfg.fc_gateway = *gwaddr;
3531
3532 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3533 struct fib6_table *table;
3534
3535 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3536 if (table)
3537 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3538 }
3539
3540 return rt6_get_dflt_router(net, gwaddr, dev);
3541}
3542
3543static void __rt6_purge_dflt_routers(struct net *net,
3544 struct fib6_table *table)
3545{
3546 struct fib6_info *rt;
3547
3548restart:
3549 rcu_read_lock();
3550 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3551 struct net_device *dev = fib6_info_nh_dev(rt);
3552 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3553
3554 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3555 (!idev || idev->cnf.accept_ra != 2) &&
3556 fib6_info_hold_safe(rt)) {
3557 rcu_read_unlock();
3558 ip6_del_rt(net, rt);
3559 goto restart;
3560 }
3561 }
3562 rcu_read_unlock();
3563
3564 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3565}
3566
3567void rt6_purge_dflt_routers(struct net *net)
3568{
3569 struct fib6_table *table;
3570 struct hlist_head *head;
3571 unsigned int h;
3572
3573 rcu_read_lock();
3574
3575 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3576 head = &net->ipv6.fib_table_hash[h];
3577 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3578 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3579 __rt6_purge_dflt_routers(net, table);
3580 }
3581 }
3582
3583 rcu_read_unlock();
3584}
3585
3586static void rtmsg_to_fib6_config(struct net *net,
3587 struct in6_rtmsg *rtmsg,
3588 struct fib6_config *cfg)
3589{
3590 *cfg = (struct fib6_config){
3591 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3592 : RT6_TABLE_MAIN,
3593 .fc_ifindex = rtmsg->rtmsg_ifindex,
3594 .fc_metric = rtmsg->rtmsg_metric,
3595 .fc_expires = rtmsg->rtmsg_info,
3596 .fc_dst_len = rtmsg->rtmsg_dst_len,
3597 .fc_src_len = rtmsg->rtmsg_src_len,
3598 .fc_flags = rtmsg->rtmsg_flags,
3599 .fc_type = rtmsg->rtmsg_type,
3600
3601 .fc_nlinfo.nl_net = net,
3602
3603 .fc_dst = rtmsg->rtmsg_dst,
3604 .fc_src = rtmsg->rtmsg_src,
3605 .fc_gateway = rtmsg->rtmsg_gateway,
3606 };
3607}
3608
3609int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3610{
3611 struct fib6_config cfg;
3612 struct in6_rtmsg rtmsg;
3613 int err;
3614
3615 switch (cmd) {
3616 case SIOCADDRT:
3617 case SIOCDELRT:
3618 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3619 return -EPERM;
3620 err = copy_from_user(&rtmsg, arg,
3621 sizeof(struct in6_rtmsg));
3622 if (err)
3623 return -EFAULT;
3624
3625 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3626
3627 rtnl_lock();
3628 switch (cmd) {
3629 case SIOCADDRT:
3630 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
3631 break;
3632 case SIOCDELRT:
3633 err = ip6_route_del(&cfg, NULL);
3634 break;
3635 default:
3636 err = -EINVAL;
3637 }
3638 rtnl_unlock();
3639
3640 return err;
3641 }
3642
3643 return -EINVAL;
3644}
3645
3646
3647
3648
3649
3650static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3651{
3652 int type;
3653 struct dst_entry *dst = skb_dst(skb);
3654 switch (ipstats_mib_noroutes) {
3655 case IPSTATS_MIB_INNOROUTES:
3656 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3657 if (type == IPV6_ADDR_ANY) {
3658 IP6_INC_STATS(dev_net(dst->dev),
3659 __in6_dev_get_safely(skb->dev),
3660 IPSTATS_MIB_INADDRERRORS);
3661 break;
3662 }
3663
3664 case IPSTATS_MIB_OUTNOROUTES:
3665 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3666 ipstats_mib_noroutes);
3667 break;
3668 }
3669 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3670 kfree_skb(skb);
3671 return 0;
3672}
3673
3674static int ip6_pkt_discard(struct sk_buff *skb)
3675{
3676 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3677}
3678
3679static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3680{
3681 skb->dev = skb_dst(skb)->dev;
3682 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3683}
3684
3685static int ip6_pkt_prohibit(struct sk_buff *skb)
3686{
3687 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3688}
3689
3690static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3691{
3692 skb->dev = skb_dst(skb)->dev;
3693 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3694}
3695
3696
3697
3698
3699
3700struct fib6_info *addrconf_f6i_alloc(struct net *net,
3701 struct inet6_dev *idev,
3702 const struct in6_addr *addr,
3703 bool anycast, gfp_t gfp_flags)
3704{
3705 u32 tb_id;
3706 struct net_device *dev = idev->dev;
3707 struct fib6_info *f6i;
3708
3709 f6i = fib6_info_alloc(gfp_flags);
3710 if (!f6i)
3711 return ERR_PTR(-ENOMEM);
3712
3713 f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0);
3714 f6i->dst_nocount = true;
3715 f6i->dst_host = true;
3716 f6i->fib6_protocol = RTPROT_KERNEL;
3717 f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
3718 if (anycast) {
3719 f6i->fib6_type = RTN_ANYCAST;
3720 f6i->fib6_flags |= RTF_ANYCAST;
3721 } else {
3722 f6i->fib6_type = RTN_LOCAL;
3723 f6i->fib6_flags |= RTF_LOCAL;
3724 }
3725
3726 f6i->fib6_nh.nh_gw = *addr;
3727 dev_hold(dev);
3728 f6i->fib6_nh.nh_dev = dev;
3729 f6i->fib6_dst.addr = *addr;
3730 f6i->fib6_dst.plen = 128;
3731 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3732 f6i->fib6_table = fib6_get_table(net, tb_id);
3733
3734 return f6i;
3735}
3736
3737
3738struct arg_dev_net_ip {
3739 struct net_device *dev;
3740 struct net *net;
3741 struct in6_addr *addr;
3742};
3743
3744static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3745{
3746 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3747 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3748 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3749
3750 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
3751 rt != net->ipv6.fib6_null_entry &&
3752 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
3753 spin_lock_bh(&rt6_exception_lock);
3754
3755 rt->fib6_prefsrc.plen = 0;
3756 spin_unlock_bh(&rt6_exception_lock);
3757 }
3758 return 0;
3759}
3760
3761void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3762{
3763 struct net *net = dev_net(ifp->idev->dev);
3764 struct arg_dev_net_ip adni = {
3765 .dev = ifp->idev->dev,
3766 .net = net,
3767 .addr = &ifp->addr,
3768 };
3769 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3770}
3771
3772#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3773
3774
3775static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3776{
3777 struct in6_addr *gateway = (struct in6_addr *)arg;
3778
3779 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3780 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
3781 return -1;
3782 }
3783
3784
3785
3786
3787
3788 rt6_exceptions_clean_tohost(rt, gateway);
3789
3790 return 0;
3791}
3792
3793void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3794{
3795 fib6_clean_all(net, fib6_clean_tohost, gateway);
3796}
3797
3798struct arg_netdev_event {
3799 const struct net_device *dev;
3800 union {
3801 unsigned int nh_flags;
3802 unsigned long event;
3803 };
3804};
3805
3806static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3807{
3808 struct fib6_info *iter;
3809 struct fib6_node *fn;
3810
3811 fn = rcu_dereference_protected(rt->fib6_node,
3812 lockdep_is_held(&rt->fib6_table->tb6_lock));
3813 iter = rcu_dereference_protected(fn->leaf,
3814 lockdep_is_held(&rt->fib6_table->tb6_lock));
3815 while (iter) {
3816 if (iter->fib6_metric == rt->fib6_metric &&
3817 rt6_qualify_for_ecmp(iter))
3818 return iter;
3819 iter = rcu_dereference_protected(iter->fib6_next,
3820 lockdep_is_held(&rt->fib6_table->tb6_lock));
3821 }
3822
3823 return NULL;
3824}
3825
3826static bool rt6_is_dead(const struct fib6_info *rt)
3827{
3828 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3829 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
3830 fib6_ignore_linkdown(rt)))
3831 return true;
3832
3833 return false;
3834}
3835
3836static int rt6_multipath_total_weight(const struct fib6_info *rt)
3837{
3838 struct fib6_info *iter;
3839 int total = 0;
3840
3841 if (!rt6_is_dead(rt))
3842 total += rt->fib6_nh.nh_weight;
3843
3844 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3845 if (!rt6_is_dead(iter))
3846 total += iter->fib6_nh.nh_weight;
3847 }
3848
3849 return total;
3850}
3851
3852static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3853{
3854 int upper_bound = -1;
3855
3856 if (!rt6_is_dead(rt)) {
3857 *weight += rt->fib6_nh.nh_weight;
3858 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3859 total) - 1;
3860 }
3861 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
3862}
3863
3864static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3865{
3866 struct fib6_info *iter;
3867 int weight = 0;
3868
3869 rt6_upper_bound_set(rt, &weight, total);
3870
3871 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3872 rt6_upper_bound_set(iter, &weight, total);
3873}
3874
3875void rt6_multipath_rebalance(struct fib6_info *rt)
3876{
3877 struct fib6_info *first;
3878 int total;
3879
3880
3881
3882
3883
3884 if (!rt->fib6_nsiblings || rt->should_flush)
3885 return;
3886
3887
3888
3889
3890
3891 first = rt6_multipath_first_sibling(rt);
3892 if (WARN_ON_ONCE(!first))
3893 return;
3894
3895 total = rt6_multipath_total_weight(first);
3896 rt6_multipath_upper_bound_set(first, total);
3897}
3898
3899static int fib6_ifup(struct fib6_info *rt, void *p_arg)
3900{
3901 const struct arg_netdev_event *arg = p_arg;
3902 struct net *net = dev_net(arg->dev);
3903
3904 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
3905 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
3906 fib6_update_sernum_upto_root(net, rt);
3907 rt6_multipath_rebalance(rt);
3908 }
3909
3910 return 0;
3911}
3912
3913void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3914{
3915 struct arg_netdev_event arg = {
3916 .dev = dev,
3917 {
3918 .nh_flags = nh_flags,
3919 },
3920 };
3921
3922 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3923 arg.nh_flags |= RTNH_F_LINKDOWN;
3924
3925 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3926}
3927
3928static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
3929 const struct net_device *dev)
3930{
3931 struct fib6_info *iter;
3932
3933 if (rt->fib6_nh.nh_dev == dev)
3934 return true;
3935 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3936 if (iter->fib6_nh.nh_dev == dev)
3937 return true;
3938
3939 return false;
3940}
3941
3942static void rt6_multipath_flush(struct fib6_info *rt)
3943{
3944 struct fib6_info *iter;
3945
3946 rt->should_flush = 1;
3947 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3948 iter->should_flush = 1;
3949}
3950
3951static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
3952 const struct net_device *down_dev)
3953{
3954 struct fib6_info *iter;
3955 unsigned int dead = 0;
3956
3957 if (rt->fib6_nh.nh_dev == down_dev ||
3958 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
3959 dead++;
3960 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3961 if (iter->fib6_nh.nh_dev == down_dev ||
3962 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
3963 dead++;
3964
3965 return dead;
3966}
3967
3968static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
3969 const struct net_device *dev,
3970 unsigned int nh_flags)
3971{
3972 struct fib6_info *iter;
3973
3974 if (rt->fib6_nh.nh_dev == dev)
3975 rt->fib6_nh.nh_flags |= nh_flags;
3976 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3977 if (iter->fib6_nh.nh_dev == dev)
3978 iter->fib6_nh.nh_flags |= nh_flags;
3979}
3980
3981
3982static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
3983{
3984 const struct arg_netdev_event *arg = p_arg;
3985 const struct net_device *dev = arg->dev;
3986 struct net *net = dev_net(dev);
3987
3988 if (rt == net->ipv6.fib6_null_entry)
3989 return 0;
3990
3991 switch (arg->event) {
3992 case NETDEV_UNREGISTER:
3993 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
3994 case NETDEV_DOWN:
3995 if (rt->should_flush)
3996 return -1;
3997 if (!rt->fib6_nsiblings)
3998 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
3999 if (rt6_multipath_uses_dev(rt, dev)) {
4000 unsigned int count;
4001
4002 count = rt6_multipath_dead_count(rt, dev);
4003 if (rt->fib6_nsiblings + 1 == count) {
4004 rt6_multipath_flush(rt);
4005 return -1;
4006 }
4007 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4008 RTNH_F_LINKDOWN);
4009 fib6_update_sernum(net, rt);
4010 rt6_multipath_rebalance(rt);
4011 }
4012 return -2;
4013 case NETDEV_CHANGE:
4014 if (rt->fib6_nh.nh_dev != dev ||
4015 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4016 break;
4017 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
4018 rt6_multipath_rebalance(rt);
4019 break;
4020 }
4021
4022 return 0;
4023}
4024
4025void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4026{
4027 struct arg_netdev_event arg = {
4028 .dev = dev,
4029 {
4030 .event = event,
4031 },
4032 };
4033 struct net *net = dev_net(dev);
4034
4035 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4036 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4037 else
4038 fib6_clean_all(net, fib6_ifdown, &arg);
4039}
4040
4041void rt6_disable_ip(struct net_device *dev, unsigned long event)
4042{
4043 rt6_sync_down_dev(dev, event);
4044 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4045 neigh_ifdown(&nd_tbl, dev);
4046}
4047
4048struct rt6_mtu_change_arg {
4049 struct net_device *dev;
4050 unsigned int mtu;
4051};
4052
4053static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
4054{
4055 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4056 struct inet6_dev *idev;
4057
4058
4059
4060
4061
4062
4063
4064 idev = __in6_dev_get(arg->dev);
4065 if (!idev)
4066 return 0;
4067
4068
4069
4070
4071
4072
4073 if (rt->fib6_nh.nh_dev == arg->dev &&
4074 !fib6_metric_locked(rt, RTAX_MTU)) {
4075 u32 mtu = rt->fib6_pmtu;
4076
4077 if (mtu >= arg->mtu ||
4078 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4079 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4080
4081 spin_lock_bh(&rt6_exception_lock);
4082 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4083 spin_unlock_bh(&rt6_exception_lock);
4084 }
4085 return 0;
4086}
4087
4088void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4089{
4090 struct rt6_mtu_change_arg arg = {
4091 .dev = dev,
4092 .mtu = mtu,
4093 };
4094
4095 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4096}
4097
4098static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4099 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
4100 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
4101 [RTA_OIF] = { .type = NLA_U32 },
4102 [RTA_IIF] = { .type = NLA_U32 },
4103 [RTA_PRIORITY] = { .type = NLA_U32 },
4104 [RTA_METRICS] = { .type = NLA_NESTED },
4105 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4106 [RTA_PREF] = { .type = NLA_U8 },
4107 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4108 [RTA_ENCAP] = { .type = NLA_NESTED },
4109 [RTA_EXPIRES] = { .type = NLA_U32 },
4110 [RTA_UID] = { .type = NLA_U32 },
4111 [RTA_MARK] = { .type = NLA_U32 },
4112 [RTA_TABLE] = { .type = NLA_U32 },
4113 [RTA_IP_PROTO] = { .type = NLA_U8 },
4114 [RTA_SPORT] = { .type = NLA_U16 },
4115 [RTA_DPORT] = { .type = NLA_U16 },
4116};
4117
4118static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4119 struct fib6_config *cfg,
4120 struct netlink_ext_ack *extack)
4121{
4122 struct rtmsg *rtm;
4123 struct nlattr *tb[RTA_MAX+1];
4124 unsigned int pref;
4125 int err;
4126
4127 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4128 extack);
4129 if (err < 0)
4130 goto errout;
4131
4132 err = -EINVAL;
4133 rtm = nlmsg_data(nlh);
4134
4135 *cfg = (struct fib6_config){
4136 .fc_table = rtm->rtm_table,
4137 .fc_dst_len = rtm->rtm_dst_len,
4138 .fc_src_len = rtm->rtm_src_len,
4139 .fc_flags = RTF_UP,
4140 .fc_protocol = rtm->rtm_protocol,
4141 .fc_type = rtm->rtm_type,
4142
4143 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4144 .fc_nlinfo.nlh = nlh,
4145 .fc_nlinfo.nl_net = sock_net(skb->sk),
4146 };
4147
4148 if (rtm->rtm_type == RTN_UNREACHABLE ||
4149 rtm->rtm_type == RTN_BLACKHOLE ||
4150 rtm->rtm_type == RTN_PROHIBIT ||
4151 rtm->rtm_type == RTN_THROW)
4152 cfg->fc_flags |= RTF_REJECT;
4153
4154 if (rtm->rtm_type == RTN_LOCAL)
4155 cfg->fc_flags |= RTF_LOCAL;
4156
4157 if (rtm->rtm_flags & RTM_F_CLONED)
4158 cfg->fc_flags |= RTF_CACHE;
4159
4160 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4161
4162 if (tb[RTA_GATEWAY]) {
4163 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4164 cfg->fc_flags |= RTF_GATEWAY;
4165 }
4166
4167 if (tb[RTA_DST]) {
4168 int plen = (rtm->rtm_dst_len + 7) >> 3;
4169
4170 if (nla_len(tb[RTA_DST]) < plen)
4171 goto errout;
4172
4173 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4174 }
4175
4176 if (tb[RTA_SRC]) {
4177 int plen = (rtm->rtm_src_len + 7) >> 3;
4178
4179 if (nla_len(tb[RTA_SRC]) < plen)
4180 goto errout;
4181
4182 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4183 }
4184
4185 if (tb[RTA_PREFSRC])
4186 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4187
4188 if (tb[RTA_OIF])
4189 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4190
4191 if (tb[RTA_PRIORITY])
4192 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4193
4194 if (tb[RTA_METRICS]) {
4195 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4196 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4197 }
4198
4199 if (tb[RTA_TABLE])
4200 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4201
4202 if (tb[RTA_MULTIPATH]) {
4203 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4204 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4205
4206 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4207 cfg->fc_mp_len, extack);
4208 if (err < 0)
4209 goto errout;
4210 }
4211
4212 if (tb[RTA_PREF]) {
4213 pref = nla_get_u8(tb[RTA_PREF]);
4214 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4215 pref != ICMPV6_ROUTER_PREF_HIGH)
4216 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4217 cfg->fc_flags |= RTF_PREF(pref);
4218 }
4219
4220 if (tb[RTA_ENCAP])
4221 cfg->fc_encap = tb[RTA_ENCAP];
4222
4223 if (tb[RTA_ENCAP_TYPE]) {
4224 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4225
4226 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4227 if (err < 0)
4228 goto errout;
4229 }
4230
4231 if (tb[RTA_EXPIRES]) {
4232 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4233
4234 if (addrconf_finite_timeout(timeout)) {
4235 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4236 cfg->fc_flags |= RTF_EXPIRES;
4237 }
4238 }
4239
4240 err = 0;
4241errout:
4242 return err;
4243}
4244
4245struct rt6_nh {
4246 struct fib6_info *fib6_info;
4247 struct fib6_config r_cfg;
4248 struct list_head next;
4249};
4250
4251static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4252{
4253 struct rt6_nh *nh;
4254
4255 list_for_each_entry(nh, rt6_nh_list, next) {
4256 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
4257 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4258 nh->r_cfg.fc_ifindex);
4259 }
4260}
4261
4262static int ip6_route_info_append(struct net *net,
4263 struct list_head *rt6_nh_list,
4264 struct fib6_info *rt,
4265 struct fib6_config *r_cfg)
4266{
4267 struct rt6_nh *nh;
4268 int err = -EEXIST;
4269
4270 list_for_each_entry(nh, rt6_nh_list, next) {
4271
4272 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
4273 return err;
4274 }
4275
4276 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4277 if (!nh)
4278 return -ENOMEM;
4279 nh->fib6_info = rt;
4280 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4281 list_add_tail(&nh->next, rt6_nh_list);
4282
4283 return 0;
4284}
4285
4286static void ip6_route_mpath_notify(struct fib6_info *rt,
4287 struct fib6_info *rt_last,
4288 struct nl_info *info,
4289 __u16 nlflags)
4290{
4291
4292
4293
4294
4295
4296
4297 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4298 rt = list_first_entry(&rt_last->fib6_siblings,
4299 struct fib6_info,
4300 fib6_siblings);
4301 }
4302
4303 if (rt)
4304 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4305}
4306
4307static int ip6_route_multipath_add(struct fib6_config *cfg,
4308 struct netlink_ext_ack *extack)
4309{
4310 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
4311 struct nl_info *info = &cfg->fc_nlinfo;
4312 struct fib6_config r_cfg;
4313 struct rtnexthop *rtnh;
4314 struct fib6_info *rt;
4315 struct rt6_nh *err_nh;
4316 struct rt6_nh *nh, *nh_safe;
4317 __u16 nlflags;
4318 int remaining;
4319 int attrlen;
4320 int err = 1;
4321 int nhn = 0;
4322 int replace = (cfg->fc_nlinfo.nlh &&
4323 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4324 LIST_HEAD(rt6_nh_list);
4325
4326 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4327 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4328 nlflags |= NLM_F_APPEND;
4329
4330 remaining = cfg->fc_mp_len;
4331 rtnh = (struct rtnexthop *)cfg->fc_mp;
4332
4333
4334
4335
4336 while (rtnh_ok(rtnh, remaining)) {
4337 memcpy(&r_cfg, cfg, sizeof(*cfg));
4338 if (rtnh->rtnh_ifindex)
4339 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4340
4341 attrlen = rtnh_attrlen(rtnh);
4342 if (attrlen > 0) {
4343 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4344
4345 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4346 if (nla) {
4347 r_cfg.fc_gateway = nla_get_in6_addr(nla);
4348 r_cfg.fc_flags |= RTF_GATEWAY;
4349 }
4350 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4351 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4352 if (nla)
4353 r_cfg.fc_encap_type = nla_get_u16(nla);
4354 }
4355
4356 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4357 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
4358 if (IS_ERR(rt)) {
4359 err = PTR_ERR(rt);
4360 rt = NULL;
4361 goto cleanup;
4362 }
4363 if (!rt6_qualify_for_ecmp(rt)) {
4364 err = -EINVAL;
4365 NL_SET_ERR_MSG(extack,
4366 "Device only routes can not be added for IPv6 using the multipath API.");
4367 fib6_info_release(rt);
4368 goto cleanup;
4369 }
4370
4371 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
4372
4373 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4374 rt, &r_cfg);
4375 if (err) {
4376 fib6_info_release(rt);
4377 goto cleanup;
4378 }
4379
4380 rtnh = rtnh_next(rtnh, &remaining);
4381 }
4382
4383
4384
4385
4386
4387 info->skip_notify = 1;
4388
4389 err_nh = NULL;
4390 list_for_each_entry(nh, &rt6_nh_list, next) {
4391 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4392 fib6_info_release(nh->fib6_info);
4393
4394 if (!err) {
4395
4396 rt_last = nh->fib6_info;
4397
4398
4399 if (!rt_notif)
4400 rt_notif = nh->fib6_info;
4401 }
4402
4403
4404 nh->fib6_info = NULL;
4405 if (err) {
4406 if (replace && nhn)
4407 ip6_print_replace_route_err(&rt6_nh_list);
4408 err_nh = nh;
4409 goto add_errout;
4410 }
4411
4412
4413
4414
4415
4416
4417
4418
4419 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4420 NLM_F_REPLACE);
4421 nhn++;
4422 }
4423
4424
4425 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4426 goto cleanup;
4427
4428add_errout:
4429
4430
4431
4432
4433 if (rt_notif)
4434 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4435
4436
4437 list_for_each_entry(nh, &rt6_nh_list, next) {
4438 if (err_nh == nh)
4439 break;
4440 ip6_route_del(&nh->r_cfg, extack);
4441 }
4442
4443cleanup:
4444 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
4445 if (nh->fib6_info)
4446 fib6_info_release(nh->fib6_info);
4447 list_del(&nh->next);
4448 kfree(nh);
4449 }
4450
4451 return err;
4452}
4453
4454static int ip6_route_multipath_del(struct fib6_config *cfg,
4455 struct netlink_ext_ack *extack)
4456{
4457 struct fib6_config r_cfg;
4458 struct rtnexthop *rtnh;
4459 int remaining;
4460 int attrlen;
4461 int err = 1, last_err = 0;
4462
4463 remaining = cfg->fc_mp_len;
4464 rtnh = (struct rtnexthop *)cfg->fc_mp;
4465
4466
4467 while (rtnh_ok(rtnh, remaining)) {
4468 memcpy(&r_cfg, cfg, sizeof(*cfg));
4469 if (rtnh->rtnh_ifindex)
4470 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4471
4472 attrlen = rtnh_attrlen(rtnh);
4473 if (attrlen > 0) {
4474 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4475
4476 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4477 if (nla) {
4478 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4479 r_cfg.fc_flags |= RTF_GATEWAY;
4480 }
4481 }
4482 err = ip6_route_del(&r_cfg, extack);
4483 if (err)
4484 last_err = err;
4485
4486 rtnh = rtnh_next(rtnh, &remaining);
4487 }
4488
4489 return last_err;
4490}
4491
4492static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4493 struct netlink_ext_ack *extack)
4494{
4495 struct fib6_config cfg;
4496 int err;
4497
4498 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4499 if (err < 0)
4500 return err;
4501
4502 if (cfg.fc_mp)
4503 return ip6_route_multipath_del(&cfg, extack);
4504 else {
4505 cfg.fc_delete_all_nh = 1;
4506 return ip6_route_del(&cfg, extack);
4507 }
4508}
4509
4510static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4511 struct netlink_ext_ack *extack)
4512{
4513 struct fib6_config cfg;
4514 int err;
4515
4516 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4517 if (err < 0)
4518 return err;
4519
4520 if (cfg.fc_mp)
4521 return ip6_route_multipath_add(&cfg, extack);
4522 else
4523 return ip6_route_add(&cfg, GFP_KERNEL, extack);
4524}
4525
4526static size_t rt6_nlmsg_size(struct fib6_info *rt)
4527{
4528 int nexthop_len = 0;
4529
4530 if (rt->fib6_nsiblings) {
4531 nexthop_len = nla_total_size(0)
4532 + NLA_ALIGN(sizeof(struct rtnexthop))
4533 + nla_total_size(16)
4534 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
4535
4536 nexthop_len *= rt->fib6_nsiblings;
4537 }
4538
4539 return NLMSG_ALIGN(sizeof(struct rtmsg))
4540 + nla_total_size(16)
4541 + nla_total_size(16)
4542 + nla_total_size(16)
4543 + nla_total_size(16)
4544 + nla_total_size(4)
4545 + nla_total_size(4)
4546 + nla_total_size(4)
4547 + nla_total_size(4)
4548 + RTAX_MAX * nla_total_size(4)
4549 + nla_total_size(sizeof(struct rta_cacheinfo))
4550 + nla_total_size(TCP_CA_NAME_MAX)
4551 + nla_total_size(1)
4552 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
4553 + nexthop_len;
4554}
4555
4556static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
4557 unsigned int *flags, bool skip_oif)
4558{
4559 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4560 *flags |= RTNH_F_DEAD;
4561
4562 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
4563 *flags |= RTNH_F_LINKDOWN;
4564
4565 rcu_read_lock();
4566 if (fib6_ignore_linkdown(rt))
4567 *flags |= RTNH_F_DEAD;
4568 rcu_read_unlock();
4569 }
4570
4571 if (rt->fib6_flags & RTF_GATEWAY) {
4572 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
4573 goto nla_put_failure;
4574 }
4575
4576 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4577 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
4578 *flags |= RTNH_F_OFFLOAD;
4579
4580
4581 if (!skip_oif && rt->fib6_nh.nh_dev &&
4582 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
4583 goto nla_put_failure;
4584
4585 if (rt->fib6_nh.nh_lwtstate &&
4586 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
4587 goto nla_put_failure;
4588
4589 return 0;
4590
4591nla_put_failure:
4592 return -EMSGSIZE;
4593}
4594
4595
4596static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
4597{
4598 const struct net_device *dev = rt->fib6_nh.nh_dev;
4599 struct rtnexthop *rtnh;
4600 unsigned int flags = 0;
4601
4602 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4603 if (!rtnh)
4604 goto nla_put_failure;
4605
4606 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4607 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4608
4609 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4610 goto nla_put_failure;
4611
4612 rtnh->rtnh_flags = flags;
4613
4614
4615 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4616
4617 return 0;
4618
4619nla_put_failure:
4620 return -EMSGSIZE;
4621}
4622
4623static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4624 struct fib6_info *rt, struct dst_entry *dst,
4625 struct in6_addr *dest, struct in6_addr *src,
4626 int iif, int type, u32 portid, u32 seq,
4627 unsigned int flags)
4628{
4629 struct rt6_info *rt6 = (struct rt6_info *)dst;
4630 struct rt6key *rt6_dst, *rt6_src;
4631 u32 *pmetrics, table, rt6_flags;
4632 struct nlmsghdr *nlh;
4633 struct rtmsg *rtm;
4634 long expires = 0;
4635
4636 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4637 if (!nlh)
4638 return -EMSGSIZE;
4639
4640 if (rt6) {
4641 rt6_dst = &rt6->rt6i_dst;
4642 rt6_src = &rt6->rt6i_src;
4643 rt6_flags = rt6->rt6i_flags;
4644 } else {
4645 rt6_dst = &rt->fib6_dst;
4646 rt6_src = &rt->fib6_src;
4647 rt6_flags = rt->fib6_flags;
4648 }
4649
4650 rtm = nlmsg_data(nlh);
4651 rtm->rtm_family = AF_INET6;
4652 rtm->rtm_dst_len = rt6_dst->plen;
4653 rtm->rtm_src_len = rt6_src->plen;
4654 rtm->rtm_tos = 0;
4655 if (rt->fib6_table)
4656 table = rt->fib6_table->tb6_id;
4657 else
4658 table = RT6_TABLE_UNSPEC;
4659 rtm->rtm_table = table;
4660 if (nla_put_u32(skb, RTA_TABLE, table))
4661 goto nla_put_failure;
4662
4663 rtm->rtm_type = rt->fib6_type;
4664 rtm->rtm_flags = 0;
4665 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4666 rtm->rtm_protocol = rt->fib6_protocol;
4667
4668 if (rt6_flags & RTF_CACHE)
4669 rtm->rtm_flags |= RTM_F_CLONED;
4670
4671 if (dest) {
4672 if (nla_put_in6_addr(skb, RTA_DST, dest))
4673 goto nla_put_failure;
4674 rtm->rtm_dst_len = 128;
4675 } else if (rtm->rtm_dst_len)
4676 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
4677 goto nla_put_failure;
4678#ifdef CONFIG_IPV6_SUBTREES
4679 if (src) {
4680 if (nla_put_in6_addr(skb, RTA_SRC, src))
4681 goto nla_put_failure;
4682 rtm->rtm_src_len = 128;
4683 } else if (rtm->rtm_src_len &&
4684 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
4685 goto nla_put_failure;
4686#endif
4687 if (iif) {
4688#ifdef CONFIG_IPV6_MROUTE
4689 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
4690 int err = ip6mr_get_route(net, skb, rtm, portid);
4691
4692 if (err == 0)
4693 return 0;
4694 if (err < 0)
4695 goto nla_put_failure;
4696 } else
4697#endif
4698 if (nla_put_u32(skb, RTA_IIF, iif))
4699 goto nla_put_failure;
4700 } else if (dest) {
4701 struct in6_addr saddr_buf;
4702 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4703 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4704 goto nla_put_failure;
4705 }
4706
4707 if (rt->fib6_prefsrc.plen) {
4708 struct in6_addr saddr_buf;
4709 saddr_buf = rt->fib6_prefsrc.addr;
4710 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4711 goto nla_put_failure;
4712 }
4713
4714 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4715 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
4716 goto nla_put_failure;
4717
4718 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4719 goto nla_put_failure;
4720
4721
4722
4723
4724 if (rt6) {
4725 if (rt6_flags & RTF_GATEWAY &&
4726 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4727 goto nla_put_failure;
4728
4729 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4730 goto nla_put_failure;
4731 } else if (rt->fib6_nsiblings) {
4732 struct fib6_info *sibling, *next_sibling;
4733 struct nlattr *mp;
4734
4735 mp = nla_nest_start(skb, RTA_MULTIPATH);
4736 if (!mp)
4737 goto nla_put_failure;
4738
4739 if (rt6_add_nexthop(skb, rt) < 0)
4740 goto nla_put_failure;
4741
4742 list_for_each_entry_safe(sibling, next_sibling,
4743 &rt->fib6_siblings, fib6_siblings) {
4744 if (rt6_add_nexthop(skb, sibling) < 0)
4745 goto nla_put_failure;
4746 }
4747
4748 nla_nest_end(skb, mp);
4749 } else {
4750 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4751 goto nla_put_failure;
4752 }
4753
4754 if (rt6_flags & RTF_EXPIRES) {
4755 expires = dst ? dst->expires : rt->expires;
4756 expires -= jiffies;
4757 }
4758
4759 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4760 goto nla_put_failure;
4761
4762 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
4763 goto nla_put_failure;
4764
4765
4766 nlmsg_end(skb, nlh);
4767 return 0;
4768
4769nla_put_failure:
4770 nlmsg_cancel(skb, nlh);
4771 return -EMSGSIZE;
4772}
4773
4774static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4775 const struct net_device *dev)
4776{
4777 if (f6i->fib6_nh.nh_dev == dev)
4778 return true;
4779
4780 if (f6i->fib6_nsiblings) {
4781 struct fib6_info *sibling, *next_sibling;
4782
4783 list_for_each_entry_safe(sibling, next_sibling,
4784 &f6i->fib6_siblings, fib6_siblings) {
4785 if (sibling->fib6_nh.nh_dev == dev)
4786 return true;
4787 }
4788 }
4789
4790 return false;
4791}
4792
4793int rt6_dump_route(struct fib6_info *rt, void *p_arg)
4794{
4795 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4796 struct fib_dump_filter *filter = &arg->filter;
4797 unsigned int flags = NLM_F_MULTI;
4798 struct net *net = arg->net;
4799
4800 if (rt == net->ipv6.fib6_null_entry)
4801 return 0;
4802
4803 if ((filter->flags & RTM_F_PREFIX) &&
4804 !(rt->fib6_flags & RTF_PREFIX_RT)) {
4805
4806 return 1;
4807 }
4808 if (filter->filter_set) {
4809 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4810 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4811 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
4812 return 1;
4813 }
4814 flags |= NLM_F_DUMP_FILTERED;
4815 }
4816
4817 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4818 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4819 arg->cb->nlh->nlmsg_seq, flags);
4820}
4821
4822static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4823 struct netlink_ext_ack *extack)
4824{
4825 struct net *net = sock_net(in_skb->sk);
4826 struct nlattr *tb[RTA_MAX+1];
4827 int err, iif = 0, oif = 0;
4828 struct fib6_info *from;
4829 struct dst_entry *dst;
4830 struct rt6_info *rt;
4831 struct sk_buff *skb;
4832 struct rtmsg *rtm;
4833 struct flowi6 fl6 = {};
4834 bool fibmatch;
4835
4836 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4837 extack);
4838 if (err < 0)
4839 goto errout;
4840
4841 err = -EINVAL;
4842 rtm = nlmsg_data(nlh);
4843 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4844 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4845
4846 if (tb[RTA_SRC]) {
4847 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4848 goto errout;
4849
4850 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4851 }
4852
4853 if (tb[RTA_DST]) {
4854 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4855 goto errout;
4856
4857 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4858 }
4859
4860 if (tb[RTA_IIF])
4861 iif = nla_get_u32(tb[RTA_IIF]);
4862
4863 if (tb[RTA_OIF])
4864 oif = nla_get_u32(tb[RTA_OIF]);
4865
4866 if (tb[RTA_MARK])
4867 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4868
4869 if (tb[RTA_UID])
4870 fl6.flowi6_uid = make_kuid(current_user_ns(),
4871 nla_get_u32(tb[RTA_UID]));
4872 else
4873 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4874
4875 if (tb[RTA_SPORT])
4876 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4877
4878 if (tb[RTA_DPORT])
4879 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4880
4881 if (tb[RTA_IP_PROTO]) {
4882 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
4883 &fl6.flowi6_proto, extack);
4884 if (err)
4885 goto errout;
4886 }
4887
4888 if (iif) {
4889 struct net_device *dev;
4890 int flags = 0;
4891
4892 rcu_read_lock();
4893
4894 dev = dev_get_by_index_rcu(net, iif);
4895 if (!dev) {
4896 rcu_read_unlock();
4897 err = -ENODEV;
4898 goto errout;
4899 }
4900
4901 fl6.flowi6_iif = iif;
4902
4903 if (!ipv6_addr_any(&fl6.saddr))
4904 flags |= RT6_LOOKUP_F_HAS_SADDR;
4905
4906 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
4907
4908 rcu_read_unlock();
4909 } else {
4910 fl6.flowi6_oif = oif;
4911
4912 dst = ip6_route_output(net, NULL, &fl6);
4913 }
4914
4915
4916 rt = container_of(dst, struct rt6_info, dst);
4917 if (rt->dst.error) {
4918 err = rt->dst.error;
4919 ip6_rt_put(rt);
4920 goto errout;
4921 }
4922
4923 if (rt == net->ipv6.ip6_null_entry) {
4924 err = rt->dst.error;
4925 ip6_rt_put(rt);
4926 goto errout;
4927 }
4928
4929 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4930 if (!skb) {
4931 ip6_rt_put(rt);
4932 err = -ENOBUFS;
4933 goto errout;
4934 }
4935
4936 skb_dst_set(skb, &rt->dst);
4937
4938 rcu_read_lock();
4939 from = rcu_dereference(rt->from);
4940
4941 if (fibmatch)
4942 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
4943 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4944 nlh->nlmsg_seq, 0);
4945 else
4946 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4947 &fl6.saddr, iif, RTM_NEWROUTE,
4948 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4949 0);
4950 rcu_read_unlock();
4951
4952 if (err < 0) {
4953 kfree_skb(skb);
4954 goto errout;
4955 }
4956
4957 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
4958errout:
4959 return err;
4960}
4961
4962void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
4963 unsigned int nlm_flags)
4964{
4965 struct sk_buff *skb;
4966 struct net *net = info->nl_net;
4967 u32 seq;
4968 int err;
4969
4970 err = -ENOBUFS;
4971 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
4972
4973 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
4974 if (!skb)
4975 goto errout;
4976
4977 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4978 event, info->portid, seq, nlm_flags);
4979 if (err < 0) {
4980
4981 WARN_ON(err == -EMSGSIZE);
4982 kfree_skb(skb);
4983 goto errout;
4984 }
4985 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
4986 info->nlh, gfp_any());
4987 return;
4988errout:
4989 if (err < 0)
4990 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
4991}
4992
4993static int ip6_route_dev_notify(struct notifier_block *this,
4994 unsigned long event, void *ptr)
4995{
4996 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4997 struct net *net = dev_net(dev);
4998
4999 if (!(dev->flags & IFF_LOOPBACK))
5000 return NOTIFY_OK;
5001
5002 if (event == NETDEV_REGISTER) {
5003 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
5004 net->ipv6.ip6_null_entry->dst.dev = dev;
5005 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5006#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5007 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
5008 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
5009 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
5010 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
5011#endif
5012 } else if (event == NETDEV_UNREGISTER &&
5013 dev->reg_state != NETREG_UNREGISTERED) {
5014
5015
5016
5017 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
5018#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5019 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5020 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
5021#endif
5022 }
5023
5024 return NOTIFY_OK;
5025}
5026
5027
5028
5029
5030
5031#ifdef CONFIG_PROC_FS
5032static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5033{
5034 struct net *net = (struct net *)seq->private;
5035 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
5036 net->ipv6.rt6_stats->fib_nodes,
5037 net->ipv6.rt6_stats->fib_route_nodes,
5038 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
5039 net->ipv6.rt6_stats->fib_rt_entries,
5040 net->ipv6.rt6_stats->fib_rt_cache,
5041 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
5042 net->ipv6.rt6_stats->fib_discarded_routes);
5043
5044 return 0;
5045}
5046#endif
5047
5048#ifdef CONFIG_SYSCTL
5049
5050static
5051int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
5052 void __user *buffer, size_t *lenp, loff_t *ppos)
5053{
5054 struct net *net;
5055 int delay;
5056 if (!write)
5057 return -EINVAL;
5058
5059 net = (struct net *)ctl->extra1;
5060 delay = net->ipv6.sysctl.flush_delay;
5061 proc_dointvec(ctl, write, buffer, lenp, ppos);
5062 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
5063 return 0;
5064}
5065
5066static int zero;
5067static int one = 1;
5068
5069static struct ctl_table ipv6_route_table_template[] = {
5070 {
5071 .procname = "flush",
5072 .data = &init_net.ipv6.sysctl.flush_delay,
5073 .maxlen = sizeof(int),
5074 .mode = 0200,
5075 .proc_handler = ipv6_sysctl_rtcache_flush
5076 },
5077 {
5078 .procname = "gc_thresh",
5079 .data = &ip6_dst_ops_template.gc_thresh,
5080 .maxlen = sizeof(int),
5081 .mode = 0644,
5082 .proc_handler = proc_dointvec,
5083 },
5084 {
5085 .procname = "max_size",
5086 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
5087 .maxlen = sizeof(int),
5088 .mode = 0644,
5089 .proc_handler = proc_dointvec,
5090 },
5091 {
5092 .procname = "gc_min_interval",
5093 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5094 .maxlen = sizeof(int),
5095 .mode = 0644,
5096 .proc_handler = proc_dointvec_jiffies,
5097 },
5098 {
5099 .procname = "gc_timeout",
5100 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
5101 .maxlen = sizeof(int),
5102 .mode = 0644,
5103 .proc_handler = proc_dointvec_jiffies,
5104 },
5105 {
5106 .procname = "gc_interval",
5107 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
5108 .maxlen = sizeof(int),
5109 .mode = 0644,
5110 .proc_handler = proc_dointvec_jiffies,
5111 },
5112 {
5113 .procname = "gc_elasticity",
5114 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
5115 .maxlen = sizeof(int),
5116 .mode = 0644,
5117 .proc_handler = proc_dointvec,
5118 },
5119 {
5120 .procname = "mtu_expires",
5121 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
5122 .maxlen = sizeof(int),
5123 .mode = 0644,
5124 .proc_handler = proc_dointvec_jiffies,
5125 },
5126 {
5127 .procname = "min_adv_mss",
5128 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
5129 .maxlen = sizeof(int),
5130 .mode = 0644,
5131 .proc_handler = proc_dointvec,
5132 },
5133 {
5134 .procname = "gc_min_interval_ms",
5135 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5136 .maxlen = sizeof(int),
5137 .mode = 0644,
5138 .proc_handler = proc_dointvec_ms_jiffies,
5139 },
5140 {
5141 .procname = "skip_notify_on_dev_down",
5142 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5143 .maxlen = sizeof(int),
5144 .mode = 0644,
5145 .proc_handler = proc_dointvec,
5146 .extra1 = &zero,
5147 .extra2 = &one,
5148 },
5149 { }
5150};
5151
5152struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5153{
5154 struct ctl_table *table;
5155
5156 table = kmemdup(ipv6_route_table_template,
5157 sizeof(ipv6_route_table_template),
5158 GFP_KERNEL);
5159
5160 if (table) {
5161 table[0].data = &net->ipv6.sysctl.flush_delay;
5162 table[0].extra1 = net;
5163 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5164 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5165 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5166 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5167 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5168 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5169 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5170 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
5171 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5172 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
5173
5174
5175 if (net->user_ns != &init_user_ns)
5176 table[0].procname = NULL;
5177 }
5178
5179 return table;
5180}
5181#endif
5182
5183static int __net_init ip6_route_net_init(struct net *net)
5184{
5185 int ret = -ENOMEM;
5186
5187 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5188 sizeof(net->ipv6.ip6_dst_ops));
5189
5190 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5191 goto out_ip6_dst_ops;
5192
5193 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5194 sizeof(*net->ipv6.fib6_null_entry),
5195 GFP_KERNEL);
5196 if (!net->ipv6.fib6_null_entry)
5197 goto out_ip6_dst_entries;
5198
5199 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5200 sizeof(*net->ipv6.ip6_null_entry),
5201 GFP_KERNEL);
5202 if (!net->ipv6.ip6_null_entry)
5203 goto out_fib6_null_entry;
5204 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5205 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5206 ip6_template_metrics, true);
5207
5208#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5209 net->ipv6.fib6_has_custom_rules = false;
5210 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5211 sizeof(*net->ipv6.ip6_prohibit_entry),
5212 GFP_KERNEL);
5213 if (!net->ipv6.ip6_prohibit_entry)
5214 goto out_ip6_null_entry;
5215 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5216 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5217 ip6_template_metrics, true);
5218
5219 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5220 sizeof(*net->ipv6.ip6_blk_hole_entry),
5221 GFP_KERNEL);
5222 if (!net->ipv6.ip6_blk_hole_entry)
5223 goto out_ip6_prohibit_entry;
5224 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5225 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5226 ip6_template_metrics, true);
5227#endif
5228
5229 net->ipv6.sysctl.flush_delay = 0;
5230 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5231 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5232 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5233 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5234 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5235 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5236 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5237 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
5238
5239 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5240
5241 ret = 0;
5242out:
5243 return ret;
5244
5245#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5246out_ip6_prohibit_entry:
5247 kfree(net->ipv6.ip6_prohibit_entry);
5248out_ip6_null_entry:
5249 kfree(net->ipv6.ip6_null_entry);
5250#endif
5251out_fib6_null_entry:
5252 kfree(net->ipv6.fib6_null_entry);
5253out_ip6_dst_entries:
5254 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5255out_ip6_dst_ops:
5256 goto out;
5257}
5258
5259static void __net_exit ip6_route_net_exit(struct net *net)
5260{
5261 kfree(net->ipv6.fib6_null_entry);
5262 kfree(net->ipv6.ip6_null_entry);
5263#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5264 kfree(net->ipv6.ip6_prohibit_entry);
5265 kfree(net->ipv6.ip6_blk_hole_entry);
5266#endif
5267 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5268}
5269
5270static int __net_init ip6_route_net_init_late(struct net *net)
5271{
5272#ifdef CONFIG_PROC_FS
5273 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5274 sizeof(struct ipv6_route_iter));
5275 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5276 rt6_stats_seq_show, NULL);
5277#endif
5278 return 0;
5279}
5280
5281static void __net_exit ip6_route_net_exit_late(struct net *net)
5282{
5283#ifdef CONFIG_PROC_FS
5284 remove_proc_entry("ipv6_route", net->proc_net);
5285 remove_proc_entry("rt6_stats", net->proc_net);
5286#endif
5287}
5288
5289static struct pernet_operations ip6_route_net_ops = {
5290 .init = ip6_route_net_init,
5291 .exit = ip6_route_net_exit,
5292};
5293
5294static int __net_init ipv6_inetpeer_init(struct net *net)
5295{
5296 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5297
5298 if (!bp)
5299 return -ENOMEM;
5300 inet_peer_base_init(bp);
5301 net->ipv6.peers = bp;
5302 return 0;
5303}
5304
5305static void __net_exit ipv6_inetpeer_exit(struct net *net)
5306{
5307 struct inet_peer_base *bp = net->ipv6.peers;
5308
5309 net->ipv6.peers = NULL;
5310 inetpeer_invalidate_tree(bp);
5311 kfree(bp);
5312}
5313
5314static struct pernet_operations ipv6_inetpeer_ops = {
5315 .init = ipv6_inetpeer_init,
5316 .exit = ipv6_inetpeer_exit,
5317};
5318
5319static struct pernet_operations ip6_route_net_late_ops = {
5320 .init = ip6_route_net_init_late,
5321 .exit = ip6_route_net_exit_late,
5322};
5323
5324static struct notifier_block ip6_route_dev_notifier = {
5325 .notifier_call = ip6_route_dev_notify,
5326 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
5327};
5328
5329void __init ip6_route_init_special_entries(void)
5330{
5331
5332
5333
5334 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5335 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5336 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5337 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5338 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5339 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5340 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5341 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5342 #endif
5343}
5344
5345int __init ip6_route_init(void)
5346{
5347 int ret;
5348 int cpu;
5349
5350 ret = -ENOMEM;
5351 ip6_dst_ops_template.kmem_cachep =
5352 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5353 SLAB_HWCACHE_ALIGN, NULL);
5354 if (!ip6_dst_ops_template.kmem_cachep)
5355 goto out;
5356
5357 ret = dst_entries_init(&ip6_dst_blackhole_ops);
5358 if (ret)
5359 goto out_kmem_cache;
5360
5361 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5362 if (ret)
5363 goto out_dst_entries;
5364
5365 ret = register_pernet_subsys(&ip6_route_net_ops);
5366 if (ret)
5367 goto out_register_inetpeer;
5368
5369 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5370
5371 ret = fib6_init();
5372 if (ret)
5373 goto out_register_subsys;
5374
5375 ret = xfrm6_init();
5376 if (ret)
5377 goto out_fib6_init;
5378
5379 ret = fib6_rules_init();
5380 if (ret)
5381 goto xfrm6_init;
5382
5383 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5384 if (ret)
5385 goto fib6_rules_init;
5386
5387 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5388 inet6_rtm_newroute, NULL, 0);
5389 if (ret < 0)
5390 goto out_register_late_subsys;
5391
5392 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5393 inet6_rtm_delroute, NULL, 0);
5394 if (ret < 0)
5395 goto out_register_late_subsys;
5396
5397 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5398 inet6_rtm_getroute, NULL,
5399 RTNL_FLAG_DOIT_UNLOCKED);
5400 if (ret < 0)
5401 goto out_register_late_subsys;
5402
5403 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5404 if (ret)
5405 goto out_register_late_subsys;
5406
5407 for_each_possible_cpu(cpu) {
5408 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5409
5410 INIT_LIST_HEAD(&ul->head);
5411 spin_lock_init(&ul->lock);
5412 }
5413
5414out:
5415 return ret;
5416
5417out_register_late_subsys:
5418 rtnl_unregister_all(PF_INET6);
5419 unregister_pernet_subsys(&ip6_route_net_late_ops);
5420fib6_rules_init:
5421 fib6_rules_cleanup();
5422xfrm6_init:
5423 xfrm6_fini();
5424out_fib6_init:
5425 fib6_gc_cleanup();
5426out_register_subsys:
5427 unregister_pernet_subsys(&ip6_route_net_ops);
5428out_register_inetpeer:
5429 unregister_pernet_subsys(&ipv6_inetpeer_ops);
5430out_dst_entries:
5431 dst_entries_destroy(&ip6_dst_blackhole_ops);
5432out_kmem_cache:
5433 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5434 goto out;
5435}
5436
5437void ip6_route_cleanup(void)
5438{
5439 unregister_netdevice_notifier(&ip6_route_dev_notifier);
5440 unregister_pernet_subsys(&ip6_route_net_late_ops);
5441 fib6_rules_cleanup();
5442 xfrm6_fini();
5443 fib6_gc_cleanup();
5444 unregister_pernet_subsys(&ipv6_inetpeer_ops);
5445 unregister_pernet_subsys(&ip6_route_net_ops);
5446 dst_entries_destroy(&ip6_dst_blackhole_ops);
5447 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5448}
5449