1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#define pr_fmt(fmt) "IPv6: " fmt
28
29#include <linux/capability.h>
30#include <linux/errno.h>
31#include <linux/export.h>
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
40#include <linux/mroute6.h>
41#include <linux/init.h>
42#include <linux/if_arp.h>
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#include <linux/nsproxy.h>
46#include <linux/slab.h>
47#include <net/net_namespace.h>
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/dst_metadata.h>
58#include <net/xfrm.h>
59#include <net/netevent.h>
60#include <net/netlink.h>
61#include <net/nexthop.h>
62#include <net/lwtunnel.h>
63#ifndef __GENKSYMS__
64#include <net/ip_tunnels.h>
65#endif
66
67#include <asm/uaccess.h>
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
73enum rt6_nud_state {
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
77 RT6_NUD_SUCCEED = 1
78};
79
80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
83static unsigned int ip6_mtu(const struct dst_entry *dst);
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
88static int ip6_dst_gc(struct dst_ops *ops);
89
90static int ip6_pkt_discard(struct sk_buff *skb);
91static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
92static int ip6_pkt_prohibit(struct sk_buff *skb);
93static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
94static void ip6_link_failure(struct sk_buff *skb);
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101
102#ifdef CONFIG_IPV6_ROUTE_INFO
103static struct rt6_info *rt6_add_route_info(struct net *net,
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr, int ifindex,
106 unsigned int pref);
107static struct rt6_info *rt6_get_route_info(struct net *net,
108 const struct in6_addr *prefix, int prefixlen,
109 const struct in6_addr *gwaddr, int ifindex);
110#endif
111
112struct uncached_list {
113 spinlock_t lock;
114 struct list_head head;
115};
116
117static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
118
119static void rt6_uncached_list_add(struct rt6_info *rt)
120{
121 struct uncached_list *ul = __this_cpu_ptr(&rt6_uncached_list);
122
123 rt->dst.flags |= DST_NOCACHE;
124 rt->rt6i_uncached_list = ul;
125
126 spin_lock_bh(&ul->lock);
127 list_add_tail(&rt->rt6i_uncached, &ul->head);
128 spin_unlock_bh(&ul->lock);
129}
130
131static void rt6_uncached_list_del(struct rt6_info *rt)
132{
133 if (!list_empty(&rt->rt6i_uncached)) {
134 struct uncached_list *ul = rt->rt6i_uncached_list;
135
136 spin_lock_bh(&ul->lock);
137 list_del(&rt->rt6i_uncached);
138 spin_unlock_bh(&ul->lock);
139 }
140}
141
142static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
143{
144 struct net_device *loopback_dev = net->loopback_dev;
145 int cpu;
146
147 if (dev == loopback_dev)
148 return;
149
150 for_each_possible_cpu(cpu) {
151 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
152 struct rt6_info *rt;
153
154 spin_lock_bh(&ul->lock);
155 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
156 struct inet6_dev *rt_idev = rt->rt6i_idev;
157 struct net_device *rt_dev = rt->dst.dev;
158
159 if (rt_idev->dev == dev) {
160 rt->rt6i_idev = in6_dev_get(loopback_dev);
161 in6_dev_put(rt_idev);
162 }
163
164 if (rt_dev == dev) {
165 rt->dst.dev = loopback_dev;
166 dev_hold(rt->dst.dev);
167 dev_put(rt_dev);
168 }
169 }
170 spin_unlock_bh(&ul->lock);
171 }
172}
173
174static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
175{
176 return dst_metrics_write_ptr(rt->dst.from);
177}
178
179static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
180{
181 struct rt6_info *rt = (struct rt6_info *)dst;
182
183 if (rt->rt6i_flags & RTF_PCPU)
184 return rt6_pcpu_cow_metrics(rt);
185 else if (rt->rt6i_flags & RTF_CACHE)
186 return NULL;
187 else
188 return dst_cow_metrics_generic(dst, old);
189}
190
191static inline const void *choose_neigh_daddr(struct rt6_info *rt,
192 struct sk_buff *skb,
193 const void *daddr)
194{
195 struct in6_addr *p = &rt->rt6i_gateway;
196
197 if (!ipv6_addr_any(p))
198 return (const void *) p;
199 else if (skb)
200 return &ipv6_hdr(skb)->daddr;
201 return daddr;
202}
203
204static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
205 struct sk_buff *skb,
206 const void *daddr)
207{
208 struct rt6_info *rt = (struct rt6_info *) dst;
209 struct neighbour *n;
210
211 daddr = choose_neigh_daddr(rt, skb, daddr);
212 n = __ipv6_neigh_lookup(dst->dev, daddr);
213 if (n)
214 return n;
215 return neigh_create(&nd_tbl, daddr, dst->dev);
216}
217
218static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
219{
220 struct net_device *dev = dst->dev;
221 struct rt6_info *rt = (struct rt6_info *)dst;
222
223 daddr = choose_neigh_daddr(rt, NULL, daddr);
224 if (!daddr)
225 return;
226 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
227 return;
228 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
229 return;
230 __ipv6_confirm_neigh(dev, daddr);
231}
232
233static struct dst_ops ip6_dst_ops_template = {
234 .family = AF_INET6,
235 .protocol = cpu_to_be16(ETH_P_IPV6),
236 .gc = ip6_dst_gc,
237 .gc_thresh = 1024,
238 .check = ip6_dst_check,
239 .default_advmss = ip6_default_advmss,
240 .mtu = ip6_mtu,
241 .cow_metrics = ipv6_cow_metrics,
242 .destroy = ip6_dst_destroy,
243 .ifdown = ip6_dst_ifdown,
244 .negative_advice = ip6_negative_advice,
245 .link_failure = ip6_link_failure,
246 .update_pmtu = ip6_rt_update_pmtu,
247 .redirect = rt6_do_redirect,
248 .local_out = __ip6_local_out,
249 .neigh_lookup = ip6_neigh_lookup,
250 .confirm_neigh = ip6_confirm_neigh,
251};
252
253static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
254{
255 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256
257 return mtu ? : dst->dev->mtu;
258}
259
260static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
261 struct sk_buff *skb, u32 mtu)
262{
263}
264
265static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
266 struct sk_buff *skb)
267{
268}
269
270static struct dst_ops ip6_dst_blackhole_ops = {
271 .family = AF_INET6,
272 .protocol = cpu_to_be16(ETH_P_IPV6),
273 .destroy = ip6_dst_destroy,
274 .check = ip6_dst_check,
275 .mtu = ip6_blackhole_mtu,
276 .default_advmss = ip6_default_advmss,
277 .update_pmtu = ip6_rt_blackhole_update_pmtu,
278 .redirect = ip6_rt_blackhole_redirect,
279 .cow_metrics = dst_cow_metrics_generic,
280 .neigh_lookup = ip6_neigh_lookup,
281};
282
283static const u32 ip6_template_metrics[RTAX_MAX] = {
284 [RTAX_HOPLIMIT - 1] = 0,
285};
286
287static const struct rt6_info ip6_null_entry_template = {
288 .dst = {
289 .__refcnt = ATOMIC_INIT(1),
290 .__use = 1,
291 .obsolete = DST_OBSOLETE_FORCE_CHK,
292 .error = -ENETUNREACH,
293 .input = ip6_pkt_discard,
294 .output = ip6_pkt_discard_out,
295 },
296 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
297 .rt6i_protocol = RTPROT_KERNEL,
298 .rt6i_metric = ~(u32) 0,
299 .rt6i_ref = ATOMIC_INIT(1),
300};
301
302#ifdef CONFIG_IPV6_MULTIPLE_TABLES
303
304static const struct rt6_info ip6_prohibit_entry_template = {
305 .dst = {
306 .__refcnt = ATOMIC_INIT(1),
307 .__use = 1,
308 .obsolete = DST_OBSOLETE_FORCE_CHK,
309 .error = -EACCES,
310 .input = ip6_pkt_prohibit,
311 .output = ip6_pkt_prohibit_out,
312 },
313 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
314 .rt6i_protocol = RTPROT_KERNEL,
315 .rt6i_metric = ~(u32) 0,
316 .rt6i_ref = ATOMIC_INIT(1),
317};
318
319static const struct rt6_info ip6_blk_hole_entry_template = {
320 .dst = {
321 .__refcnt = ATOMIC_INIT(1),
322 .__use = 1,
323 .obsolete = DST_OBSOLETE_FORCE_CHK,
324 .error = -EINVAL,
325 .input = dst_discard,
326 .output = dst_discard_sk,
327 },
328 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
329 .rt6i_protocol = RTPROT_KERNEL,
330 .rt6i_metric = ~(u32) 0,
331 .rt6i_ref = ATOMIC_INIT(1),
332};
333
334#endif
335
336static void rt6_info_init(struct rt6_info *rt)
337{
338 struct dst_entry *dst = &rt->dst;
339
340 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
341 INIT_LIST_HEAD(&rt->rt6i_siblings);
342 INIT_LIST_HEAD(&rt->rt6i_uncached);
343}
344
345
346static struct rt6_info *__ip6_dst_alloc(struct net *net,
347 struct net_device *dev,
348 int flags)
349{
350 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
351 0, DST_OBSOLETE_FORCE_CHK, flags);
352
353 if (rt)
354 rt6_info_init(rt);
355
356 return rt;
357}
358
359static struct rt6_info *ip6_dst_alloc(struct net *net,
360 struct net_device *dev,
361 int flags)
362{
363 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
364
365 if (rt) {
366 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
367 if (rt->rt6i_pcpu) {
368 int cpu;
369
370 for_each_possible_cpu(cpu) {
371 struct rt6_info **p;
372
373 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
374
375 *p = NULL;
376 }
377 } else {
378 dst_destroy((struct dst_entry *)rt);
379 return NULL;
380 }
381 }
382
383 return rt;
384}
385
386static void ip6_dst_destroy(struct dst_entry *dst)
387{
388 struct rt6_info *rt = (struct rt6_info *)dst;
389 struct dst_entry *from = dst->from;
390 struct inet6_dev *idev;
391
392 dst_destroy_metrics_generic(dst);
393
394 if (rt->rt6i_pcpu)
395 free_percpu(rt->rt6i_pcpu);
396
397 rt6_uncached_list_del(rt);
398
399 idev = rt->rt6i_idev;
400 if (idev) {
401 rt->rt6i_idev = NULL;
402 in6_dev_put(idev);
403 }
404
405 dst->from = NULL;
406 dst_release(from);
407}
408
409static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
410 int how)
411{
412 struct rt6_info *rt = (struct rt6_info *)dst;
413 struct inet6_dev *idev = rt->rt6i_idev;
414 struct net_device *loopback_dev =
415 dev_net(dev)->loopback_dev;
416
417 if (dev != loopback_dev) {
418 if (idev && idev->dev == dev) {
419 struct inet6_dev *loopback_idev =
420 in6_dev_get(loopback_dev);
421 if (loopback_idev) {
422 rt->rt6i_idev = loopback_idev;
423 in6_dev_put(idev);
424 }
425 }
426 }
427}
428
429static bool __rt6_check_expired(const struct rt6_info *rt)
430{
431 if (rt->rt6i_flags & RTF_EXPIRES)
432 return time_after(jiffies, rt->dst.expires);
433 else
434 return false;
435}
436
437static bool rt6_check_expired(const struct rt6_info *rt)
438{
439 if (rt->rt6i_flags & RTF_EXPIRES) {
440 if (time_after(jiffies, rt->dst.expires))
441 return true;
442 } else if (rt->dst.from) {
443 return rt6_check_expired((struct rt6_info *) rt->dst.from);
444 }
445 return false;
446}
447
448
449
450
451
452static int rt6_info_hash_nhsfn(unsigned int candidate_count,
453 const struct flowi6 *fl6)
454{
455 unsigned int val = fl6->flowi6_proto;
456
457 val ^= ipv6_addr_hash(&fl6->daddr);
458 val ^= ipv6_addr_hash(&fl6->saddr);
459
460
461 switch (fl6->flowi6_proto) {
462 case IPPROTO_UDP:
463 case IPPROTO_TCP:
464 case IPPROTO_SCTP:
465 val ^= (__force u16)fl6->fl6_sport;
466 val ^= (__force u16)fl6->fl6_dport;
467 break;
468
469 case IPPROTO_ICMPV6:
470 val ^= (__force u16)fl6->fl6_icmp_type;
471 val ^= (__force u16)fl6->fl6_icmp_code;
472 break;
473 }
474
475 val ^= (__force u32)fl6->flowlabel;
476
477
478 val = val ^ (val >> 7) ^ (val >> 12);
479 return val % candidate_count;
480}
481
482static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
483 struct flowi6 *fl6, int oif,
484 int strict)
485{
486 struct rt6_info *sibling, *next_sibling;
487 int route_choosen;
488
489 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
490
491
492
493 if (route_choosen)
494 list_for_each_entry_safe(sibling, next_sibling,
495 &match->rt6i_siblings, rt6i_siblings) {
496 route_choosen--;
497 if (route_choosen == 0) {
498 if (rt6_score_route(sibling, oif, strict) < 0)
499 break;
500 match = sibling;
501 break;
502 }
503 }
504 return match;
505}
506
507
508
509
510
511static inline struct rt6_info *rt6_device_match(struct net *net,
512 struct rt6_info *rt,
513 const struct in6_addr *saddr,
514 int oif,
515 int flags)
516{
517 struct rt6_info *local = NULL;
518 struct rt6_info *sprt;
519
520 if (!oif && ipv6_addr_any(saddr))
521 goto out;
522
523 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
524 struct net_device *dev = sprt->dst.dev;
525
526 if (oif) {
527 if (dev->ifindex == oif)
528 return sprt;
529 if (dev->flags & IFF_LOOPBACK) {
530 if (!sprt->rt6i_idev ||
531 sprt->rt6i_idev->dev->ifindex != oif) {
532 if (flags & RT6_LOOKUP_F_IFACE && oif)
533 continue;
534 if (local && (!oif ||
535 local->rt6i_idev->dev->ifindex == oif))
536 continue;
537 }
538 local = sprt;
539 }
540 } else {
541 if (ipv6_chk_addr(net, saddr, dev,
542 flags & RT6_LOOKUP_F_IFACE))
543 return sprt;
544 }
545 }
546
547 if (oif) {
548 if (local)
549 return local;
550
551 if (flags & RT6_LOOKUP_F_IFACE)
552 return net->ipv6.ip6_null_entry;
553 }
554out:
555 return rt;
556}
557
558#ifdef CONFIG_IPV6_ROUTER_PREF
559struct __rt6_probe_work {
560 struct work_struct work;
561 struct in6_addr target;
562 struct net_device *dev;
563};
564
565static void rt6_probe_deferred(struct work_struct *w)
566{
567 struct in6_addr mcaddr;
568 struct __rt6_probe_work *work =
569 container_of(w, struct __rt6_probe_work, work);
570
571 addrconf_addr_solict_mult(&work->target, &mcaddr);
572 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
573 dev_put(work->dev);
574 kfree(w);
575}
576
577static void rt6_probe(struct rt6_info *rt)
578{
579 struct neighbour *neigh;
580
581
582
583
584
585
586
587
588 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
589 return;
590 rcu_read_lock_bh();
591 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
592 if (neigh) {
593 write_lock(&neigh->lock);
594 if (neigh->nud_state & NUD_VALID)
595 goto out;
596 }
597
598 if (!neigh ||
599 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
600 struct __rt6_probe_work *work;
601
602 work = kmalloc(sizeof(*work), GFP_ATOMIC);
603
604 if (neigh && work)
605 __neigh_set_probe_once(neigh);
606
607 if (neigh)
608 write_unlock(&neigh->lock);
609
610 if (work) {
611 INIT_WORK(&work->work, rt6_probe_deferred);
612 work->target = rt->rt6i_gateway;
613 dev_hold(rt->dst.dev);
614 work->dev = rt->dst.dev;
615 schedule_work(&work->work);
616 }
617 } else {
618out:
619 write_unlock(&neigh->lock);
620 }
621 rcu_read_unlock_bh();
622}
623#else
624static inline void rt6_probe(struct rt6_info *rt)
625{
626}
627#endif
628
629
630
631
632static inline int rt6_check_dev(struct rt6_info *rt, int oif)
633{
634 struct net_device *dev = rt->dst.dev;
635 if (!oif || dev->ifindex == oif)
636 return 2;
637 if ((dev->flags & IFF_LOOPBACK) &&
638 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
639 return 1;
640 return 0;
641}
642
643static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
644{
645 struct neighbour *neigh;
646 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
647
648 if (rt->rt6i_flags & RTF_NONEXTHOP ||
649 !(rt->rt6i_flags & RTF_GATEWAY))
650 return RT6_NUD_SUCCEED;
651
652 rcu_read_lock_bh();
653 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
654 if (neigh) {
655 read_lock(&neigh->lock);
656 if (neigh->nud_state & NUD_VALID)
657 ret = RT6_NUD_SUCCEED;
658#ifdef CONFIG_IPV6_ROUTER_PREF
659 else if (!(neigh->nud_state & NUD_FAILED))
660 ret = RT6_NUD_SUCCEED;
661 else
662 ret = RT6_NUD_FAIL_PROBE;
663#endif
664 read_unlock(&neigh->lock);
665 } else {
666 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
667 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
668 }
669 rcu_read_unlock_bh();
670
671 return ret;
672}
673
674static int rt6_score_route(struct rt6_info *rt, int oif,
675 int strict)
676{
677 int m;
678
679 m = rt6_check_dev(rt, oif);
680 if (!m && (strict & RT6_LOOKUP_F_IFACE))
681 return RT6_NUD_FAIL_HARD;
682#ifdef CONFIG_IPV6_ROUTER_PREF
683 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
684#endif
685 if (strict & RT6_LOOKUP_F_REACHABLE) {
686 int n = rt6_check_neigh(rt);
687 if (n < 0)
688 return n;
689 }
690 return m;
691}
692
693static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
694 int *mpri, struct rt6_info *match,
695 bool *do_rr)
696{
697 int m;
698 bool match_do_rr = false;
699
700 if (rt6_check_expired(rt))
701 goto out;
702
703 m = rt6_score_route(rt, oif, strict);
704 if (m == RT6_NUD_FAIL_DO_RR) {
705 match_do_rr = true;
706 m = 0;
707 } else if (m == RT6_NUD_FAIL_HARD) {
708 goto out;
709 }
710
711 if (strict & RT6_LOOKUP_F_REACHABLE)
712 rt6_probe(rt);
713
714
715 if (m > *mpri) {
716 *do_rr = match_do_rr;
717 *mpri = m;
718 match = rt;
719 }
720out:
721 return match;
722}
723
724static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
725 struct rt6_info *rr_head,
726 u32 metric, int oif, int strict,
727 bool *do_rr)
728{
729 struct rt6_info *rt, *match, *cont;
730 int mpri = -1;
731
732 match = NULL;
733 cont = NULL;
734 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
735 if (rt->rt6i_metric != metric) {
736 cont = rt;
737 break;
738 }
739
740 match = find_match(rt, oif, strict, &mpri, match, do_rr);
741 }
742
743 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
744 if (rt->rt6i_metric != metric) {
745 cont = rt;
746 break;
747 }
748
749 match = find_match(rt, oif, strict, &mpri, match, do_rr);
750 }
751
752 if (match || !cont)
753 return match;
754
755 for (rt = cont; rt; rt = rt->dst.rt6_next)
756 match = find_match(rt, oif, strict, &mpri, match, do_rr);
757
758 return match;
759}
760
761static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
762{
763 struct rt6_info *match, *rt0;
764 struct net *net;
765 bool do_rr = false;
766
767 rt0 = fn->rr_ptr;
768 if (!rt0)
769 fn->rr_ptr = rt0 = fn->leaf;
770
771 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
772 &do_rr);
773
774 if (do_rr) {
775 struct rt6_info *next = rt0->dst.rt6_next;
776
777
778 if (!next || next->rt6i_metric != rt0->rt6i_metric)
779 next = fn->leaf;
780
781 if (next != rt0)
782 fn->rr_ptr = next;
783 }
784
785 net = dev_net(rt0->dst.dev);
786 return match ? match : net->ipv6.ip6_null_entry;
787}
788
789static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
790{
791 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
792}
793
794#ifdef CONFIG_IPV6_ROUTE_INFO
795int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
796 const struct in6_addr *gwaddr)
797{
798 struct net *net = dev_net(dev);
799 struct route_info *rinfo = (struct route_info *) opt;
800 struct in6_addr prefix_buf, *prefix;
801 unsigned int pref;
802 unsigned long lifetime;
803 struct rt6_info *rt;
804
805 if (len < sizeof(struct route_info)) {
806 return -EINVAL;
807 }
808
809
810 if (rinfo->length > 3) {
811 return -EINVAL;
812 } else if (rinfo->prefix_len > 128) {
813 return -EINVAL;
814 } else if (rinfo->prefix_len > 64) {
815 if (rinfo->length < 2) {
816 return -EINVAL;
817 }
818 } else if (rinfo->prefix_len > 0) {
819 if (rinfo->length < 1) {
820 return -EINVAL;
821 }
822 }
823
824 pref = rinfo->route_pref;
825 if (pref == ICMPV6_ROUTER_PREF_INVALID)
826 return -EINVAL;
827
828 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
829
830 if (rinfo->length == 3)
831 prefix = (struct in6_addr *)rinfo->prefix;
832 else {
833
834 ipv6_addr_prefix(&prefix_buf,
835 (struct in6_addr *)rinfo->prefix,
836 rinfo->prefix_len);
837 prefix = &prefix_buf;
838 }
839
840 if (rinfo->prefix_len == 0)
841 rt = rt6_get_dflt_router(gwaddr, dev);
842 else
843 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
844 gwaddr, dev->ifindex);
845
846 if (rt && !lifetime) {
847 ip6_del_rt(rt);
848 rt = NULL;
849 }
850
851 if (!rt && lifetime)
852 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
853 pref);
854 else if (rt)
855 rt->rt6i_flags = RTF_ROUTEINFO |
856 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
857
858 if (rt) {
859 if (!addrconf_finite_timeout(lifetime))
860 rt6_clean_expires(rt);
861 else
862 rt6_set_expires(rt, jiffies + HZ * lifetime);
863
864 ip6_rt_put(rt);
865 }
866 return 0;
867}
868#endif
869
870static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
871 struct in6_addr *saddr)
872{
873 struct fib6_node *pn;
874 while (1) {
875 if (fn->fn_flags & RTN_TL_ROOT)
876 return NULL;
877 pn = fn->parent;
878 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
879 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
880 else
881 fn = pn;
882 if (fn->fn_flags & RTN_RTINFO)
883 return fn;
884 }
885}
886
887static struct rt6_info *ip6_pol_route_lookup(struct net *net,
888 struct fib6_table *table,
889 struct flowi6 *fl6, int flags)
890{
891 struct fib6_node *fn;
892 struct rt6_info *rt;
893
894 read_lock_bh(&table->tb6_lock);
895 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
896restart:
897 rt = fn->leaf;
898 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
899 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
900 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
901 if (rt == net->ipv6.ip6_null_entry) {
902 fn = fib6_backtrack(fn, &fl6->saddr);
903 if (fn)
904 goto restart;
905 }
906 dst_use(&rt->dst, jiffies);
907 read_unlock_bh(&table->tb6_lock);
908 return rt;
909
910}
911
912struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
913 int flags)
914{
915 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
916}
917EXPORT_SYMBOL_GPL(ip6_route_lookup);
918
919struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
920 const struct in6_addr *saddr, int oif, int strict)
921{
922 struct flowi6 fl6 = {
923 .flowi6_oif = oif,
924 .daddr = *daddr,
925 };
926 struct dst_entry *dst;
927 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
928
929 if (saddr) {
930 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
931 flags |= RT6_LOOKUP_F_HAS_SADDR;
932 }
933
934 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
935 if (dst->error == 0)
936 return (struct rt6_info *) dst;
937
938 dst_release(dst);
939
940 return NULL;
941}
942
943EXPORT_SYMBOL(rt6_lookup);
944
945
946
947
948
949
950
951static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
952 struct mx6_config *mxc)
953{
954 int err;
955 struct fib6_table *table;
956
957 table = rt->rt6i_table;
958 write_lock_bh(&table->tb6_lock);
959 err = fib6_add(&table->tb6_root, rt, info, mxc);
960 write_unlock_bh(&table->tb6_lock);
961
962 return err;
963}
964
965int ip6_ins_rt(struct rt6_info *rt)
966{
967 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
968 struct mx6_config mxc = { .mx = NULL, };
969
970 return __ip6_ins_rt(rt, &info, &mxc);
971}
972
973static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
974 const struct in6_addr *daddr,
975 const struct in6_addr *saddr)
976{
977 struct rt6_info *rt;
978
979
980
981
982
983 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
984 ort = (struct rt6_info *)ort->dst.from;
985
986 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
987
988 if (!rt)
989 return NULL;
990
991 ip6_rt_copy_init(rt, ort);
992 rt->rt6i_flags |= RTF_CACHE;
993 rt->rt6i_metric = 0;
994 rt->dst.flags |= DST_HOST;
995 rt->rt6i_dst.addr = *daddr;
996 rt->rt6i_dst.plen = 128;
997
998 if (!rt6_is_gw_or_nonexthop(ort)) {
999 if (ort->rt6i_dst.plen != 128 &&
1000 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1001 rt->rt6i_flags |= RTF_ANYCAST;
1002#ifdef CONFIG_IPV6_SUBTREES
1003 if (rt->rt6i_src.plen && saddr) {
1004 rt->rt6i_src.addr = *saddr;
1005 rt->rt6i_src.plen = 128;
1006 }
1007#endif
1008 }
1009
1010 return rt;
1011}
1012
1013static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1014{
1015 struct rt6_info *pcpu_rt;
1016
1017 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
1018 rt->dst.dev, rt->dst.flags);
1019
1020 if (!pcpu_rt)
1021 return NULL;
1022 ip6_rt_copy_init(pcpu_rt, rt);
1023 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1024 pcpu_rt->rt6i_flags |= RTF_PCPU;
1025 return pcpu_rt;
1026}
1027
1028
1029static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1030{
1031 struct rt6_info *pcpu_rt, **p;
1032
1033 p = this_cpu_ptr(rt->rt6i_pcpu);
1034 pcpu_rt = *p;
1035
1036 if (pcpu_rt) {
1037 dst_hold(&pcpu_rt->dst);
1038 rt6_dst_from_metrics_check(pcpu_rt);
1039 }
1040 return pcpu_rt;
1041}
1042
1043static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1044{
1045 struct fib6_table *table = rt->rt6i_table;
1046 struct rt6_info *pcpu_rt, *prev, **p;
1047
1048 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1049 if (!pcpu_rt) {
1050 struct net *net = dev_net(rt->dst.dev);
1051
1052 dst_hold(&net->ipv6.ip6_null_entry->dst);
1053 return net->ipv6.ip6_null_entry;
1054 }
1055
1056 read_lock_bh(&table->tb6_lock);
1057 if (rt->rt6i_pcpu) {
1058 p = this_cpu_ptr(rt->rt6i_pcpu);
1059 prev = cmpxchg(p, NULL, pcpu_rt);
1060 if (prev) {
1061
1062 dst_destroy(&pcpu_rt->dst);
1063 pcpu_rt = prev;
1064 }
1065 } else {
1066
1067
1068
1069
1070
1071
1072 dst_destroy(&pcpu_rt->dst);
1073 pcpu_rt = rt;
1074 }
1075 dst_hold(&pcpu_rt->dst);
1076 rt6_dst_from_metrics_check(pcpu_rt);
1077 read_unlock_bh(&table->tb6_lock);
1078 return pcpu_rt;
1079}
1080
1081static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1082 struct flowi6 *fl6, int flags)
1083{
1084 struct fib6_node *fn, *saved_fn;
1085 struct rt6_info *rt;
1086 int strict = 0;
1087
1088 strict |= flags & RT6_LOOKUP_F_IFACE;
1089 if (net->ipv6.devconf_all->forwarding == 0)
1090 strict |= RT6_LOOKUP_F_REACHABLE;
1091
1092 read_lock_bh(&table->tb6_lock);
1093
1094 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1095 saved_fn = fn;
1096
1097redo_rt6_select:
1098 rt = rt6_select(fn, oif, strict);
1099 if (rt->rt6i_nsiblings)
1100 rt = rt6_multipath_select(rt, fl6, oif, strict);
1101 if (rt == net->ipv6.ip6_null_entry) {
1102 fn = fib6_backtrack(fn, &fl6->saddr);
1103 if (fn)
1104 goto redo_rt6_select;
1105 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1106
1107 strict &= ~RT6_LOOKUP_F_REACHABLE;
1108 fn = saved_fn;
1109 goto redo_rt6_select;
1110 }
1111 }
1112
1113
1114 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1115 dst_use(&rt->dst, jiffies);
1116 read_unlock_bh(&table->tb6_lock);
1117
1118 rt6_dst_from_metrics_check(rt);
1119 return rt;
1120 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1121 !(rt->rt6i_flags & RTF_GATEWAY))) {
1122
1123
1124
1125
1126
1127
1128 struct rt6_info *uncached_rt;
1129
1130 dst_use(&rt->dst, jiffies);
1131 read_unlock_bh(&table->tb6_lock);
1132
1133 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1134 dst_release(&rt->dst);
1135
1136 if (uncached_rt)
1137 rt6_uncached_list_add(uncached_rt);
1138 else
1139 uncached_rt = net->ipv6.ip6_null_entry;
1140
1141 dst_hold(&uncached_rt->dst);
1142 return uncached_rt;
1143
1144 } else {
1145
1146
1147 struct rt6_info *pcpu_rt;
1148
1149 rt->dst.lastuse = jiffies;
1150 rt->dst.__use++;
1151 pcpu_rt = rt6_get_pcpu_route(rt);
1152
1153 if (pcpu_rt) {
1154 read_unlock_bh(&table->tb6_lock);
1155 } else {
1156
1157
1158
1159
1160 dst_hold(&rt->dst);
1161 read_unlock_bh(&table->tb6_lock);
1162 pcpu_rt = rt6_make_pcpu_route(rt);
1163 dst_release(&rt->dst);
1164 }
1165
1166 return pcpu_rt;
1167
1168 }
1169}
1170
1171static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1172 struct flowi6 *fl6, int flags)
1173{
1174 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1175}
1176
1177static struct dst_entry *ip6_route_input_lookup(struct net *net,
1178 struct net_device *dev,
1179 struct flowi6 *fl6, int flags)
1180{
1181 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1182 flags |= RT6_LOOKUP_F_IFACE;
1183
1184 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1185}
1186
1187void ip6_route_input(struct sk_buff *skb)
1188{
1189 const struct ipv6hdr *iph = ipv6_hdr(skb);
1190 struct net *net = dev_net(skb->dev);
1191 int flags = RT6_LOOKUP_F_HAS_SADDR;
1192 struct ip_tunnel_info *tun_info;
1193 struct flowi6 fl6 = {
1194 .flowi6_iif = skb->dev->ifindex,
1195 .daddr = iph->daddr,
1196 .saddr = iph->saddr,
1197 .flowlabel = ip6_flowinfo(iph),
1198 .flowi6_mark = skb->mark,
1199 .flowi6_proto = iph->nexthdr,
1200 };
1201
1202 tun_info = skb_tunnel_info(skb);
1203 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1204 flowi6_tun_id_set(&fl6, tun_info->key.tun_id);
1205 skb_dst_drop(skb);
1206 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1207}
1208
1209static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1210 struct flowi6 *fl6, int flags)
1211{
1212 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1213}
1214
1215struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1216 struct flowi6 *fl6)
1217{
1218 int flags = 0;
1219 bool any_src;
1220
1221 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1222
1223 any_src = ipv6_addr_any(&fl6->saddr);
1224 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1225 (fl6->flowi6_oif && any_src))
1226 flags |= RT6_LOOKUP_F_IFACE;
1227
1228 if (!any_src)
1229 flags |= RT6_LOOKUP_F_HAS_SADDR;
1230 else if (sk)
1231 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1232
1233 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1234}
1235
1236EXPORT_SYMBOL(ip6_route_output);
1237
1238struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1239{
1240 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1241 struct dst_entry *new = NULL;
1242
1243 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1244 if (rt) {
1245 rt6_info_init(rt);
1246
1247 new = &rt->dst;
1248 new->__use = 1;
1249 new->input = dst_discard;
1250 new->output = dst_discard_sk;
1251
1252 dst_copy_metrics(new, &ort->dst);
1253 rt->rt6i_idev = ort->rt6i_idev;
1254 if (rt->rt6i_idev)
1255 in6_dev_hold(rt->rt6i_idev);
1256
1257 rt->rt6i_gateway = ort->rt6i_gateway;
1258 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1259 rt->rt6i_metric = 0;
1260
1261 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1262#ifdef CONFIG_IPV6_SUBTREES
1263 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1264#endif
1265
1266 dst_free(new);
1267 }
1268
1269 dst_release(dst_orig);
1270 return new ? new : ERR_PTR(-ENOMEM);
1271}
1272
1273
1274
1275
1276
1277static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1278{
1279 if (rt->dst.from &&
1280 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1281 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1282}
1283
1284static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1285{
1286 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1287 return NULL;
1288
1289 if (rt6_check_expired(rt))
1290 return NULL;
1291
1292 return &rt->dst;
1293}
1294
1295static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1296{
1297 if (!__rt6_check_expired(rt) &&
1298 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1299 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1300 return &rt->dst;
1301 else
1302 return NULL;
1303}
1304
1305static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1306{
1307 struct rt6_info *rt;
1308
1309 rt = (struct rt6_info *) dst;
1310
1311
1312
1313
1314
1315
1316 rt6_dst_from_metrics_check(rt);
1317
1318 if (rt->rt6i_flags & RTF_PCPU ||
1319 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1320 return rt6_dst_from_check(rt, cookie);
1321 else
1322 return rt6_check(rt, cookie);
1323}
1324
1325static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1326{
1327 struct rt6_info *rt = (struct rt6_info *) dst;
1328
1329 if (rt) {
1330 if (rt->rt6i_flags & RTF_CACHE) {
1331 if (rt6_check_expired(rt)) {
1332 ip6_del_rt(rt);
1333 dst = NULL;
1334 }
1335 } else {
1336 dst_release(dst);
1337 dst = NULL;
1338 }
1339 }
1340 return dst;
1341}
1342
1343static void ip6_link_failure(struct sk_buff *skb)
1344{
1345 struct rt6_info *rt;
1346
1347 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1348
1349 rt = (struct rt6_info *) skb_dst(skb);
1350 if (rt) {
1351 if (rt->rt6i_flags & RTF_CACHE) {
1352 dst_hold(&rt->dst);
1353 ip6_del_rt(rt);
1354 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1355 rt->rt6i_node->fn_sernum = -1;
1356 }
1357 }
1358}
1359
1360static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1361{
1362 struct net *net = dev_net(rt->dst.dev);
1363
1364 rt->rt6i_flags |= RTF_MODIFIED;
1365 rt->rt6i_pmtu = mtu;
1366 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1367}
1368
1369static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1370{
1371 return !(rt->rt6i_flags & RTF_CACHE) &&
1372 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1373}
1374
1375static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1376 const struct ipv6hdr *iph, u32 mtu)
1377{
1378 const struct in6_addr *daddr, *saddr;
1379 struct rt6_info *rt6 = (struct rt6_info *)dst;
1380
1381 if (rt6->rt6i_flags & RTF_LOCAL)
1382 return;
1383
1384 if (dst_metric_locked(dst, RTAX_MTU))
1385 return;
1386
1387 if (iph) {
1388 daddr = &iph->daddr;
1389 saddr = &iph->saddr;
1390 } else if (sk) {
1391 daddr = &sk->sk_v6_daddr;
1392 saddr = &inet6_sk(sk)->saddr;
1393 } else {
1394 daddr = NULL;
1395 saddr = NULL;
1396 }
1397 dst_confirm_neigh(dst, daddr);
1398 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1399 if (mtu >= dst_mtu(dst))
1400 return;
1401
1402 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1403 rt6_do_update_pmtu(rt6, mtu);
1404 } else if (daddr) {
1405 struct rt6_info *nrt6;
1406
1407 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1408 if (nrt6) {
1409 rt6_do_update_pmtu(nrt6, mtu);
1410
1411
1412
1413
1414
1415
1416 ip6_ins_rt(nrt6);
1417 }
1418 }
1419}
1420
1421static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1422 struct sk_buff *skb, u32 mtu)
1423{
1424 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1425}
1426
1427void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1428 int oif, u32 mark)
1429{
1430 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1431 struct dst_entry *dst;
1432 struct flowi6 fl6;
1433
1434 memset(&fl6, 0, sizeof(fl6));
1435 fl6.flowi6_oif = oif;
1436 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1437 fl6.flowi6_flags = 0;
1438 fl6.daddr = iph->daddr;
1439 fl6.saddr = iph->saddr;
1440 fl6.flowlabel = ip6_flowinfo(iph);
1441
1442 dst = ip6_route_output(net, NULL, &fl6);
1443 if (!dst->error)
1444 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1445 dst_release(dst);
1446}
1447EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1448
1449void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1450{
1451 struct dst_entry *dst;
1452
1453 ip6_update_pmtu(skb, sock_net(sk), mtu,
1454 sk->sk_bound_dev_if, sk->sk_mark);
1455
1456 dst = __sk_dst_get(sk);
1457 if (!dst || !dst->obsolete ||
1458 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1459 return;
1460
1461 bh_lock_sock(sk);
1462 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1463 ip6_datagram_dst_update(sk, false);
1464 bh_unlock_sock(sk);
1465}
1466EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1467
1468
1469struct ip6rd_flowi {
1470 struct flowi6 fl6;
1471 struct in6_addr gateway;
1472};
1473
1474static struct rt6_info *__ip6_route_redirect(struct net *net,
1475 struct fib6_table *table,
1476 struct flowi6 *fl6,
1477 int flags)
1478{
1479 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1480 struct rt6_info *rt;
1481 struct fib6_node *fn;
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493 read_lock_bh(&table->tb6_lock);
1494 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1495restart:
1496 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1497 if (rt6_check_expired(rt))
1498 continue;
1499 if (rt->dst.error)
1500 break;
1501 if (!(rt->rt6i_flags & RTF_GATEWAY))
1502 continue;
1503 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1504 continue;
1505 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1506 continue;
1507 break;
1508 }
1509
1510 if (!rt)
1511 rt = net->ipv6.ip6_null_entry;
1512 else if (rt->dst.error) {
1513 rt = net->ipv6.ip6_null_entry;
1514 goto out;
1515 }
1516
1517 if (rt == net->ipv6.ip6_null_entry) {
1518 fn = fib6_backtrack(fn, &fl6->saddr);
1519 if (fn)
1520 goto restart;
1521 }
1522
1523out:
1524 dst_hold(&rt->dst);
1525
1526 read_unlock_bh(&table->tb6_lock);
1527
1528 return rt;
1529};
1530
1531static struct dst_entry *ip6_route_redirect(struct net *net,
1532 const struct flowi6 *fl6,
1533 const struct in6_addr *gateway)
1534{
1535 int flags = RT6_LOOKUP_F_HAS_SADDR;
1536 struct ip6rd_flowi rdfl;
1537
1538 rdfl.fl6 = *fl6;
1539 rdfl.gateway = *gateway;
1540
1541 return fib6_rule_lookup(net, &rdfl.fl6,
1542 flags, __ip6_route_redirect);
1543}
1544
1545void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1546{
1547 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1548 struct dst_entry *dst;
1549 struct flowi6 fl6;
1550
1551 memset(&fl6, 0, sizeof(fl6));
1552 fl6.flowi6_oif = oif;
1553 fl6.flowi6_mark = mark;
1554 fl6.flowi6_flags = 0;
1555 fl6.daddr = iph->daddr;
1556 fl6.saddr = iph->saddr;
1557 fl6.flowlabel = ip6_flowinfo(iph);
1558
1559 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1560 rt6_do_redirect(dst, NULL, skb);
1561 dst_release(dst);
1562}
1563EXPORT_SYMBOL_GPL(ip6_redirect);
1564
1565void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1566 u32 mark)
1567{
1568 const struct ipv6hdr *iph = ipv6_hdr(skb);
1569 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1570 struct dst_entry *dst;
1571 struct flowi6 fl6;
1572
1573 memset(&fl6, 0, sizeof(fl6));
1574 fl6.flowi6_oif = oif;
1575 fl6.flowi6_mark = mark;
1576 fl6.flowi6_flags = 0;
1577 fl6.daddr = msg->dest;
1578 fl6.saddr = iph->daddr;
1579
1580 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1581 rt6_do_redirect(dst, NULL, skb);
1582 dst_release(dst);
1583}
1584
1585void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1586{
1587 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1588}
1589EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1590
1591static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1592{
1593 struct net_device *dev = dst->dev;
1594 unsigned int mtu = dst_mtu(dst);
1595 struct net *net = dev_net(dev);
1596
1597 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1598
1599 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1600 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1601
1602
1603
1604
1605
1606
1607
1608 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1609 mtu = IPV6_MAXPLEN;
1610 return mtu;
1611}
1612
1613static unsigned int ip6_mtu(const struct dst_entry *dst)
1614{
1615 const struct rt6_info *rt = (const struct rt6_info *)dst;
1616 unsigned int mtu = rt->rt6i_pmtu;
1617 struct inet6_dev *idev;
1618
1619 if (mtu)
1620 goto out;
1621
1622 mtu = dst_metric_raw(dst, RTAX_MTU);
1623 if (mtu)
1624 goto out;
1625
1626 mtu = IPV6_MIN_MTU;
1627
1628 rcu_read_lock();
1629 idev = __in6_dev_get(dst->dev);
1630 if (idev)
1631 mtu = idev->cnf.mtu6;
1632 rcu_read_unlock();
1633
1634out:
1635 return min_t(unsigned int, mtu, IP6_MAX_MTU);
1636}
1637
1638static struct dst_entry *icmp6_dst_gc_list;
1639static DEFINE_SPINLOCK(icmp6_dst_lock);
1640
1641struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1642 struct flowi6 *fl6)
1643{
1644 struct dst_entry *dst;
1645 struct rt6_info *rt;
1646 struct inet6_dev *idev = in6_dev_get(dev);
1647 struct net *net = dev_net(dev);
1648
1649 if (unlikely(!idev))
1650 return ERR_PTR(-ENODEV);
1651
1652 rt = ip6_dst_alloc(net, dev, 0);
1653 if (unlikely(!rt)) {
1654 in6_dev_put(idev);
1655 dst = ERR_PTR(-ENOMEM);
1656 goto out;
1657 }
1658
1659 rt->dst.flags |= DST_HOST;
1660 rt->dst.output = ip6_output;
1661 atomic_set(&rt->dst.__refcnt, 1);
1662 rt->rt6i_gateway = fl6->daddr;
1663 rt->rt6i_dst.addr = fl6->daddr;
1664 rt->rt6i_dst.plen = 128;
1665 rt->rt6i_idev = idev;
1666 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1667
1668 spin_lock_bh(&icmp6_dst_lock);
1669 rt->dst.next = icmp6_dst_gc_list;
1670 icmp6_dst_gc_list = &rt->dst;
1671 spin_unlock_bh(&icmp6_dst_lock);
1672
1673 fib6_force_start_gc(net);
1674
1675 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1676
1677out:
1678 return dst;
1679}
1680
1681int icmp6_dst_gc(void)
1682{
1683 struct dst_entry *dst, **pprev;
1684 int more = 0;
1685
1686 spin_lock_bh(&icmp6_dst_lock);
1687 pprev = &icmp6_dst_gc_list;
1688
1689 while ((dst = *pprev) != NULL) {
1690 if (!atomic_read(&dst->__refcnt)) {
1691 *pprev = dst->next;
1692 dst_free(dst);
1693 } else {
1694 pprev = &dst->next;
1695 ++more;
1696 }
1697 }
1698
1699 spin_unlock_bh(&icmp6_dst_lock);
1700
1701 return more;
1702}
1703
1704static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1705 void *arg)
1706{
1707 struct dst_entry *dst, **pprev;
1708
1709 spin_lock_bh(&icmp6_dst_lock);
1710 pprev = &icmp6_dst_gc_list;
1711 while ((dst = *pprev) != NULL) {
1712 struct rt6_info *rt = (struct rt6_info *) dst;
1713 if (func(rt, arg)) {
1714 *pprev = dst->next;
1715 dst_free(dst);
1716 } else {
1717 pprev = &dst->next;
1718 }
1719 }
1720 spin_unlock_bh(&icmp6_dst_lock);
1721}
1722
1723static int ip6_dst_gc(struct dst_ops *ops)
1724{
1725 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1726 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1727 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1728 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1729 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1730 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1731 int entries;
1732
1733 entries = dst_entries_get_fast(ops);
1734 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1735 entries <= rt_max_size)
1736 goto out;
1737
1738 net->ipv6.ip6_rt_gc_expire++;
1739 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1740 entries = dst_entries_get_slow(ops);
1741 if (entries < ops->gc_thresh)
1742 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1743out:
1744 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1745 return entries > rt_max_size;
1746}
1747
1748static int ip6_convert_metrics(struct mx6_config *mxc,
1749 const struct fib6_config *cfg)
1750{
1751 bool ecn_ca = false;
1752 struct nlattr *nla;
1753 int remaining;
1754 u32 *mp;
1755
1756 if (cfg->fc_mx == NULL)
1757 return 0;
1758
1759 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1760 if (unlikely(!mp))
1761 return -ENOMEM;
1762
1763 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1764 int type = nla_type(nla);
1765 u32 val;
1766
1767 if (!type)
1768 continue;
1769 if (unlikely(type > RTAX_MAX))
1770 goto err;
1771
1772 if (type == RTAX_CC_ALGO) {
1773 char tmp[TCP_CA_NAME_MAX];
1774
1775 nla_strlcpy(tmp, nla, sizeof(tmp));
1776 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1777 if (val == TCP_CA_UNSPEC)
1778 goto err;
1779 } else {
1780 val = nla_get_u32(nla);
1781 }
1782 if (type == RTAX_HOPLIMIT && val > 255)
1783 val = 255;
1784 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1785 goto err;
1786
1787 mp[type - 1] = val;
1788 __set_bit(type - 1, mxc->mx_valid);
1789 }
1790
1791 if (ecn_ca) {
1792 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1793 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1794 }
1795
1796 mxc->mx = mp;
1797 return 0;
1798 err:
1799 kfree(mp);
1800 return -EINVAL;
1801}
1802
1803int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1804{
1805 int err;
1806 struct net *net = cfg->fc_nlinfo.nl_net;
1807 struct rt6_info *rt = NULL;
1808 struct net_device *dev = NULL;
1809 struct inet6_dev *idev = NULL;
1810 struct fib6_table *table;
1811 int addr_type;
1812
1813 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1814 return -EINVAL;
1815#ifndef CONFIG_IPV6_SUBTREES
1816 if (cfg->fc_src_len)
1817 return -EINVAL;
1818#endif
1819 if (cfg->fc_ifindex) {
1820 err = -ENODEV;
1821 dev = dev_get_by_index(net, cfg->fc_ifindex);
1822 if (!dev)
1823 goto out;
1824 idev = in6_dev_get(dev);
1825 if (!idev)
1826 goto out;
1827 }
1828
1829 if (cfg->fc_metric == 0)
1830 cfg->fc_metric = IP6_RT_PRIO_USER;
1831
1832 err = -ENOBUFS;
1833 if (cfg->fc_nlinfo.nlh &&
1834 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1835 table = fib6_get_table(net, cfg->fc_table);
1836 if (!table) {
1837 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1838 table = fib6_new_table(net, cfg->fc_table);
1839 }
1840 } else {
1841 table = fib6_new_table(net, cfg->fc_table);
1842 }
1843
1844 if (!table)
1845 goto out;
1846
1847 rt = ip6_dst_alloc(net, NULL,
1848 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1849
1850 if (!rt) {
1851 err = -ENOMEM;
1852 goto out;
1853 }
1854
1855 if (cfg->fc_flags & RTF_EXPIRES)
1856 rt6_set_expires(rt, jiffies +
1857 clock_t_to_jiffies(cfg->fc_expires));
1858 else
1859 rt6_clean_expires(rt);
1860
1861 if (cfg->fc_protocol == RTPROT_UNSPEC)
1862 cfg->fc_protocol = RTPROT_BOOT;
1863 rt->rt6i_protocol = cfg->fc_protocol;
1864
1865 addr_type = ipv6_addr_type(&cfg->fc_dst);
1866
1867 if (addr_type & IPV6_ADDR_MULTICAST)
1868 rt->dst.input = ip6_mc_input;
1869 else if (cfg->fc_flags & RTF_LOCAL)
1870 rt->dst.input = ip6_input;
1871 else
1872 rt->dst.input = ip6_forward;
1873
1874 rt->dst.output = ip6_output;
1875
1876 if (cfg->fc_encap) {
1877 struct lwtunnel_state *lwtstate;
1878
1879 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1880 cfg->fc_encap, AF_INET6, cfg,
1881 &lwtstate);
1882 if (err)
1883 goto out;
1884 rt->dst.lwtstate = lwtstate_get(lwtstate);
1885 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1886 rt->dst.lwtstate->orig_output = rt->dst.output;
1887 rt->dst.output = lwtunnel_output;
1888 }
1889 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1890 rt->dst.lwtstate->orig_input = rt->dst.input;
1891 rt->dst.input = lwtunnel_input;
1892 }
1893 }
1894
1895 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1896 rt->rt6i_dst.plen = cfg->fc_dst_len;
1897 if (rt->rt6i_dst.plen == 128)
1898 rt->dst.flags |= DST_HOST;
1899
1900#ifdef CONFIG_IPV6_SUBTREES
1901 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1902 rt->rt6i_src.plen = cfg->fc_src_len;
1903#endif
1904
1905 rt->rt6i_metric = cfg->fc_metric;
1906
1907
1908
1909
1910 if ((cfg->fc_flags & RTF_REJECT) ||
1911 (dev && (dev->flags & IFF_LOOPBACK) &&
1912 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1913 !(cfg->fc_flags & RTF_LOCAL))) {
1914
1915 if (dev != net->loopback_dev) {
1916 if (dev) {
1917 dev_put(dev);
1918 in6_dev_put(idev);
1919 }
1920 dev = net->loopback_dev;
1921 dev_hold(dev);
1922 idev = in6_dev_get(dev);
1923 if (!idev) {
1924 err = -ENODEV;
1925 goto out;
1926 }
1927 }
1928 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1929 switch (cfg->fc_type) {
1930 case RTN_BLACKHOLE:
1931 rt->dst.error = -EINVAL;
1932 rt->dst.output = dst_discard_sk;
1933 rt->dst.input = dst_discard;
1934 break;
1935 case RTN_PROHIBIT:
1936 rt->dst.error = -EACCES;
1937 rt->dst.output = ip6_pkt_prohibit_out;
1938 rt->dst.input = ip6_pkt_prohibit;
1939 break;
1940 case RTN_THROW:
1941 case RTN_UNREACHABLE:
1942 default:
1943 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1944 : (cfg->fc_type == RTN_UNREACHABLE)
1945 ? -EHOSTUNREACH : -ENETUNREACH;
1946 rt->dst.output = ip6_pkt_discard_out;
1947 rt->dst.input = ip6_pkt_discard;
1948 break;
1949 }
1950 goto install_route;
1951 }
1952
1953 if (cfg->fc_flags & RTF_GATEWAY) {
1954 const struct in6_addr *gw_addr;
1955 int gwa_type;
1956
1957 gw_addr = &cfg->fc_gateway;
1958 gwa_type = ipv6_addr_type(gw_addr);
1959
1960
1961
1962
1963
1964
1965 err = -EINVAL;
1966 if (ipv6_chk_addr_and_flags(net, gw_addr,
1967 gwa_type & IPV6_ADDR_LINKLOCAL ?
1968 dev : NULL, 0, 0))
1969 goto out;
1970
1971 rt->rt6i_gateway = *gw_addr;
1972
1973 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1974 struct rt6_info *grt;
1975
1976
1977
1978
1979
1980
1981
1982
1983 if (!(gwa_type & IPV6_ADDR_UNICAST))
1984 goto out;
1985
1986 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1987
1988 err = -EHOSTUNREACH;
1989 if (!grt)
1990 goto out;
1991 if (dev) {
1992 if (dev != grt->dst.dev) {
1993 ip6_rt_put(grt);
1994 goto out;
1995 }
1996 } else {
1997 dev = grt->dst.dev;
1998 idev = grt->rt6i_idev;
1999 dev_hold(dev);
2000 in6_dev_hold(grt->rt6i_idev);
2001 }
2002 if (!(grt->rt6i_flags & RTF_GATEWAY))
2003 err = 0;
2004 ip6_rt_put(grt);
2005
2006 if (err)
2007 goto out;
2008 }
2009 err = -EINVAL;
2010 if (!dev || (dev->flags & IFF_LOOPBACK))
2011 goto out;
2012 }
2013
2014 err = -ENODEV;
2015 if (!dev)
2016 goto out;
2017
2018 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2019 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2020 err = -EINVAL;
2021 goto out;
2022 }
2023 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2024 rt->rt6i_prefsrc.plen = 128;
2025 } else
2026 rt->rt6i_prefsrc.plen = 0;
2027
2028 rt->rt6i_flags = cfg->fc_flags;
2029
2030install_route:
2031 rt->dst.dev = dev;
2032 rt->rt6i_idev = idev;
2033 rt->rt6i_table = table;
2034
2035 cfg->fc_nlinfo.nl_net = dev_net(dev);
2036
2037 *rt_ret = rt;
2038
2039 return 0;
2040out:
2041 if (dev)
2042 dev_put(dev);
2043 if (idev)
2044 in6_dev_put(idev);
2045 if (rt)
2046 dst_free(&rt->dst);
2047
2048 *rt_ret = NULL;
2049
2050 return err;
2051}
2052
2053int ip6_route_add(struct fib6_config *cfg)
2054{
2055 struct mx6_config mxc = { .mx = NULL, };
2056 struct rt6_info *rt = NULL;
2057 int err;
2058
2059 err = ip6_route_info_create(cfg, &rt);
2060 if (err)
2061 goto out;
2062
2063 err = ip6_convert_metrics(&mxc, cfg);
2064 if (err)
2065 goto out;
2066
2067 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2068
2069 kfree(mxc.mx);
2070
2071 return err;
2072out:
2073 if (rt)
2074 dst_free(&rt->dst);
2075
2076 return err;
2077}
2078
2079static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2080{
2081 int err;
2082 struct fib6_table *table;
2083 struct net *net = dev_net(rt->dst.dev);
2084
2085 if (rt == net->ipv6.ip6_null_entry ||
2086 rt->dst.flags & DST_NOCACHE) {
2087 err = -ENOENT;
2088 goto out;
2089 }
2090
2091 table = rt->rt6i_table;
2092 write_lock_bh(&table->tb6_lock);
2093 err = fib6_del(rt, info);
2094 write_unlock_bh(&table->tb6_lock);
2095
2096out:
2097 ip6_rt_put(rt);
2098 return err;
2099}
2100
2101int ip6_del_rt(struct rt6_info *rt)
2102{
2103 struct nl_info info = {
2104 .nl_net = dev_net(rt->dst.dev),
2105 };
2106 return __ip6_del_rt(rt, &info);
2107}
2108
2109static int ip6_route_del(struct fib6_config *cfg)
2110{
2111 struct fib6_table *table;
2112 struct fib6_node *fn;
2113 struct rt6_info *rt;
2114 int err = -ESRCH;
2115
2116 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2117 if (!table)
2118 return err;
2119
2120 read_lock_bh(&table->tb6_lock);
2121
2122 fn = fib6_locate(&table->tb6_root,
2123 &cfg->fc_dst, cfg->fc_dst_len,
2124 &cfg->fc_src, cfg->fc_src_len);
2125
2126 if (fn) {
2127 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2128 if ((rt->rt6i_flags & RTF_CACHE) &&
2129 !(cfg->fc_flags & RTF_CACHE))
2130 continue;
2131 if (cfg->fc_ifindex &&
2132 (!rt->dst.dev ||
2133 rt->dst.dev->ifindex != cfg->fc_ifindex))
2134 continue;
2135 if (cfg->fc_flags & RTF_GATEWAY &&
2136 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2137 continue;
2138 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2139 continue;
2140 dst_hold(&rt->dst);
2141 read_unlock_bh(&table->tb6_lock);
2142
2143 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2144 }
2145 }
2146 read_unlock_bh(&table->tb6_lock);
2147
2148 return err;
2149}
2150
2151static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2152{
2153 struct netevent_redirect netevent;
2154 struct rt6_info *rt, *nrt = NULL;
2155 struct ndisc_options ndopts;
2156 struct inet6_dev *in6_dev;
2157 struct neighbour *neigh;
2158 struct rd_msg *msg;
2159 int optlen, on_link;
2160 u8 *lladdr;
2161
2162 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2163 optlen -= sizeof(*msg);
2164
2165 if (optlen < 0) {
2166 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2167 return;
2168 }
2169
2170 msg = (struct rd_msg *)icmp6_hdr(skb);
2171
2172 if (ipv6_addr_is_multicast(&msg->dest)) {
2173 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2174 return;
2175 }
2176
2177 on_link = 0;
2178 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2179 on_link = 1;
2180 } else if (ipv6_addr_type(&msg->target) !=
2181 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2182 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2183 return;
2184 }
2185
2186 in6_dev = __in6_dev_get(skb->dev);
2187 if (!in6_dev)
2188 return;
2189 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2190 return;
2191
2192
2193
2194
2195
2196
2197 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2198 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2199 return;
2200 }
2201
2202 lladdr = NULL;
2203 if (ndopts.nd_opts_tgt_lladdr) {
2204 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2205 skb->dev);
2206 if (!lladdr) {
2207 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2208 return;
2209 }
2210 }
2211
2212 rt = (struct rt6_info *) dst;
2213 if (rt->rt6i_flags & RTF_REJECT) {
2214 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2215 return;
2216 }
2217
2218
2219
2220
2221
2222 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2223
2224 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2225 if (!neigh)
2226 return;
2227
2228
2229
2230
2231
2232 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2233 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2234 NEIGH_UPDATE_F_OVERRIDE|
2235 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2236 NEIGH_UPDATE_F_ISROUTER)),
2237 NDISC_REDIRECT, &ndopts);
2238
2239 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2240 if (!nrt)
2241 goto out;
2242
2243 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2244 if (on_link)
2245 nrt->rt6i_flags &= ~RTF_GATEWAY;
2246
2247 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2248
2249 if (ip6_ins_rt(nrt))
2250 goto out;
2251
2252 netevent.old = &rt->dst;
2253 netevent.new = &nrt->dst;
2254 netevent.daddr = &msg->dest;
2255 netevent.neigh = neigh;
2256 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2257
2258 if (rt->rt6i_flags & RTF_CACHE) {
2259 rt = (struct rt6_info *) dst_clone(&rt->dst);
2260 ip6_del_rt(rt);
2261 }
2262
2263out:
2264 neigh_release(neigh);
2265}
2266
2267
2268
2269
2270
2271static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2272{
2273 BUG_ON(from->dst.from);
2274
2275 rt->rt6i_flags &= ~RTF_EXPIRES;
2276 dst_hold(&from->dst);
2277 rt->dst.from = &from->dst;
2278 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2279}
2280
2281static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2282{
2283 rt->dst.input = ort->dst.input;
2284 rt->dst.output = ort->dst.output;
2285 rt->rt6i_dst = ort->rt6i_dst;
2286 rt->dst.error = ort->dst.error;
2287 rt->rt6i_idev = ort->rt6i_idev;
2288 if (rt->rt6i_idev)
2289 in6_dev_hold(rt->rt6i_idev);
2290 rt->dst.lastuse = jiffies;
2291 rt->rt6i_gateway = ort->rt6i_gateway;
2292 rt->rt6i_flags = ort->rt6i_flags;
2293 rt6_set_from(rt, ort);
2294 rt->rt6i_metric = ort->rt6i_metric;
2295#ifdef CONFIG_IPV6_SUBTREES
2296 rt->rt6i_src = ort->rt6i_src;
2297#endif
2298 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2299 rt->rt6i_table = ort->rt6i_table;
2300 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2301}
2302
2303#ifdef CONFIG_IPV6_ROUTE_INFO
2304static struct rt6_info *rt6_get_route_info(struct net *net,
2305 const struct in6_addr *prefix, int prefixlen,
2306 const struct in6_addr *gwaddr, int ifindex)
2307{
2308 struct fib6_node *fn;
2309 struct rt6_info *rt = NULL;
2310 struct fib6_table *table;
2311
2312 table = fib6_get_table(net, RT6_TABLE_INFO);
2313 if (!table)
2314 return NULL;
2315
2316 read_lock_bh(&table->tb6_lock);
2317 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
2318 if (!fn)
2319 goto out;
2320
2321 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2322 if (rt->dst.dev->ifindex != ifindex)
2323 continue;
2324 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2325 continue;
2326 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2327 continue;
2328 dst_hold(&rt->dst);
2329 break;
2330 }
2331out:
2332 read_unlock_bh(&table->tb6_lock);
2333 return rt;
2334}
2335
2336static struct rt6_info *rt6_add_route_info(struct net *net,
2337 const struct in6_addr *prefix, int prefixlen,
2338 const struct in6_addr *gwaddr, int ifindex,
2339 unsigned int pref)
2340{
2341 struct fib6_config cfg = {
2342 .fc_table = RT6_TABLE_INFO,
2343 .fc_metric = IP6_RT_PRIO_USER,
2344 .fc_ifindex = ifindex,
2345 .fc_dst_len = prefixlen,
2346 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2347 RTF_UP | RTF_PREF(pref),
2348 .fc_nlinfo.portid = 0,
2349 .fc_nlinfo.nlh = NULL,
2350 .fc_nlinfo.nl_net = net,
2351 };
2352
2353 cfg.fc_dst = *prefix;
2354 cfg.fc_gateway = *gwaddr;
2355
2356
2357 if (!prefixlen)
2358 cfg.fc_flags |= RTF_DEFAULT;
2359
2360 ip6_route_add(&cfg);
2361
2362 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2363}
2364#endif
2365
2366struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2367{
2368 struct rt6_info *rt;
2369 struct fib6_table *table;
2370
2371 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2372 if (!table)
2373 return NULL;
2374
2375 read_lock_bh(&table->tb6_lock);
2376 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
2377 if (dev == rt->dst.dev &&
2378 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2379 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2380 break;
2381 }
2382 if (rt)
2383 dst_hold(&rt->dst);
2384 read_unlock_bh(&table->tb6_lock);
2385 return rt;
2386}
2387
2388struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2389 struct net_device *dev,
2390 unsigned int pref)
2391{
2392 struct fib6_config cfg = {
2393 .fc_table = RT6_TABLE_DFLT,
2394 .fc_metric = IP6_RT_PRIO_USER,
2395 .fc_ifindex = dev->ifindex,
2396 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2397 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2398 .fc_nlinfo.portid = 0,
2399 .fc_nlinfo.nlh = NULL,
2400 .fc_nlinfo.nl_net = dev_net(dev),
2401 };
2402
2403 cfg.fc_gateway = *gwaddr;
2404
2405 ip6_route_add(&cfg);
2406
2407 return rt6_get_dflt_router(gwaddr, dev);
2408}
2409
2410void rt6_purge_dflt_routers(struct net *net)
2411{
2412 struct rt6_info *rt;
2413 struct fib6_table *table;
2414
2415
2416 table = fib6_get_table(net, RT6_TABLE_DFLT);
2417 if (!table)
2418 return;
2419
2420restart:
2421 read_lock_bh(&table->tb6_lock);
2422 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2423 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2424 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2425 dst_hold(&rt->dst);
2426 read_unlock_bh(&table->tb6_lock);
2427 ip6_del_rt(rt);
2428 goto restart;
2429 }
2430 }
2431 read_unlock_bh(&table->tb6_lock);
2432}
2433
2434static void rtmsg_to_fib6_config(struct net *net,
2435 struct in6_rtmsg *rtmsg,
2436 struct fib6_config *cfg)
2437{
2438 memset(cfg, 0, sizeof(*cfg));
2439
2440 cfg->fc_table = RT6_TABLE_MAIN;
2441 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2442 cfg->fc_metric = rtmsg->rtmsg_metric;
2443 cfg->fc_expires = rtmsg->rtmsg_info;
2444 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2445 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2446 cfg->fc_flags = rtmsg->rtmsg_flags;
2447
2448 cfg->fc_nlinfo.nl_net = net;
2449
2450 cfg->fc_dst = rtmsg->rtmsg_dst;
2451 cfg->fc_src = rtmsg->rtmsg_src;
2452 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2453}
2454
2455int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2456{
2457 struct fib6_config cfg;
2458 struct in6_rtmsg rtmsg;
2459 int err;
2460
2461 switch(cmd) {
2462 case SIOCADDRT:
2463 case SIOCDELRT:
2464 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2465 return -EPERM;
2466 err = copy_from_user(&rtmsg, arg,
2467 sizeof(struct in6_rtmsg));
2468 if (err)
2469 return -EFAULT;
2470
2471 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2472
2473 rtnl_lock();
2474 switch (cmd) {
2475 case SIOCADDRT:
2476 err = ip6_route_add(&cfg);
2477 break;
2478 case SIOCDELRT:
2479 err = ip6_route_del(&cfg);
2480 break;
2481 default:
2482 err = -EINVAL;
2483 }
2484 rtnl_unlock();
2485
2486 return err;
2487 }
2488
2489 return -EINVAL;
2490}
2491
2492
2493
2494
2495
2496static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2497{
2498 int type;
2499 struct dst_entry *dst = skb_dst(skb);
2500 switch (ipstats_mib_noroutes) {
2501 case IPSTATS_MIB_INNOROUTES:
2502 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2503 if (type == IPV6_ADDR_ANY) {
2504 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2505 IPSTATS_MIB_INADDRERRORS);
2506 break;
2507 }
2508
2509 case IPSTATS_MIB_OUTNOROUTES:
2510 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2511 ipstats_mib_noroutes);
2512 break;
2513 }
2514 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2515 kfree_skb(skb);
2516 return 0;
2517}
2518
2519static int ip6_pkt_discard(struct sk_buff *skb)
2520{
2521 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2522}
2523
2524static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2525{
2526 skb->dev = skb_dst(skb)->dev;
2527 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2528}
2529
2530static int ip6_pkt_prohibit(struct sk_buff *skb)
2531{
2532 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2533}
2534
2535static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2536{
2537 skb->dev = skb_dst(skb)->dev;
2538 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2539}
2540
2541
2542
2543
2544
2545struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2546 const struct in6_addr *addr,
2547 bool anycast)
2548{
2549 struct net *net = dev_net(idev->dev);
2550 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2551 DST_NOCOUNT);
2552 if (!rt)
2553 return ERR_PTR(-ENOMEM);
2554
2555 in6_dev_hold(idev);
2556
2557 rt->dst.flags |= DST_HOST;
2558 rt->dst.input = ip6_input;
2559 rt->dst.output = ip6_output;
2560 rt->rt6i_idev = idev;
2561
2562 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2563 if (anycast)
2564 rt->rt6i_flags |= RTF_ANYCAST;
2565 else
2566 rt->rt6i_flags |= RTF_LOCAL;
2567
2568 rt->rt6i_gateway = *addr;
2569 rt->rt6i_dst.addr = *addr;
2570 rt->rt6i_dst.plen = 128;
2571 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2572 rt->dst.flags |= DST_NOCACHE;
2573
2574 atomic_set(&rt->dst.__refcnt, 1);
2575
2576 return rt;
2577}
2578
2579int ip6_route_get_saddr(struct net *net,
2580 struct rt6_info *rt,
2581 const struct in6_addr *daddr,
2582 unsigned int prefs,
2583 struct in6_addr *saddr)
2584{
2585 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2586 int err = 0;
2587 if (rt->rt6i_prefsrc.plen)
2588 *saddr = rt->rt6i_prefsrc.addr;
2589 else
2590 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2591 daddr, prefs, saddr);
2592 return err;
2593}
2594
2595
2596struct arg_dev_net_ip {
2597 struct net_device *dev;
2598 struct net *net;
2599 struct in6_addr *addr;
2600};
2601
2602static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2603{
2604 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2605 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2606 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2607
2608 if (((void *)rt->dst.dev == dev || !dev) &&
2609 rt != net->ipv6.ip6_null_entry &&
2610 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2611
2612 rt->rt6i_prefsrc.plen = 0;
2613 }
2614 return 0;
2615}
2616
2617void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2618{
2619 struct net *net = dev_net(ifp->idev->dev);
2620 struct arg_dev_net_ip adni = {
2621 .dev = ifp->idev->dev,
2622 .net = net,
2623 .addr = &ifp->addr,
2624 };
2625 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2626}
2627
2628#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2629#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2630
2631
2632static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2633{
2634 struct in6_addr *gateway = (struct in6_addr *)arg;
2635
2636 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2637 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2638 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2639 return -1;
2640 }
2641 return 0;
2642}
2643
2644void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2645{
2646 fib6_clean_all(net, fib6_clean_tohost, gateway);
2647}
2648
2649struct arg_dev_net {
2650 struct net_device *dev;
2651 struct net *net;
2652};
2653
2654static int fib6_ifdown(struct rt6_info *rt, void *arg)
2655{
2656 const struct arg_dev_net *adn = arg;
2657 const struct net_device *dev = adn->dev;
2658
2659 if ((rt->dst.dev == dev || !dev) &&
2660 rt != adn->net->ipv6.ip6_null_entry)
2661 return -1;
2662
2663 return 0;
2664}
2665
2666void rt6_ifdown(struct net *net, struct net_device *dev)
2667{
2668 struct arg_dev_net adn = {
2669 .dev = dev,
2670 .net = net,
2671 };
2672
2673 fib6_clean_all(net, fib6_ifdown, &adn);
2674 icmp6_clean_all(fib6_ifdown, &adn);
2675 if (dev)
2676 rt6_uncached_list_flush_dev(net, dev);
2677}
2678
2679struct rt6_mtu_change_arg {
2680 struct net_device *dev;
2681 unsigned int mtu;
2682};
2683
2684static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2685{
2686 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2687 struct inet6_dev *idev;
2688
2689
2690
2691
2692
2693
2694
2695 idev = __in6_dev_get(arg->dev);
2696 if (!idev)
2697 return 0;
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713 if (rt->dst.dev == arg->dev &&
2714 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2715 if (rt->rt6i_flags & RTF_CACHE) {
2716
2717
2718
2719
2720
2721 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2722 rt->rt6i_pmtu = arg->mtu;
2723 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2724 (dst_mtu(&rt->dst) < arg->mtu &&
2725 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2726 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2727 }
2728 }
2729 return 0;
2730}
2731
2732void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2733{
2734 struct rt6_mtu_change_arg arg = {
2735 .dev = dev,
2736 .mtu = mtu,
2737 };
2738
2739 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2740}
2741
2742static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2743 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2744 [RTA_OIF] = { .type = NLA_U32 },
2745 [RTA_IIF] = { .type = NLA_U32 },
2746 [RTA_PRIORITY] = { .type = NLA_U32 },
2747 [RTA_METRICS] = { .type = NLA_NESTED },
2748 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2749 [RTA_PREF] = { .type = NLA_U8 },
2750 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2751 [RTA_ENCAP] = { .type = NLA_NESTED },
2752 [RTA_EXPIRES] = { .type = NLA_U32 },
2753};
2754
2755static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2756 struct fib6_config *cfg)
2757{
2758 struct rtmsg *rtm;
2759 struct nlattr *tb[RTA_MAX+1];
2760 unsigned int pref;
2761 int err;
2762
2763 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2764 if (err < 0)
2765 goto errout;
2766
2767 err = -EINVAL;
2768 rtm = nlmsg_data(nlh);
2769 memset(cfg, 0, sizeof(*cfg));
2770
2771 cfg->fc_table = rtm->rtm_table;
2772 cfg->fc_dst_len = rtm->rtm_dst_len;
2773 cfg->fc_src_len = rtm->rtm_src_len;
2774 cfg->fc_flags = RTF_UP;
2775 cfg->fc_protocol = rtm->rtm_protocol;
2776 cfg->fc_type = rtm->rtm_type;
2777
2778 if (rtm->rtm_type == RTN_UNREACHABLE ||
2779 rtm->rtm_type == RTN_BLACKHOLE ||
2780 rtm->rtm_type == RTN_PROHIBIT ||
2781 rtm->rtm_type == RTN_THROW)
2782 cfg->fc_flags |= RTF_REJECT;
2783
2784 if (rtm->rtm_type == RTN_LOCAL)
2785 cfg->fc_flags |= RTF_LOCAL;
2786
2787 if (rtm->rtm_flags & RTM_F_CLONED)
2788 cfg->fc_flags |= RTF_CACHE;
2789
2790 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2791 cfg->fc_nlinfo.nlh = nlh;
2792 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2793
2794 if (tb[RTA_GATEWAY]) {
2795 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2796 cfg->fc_flags |= RTF_GATEWAY;
2797 }
2798
2799 if (tb[RTA_DST]) {
2800 int plen = (rtm->rtm_dst_len + 7) >> 3;
2801
2802 if (nla_len(tb[RTA_DST]) < plen)
2803 goto errout;
2804
2805 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2806 }
2807
2808 if (tb[RTA_SRC]) {
2809 int plen = (rtm->rtm_src_len + 7) >> 3;
2810
2811 if (nla_len(tb[RTA_SRC]) < plen)
2812 goto errout;
2813
2814 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2815 }
2816
2817 if (tb[RTA_PREFSRC])
2818 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2819
2820 if (tb[RTA_OIF])
2821 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2822
2823 if (tb[RTA_PRIORITY])
2824 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2825
2826 if (tb[RTA_METRICS]) {
2827 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2828 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2829 }
2830
2831 if (tb[RTA_TABLE])
2832 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2833
2834 if (tb[RTA_MULTIPATH]) {
2835 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2836 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2837 }
2838
2839 if (tb[RTA_PREF]) {
2840 pref = nla_get_u8(tb[RTA_PREF]);
2841 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2842 pref != ICMPV6_ROUTER_PREF_HIGH)
2843 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2844 cfg->fc_flags |= RTF_PREF(pref);
2845 }
2846
2847 if (tb[RTA_EXPIRES]) {
2848 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2849
2850 if (addrconf_finite_timeout(timeout)) {
2851 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2852 cfg->fc_flags |= RTF_EXPIRES;
2853 }
2854 }
2855
2856 if (tb[RTA_ENCAP])
2857 cfg->fc_encap = tb[RTA_ENCAP];
2858
2859 if (tb[RTA_ENCAP_TYPE])
2860 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2861
2862 err = 0;
2863errout:
2864 return err;
2865}
2866
2867struct rt6_nh {
2868 struct rt6_info *rt6_info;
2869 struct fib6_config r_cfg;
2870 struct mx6_config mxc;
2871 struct list_head next;
2872};
2873
2874static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2875{
2876 struct rt6_nh *nh;
2877
2878 list_for_each_entry(nh, rt6_nh_list, next) {
2879 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2880 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2881 nh->r_cfg.fc_ifindex);
2882 }
2883}
2884
2885static int ip6_route_info_append(struct list_head *rt6_nh_list,
2886 struct rt6_info *rt, struct fib6_config *r_cfg)
2887{
2888 struct rt6_nh *nh;
2889 struct rt6_info *rtnh;
2890 int err = -EEXIST;
2891
2892 list_for_each_entry(nh, rt6_nh_list, next) {
2893
2894 rtnh = nh->rt6_info;
2895
2896 if (rtnh->dst.dev == rt->dst.dev &&
2897 rtnh->rt6i_idev == rt->rt6i_idev &&
2898 ipv6_addr_equal(&rtnh->rt6i_gateway,
2899 &rt->rt6i_gateway))
2900 return err;
2901 }
2902
2903 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2904 if (!nh)
2905 return -ENOMEM;
2906 nh->rt6_info = rt;
2907 err = ip6_convert_metrics(&nh->mxc, r_cfg);
2908 if (err) {
2909 kfree(nh);
2910 return err;
2911 }
2912 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2913 list_add_tail(&nh->next, rt6_nh_list);
2914
2915 return 0;
2916}
2917
2918static int ip6_route_multipath_add(struct fib6_config *cfg)
2919{
2920 struct fib6_config r_cfg;
2921 struct rtnexthop *rtnh;
2922 struct rt6_info *rt;
2923 struct rt6_nh *err_nh;
2924 struct rt6_nh *nh, *nh_safe;
2925 int remaining;
2926 int attrlen;
2927 int err = 1;
2928 int nhn = 0;
2929 int replace = (cfg->fc_nlinfo.nlh &&
2930 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2931 LIST_HEAD(rt6_nh_list);
2932
2933 remaining = cfg->fc_mp_len;
2934 rtnh = (struct rtnexthop *)cfg->fc_mp;
2935
2936
2937
2938
2939 while (rtnh_ok(rtnh, remaining)) {
2940 memcpy(&r_cfg, cfg, sizeof(*cfg));
2941 if (rtnh->rtnh_ifindex)
2942 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2943
2944 attrlen = rtnh_attrlen(rtnh);
2945 if (attrlen > 0) {
2946 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2947
2948 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2949 if (nla) {
2950 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2951 r_cfg.fc_flags |= RTF_GATEWAY;
2952 }
2953 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2954 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2955 if (nla)
2956 r_cfg.fc_encap_type = nla_get_u16(nla);
2957 }
2958
2959 err = ip6_route_info_create(&r_cfg, &rt);
2960 if (err)
2961 goto cleanup;
2962
2963 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2964 if (err) {
2965 dst_free(&rt->dst);
2966 goto cleanup;
2967 }
2968
2969 rtnh = rtnh_next(rtnh, &remaining);
2970 }
2971
2972 err_nh = NULL;
2973 list_for_each_entry(nh, &rt6_nh_list, next) {
2974 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2975
2976 nh->rt6_info = NULL;
2977 if (err) {
2978 if (replace && nhn)
2979 ip6_print_replace_route_err(&rt6_nh_list);
2980 err_nh = nh;
2981 goto add_errout;
2982 }
2983
2984
2985
2986
2987
2988
2989
2990
2991 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2992 NLM_F_REPLACE);
2993 nhn++;
2994 }
2995
2996 goto cleanup;
2997
2998add_errout:
2999
3000 list_for_each_entry(nh, &rt6_nh_list, next) {
3001 if (err_nh == nh)
3002 break;
3003 ip6_route_del(&nh->r_cfg);
3004 }
3005
3006cleanup:
3007 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3008 if (nh->rt6_info)
3009 dst_free(&nh->rt6_info->dst);
3010 if (nh->mxc.mx)
3011 kfree(nh->mxc.mx);
3012 list_del(&nh->next);
3013 kfree(nh);
3014 }
3015
3016 return err;
3017}
3018
3019static int ip6_route_multipath_del(struct fib6_config *cfg)
3020{
3021 struct fib6_config r_cfg;
3022 struct rtnexthop *rtnh;
3023 int remaining;
3024 int attrlen;
3025 int err = 1, last_err = 0;
3026
3027 remaining = cfg->fc_mp_len;
3028 rtnh = (struct rtnexthop *)cfg->fc_mp;
3029
3030
3031 while (rtnh_ok(rtnh, remaining)) {
3032 memcpy(&r_cfg, cfg, sizeof(*cfg));
3033 if (rtnh->rtnh_ifindex)
3034 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3035
3036 attrlen = rtnh_attrlen(rtnh);
3037 if (attrlen > 0) {
3038 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3039
3040 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3041 if (nla) {
3042 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3043 r_cfg.fc_flags |= RTF_GATEWAY;
3044 }
3045 }
3046 err = ip6_route_del(&r_cfg);
3047 if (err)
3048 last_err = err;
3049
3050 rtnh = rtnh_next(rtnh, &remaining);
3051 }
3052
3053 return last_err;
3054}
3055
3056static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
3057{
3058 struct fib6_config cfg;
3059 int err;
3060
3061 err = rtm_to_fib6_config(skb, nlh, &cfg);
3062 if (err < 0)
3063 return err;
3064
3065 if (cfg.fc_mp)
3066 return ip6_route_multipath_del(&cfg);
3067 else
3068 return ip6_route_del(&cfg);
3069}
3070
3071static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
3072{
3073 struct fib6_config cfg;
3074 int err;
3075
3076 err = rtm_to_fib6_config(skb, nlh, &cfg);
3077 if (err < 0)
3078 return err;
3079
3080 if (cfg.fc_mp)
3081 return ip6_route_multipath_add(&cfg);
3082 else
3083 return ip6_route_add(&cfg);
3084}
3085
3086static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3087{
3088 return NLMSG_ALIGN(sizeof(struct rtmsg))
3089 + nla_total_size(16)
3090 + nla_total_size(16)
3091 + nla_total_size(16)
3092 + nla_total_size(16)
3093 + nla_total_size(4)
3094 + nla_total_size(4)
3095 + nla_total_size(4)
3096 + nla_total_size(4)
3097 + RTAX_MAX * nla_total_size(4)
3098 + nla_total_size(sizeof(struct rta_cacheinfo))
3099 + nla_total_size(TCP_CA_NAME_MAX)
3100 + nla_total_size(1)
3101 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3102}
3103
3104static int rt6_fill_node(struct net *net,
3105 struct sk_buff *skb, struct rt6_info *rt,
3106 struct in6_addr *dst, struct in6_addr *src,
3107 int iif, int type, u32 portid, u32 seq,
3108 int prefix, int nowait, unsigned int flags)
3109{
3110 u32 metrics[RTAX_MAX];
3111 struct rtmsg *rtm;
3112 struct nlmsghdr *nlh;
3113 long expires;
3114 u32 table;
3115
3116 if (prefix) {
3117 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3118
3119 return 1;
3120 }
3121 }
3122
3123 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3124 if (!nlh)
3125 return -EMSGSIZE;
3126
3127 rtm = nlmsg_data(nlh);
3128 rtm->rtm_family = AF_INET6;
3129 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3130 rtm->rtm_src_len = rt->rt6i_src.plen;
3131 rtm->rtm_tos = 0;
3132 if (rt->rt6i_table)
3133 table = rt->rt6i_table->tb6_id;
3134 else
3135 table = RT6_TABLE_UNSPEC;
3136 rtm->rtm_table = table;
3137 if (nla_put_u32(skb, RTA_TABLE, table))
3138 goto nla_put_failure;
3139 if (rt->rt6i_flags & RTF_REJECT) {
3140 switch (rt->dst.error) {
3141 case -EINVAL:
3142 rtm->rtm_type = RTN_BLACKHOLE;
3143 break;
3144 case -EACCES:
3145 rtm->rtm_type = RTN_PROHIBIT;
3146 break;
3147 case -EAGAIN:
3148 rtm->rtm_type = RTN_THROW;
3149 break;
3150 default:
3151 rtm->rtm_type = RTN_UNREACHABLE;
3152 break;
3153 }
3154 }
3155 else if (rt->rt6i_flags & RTF_LOCAL)
3156 rtm->rtm_type = RTN_LOCAL;
3157 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3158 rtm->rtm_type = RTN_LOCAL;
3159 else
3160 rtm->rtm_type = RTN_UNICAST;
3161 rtm->rtm_flags = 0;
3162 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3163 rtm->rtm_protocol = rt->rt6i_protocol;
3164 if (rt->rt6i_flags & RTF_DYNAMIC)
3165 rtm->rtm_protocol = RTPROT_REDIRECT;
3166 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3167 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3168 rtm->rtm_protocol = RTPROT_RA;
3169 else
3170 rtm->rtm_protocol = RTPROT_KERNEL;
3171 }
3172
3173 if (rt->rt6i_flags & RTF_CACHE)
3174 rtm->rtm_flags |= RTM_F_CLONED;
3175
3176 if (dst) {
3177 if (nla_put_in6_addr(skb, RTA_DST, dst))
3178 goto nla_put_failure;
3179 rtm->rtm_dst_len = 128;
3180 } else if (rtm->rtm_dst_len)
3181 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3182 goto nla_put_failure;
3183#ifdef CONFIG_IPV6_SUBTREES
3184 if (src) {
3185 if (nla_put_in6_addr(skb, RTA_SRC, src))
3186 goto nla_put_failure;
3187 rtm->rtm_src_len = 128;
3188 } else if (rtm->rtm_src_len &&
3189 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3190 goto nla_put_failure;
3191#endif
3192 if (iif) {
3193#ifdef CONFIG_IPV6_MROUTE
3194 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3195 int err = ip6mr_get_route(net, skb, rtm, nowait);
3196 if (err <= 0) {
3197 if (!nowait) {
3198 if (err == 0)
3199 return 0;
3200 goto nla_put_failure;
3201 } else {
3202 if (err == -EMSGSIZE)
3203 goto nla_put_failure;
3204 }
3205 }
3206 } else
3207#endif
3208 if (nla_put_u32(skb, RTA_IIF, iif))
3209 goto nla_put_failure;
3210 } else if (dst) {
3211 struct in6_addr saddr_buf;
3212 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3213 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3214 goto nla_put_failure;
3215 }
3216
3217 if (rt->rt6i_prefsrc.plen) {
3218 struct in6_addr saddr_buf;
3219 saddr_buf = rt->rt6i_prefsrc.addr;
3220 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3221 goto nla_put_failure;
3222 }
3223
3224 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3225 if (rt->rt6i_pmtu)
3226 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3227 if (rtnetlink_put_metrics(skb, metrics) < 0)
3228 goto nla_put_failure;
3229
3230 if (rt->rt6i_flags & RTF_GATEWAY) {
3231 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3232 goto nla_put_failure;
3233 }
3234
3235 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
3236 rtm->rtm_flags |= RTNH_F_OFFLOAD;
3237
3238 if (rt->dst.dev &&
3239 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3240 goto nla_put_failure;
3241 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3242 goto nla_put_failure;
3243
3244 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3245
3246 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3247 goto nla_put_failure;
3248
3249 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3250 goto nla_put_failure;
3251
3252 lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3253
3254 nlmsg_end(skb, nlh);
3255 return 0;
3256
3257nla_put_failure:
3258 nlmsg_cancel(skb, nlh);
3259 return -EMSGSIZE;
3260}
3261
3262int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3263{
3264 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3265 int prefix;
3266
3267 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3268 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3269 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3270 } else
3271 prefix = 0;
3272
3273 return rt6_fill_node(arg->net,
3274 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3275 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3276 prefix, 0, NLM_F_MULTI);
3277}
3278
3279static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
3280{
3281 struct net *net = sock_net(in_skb->sk);
3282 struct nlattr *tb[RTA_MAX+1];
3283 struct rt6_info *rt;
3284 struct sk_buff *skb;
3285 struct rtmsg *rtm;
3286 struct flowi6 fl6;
3287 int err, iif = 0, oif = 0;
3288
3289 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3290 if (err < 0)
3291 goto errout;
3292
3293 err = -EINVAL;
3294 memset(&fl6, 0, sizeof(fl6));
3295 rtm = nlmsg_data(nlh);
3296 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3297
3298 if (tb[RTA_SRC]) {
3299 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3300 goto errout;
3301
3302 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3303 }
3304
3305 if (tb[RTA_DST]) {
3306 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3307 goto errout;
3308
3309 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3310 }
3311
3312 if (tb[RTA_IIF])
3313 iif = nla_get_u32(tb[RTA_IIF]);
3314
3315 if (tb[RTA_OIF])
3316 oif = nla_get_u32(tb[RTA_OIF]);
3317
3318 if (iif) {
3319 struct net_device *dev;
3320 int flags = 0;
3321
3322 dev = __dev_get_by_index(net, iif);
3323 if (!dev) {
3324 err = -ENODEV;
3325 goto errout;
3326 }
3327
3328 fl6.flowi6_iif = iif;
3329
3330 if (!ipv6_addr_any(&fl6.saddr))
3331 flags |= RT6_LOOKUP_F_HAS_SADDR;
3332
3333 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3334 flags);
3335 } else {
3336 fl6.flowi6_oif = oif;
3337
3338 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3339 }
3340
3341 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3342 if (!skb) {
3343 ip6_rt_put(rt);
3344 err = -ENOBUFS;
3345 goto errout;
3346 }
3347
3348
3349
3350
3351 skb_reset_mac_header(skb);
3352 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3353
3354 skb_dst_set(skb, &rt->dst);
3355
3356 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3357 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3358 nlh->nlmsg_seq, 0, 0, 0);
3359 if (err < 0) {
3360 kfree_skb(skb);
3361 goto errout;
3362 }
3363
3364 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3365errout:
3366 return err;
3367}
3368
3369void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
3370{
3371 struct sk_buff *skb;
3372 struct net *net = info->nl_net;
3373 u32 seq;
3374 int err;
3375
3376 err = -ENOBUFS;
3377 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3378
3379 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3380 if (!skb)
3381 goto errout;
3382
3383 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3384 event, info->portid, seq, 0, 0, 0);
3385 if (err < 0) {
3386
3387 WARN_ON(err == -EMSGSIZE);
3388 kfree_skb(skb);
3389 goto errout;
3390 }
3391 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3392 info->nlh, gfp_any());
3393 return;
3394errout:
3395 if (err < 0)
3396 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3397}
3398
3399static int ip6_route_dev_notify(struct notifier_block *this,
3400 unsigned long event, void *ptr)
3401{
3402 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3403 struct net *net = dev_net(dev);
3404
3405 if (!(dev->flags & IFF_LOOPBACK))
3406 return NOTIFY_OK;
3407
3408 if (event == NETDEV_REGISTER) {
3409 net->ipv6.ip6_null_entry->dst.dev = dev;
3410 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3411#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3412 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3413 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3414 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3415 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3416#endif
3417 } else if (event == NETDEV_UNREGISTER &&
3418 dev->reg_state != NETREG_UNREGISTERED) {
3419
3420
3421
3422 in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
3423#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3424 in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
3425 in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3426#endif
3427 }
3428
3429 return NOTIFY_OK;
3430}
3431
3432
3433
3434
3435
3436#ifdef CONFIG_PROC_FS
3437
3438static const struct file_operations ipv6_route_proc_fops = {
3439 .owner = THIS_MODULE,
3440 .open = ipv6_route_open,
3441 .read = seq_read,
3442 .llseek = seq_lseek,
3443 .release = seq_release_net,
3444};
3445
3446static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3447{
3448 struct net *net = (struct net *)seq->private;
3449 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3450 net->ipv6.rt6_stats->fib_nodes,
3451 net->ipv6.rt6_stats->fib_route_nodes,
3452 net->ipv6.rt6_stats->fib_rt_alloc,
3453 net->ipv6.rt6_stats->fib_rt_entries,
3454 net->ipv6.rt6_stats->fib_rt_cache,
3455 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3456 net->ipv6.rt6_stats->fib_discarded_routes);
3457
3458 return 0;
3459}
3460
3461static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3462{
3463 return single_open_net(inode, file, rt6_stats_seq_show);
3464}
3465
3466static const struct file_operations rt6_stats_seq_fops = {
3467 .owner = THIS_MODULE,
3468 .open = rt6_stats_seq_open,
3469 .read = seq_read,
3470 .llseek = seq_lseek,
3471 .release = single_release_net,
3472};
3473#endif
3474
3475#ifdef CONFIG_SYSCTL
3476
3477static
3478int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3479 void __user *buffer, size_t *lenp, loff_t *ppos)
3480{
3481 struct net *net;
3482 int delay;
3483 if (!write)
3484 return -EINVAL;
3485
3486 net = (struct net *)ctl->extra1;
3487 delay = net->ipv6.sysctl.flush_delay;
3488 proc_dointvec(ctl, write, buffer, lenp, ppos);
3489 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3490 return 0;
3491}
3492
3493struct ctl_table ipv6_route_table_template[] = {
3494 {
3495 .procname = "flush",
3496 .data = &init_net.ipv6.sysctl.flush_delay,
3497 .maxlen = sizeof(int),
3498 .mode = 0200,
3499 .proc_handler = ipv6_sysctl_rtcache_flush
3500 },
3501 {
3502 .procname = "gc_thresh",
3503 .data = &ip6_dst_ops_template.gc_thresh,
3504 .maxlen = sizeof(int),
3505 .mode = 0644,
3506 .proc_handler = proc_dointvec,
3507 },
3508 {
3509 .procname = "max_size",
3510 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
3511 .maxlen = sizeof(int),
3512 .mode = 0644,
3513 .proc_handler = proc_dointvec,
3514 },
3515 {
3516 .procname = "gc_min_interval",
3517 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3518 .maxlen = sizeof(int),
3519 .mode = 0644,
3520 .proc_handler = proc_dointvec_jiffies,
3521 },
3522 {
3523 .procname = "gc_timeout",
3524 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3525 .maxlen = sizeof(int),
3526 .mode = 0644,
3527 .proc_handler = proc_dointvec_jiffies,
3528 },
3529 {
3530 .procname = "gc_interval",
3531 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3532 .maxlen = sizeof(int),
3533 .mode = 0644,
3534 .proc_handler = proc_dointvec_jiffies,
3535 },
3536 {
3537 .procname = "gc_elasticity",
3538 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3539 .maxlen = sizeof(int),
3540 .mode = 0644,
3541 .proc_handler = proc_dointvec,
3542 },
3543 {
3544 .procname = "mtu_expires",
3545 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3546 .maxlen = sizeof(int),
3547 .mode = 0644,
3548 .proc_handler = proc_dointvec_jiffies,
3549 },
3550 {
3551 .procname = "min_adv_mss",
3552 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3553 .maxlen = sizeof(int),
3554 .mode = 0644,
3555 .proc_handler = proc_dointvec,
3556 },
3557 {
3558 .procname = "gc_min_interval_ms",
3559 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3560 .maxlen = sizeof(int),
3561 .mode = 0644,
3562 .proc_handler = proc_dointvec_ms_jiffies,
3563 },
3564 { }
3565};
3566
3567struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3568{
3569 struct ctl_table *table;
3570
3571 table = kmemdup(ipv6_route_table_template,
3572 sizeof(ipv6_route_table_template),
3573 GFP_KERNEL);
3574
3575 if (table) {
3576 table[0].data = &net->ipv6.sysctl.flush_delay;
3577 table[0].extra1 = net;
3578 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3579 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3580 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3581 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3582 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3583 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3584 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3585 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3586 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3587
3588
3589 if (net->user_ns != &init_user_ns)
3590 table[0].procname = NULL;
3591 }
3592
3593 return table;
3594}
3595#endif
3596
3597static int __net_init ip6_route_net_init(struct net *net)
3598{
3599 int ret = -ENOMEM;
3600
3601 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3602 sizeof(net->ipv6.ip6_dst_ops));
3603
3604 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3605 goto out_ip6_dst_ops;
3606
3607 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3608 sizeof(*net->ipv6.ip6_null_entry),
3609 GFP_KERNEL);
3610 if (!net->ipv6.ip6_null_entry)
3611 goto out_ip6_dst_entries;
3612 net->ipv6.ip6_null_entry->dst.path =
3613 (struct dst_entry *)net->ipv6.ip6_null_entry;
3614 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3615 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3616 ip6_template_metrics, true);
3617
3618#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3619 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3620 sizeof(*net->ipv6.ip6_prohibit_entry),
3621 GFP_KERNEL);
3622 if (!net->ipv6.ip6_prohibit_entry)
3623 goto out_ip6_null_entry;
3624 net->ipv6.ip6_prohibit_entry->dst.path =
3625 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3626 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3627 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3628 ip6_template_metrics, true);
3629
3630 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3631 sizeof(*net->ipv6.ip6_blk_hole_entry),
3632 GFP_KERNEL);
3633 if (!net->ipv6.ip6_blk_hole_entry)
3634 goto out_ip6_prohibit_entry;
3635 net->ipv6.ip6_blk_hole_entry->dst.path =
3636 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3637 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3638 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3639 ip6_template_metrics, true);
3640#endif
3641
3642 net->ipv6.sysctl.flush_delay = 0;
3643 net->ipv6.sysctl.ip6_rt_max_size = 16384;
3644 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3645 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3646 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3647 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3648 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3649 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3650
3651 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3652
3653 ret = 0;
3654out:
3655 return ret;
3656
3657#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3658out_ip6_prohibit_entry:
3659 kfree(net->ipv6.ip6_prohibit_entry);
3660out_ip6_null_entry:
3661 kfree(net->ipv6.ip6_null_entry);
3662#endif
3663out_ip6_dst_entries:
3664 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3665out_ip6_dst_ops:
3666 goto out;
3667}
3668
3669static void __net_exit ip6_route_net_exit(struct net *net)
3670{
3671 kfree(net->ipv6.ip6_null_entry);
3672#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3673 kfree(net->ipv6.ip6_prohibit_entry);
3674 kfree(net->ipv6.ip6_blk_hole_entry);
3675#endif
3676 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3677}
3678
3679static int __net_init ip6_route_net_init_late(struct net *net)
3680{
3681#ifdef CONFIG_PROC_FS
3682 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3683 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3684#endif
3685 return 0;
3686}
3687
3688static void __net_exit ip6_route_net_exit_late(struct net *net)
3689{
3690#ifdef CONFIG_PROC_FS
3691 remove_proc_entry("ipv6_route", net->proc_net);
3692 remove_proc_entry("rt6_stats", net->proc_net);
3693#endif
3694}
3695
3696static struct pernet_operations ip6_route_net_ops = {
3697 .init = ip6_route_net_init,
3698 .exit = ip6_route_net_exit,
3699};
3700
3701static int __net_init ipv6_inetpeer_init(struct net *net)
3702{
3703 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3704
3705 if (!bp)
3706 return -ENOMEM;
3707 inet_peer_base_init(bp);
3708 net->ipv6.peers = bp;
3709 return 0;
3710}
3711
3712static void __net_exit ipv6_inetpeer_exit(struct net *net)
3713{
3714 struct inet_peer_base *bp = net->ipv6.peers;
3715
3716 net->ipv6.peers = NULL;
3717 inetpeer_invalidate_tree(bp);
3718 kfree(bp);
3719}
3720
3721static struct pernet_operations ipv6_inetpeer_ops = {
3722 .init = ipv6_inetpeer_init,
3723 .exit = ipv6_inetpeer_exit,
3724};
3725
3726static struct pernet_operations ip6_route_net_late_ops = {
3727 .init = ip6_route_net_init_late,
3728 .exit = ip6_route_net_exit_late,
3729};
3730
3731static struct notifier_block ip6_route_dev_notifier = {
3732 .notifier_call = ip6_route_dev_notify,
3733 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
3734};
3735
3736void __init ip6_route_init_special_entries(void)
3737{
3738
3739
3740
3741 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3742 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3743 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3744 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3745 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3746 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3747 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3748 #endif
3749}
3750
3751int __init ip6_route_init(void)
3752{
3753 int ret;
3754 int cpu;
3755
3756 ret = -ENOMEM;
3757 ip6_dst_ops_template.kmem_cachep =
3758 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3759 SLAB_HWCACHE_ALIGN, NULL);
3760 if (!ip6_dst_ops_template.kmem_cachep)
3761 goto out;
3762
3763 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3764 if (ret)
3765 goto out_kmem_cache;
3766
3767 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3768 if (ret)
3769 goto out_dst_entries;
3770
3771 ret = register_pernet_subsys(&ip6_route_net_ops);
3772 if (ret)
3773 goto out_register_inetpeer;
3774
3775 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3776
3777 ret = fib6_init();
3778 if (ret)
3779 goto out_register_subsys;
3780
3781 ret = xfrm6_init();
3782 if (ret)
3783 goto out_fib6_init;
3784
3785 ret = fib6_rules_init();
3786 if (ret)
3787 goto xfrm6_init;
3788
3789 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3790 if (ret)
3791 goto fib6_rules_init;
3792
3793 ret = -ENOBUFS;
3794 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3795 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3796 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3797 goto out_register_late_subsys;
3798
3799 ret = register_netdevice_notifier_rh(&ip6_route_dev_notifier);
3800 if (ret)
3801 goto out_register_late_subsys;
3802
3803 for_each_possible_cpu(cpu) {
3804 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3805
3806 INIT_LIST_HEAD(&ul->head);
3807 spin_lock_init(&ul->lock);
3808 }
3809
3810out:
3811 return ret;
3812
3813out_register_late_subsys:
3814 unregister_pernet_subsys(&ip6_route_net_late_ops);
3815fib6_rules_init:
3816 fib6_rules_cleanup();
3817xfrm6_init:
3818 xfrm6_fini();
3819out_fib6_init:
3820 fib6_gc_cleanup();
3821out_register_subsys:
3822 unregister_pernet_subsys(&ip6_route_net_ops);
3823out_register_inetpeer:
3824 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3825out_dst_entries:
3826 dst_entries_destroy(&ip6_dst_blackhole_ops);
3827out_kmem_cache:
3828 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3829 goto out;
3830}
3831
3832void ip6_route_cleanup(void)
3833{
3834 unregister_netdevice_notifier_rh(&ip6_route_dev_notifier);
3835 unregister_pernet_subsys(&ip6_route_net_late_ops);
3836 fib6_rules_cleanup();
3837 xfrm6_fini();
3838 fib6_gc_cleanup();
3839 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3840 unregister_pernet_subsys(&ip6_route_net_ops);
3841 dst_entries_destroy(&ip6_dst_blackhole_ops);
3842 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3843}
3844