1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#define pr_fmt(fmt) "IPv6: " fmt
28
29#include <linux/capability.h>
30#include <linux/errno.h>
31#include <linux/export.h>
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
40#include <linux/mroute6.h>
41#include <linux/init.h>
42#include <linux/if_arp.h>
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#include <linux/nsproxy.h>
46#include <linux/slab.h>
47#include <net/net_namespace.h>
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/dst_metadata.h>
58#include <net/xfrm.h>
59#include <net/netevent.h>
60#include <net/netlink.h>
61#include <net/nexthop.h>
62#include <net/lwtunnel.h>
63#include <net/ip_tunnels.h>
64#include <net/l3mdev.h>
65#include <trace/events/fib6.h>
66
67#include <linux/uaccess.h>
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
73enum rt6_nud_state {
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
77 RT6_NUD_SUCCEED = 1
78};
79
80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
83static unsigned int ip6_mtu(const struct dst_entry *dst);
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
88static int ip6_dst_gc(struct dst_ops *ops);
89
90static int ip6_pkt_discard(struct sk_buff *skb);
91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92static int ip6_pkt_prohibit(struct sk_buff *skb);
93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94static void ip6_link_failure(struct sk_buff *skb);
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101static size_t rt6_nlmsg_size(struct rt6_info *rt);
102static int rt6_fill_node(struct net *net,
103 struct sk_buff *skb, struct rt6_info *rt,
104 struct in6_addr *dst, struct in6_addr *src,
105 int iif, int type, u32 portid, u32 seq,
106 unsigned int flags);
107
108#ifdef CONFIG_IPV6_ROUTE_INFO
109static struct rt6_info *rt6_add_route_info(struct net *net,
110 const struct in6_addr *prefix, int prefixlen,
111 const struct in6_addr *gwaddr,
112 struct net_device *dev,
113 unsigned int pref);
114static struct rt6_info *rt6_get_route_info(struct net *net,
115 const struct in6_addr *prefix, int prefixlen,
116 const struct in6_addr *gwaddr,
117 struct net_device *dev);
118#endif
119
120struct uncached_list {
121 spinlock_t lock;
122 struct list_head head;
123};
124
125static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
126
127static void rt6_uncached_list_add(struct rt6_info *rt)
128{
129 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
130
131 rt->dst.flags |= DST_NOCACHE;
132 rt->rt6i_uncached_list = ul;
133
134 spin_lock_bh(&ul->lock);
135 list_add_tail(&rt->rt6i_uncached, &ul->head);
136 spin_unlock_bh(&ul->lock);
137}
138
139static void rt6_uncached_list_del(struct rt6_info *rt)
140{
141 if (!list_empty(&rt->rt6i_uncached)) {
142 struct uncached_list *ul = rt->rt6i_uncached_list;
143
144 spin_lock_bh(&ul->lock);
145 list_del(&rt->rt6i_uncached);
146 spin_unlock_bh(&ul->lock);
147 }
148}
149
150static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
151{
152 struct net_device *loopback_dev = net->loopback_dev;
153 int cpu;
154
155 if (dev == loopback_dev)
156 return;
157
158 for_each_possible_cpu(cpu) {
159 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
160 struct rt6_info *rt;
161
162 spin_lock_bh(&ul->lock);
163 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
164 struct inet6_dev *rt_idev = rt->rt6i_idev;
165 struct net_device *rt_dev = rt->dst.dev;
166
167 if (rt_idev->dev == dev) {
168 rt->rt6i_idev = in6_dev_get(loopback_dev);
169 in6_dev_put(rt_idev);
170 }
171
172 if (rt_dev == dev) {
173 rt->dst.dev = loopback_dev;
174 dev_hold(rt->dst.dev);
175 dev_put(rt_dev);
176 }
177 }
178 spin_unlock_bh(&ul->lock);
179 }
180}
181
182static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
183{
184 return dst_metrics_write_ptr(rt->dst.from);
185}
186
187static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
188{
189 struct rt6_info *rt = (struct rt6_info *)dst;
190
191 if (rt->rt6i_flags & RTF_PCPU)
192 return rt6_pcpu_cow_metrics(rt);
193 else if (rt->rt6i_flags & RTF_CACHE)
194 return NULL;
195 else
196 return dst_cow_metrics_generic(dst, old);
197}
198
199static inline const void *choose_neigh_daddr(struct rt6_info *rt,
200 struct sk_buff *skb,
201 const void *daddr)
202{
203 struct in6_addr *p = &rt->rt6i_gateway;
204
205 if (!ipv6_addr_any(p))
206 return (const void *) p;
207 else if (skb)
208 return &ipv6_hdr(skb)->daddr;
209 return daddr;
210}
211
212static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
213 struct sk_buff *skb,
214 const void *daddr)
215{
216 struct rt6_info *rt = (struct rt6_info *) dst;
217 struct neighbour *n;
218
219 daddr = choose_neigh_daddr(rt, skb, daddr);
220 n = __ipv6_neigh_lookup(dst->dev, daddr);
221 if (n)
222 return n;
223 return neigh_create(&nd_tbl, daddr, dst->dev);
224}
225
226static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
227{
228 struct net_device *dev = dst->dev;
229 struct rt6_info *rt = (struct rt6_info *)dst;
230
231 daddr = choose_neigh_daddr(rt, NULL, daddr);
232 if (!daddr)
233 return;
234 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
235 return;
236 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
237 return;
238 __ipv6_confirm_neigh(dev, daddr);
239}
240
241static struct dst_ops ip6_dst_ops_template = {
242 .family = AF_INET6,
243 .gc = ip6_dst_gc,
244 .gc_thresh = 1024,
245 .check = ip6_dst_check,
246 .default_advmss = ip6_default_advmss,
247 .mtu = ip6_mtu,
248 .cow_metrics = ipv6_cow_metrics,
249 .destroy = ip6_dst_destroy,
250 .ifdown = ip6_dst_ifdown,
251 .negative_advice = ip6_negative_advice,
252 .link_failure = ip6_link_failure,
253 .update_pmtu = ip6_rt_update_pmtu,
254 .redirect = rt6_do_redirect,
255 .local_out = __ip6_local_out,
256 .neigh_lookup = ip6_neigh_lookup,
257 .confirm_neigh = ip6_confirm_neigh,
258};
259
260static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
261{
262 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
263
264 return mtu ? : dst->dev->mtu;
265}
266
267static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
268 struct sk_buff *skb, u32 mtu)
269{
270}
271
272static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
273 struct sk_buff *skb)
274{
275}
276
277static struct dst_ops ip6_dst_blackhole_ops = {
278 .family = AF_INET6,
279 .destroy = ip6_dst_destroy,
280 .check = ip6_dst_check,
281 .mtu = ip6_blackhole_mtu,
282 .default_advmss = ip6_default_advmss,
283 .update_pmtu = ip6_rt_blackhole_update_pmtu,
284 .redirect = ip6_rt_blackhole_redirect,
285 .cow_metrics = dst_cow_metrics_generic,
286 .neigh_lookup = ip6_neigh_lookup,
287};
288
289static const u32 ip6_template_metrics[RTAX_MAX] = {
290 [RTAX_HOPLIMIT - 1] = 0,
291};
292
293static const struct rt6_info ip6_null_entry_template = {
294 .dst = {
295 .__refcnt = ATOMIC_INIT(1),
296 .__use = 1,
297 .obsolete = DST_OBSOLETE_FORCE_CHK,
298 .error = -ENETUNREACH,
299 .input = ip6_pkt_discard,
300 .output = ip6_pkt_discard_out,
301 },
302 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
303 .rt6i_protocol = RTPROT_KERNEL,
304 .rt6i_metric = ~(u32) 0,
305 .rt6i_ref = ATOMIC_INIT(1),
306};
307
308#ifdef CONFIG_IPV6_MULTIPLE_TABLES
309
310static const struct rt6_info ip6_prohibit_entry_template = {
311 .dst = {
312 .__refcnt = ATOMIC_INIT(1),
313 .__use = 1,
314 .obsolete = DST_OBSOLETE_FORCE_CHK,
315 .error = -EACCES,
316 .input = ip6_pkt_prohibit,
317 .output = ip6_pkt_prohibit_out,
318 },
319 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
320 .rt6i_protocol = RTPROT_KERNEL,
321 .rt6i_metric = ~(u32) 0,
322 .rt6i_ref = ATOMIC_INIT(1),
323};
324
325static const struct rt6_info ip6_blk_hole_entry_template = {
326 .dst = {
327 .__refcnt = ATOMIC_INIT(1),
328 .__use = 1,
329 .obsolete = DST_OBSOLETE_FORCE_CHK,
330 .error = -EINVAL,
331 .input = dst_discard,
332 .output = dst_discard_out,
333 },
334 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
335 .rt6i_protocol = RTPROT_KERNEL,
336 .rt6i_metric = ~(u32) 0,
337 .rt6i_ref = ATOMIC_INIT(1),
338};
339
340#endif
341
342static void rt6_info_init(struct rt6_info *rt)
343{
344 struct dst_entry *dst = &rt->dst;
345
346 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
347 INIT_LIST_HEAD(&rt->rt6i_siblings);
348 INIT_LIST_HEAD(&rt->rt6i_uncached);
349}
350
351
352static struct rt6_info *__ip6_dst_alloc(struct net *net,
353 struct net_device *dev,
354 int flags)
355{
356 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
357 0, DST_OBSOLETE_FORCE_CHK, flags);
358
359 if (rt)
360 rt6_info_init(rt);
361
362 return rt;
363}
364
365struct rt6_info *ip6_dst_alloc(struct net *net,
366 struct net_device *dev,
367 int flags)
368{
369 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
370
371 if (rt) {
372 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
373 if (rt->rt6i_pcpu) {
374 int cpu;
375
376 for_each_possible_cpu(cpu) {
377 struct rt6_info **p;
378
379 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
380
381 *p = NULL;
382 }
383 } else {
384 dst_destroy((struct dst_entry *)rt);
385 return NULL;
386 }
387 }
388
389 return rt;
390}
391EXPORT_SYMBOL(ip6_dst_alloc);
392
393static void ip6_dst_destroy(struct dst_entry *dst)
394{
395 struct rt6_info *rt = (struct rt6_info *)dst;
396 struct dst_entry *from = dst->from;
397 struct inet6_dev *idev;
398
399 dst_destroy_metrics_generic(dst);
400 free_percpu(rt->rt6i_pcpu);
401 rt6_uncached_list_del(rt);
402
403 idev = rt->rt6i_idev;
404 if (idev) {
405 rt->rt6i_idev = NULL;
406 in6_dev_put(idev);
407 }
408
409 dst->from = NULL;
410 dst_release(from);
411}
412
413static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
414 int how)
415{
416 struct rt6_info *rt = (struct rt6_info *)dst;
417 struct inet6_dev *idev = rt->rt6i_idev;
418 struct net_device *loopback_dev =
419 dev_net(dev)->loopback_dev;
420
421 if (dev != loopback_dev) {
422 if (idev && idev->dev == dev) {
423 struct inet6_dev *loopback_idev =
424 in6_dev_get(loopback_dev);
425 if (loopback_idev) {
426 rt->rt6i_idev = loopback_idev;
427 in6_dev_put(idev);
428 }
429 }
430 }
431}
432
433static bool __rt6_check_expired(const struct rt6_info *rt)
434{
435 if (rt->rt6i_flags & RTF_EXPIRES)
436 return time_after(jiffies, rt->dst.expires);
437 else
438 return false;
439}
440
441static bool rt6_check_expired(const struct rt6_info *rt)
442{
443 if (rt->rt6i_flags & RTF_EXPIRES) {
444 if (time_after(jiffies, rt->dst.expires))
445 return true;
446 } else if (rt->dst.from) {
447 return rt6_check_expired((struct rt6_info *) rt->dst.from);
448 }
449 return false;
450}
451
452
453
454
455
456static int rt6_info_hash_nhsfn(unsigned int candidate_count,
457 const struct flowi6 *fl6)
458{
459 return get_hash_from_flowi6(fl6) % candidate_count;
460}
461
462static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
463 struct flowi6 *fl6, int oif,
464 int strict)
465{
466 struct rt6_info *sibling, *next_sibling;
467 int route_choosen;
468
469 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
470
471
472
473 if (route_choosen)
474 list_for_each_entry_safe(sibling, next_sibling,
475 &match->rt6i_siblings, rt6i_siblings) {
476 route_choosen--;
477 if (route_choosen == 0) {
478 if (rt6_score_route(sibling, oif, strict) < 0)
479 break;
480 match = sibling;
481 break;
482 }
483 }
484 return match;
485}
486
487
488
489
490
491static inline struct rt6_info *rt6_device_match(struct net *net,
492 struct rt6_info *rt,
493 const struct in6_addr *saddr,
494 int oif,
495 int flags)
496{
497 struct rt6_info *local = NULL;
498 struct rt6_info *sprt;
499
500 if (!oif && ipv6_addr_any(saddr))
501 goto out;
502
503 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
504 struct net_device *dev = sprt->dst.dev;
505
506 if (oif) {
507 if (dev->ifindex == oif)
508 return sprt;
509 if (dev->flags & IFF_LOOPBACK) {
510 if (!sprt->rt6i_idev ||
511 sprt->rt6i_idev->dev->ifindex != oif) {
512 if (flags & RT6_LOOKUP_F_IFACE)
513 continue;
514 if (local &&
515 local->rt6i_idev->dev->ifindex == oif)
516 continue;
517 }
518 local = sprt;
519 }
520 } else {
521 if (ipv6_chk_addr(net, saddr, dev,
522 flags & RT6_LOOKUP_F_IFACE))
523 return sprt;
524 }
525 }
526
527 if (oif) {
528 if (local)
529 return local;
530
531 if (flags & RT6_LOOKUP_F_IFACE)
532 return net->ipv6.ip6_null_entry;
533 }
534out:
535 return rt;
536}
537
538#ifdef CONFIG_IPV6_ROUTER_PREF
539struct __rt6_probe_work {
540 struct work_struct work;
541 struct in6_addr target;
542 struct net_device *dev;
543};
544
545static void rt6_probe_deferred(struct work_struct *w)
546{
547 struct in6_addr mcaddr;
548 struct __rt6_probe_work *work =
549 container_of(w, struct __rt6_probe_work, work);
550
551 addrconf_addr_solict_mult(&work->target, &mcaddr);
552 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
553 dev_put(work->dev);
554 kfree(work);
555}
556
557static void rt6_probe(struct rt6_info *rt)
558{
559 struct __rt6_probe_work *work;
560 struct neighbour *neigh;
561
562
563
564
565
566
567
568
569 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
570 return;
571 rcu_read_lock_bh();
572 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
573 if (neigh) {
574 if (neigh->nud_state & NUD_VALID)
575 goto out;
576
577 work = NULL;
578 write_lock(&neigh->lock);
579 if (!(neigh->nud_state & NUD_VALID) &&
580 time_after(jiffies,
581 neigh->updated +
582 rt->rt6i_idev->cnf.rtr_probe_interval)) {
583 work = kmalloc(sizeof(*work), GFP_ATOMIC);
584 if (work)
585 __neigh_set_probe_once(neigh);
586 }
587 write_unlock(&neigh->lock);
588 } else {
589 work = kmalloc(sizeof(*work), GFP_ATOMIC);
590 }
591
592 if (work) {
593 INIT_WORK(&work->work, rt6_probe_deferred);
594 work->target = rt->rt6i_gateway;
595 dev_hold(rt->dst.dev);
596 work->dev = rt->dst.dev;
597 schedule_work(&work->work);
598 }
599
600out:
601 rcu_read_unlock_bh();
602}
603#else
604static inline void rt6_probe(struct rt6_info *rt)
605{
606}
607#endif
608
609
610
611
612static inline int rt6_check_dev(struct rt6_info *rt, int oif)
613{
614 struct net_device *dev = rt->dst.dev;
615 if (!oif || dev->ifindex == oif)
616 return 2;
617 if ((dev->flags & IFF_LOOPBACK) &&
618 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
619 return 1;
620 return 0;
621}
622
623static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
624{
625 struct neighbour *neigh;
626 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
627
628 if (rt->rt6i_flags & RTF_NONEXTHOP ||
629 !(rt->rt6i_flags & RTF_GATEWAY))
630 return RT6_NUD_SUCCEED;
631
632 rcu_read_lock_bh();
633 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
634 if (neigh) {
635 read_lock(&neigh->lock);
636 if (neigh->nud_state & NUD_VALID)
637 ret = RT6_NUD_SUCCEED;
638#ifdef CONFIG_IPV6_ROUTER_PREF
639 else if (!(neigh->nud_state & NUD_FAILED))
640 ret = RT6_NUD_SUCCEED;
641 else
642 ret = RT6_NUD_FAIL_PROBE;
643#endif
644 read_unlock(&neigh->lock);
645 } else {
646 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
647 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
648 }
649 rcu_read_unlock_bh();
650
651 return ret;
652}
653
654static int rt6_score_route(struct rt6_info *rt, int oif,
655 int strict)
656{
657 int m;
658
659 m = rt6_check_dev(rt, oif);
660 if (!m && (strict & RT6_LOOKUP_F_IFACE))
661 return RT6_NUD_FAIL_HARD;
662#ifdef CONFIG_IPV6_ROUTER_PREF
663 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
664#endif
665 if (strict & RT6_LOOKUP_F_REACHABLE) {
666 int n = rt6_check_neigh(rt);
667 if (n < 0)
668 return n;
669 }
670 return m;
671}
672
673static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
674 int *mpri, struct rt6_info *match,
675 bool *do_rr)
676{
677 int m;
678 bool match_do_rr = false;
679 struct inet6_dev *idev = rt->rt6i_idev;
680 struct net_device *dev = rt->dst.dev;
681
682 if (dev && !netif_carrier_ok(dev) &&
683 idev->cnf.ignore_routes_with_linkdown &&
684 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
685 goto out;
686
687 if (rt6_check_expired(rt))
688 goto out;
689
690 m = rt6_score_route(rt, oif, strict);
691 if (m == RT6_NUD_FAIL_DO_RR) {
692 match_do_rr = true;
693 m = 0;
694 } else if (m == RT6_NUD_FAIL_HARD) {
695 goto out;
696 }
697
698 if (strict & RT6_LOOKUP_F_REACHABLE)
699 rt6_probe(rt);
700
701
702 if (m > *mpri) {
703 *do_rr = match_do_rr;
704 *mpri = m;
705 match = rt;
706 }
707out:
708 return match;
709}
710
711static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
712 struct rt6_info *rr_head,
713 u32 metric, int oif, int strict,
714 bool *do_rr)
715{
716 struct rt6_info *rt, *match, *cont;
717 int mpri = -1;
718
719 match = NULL;
720 cont = NULL;
721 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
722 if (rt->rt6i_metric != metric) {
723 cont = rt;
724 break;
725 }
726
727 match = find_match(rt, oif, strict, &mpri, match, do_rr);
728 }
729
730 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
731 if (rt->rt6i_metric != metric) {
732 cont = rt;
733 break;
734 }
735
736 match = find_match(rt, oif, strict, &mpri, match, do_rr);
737 }
738
739 if (match || !cont)
740 return match;
741
742 for (rt = cont; rt; rt = rt->dst.rt6_next)
743 match = find_match(rt, oif, strict, &mpri, match, do_rr);
744
745 return match;
746}
747
748static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
749{
750 struct rt6_info *match, *rt0;
751 struct net *net;
752 bool do_rr = false;
753
754 rt0 = fn->rr_ptr;
755 if (!rt0)
756 fn->rr_ptr = rt0 = fn->leaf;
757
758 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
759 &do_rr);
760
761 if (do_rr) {
762 struct rt6_info *next = rt0->dst.rt6_next;
763
764
765 if (!next || next->rt6i_metric != rt0->rt6i_metric)
766 next = fn->leaf;
767
768 if (next != rt0)
769 fn->rr_ptr = next;
770 }
771
772 net = dev_net(rt0->dst.dev);
773 return match ? match : net->ipv6.ip6_null_entry;
774}
775
776static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
777{
778 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
779}
780
781#ifdef CONFIG_IPV6_ROUTE_INFO
782int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
783 const struct in6_addr *gwaddr)
784{
785 struct net *net = dev_net(dev);
786 struct route_info *rinfo = (struct route_info *) opt;
787 struct in6_addr prefix_buf, *prefix;
788 unsigned int pref;
789 unsigned long lifetime;
790 struct rt6_info *rt;
791
792 if (len < sizeof(struct route_info)) {
793 return -EINVAL;
794 }
795
796
797 if (rinfo->length > 3) {
798 return -EINVAL;
799 } else if (rinfo->prefix_len > 128) {
800 return -EINVAL;
801 } else if (rinfo->prefix_len > 64) {
802 if (rinfo->length < 2) {
803 return -EINVAL;
804 }
805 } else if (rinfo->prefix_len > 0) {
806 if (rinfo->length < 1) {
807 return -EINVAL;
808 }
809 }
810
811 pref = rinfo->route_pref;
812 if (pref == ICMPV6_ROUTER_PREF_INVALID)
813 return -EINVAL;
814
815 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
816
817 if (rinfo->length == 3)
818 prefix = (struct in6_addr *)rinfo->prefix;
819 else {
820
821 ipv6_addr_prefix(&prefix_buf,
822 (struct in6_addr *)rinfo->prefix,
823 rinfo->prefix_len);
824 prefix = &prefix_buf;
825 }
826
827 if (rinfo->prefix_len == 0)
828 rt = rt6_get_dflt_router(gwaddr, dev);
829 else
830 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
831 gwaddr, dev);
832
833 if (rt && !lifetime) {
834 ip6_del_rt(rt);
835 rt = NULL;
836 }
837
838 if (!rt && lifetime)
839 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
840 dev, pref);
841 else if (rt)
842 rt->rt6i_flags = RTF_ROUTEINFO |
843 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
844
845 if (rt) {
846 if (!addrconf_finite_timeout(lifetime))
847 rt6_clean_expires(rt);
848 else
849 rt6_set_expires(rt, jiffies + HZ * lifetime);
850
851 ip6_rt_put(rt);
852 }
853 return 0;
854}
855#endif
856
857static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
858 struct in6_addr *saddr)
859{
860 struct fib6_node *pn;
861 while (1) {
862 if (fn->fn_flags & RTN_TL_ROOT)
863 return NULL;
864 pn = fn->parent;
865 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
866 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
867 else
868 fn = pn;
869 if (fn->fn_flags & RTN_RTINFO)
870 return fn;
871 }
872}
873
874static struct rt6_info *ip6_pol_route_lookup(struct net *net,
875 struct fib6_table *table,
876 struct flowi6 *fl6, int flags)
877{
878 struct fib6_node *fn;
879 struct rt6_info *rt;
880
881 read_lock_bh(&table->tb6_lock);
882 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
883restart:
884 rt = fn->leaf;
885 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
886 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
887 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
888 if (rt == net->ipv6.ip6_null_entry) {
889 fn = fib6_backtrack(fn, &fl6->saddr);
890 if (fn)
891 goto restart;
892 }
893 dst_use(&rt->dst, jiffies);
894 read_unlock_bh(&table->tb6_lock);
895
896 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
897
898 return rt;
899
900}
901
902struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
903 int flags)
904{
905 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
906}
907EXPORT_SYMBOL_GPL(ip6_route_lookup);
908
909struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
910 const struct in6_addr *saddr, int oif, int strict)
911{
912 struct flowi6 fl6 = {
913 .flowi6_oif = oif,
914 .daddr = *daddr,
915 };
916 struct dst_entry *dst;
917 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
918
919 if (saddr) {
920 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
921 flags |= RT6_LOOKUP_F_HAS_SADDR;
922 }
923
924 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
925 if (dst->error == 0)
926 return (struct rt6_info *) dst;
927
928 dst_release(dst);
929
930 return NULL;
931}
932EXPORT_SYMBOL(rt6_lookup);
933
934
935
936
937
938
939
940static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
941 struct mx6_config *mxc)
942{
943 int err;
944 struct fib6_table *table;
945
946 table = rt->rt6i_table;
947 write_lock_bh(&table->tb6_lock);
948 err = fib6_add(&table->tb6_root, rt, info, mxc);
949 write_unlock_bh(&table->tb6_lock);
950
951 return err;
952}
953
954int ip6_ins_rt(struct rt6_info *rt)
955{
956 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
957 struct mx6_config mxc = { .mx = NULL, };
958
959 return __ip6_ins_rt(rt, &info, &mxc);
960}
961
962static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
963 const struct in6_addr *daddr,
964 const struct in6_addr *saddr)
965{
966 struct rt6_info *rt;
967
968
969
970
971
972 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
973 ort = (struct rt6_info *)ort->dst.from;
974
975 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
976
977 if (!rt)
978 return NULL;
979
980 ip6_rt_copy_init(rt, ort);
981 rt->rt6i_flags |= RTF_CACHE;
982 rt->rt6i_metric = 0;
983 rt->dst.flags |= DST_HOST;
984 rt->rt6i_dst.addr = *daddr;
985 rt->rt6i_dst.plen = 128;
986
987 if (!rt6_is_gw_or_nonexthop(ort)) {
988 if (ort->rt6i_dst.plen != 128 &&
989 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
990 rt->rt6i_flags |= RTF_ANYCAST;
991#ifdef CONFIG_IPV6_SUBTREES
992 if (rt->rt6i_src.plen && saddr) {
993 rt->rt6i_src.addr = *saddr;
994 rt->rt6i_src.plen = 128;
995 }
996#endif
997 }
998
999 return rt;
1000}
1001
1002static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1003{
1004 struct rt6_info *pcpu_rt;
1005
1006 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
1007 rt->dst.dev, rt->dst.flags);
1008
1009 if (!pcpu_rt)
1010 return NULL;
1011 ip6_rt_copy_init(pcpu_rt, rt);
1012 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1013 pcpu_rt->rt6i_flags |= RTF_PCPU;
1014 return pcpu_rt;
1015}
1016
1017
1018static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1019{
1020 struct rt6_info *pcpu_rt, **p;
1021
1022 p = this_cpu_ptr(rt->rt6i_pcpu);
1023 pcpu_rt = *p;
1024
1025 if (pcpu_rt) {
1026 dst_hold(&pcpu_rt->dst);
1027 rt6_dst_from_metrics_check(pcpu_rt);
1028 }
1029 return pcpu_rt;
1030}
1031
1032static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1033{
1034 struct fib6_table *table = rt->rt6i_table;
1035 struct rt6_info *pcpu_rt, *prev, **p;
1036
1037 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1038 if (!pcpu_rt) {
1039 struct net *net = dev_net(rt->dst.dev);
1040
1041 dst_hold(&net->ipv6.ip6_null_entry->dst);
1042 return net->ipv6.ip6_null_entry;
1043 }
1044
1045 read_lock_bh(&table->tb6_lock);
1046 if (rt->rt6i_pcpu) {
1047 p = this_cpu_ptr(rt->rt6i_pcpu);
1048 prev = cmpxchg(p, NULL, pcpu_rt);
1049 if (prev) {
1050
1051 dst_destroy(&pcpu_rt->dst);
1052 pcpu_rt = prev;
1053 }
1054 } else {
1055
1056
1057
1058
1059
1060
1061 dst_destroy(&pcpu_rt->dst);
1062 pcpu_rt = rt;
1063 }
1064 dst_hold(&pcpu_rt->dst);
1065 rt6_dst_from_metrics_check(pcpu_rt);
1066 read_unlock_bh(&table->tb6_lock);
1067 return pcpu_rt;
1068}
1069
1070struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1071 int oif, struct flowi6 *fl6, int flags)
1072{
1073 struct fib6_node *fn, *saved_fn;
1074 struct rt6_info *rt;
1075 int strict = 0;
1076
1077 strict |= flags & RT6_LOOKUP_F_IFACE;
1078 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1079 if (net->ipv6.devconf_all->forwarding == 0)
1080 strict |= RT6_LOOKUP_F_REACHABLE;
1081
1082 read_lock_bh(&table->tb6_lock);
1083
1084 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1085 saved_fn = fn;
1086
1087 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1088 oif = 0;
1089
1090redo_rt6_select:
1091 rt = rt6_select(fn, oif, strict);
1092 if (rt->rt6i_nsiblings)
1093 rt = rt6_multipath_select(rt, fl6, oif, strict);
1094 if (rt == net->ipv6.ip6_null_entry) {
1095 fn = fib6_backtrack(fn, &fl6->saddr);
1096 if (fn)
1097 goto redo_rt6_select;
1098 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1099
1100 strict &= ~RT6_LOOKUP_F_REACHABLE;
1101 fn = saved_fn;
1102 goto redo_rt6_select;
1103 }
1104 }
1105
1106
1107 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1108 dst_use(&rt->dst, jiffies);
1109 read_unlock_bh(&table->tb6_lock);
1110
1111 rt6_dst_from_metrics_check(rt);
1112
1113 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1114 return rt;
1115 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1116 !(rt->rt6i_flags & RTF_GATEWAY))) {
1117
1118
1119
1120
1121
1122
1123 struct rt6_info *uncached_rt;
1124
1125 dst_use(&rt->dst, jiffies);
1126 read_unlock_bh(&table->tb6_lock);
1127
1128 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1129 dst_release(&rt->dst);
1130
1131 if (uncached_rt)
1132 rt6_uncached_list_add(uncached_rt);
1133 else
1134 uncached_rt = net->ipv6.ip6_null_entry;
1135
1136 dst_hold(&uncached_rt->dst);
1137
1138 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1139 return uncached_rt;
1140
1141 } else {
1142
1143
1144 struct rt6_info *pcpu_rt;
1145
1146 rt->dst.lastuse = jiffies;
1147 rt->dst.__use++;
1148 pcpu_rt = rt6_get_pcpu_route(rt);
1149
1150 if (pcpu_rt) {
1151 read_unlock_bh(&table->tb6_lock);
1152 } else {
1153
1154
1155
1156
1157 dst_hold(&rt->dst);
1158 read_unlock_bh(&table->tb6_lock);
1159 pcpu_rt = rt6_make_pcpu_route(rt);
1160 dst_release(&rt->dst);
1161 }
1162
1163 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1164 return pcpu_rt;
1165
1166 }
1167}
1168EXPORT_SYMBOL_GPL(ip6_pol_route);
1169
1170static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1171 struct flowi6 *fl6, int flags)
1172{
1173 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1174}
1175
1176struct dst_entry *ip6_route_input_lookup(struct net *net,
1177 struct net_device *dev,
1178 struct flowi6 *fl6, int flags)
1179{
1180 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1181 flags |= RT6_LOOKUP_F_IFACE;
1182
1183 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1184}
1185EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1186
1187void ip6_route_input(struct sk_buff *skb)
1188{
1189 const struct ipv6hdr *iph = ipv6_hdr(skb);
1190 struct net *net = dev_net(skb->dev);
1191 int flags = RT6_LOOKUP_F_HAS_SADDR;
1192 struct ip_tunnel_info *tun_info;
1193 struct flowi6 fl6 = {
1194 .flowi6_iif = skb->dev->ifindex,
1195 .daddr = iph->daddr,
1196 .saddr = iph->saddr,
1197 .flowlabel = ip6_flowinfo(iph),
1198 .flowi6_mark = skb->mark,
1199 .flowi6_proto = iph->nexthdr,
1200 };
1201
1202 tun_info = skb_tunnel_info(skb);
1203 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1204 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1205 skb_dst_drop(skb);
1206 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1207}
1208
1209static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1210 struct flowi6 *fl6, int flags)
1211{
1212 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1213}
1214
1215struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1216 struct flowi6 *fl6, int flags)
1217{
1218 bool any_src;
1219
1220 if (rt6_need_strict(&fl6->daddr)) {
1221 struct dst_entry *dst;
1222
1223 dst = l3mdev_link_scope_lookup(net, fl6);
1224 if (dst)
1225 return dst;
1226 }
1227
1228 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1229
1230 any_src = ipv6_addr_any(&fl6->saddr);
1231 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1232 (fl6->flowi6_oif && any_src))
1233 flags |= RT6_LOOKUP_F_IFACE;
1234
1235 if (!any_src)
1236 flags |= RT6_LOOKUP_F_HAS_SADDR;
1237 else if (sk)
1238 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1239
1240 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1241}
1242EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1243
1244struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1245{
1246 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1247 struct dst_entry *new = NULL;
1248
1249 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1250 if (rt) {
1251 rt6_info_init(rt);
1252
1253 new = &rt->dst;
1254 new->__use = 1;
1255 new->input = dst_discard;
1256 new->output = dst_discard_out;
1257
1258 dst_copy_metrics(new, &ort->dst);
1259 rt->rt6i_idev = ort->rt6i_idev;
1260 if (rt->rt6i_idev)
1261 in6_dev_hold(rt->rt6i_idev);
1262
1263 rt->rt6i_gateway = ort->rt6i_gateway;
1264 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1265 rt->rt6i_metric = 0;
1266
1267 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1268#ifdef CONFIG_IPV6_SUBTREES
1269 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1270#endif
1271
1272 dst_free(new);
1273 }
1274
1275 dst_release(dst_orig);
1276 return new ? new : ERR_PTR(-ENOMEM);
1277}
1278
1279
1280
1281
1282
1283static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1284{
1285 if (rt->dst.from &&
1286 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1287 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1288}
1289
1290static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1291{
1292 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1293 return NULL;
1294
1295 if (rt6_check_expired(rt))
1296 return NULL;
1297
1298 return &rt->dst;
1299}
1300
1301static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1302{
1303 if (!__rt6_check_expired(rt) &&
1304 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1305 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1306 return &rt->dst;
1307 else
1308 return NULL;
1309}
1310
1311static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1312{
1313 struct rt6_info *rt;
1314
1315 rt = (struct rt6_info *) dst;
1316
1317
1318
1319
1320
1321
1322 rt6_dst_from_metrics_check(rt);
1323
1324 if (rt->rt6i_flags & RTF_PCPU ||
1325 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1326 return rt6_dst_from_check(rt, cookie);
1327 else
1328 return rt6_check(rt, cookie);
1329}
1330
1331static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1332{
1333 struct rt6_info *rt = (struct rt6_info *) dst;
1334
1335 if (rt) {
1336 if (rt->rt6i_flags & RTF_CACHE) {
1337 if (rt6_check_expired(rt)) {
1338 ip6_del_rt(rt);
1339 dst = NULL;
1340 }
1341 } else {
1342 dst_release(dst);
1343 dst = NULL;
1344 }
1345 }
1346 return dst;
1347}
1348
1349static void ip6_link_failure(struct sk_buff *skb)
1350{
1351 struct rt6_info *rt;
1352
1353 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1354
1355 rt = (struct rt6_info *) skb_dst(skb);
1356 if (rt) {
1357 if (rt->rt6i_flags & RTF_CACHE) {
1358 dst_hold(&rt->dst);
1359 ip6_del_rt(rt);
1360 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1361 rt->rt6i_node->fn_sernum = -1;
1362 }
1363 }
1364}
1365
1366static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1367{
1368 struct net *net = dev_net(rt->dst.dev);
1369
1370 rt->rt6i_flags |= RTF_MODIFIED;
1371 rt->rt6i_pmtu = mtu;
1372 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1373}
1374
1375static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1376{
1377 return !(rt->rt6i_flags & RTF_CACHE) &&
1378 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1379}
1380
1381static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1382 const struct ipv6hdr *iph, u32 mtu)
1383{
1384 const struct in6_addr *daddr, *saddr;
1385 struct rt6_info *rt6 = (struct rt6_info *)dst;
1386
1387 if (rt6->rt6i_flags & RTF_LOCAL)
1388 return;
1389
1390 if (dst_metric_locked(dst, RTAX_MTU))
1391 return;
1392
1393 if (iph) {
1394 daddr = &iph->daddr;
1395 saddr = &iph->saddr;
1396 } else if (sk) {
1397 daddr = &sk->sk_v6_daddr;
1398 saddr = &inet6_sk(sk)->saddr;
1399 } else {
1400 daddr = NULL;
1401 saddr = NULL;
1402 }
1403 dst_confirm_neigh(dst, daddr);
1404 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1405 if (mtu >= dst_mtu(dst))
1406 return;
1407
1408 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1409 rt6_do_update_pmtu(rt6, mtu);
1410 } else if (daddr) {
1411 struct rt6_info *nrt6;
1412
1413 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1414 if (nrt6) {
1415 rt6_do_update_pmtu(nrt6, mtu);
1416
1417
1418
1419
1420
1421
1422 ip6_ins_rt(nrt6);
1423 }
1424 }
1425}
1426
1427static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1428 struct sk_buff *skb, u32 mtu)
1429{
1430 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1431}
1432
1433void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1434 int oif, u32 mark, kuid_t uid)
1435{
1436 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1437 struct dst_entry *dst;
1438 struct flowi6 fl6;
1439
1440 memset(&fl6, 0, sizeof(fl6));
1441 fl6.flowi6_oif = oif;
1442 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1443 fl6.daddr = iph->daddr;
1444 fl6.saddr = iph->saddr;
1445 fl6.flowlabel = ip6_flowinfo(iph);
1446 fl6.flowi6_uid = uid;
1447
1448 dst = ip6_route_output(net, NULL, &fl6);
1449 if (!dst->error)
1450 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1451 dst_release(dst);
1452}
1453EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1454
1455void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1456{
1457 struct dst_entry *dst;
1458
1459 ip6_update_pmtu(skb, sock_net(sk), mtu,
1460 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1461
1462 dst = __sk_dst_get(sk);
1463 if (!dst || !dst->obsolete ||
1464 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1465 return;
1466
1467 bh_lock_sock(sk);
1468 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1469 ip6_datagram_dst_update(sk, false);
1470 bh_unlock_sock(sk);
1471}
1472EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1473
1474
1475struct ip6rd_flowi {
1476 struct flowi6 fl6;
1477 struct in6_addr gateway;
1478};
1479
1480static struct rt6_info *__ip6_route_redirect(struct net *net,
1481 struct fib6_table *table,
1482 struct flowi6 *fl6,
1483 int flags)
1484{
1485 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1486 struct rt6_info *rt;
1487 struct fib6_node *fn;
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499 read_lock_bh(&table->tb6_lock);
1500 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1501restart:
1502 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1503 if (rt6_check_expired(rt))
1504 continue;
1505 if (rt->dst.error)
1506 break;
1507 if (!(rt->rt6i_flags & RTF_GATEWAY))
1508 continue;
1509 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1510 continue;
1511 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1512 continue;
1513 break;
1514 }
1515
1516 if (!rt)
1517 rt = net->ipv6.ip6_null_entry;
1518 else if (rt->dst.error) {
1519 rt = net->ipv6.ip6_null_entry;
1520 goto out;
1521 }
1522
1523 if (rt == net->ipv6.ip6_null_entry) {
1524 fn = fib6_backtrack(fn, &fl6->saddr);
1525 if (fn)
1526 goto restart;
1527 }
1528
1529out:
1530 dst_hold(&rt->dst);
1531
1532 read_unlock_bh(&table->tb6_lock);
1533
1534 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1535 return rt;
1536};
1537
1538static struct dst_entry *ip6_route_redirect(struct net *net,
1539 const struct flowi6 *fl6,
1540 const struct in6_addr *gateway)
1541{
1542 int flags = RT6_LOOKUP_F_HAS_SADDR;
1543 struct ip6rd_flowi rdfl;
1544
1545 rdfl.fl6 = *fl6;
1546 rdfl.gateway = *gateway;
1547
1548 return fib6_rule_lookup(net, &rdfl.fl6,
1549 flags, __ip6_route_redirect);
1550}
1551
1552void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1553 kuid_t uid)
1554{
1555 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1556 struct dst_entry *dst;
1557 struct flowi6 fl6;
1558
1559 memset(&fl6, 0, sizeof(fl6));
1560 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1561 fl6.flowi6_oif = oif;
1562 fl6.flowi6_mark = mark;
1563 fl6.daddr = iph->daddr;
1564 fl6.saddr = iph->saddr;
1565 fl6.flowlabel = ip6_flowinfo(iph);
1566 fl6.flowi6_uid = uid;
1567
1568 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1569 rt6_do_redirect(dst, NULL, skb);
1570 dst_release(dst);
1571}
1572EXPORT_SYMBOL_GPL(ip6_redirect);
1573
1574void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1575 u32 mark)
1576{
1577 const struct ipv6hdr *iph = ipv6_hdr(skb);
1578 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1579 struct dst_entry *dst;
1580 struct flowi6 fl6;
1581
1582 memset(&fl6, 0, sizeof(fl6));
1583 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1584 fl6.flowi6_oif = oif;
1585 fl6.flowi6_mark = mark;
1586 fl6.daddr = msg->dest;
1587 fl6.saddr = iph->daddr;
1588 fl6.flowi6_uid = sock_net_uid(net, NULL);
1589
1590 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1591 rt6_do_redirect(dst, NULL, skb);
1592 dst_release(dst);
1593}
1594
1595void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1596{
1597 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1598 sk->sk_uid);
1599}
1600EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1601
1602static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1603{
1604 struct net_device *dev = dst->dev;
1605 unsigned int mtu = dst_mtu(dst);
1606 struct net *net = dev_net(dev);
1607
1608 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1609
1610 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1611 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1612
1613
1614
1615
1616
1617
1618
1619 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1620 mtu = IPV6_MAXPLEN;
1621 return mtu;
1622}
1623
1624static unsigned int ip6_mtu(const struct dst_entry *dst)
1625{
1626 const struct rt6_info *rt = (const struct rt6_info *)dst;
1627 unsigned int mtu = rt->rt6i_pmtu;
1628 struct inet6_dev *idev;
1629
1630 if (mtu)
1631 goto out;
1632
1633 mtu = dst_metric_raw(dst, RTAX_MTU);
1634 if (mtu)
1635 goto out;
1636
1637 mtu = IPV6_MIN_MTU;
1638
1639 rcu_read_lock();
1640 idev = __in6_dev_get(dst->dev);
1641 if (idev)
1642 mtu = idev->cnf.mtu6;
1643 rcu_read_unlock();
1644
1645out:
1646 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1647
1648 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1649}
1650
1651static struct dst_entry *icmp6_dst_gc_list;
1652static DEFINE_SPINLOCK(icmp6_dst_lock);
1653
1654struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1655 struct flowi6 *fl6)
1656{
1657 struct dst_entry *dst;
1658 struct rt6_info *rt;
1659 struct inet6_dev *idev = in6_dev_get(dev);
1660 struct net *net = dev_net(dev);
1661
1662 if (unlikely(!idev))
1663 return ERR_PTR(-ENODEV);
1664
1665 rt = ip6_dst_alloc(net, dev, 0);
1666 if (unlikely(!rt)) {
1667 in6_dev_put(idev);
1668 dst = ERR_PTR(-ENOMEM);
1669 goto out;
1670 }
1671
1672 rt->dst.flags |= DST_HOST;
1673 rt->dst.output = ip6_output;
1674 atomic_set(&rt->dst.__refcnt, 1);
1675 rt->rt6i_gateway = fl6->daddr;
1676 rt->rt6i_dst.addr = fl6->daddr;
1677 rt->rt6i_dst.plen = 128;
1678 rt->rt6i_idev = idev;
1679 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1680
1681 spin_lock_bh(&icmp6_dst_lock);
1682 rt->dst.next = icmp6_dst_gc_list;
1683 icmp6_dst_gc_list = &rt->dst;
1684 spin_unlock_bh(&icmp6_dst_lock);
1685
1686 fib6_force_start_gc(net);
1687
1688 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1689
1690out:
1691 return dst;
1692}
1693
1694int icmp6_dst_gc(void)
1695{
1696 struct dst_entry *dst, **pprev;
1697 int more = 0;
1698
1699 spin_lock_bh(&icmp6_dst_lock);
1700 pprev = &icmp6_dst_gc_list;
1701
1702 while ((dst = *pprev) != NULL) {
1703 if (!atomic_read(&dst->__refcnt)) {
1704 *pprev = dst->next;
1705 dst_free(dst);
1706 } else {
1707 pprev = &dst->next;
1708 ++more;
1709 }
1710 }
1711
1712 spin_unlock_bh(&icmp6_dst_lock);
1713
1714 return more;
1715}
1716
1717static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1718 void *arg)
1719{
1720 struct dst_entry *dst, **pprev;
1721
1722 spin_lock_bh(&icmp6_dst_lock);
1723 pprev = &icmp6_dst_gc_list;
1724 while ((dst = *pprev) != NULL) {
1725 struct rt6_info *rt = (struct rt6_info *) dst;
1726 if (func(rt, arg)) {
1727 *pprev = dst->next;
1728 dst_free(dst);
1729 } else {
1730 pprev = &dst->next;
1731 }
1732 }
1733 spin_unlock_bh(&icmp6_dst_lock);
1734}
1735
1736static int ip6_dst_gc(struct dst_ops *ops)
1737{
1738 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1739 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1740 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1741 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1742 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1743 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1744 int entries;
1745
1746 entries = dst_entries_get_fast(ops);
1747 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1748 entries <= rt_max_size)
1749 goto out;
1750
1751 net->ipv6.ip6_rt_gc_expire++;
1752 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1753 entries = dst_entries_get_slow(ops);
1754 if (entries < ops->gc_thresh)
1755 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1756out:
1757 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1758 return entries > rt_max_size;
1759}
1760
1761static int ip6_convert_metrics(struct mx6_config *mxc,
1762 const struct fib6_config *cfg)
1763{
1764 bool ecn_ca = false;
1765 struct nlattr *nla;
1766 int remaining;
1767 u32 *mp;
1768
1769 if (!cfg->fc_mx)
1770 return 0;
1771
1772 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1773 if (unlikely(!mp))
1774 return -ENOMEM;
1775
1776 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1777 int type = nla_type(nla);
1778 u32 val;
1779
1780 if (!type)
1781 continue;
1782 if (unlikely(type > RTAX_MAX))
1783 goto err;
1784
1785 if (type == RTAX_CC_ALGO) {
1786 char tmp[TCP_CA_NAME_MAX];
1787
1788 nla_strlcpy(tmp, nla, sizeof(tmp));
1789 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1790 if (val == TCP_CA_UNSPEC)
1791 goto err;
1792 } else {
1793 val = nla_get_u32(nla);
1794 }
1795 if (type == RTAX_HOPLIMIT && val > 255)
1796 val = 255;
1797 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1798 goto err;
1799
1800 mp[type - 1] = val;
1801 __set_bit(type - 1, mxc->mx_valid);
1802 }
1803
1804 if (ecn_ca) {
1805 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1806 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1807 }
1808
1809 mxc->mx = mp;
1810 return 0;
1811 err:
1812 kfree(mp);
1813 return -EINVAL;
1814}
1815
1816static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1817 struct fib6_config *cfg,
1818 const struct in6_addr *gw_addr)
1819{
1820 struct flowi6 fl6 = {
1821 .flowi6_oif = cfg->fc_ifindex,
1822 .daddr = *gw_addr,
1823 .saddr = cfg->fc_prefsrc,
1824 };
1825 struct fib6_table *table;
1826 struct rt6_info *rt;
1827 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1828
1829 table = fib6_get_table(net, cfg->fc_table);
1830 if (!table)
1831 return NULL;
1832
1833 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1834 flags |= RT6_LOOKUP_F_HAS_SADDR;
1835
1836 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1837
1838
1839 if (rt == net->ipv6.ip6_null_entry) {
1840 ip6_rt_put(rt);
1841 rt = NULL;
1842 }
1843
1844 return rt;
1845}
1846
1847static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1848{
1849 struct net *net = cfg->fc_nlinfo.nl_net;
1850 struct rt6_info *rt = NULL;
1851 struct net_device *dev = NULL;
1852 struct inet6_dev *idev = NULL;
1853 struct fib6_table *table;
1854 int addr_type;
1855 int err = -EINVAL;
1856
1857
1858 if (cfg->fc_flags & RTF_PCPU)
1859 goto out;
1860
1861 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1862 goto out;
1863#ifndef CONFIG_IPV6_SUBTREES
1864 if (cfg->fc_src_len)
1865 goto out;
1866#endif
1867 if (cfg->fc_ifindex) {
1868 err = -ENODEV;
1869 dev = dev_get_by_index(net, cfg->fc_ifindex);
1870 if (!dev)
1871 goto out;
1872 idev = in6_dev_get(dev);
1873 if (!idev)
1874 goto out;
1875 }
1876
1877 if (cfg->fc_metric == 0)
1878 cfg->fc_metric = IP6_RT_PRIO_USER;
1879
1880 err = -ENOBUFS;
1881 if (cfg->fc_nlinfo.nlh &&
1882 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1883 table = fib6_get_table(net, cfg->fc_table);
1884 if (!table) {
1885 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1886 table = fib6_new_table(net, cfg->fc_table);
1887 }
1888 } else {
1889 table = fib6_new_table(net, cfg->fc_table);
1890 }
1891
1892 if (!table)
1893 goto out;
1894
1895 rt = ip6_dst_alloc(net, NULL,
1896 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1897
1898 if (!rt) {
1899 err = -ENOMEM;
1900 goto out;
1901 }
1902
1903 if (cfg->fc_flags & RTF_EXPIRES)
1904 rt6_set_expires(rt, jiffies +
1905 clock_t_to_jiffies(cfg->fc_expires));
1906 else
1907 rt6_clean_expires(rt);
1908
1909 if (cfg->fc_protocol == RTPROT_UNSPEC)
1910 cfg->fc_protocol = RTPROT_BOOT;
1911 rt->rt6i_protocol = cfg->fc_protocol;
1912
1913 addr_type = ipv6_addr_type(&cfg->fc_dst);
1914
1915 if (addr_type & IPV6_ADDR_MULTICAST)
1916 rt->dst.input = ip6_mc_input;
1917 else if (cfg->fc_flags & RTF_LOCAL)
1918 rt->dst.input = ip6_input;
1919 else
1920 rt->dst.input = ip6_forward;
1921
1922 rt->dst.output = ip6_output;
1923
1924 if (cfg->fc_encap) {
1925 struct lwtunnel_state *lwtstate;
1926
1927 err = lwtunnel_build_state(cfg->fc_encap_type,
1928 cfg->fc_encap, AF_INET6, cfg,
1929 &lwtstate);
1930 if (err)
1931 goto out;
1932 rt->dst.lwtstate = lwtstate_get(lwtstate);
1933 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1934 rt->dst.lwtstate->orig_output = rt->dst.output;
1935 rt->dst.output = lwtunnel_output;
1936 }
1937 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1938 rt->dst.lwtstate->orig_input = rt->dst.input;
1939 rt->dst.input = lwtunnel_input;
1940 }
1941 }
1942
1943 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1944 rt->rt6i_dst.plen = cfg->fc_dst_len;
1945 if (rt->rt6i_dst.plen == 128)
1946 rt->dst.flags |= DST_HOST;
1947
1948#ifdef CONFIG_IPV6_SUBTREES
1949 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1950 rt->rt6i_src.plen = cfg->fc_src_len;
1951#endif
1952
1953 rt->rt6i_metric = cfg->fc_metric;
1954
1955
1956
1957
1958 if ((cfg->fc_flags & RTF_REJECT) ||
1959 (dev && (dev->flags & IFF_LOOPBACK) &&
1960 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1961 !(cfg->fc_flags & RTF_LOCAL))) {
1962
1963 if (dev != net->loopback_dev) {
1964 if (dev) {
1965 dev_put(dev);
1966 in6_dev_put(idev);
1967 }
1968 dev = net->loopback_dev;
1969 dev_hold(dev);
1970 idev = in6_dev_get(dev);
1971 if (!idev) {
1972 err = -ENODEV;
1973 goto out;
1974 }
1975 }
1976 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1977 switch (cfg->fc_type) {
1978 case RTN_BLACKHOLE:
1979 rt->dst.error = -EINVAL;
1980 rt->dst.output = dst_discard_out;
1981 rt->dst.input = dst_discard;
1982 break;
1983 case RTN_PROHIBIT:
1984 rt->dst.error = -EACCES;
1985 rt->dst.output = ip6_pkt_prohibit_out;
1986 rt->dst.input = ip6_pkt_prohibit;
1987 break;
1988 case RTN_THROW:
1989 case RTN_UNREACHABLE:
1990 default:
1991 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1992 : (cfg->fc_type == RTN_UNREACHABLE)
1993 ? -EHOSTUNREACH : -ENETUNREACH;
1994 rt->dst.output = ip6_pkt_discard_out;
1995 rt->dst.input = ip6_pkt_discard;
1996 break;
1997 }
1998 goto install_route;
1999 }
2000
2001 if (cfg->fc_flags & RTF_GATEWAY) {
2002 const struct in6_addr *gw_addr;
2003 int gwa_type;
2004
2005 gw_addr = &cfg->fc_gateway;
2006 gwa_type = ipv6_addr_type(gw_addr);
2007
2008
2009
2010
2011
2012
2013 err = -EINVAL;
2014 if (ipv6_chk_addr_and_flags(net, gw_addr,
2015 gwa_type & IPV6_ADDR_LINKLOCAL ?
2016 dev : NULL, 0, 0))
2017 goto out;
2018
2019 rt->rt6i_gateway = *gw_addr;
2020
2021 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2022 struct rt6_info *grt = NULL;
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033 if (!(gwa_type & (IPV6_ADDR_UNICAST |
2034 IPV6_ADDR_MAPPED)))
2035 goto out;
2036
2037 if (cfg->fc_table) {
2038 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2039
2040 if (grt) {
2041 if (grt->rt6i_flags & RTF_GATEWAY ||
2042 (dev && dev != grt->dst.dev)) {
2043 ip6_rt_put(grt);
2044 grt = NULL;
2045 }
2046 }
2047 }
2048
2049 if (!grt)
2050 grt = rt6_lookup(net, gw_addr, NULL,
2051 cfg->fc_ifindex, 1);
2052
2053 err = -EHOSTUNREACH;
2054 if (!grt)
2055 goto out;
2056 if (dev) {
2057 if (dev != grt->dst.dev) {
2058 ip6_rt_put(grt);
2059 goto out;
2060 }
2061 } else {
2062 dev = grt->dst.dev;
2063 idev = grt->rt6i_idev;
2064 dev_hold(dev);
2065 in6_dev_hold(grt->rt6i_idev);
2066 }
2067 if (!(grt->rt6i_flags & RTF_GATEWAY))
2068 err = 0;
2069 ip6_rt_put(grt);
2070
2071 if (err)
2072 goto out;
2073 }
2074 err = -EINVAL;
2075 if (!dev || (dev->flags & IFF_LOOPBACK))
2076 goto out;
2077 }
2078
2079 err = -ENODEV;
2080 if (!dev)
2081 goto out;
2082
2083 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2084 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2085 err = -EINVAL;
2086 goto out;
2087 }
2088 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2089 rt->rt6i_prefsrc.plen = 128;
2090 } else
2091 rt->rt6i_prefsrc.plen = 0;
2092
2093 rt->rt6i_flags = cfg->fc_flags;
2094
2095install_route:
2096 rt->dst.dev = dev;
2097 rt->rt6i_idev = idev;
2098 rt->rt6i_table = table;
2099
2100 cfg->fc_nlinfo.nl_net = dev_net(dev);
2101
2102 return rt;
2103out:
2104 if (dev)
2105 dev_put(dev);
2106 if (idev)
2107 in6_dev_put(idev);
2108 if (rt)
2109 dst_free(&rt->dst);
2110
2111 return ERR_PTR(err);
2112}
2113
2114int ip6_route_add(struct fib6_config *cfg)
2115{
2116 struct mx6_config mxc = { .mx = NULL, };
2117 struct rt6_info *rt;
2118 int err;
2119
2120 rt = ip6_route_info_create(cfg);
2121 if (IS_ERR(rt)) {
2122 err = PTR_ERR(rt);
2123 rt = NULL;
2124 goto out;
2125 }
2126
2127 err = ip6_convert_metrics(&mxc, cfg);
2128 if (err)
2129 goto out;
2130
2131 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2132
2133 kfree(mxc.mx);
2134
2135 return err;
2136out:
2137 if (rt)
2138 dst_free(&rt->dst);
2139
2140 return err;
2141}
2142
2143static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2144{
2145 int err;
2146 struct fib6_table *table;
2147 struct net *net = dev_net(rt->dst.dev);
2148
2149 if (rt == net->ipv6.ip6_null_entry ||
2150 rt->dst.flags & DST_NOCACHE) {
2151 err = -ENOENT;
2152 goto out;
2153 }
2154
2155 table = rt->rt6i_table;
2156 write_lock_bh(&table->tb6_lock);
2157 err = fib6_del(rt, info);
2158 write_unlock_bh(&table->tb6_lock);
2159
2160out:
2161 ip6_rt_put(rt);
2162 return err;
2163}
2164
2165int ip6_del_rt(struct rt6_info *rt)
2166{
2167 struct nl_info info = {
2168 .nl_net = dev_net(rt->dst.dev),
2169 };
2170 return __ip6_del_rt(rt, &info);
2171}
2172
2173static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2174{
2175 struct nl_info *info = &cfg->fc_nlinfo;
2176 struct net *net = info->nl_net;
2177 struct sk_buff *skb = NULL;
2178 struct fib6_table *table;
2179 int err = -ENOENT;
2180
2181 if (rt == net->ipv6.ip6_null_entry)
2182 goto out_put;
2183 table = rt->rt6i_table;
2184 write_lock_bh(&table->tb6_lock);
2185
2186 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2187 struct rt6_info *sibling, *next_sibling;
2188
2189
2190 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2191 if (skb) {
2192 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2193
2194 if (rt6_fill_node(net, skb, rt,
2195 NULL, NULL, 0, RTM_DELROUTE,
2196 info->portid, seq, 0) < 0) {
2197 kfree_skb(skb);
2198 skb = NULL;
2199 } else
2200 info->skip_notify = 1;
2201 }
2202
2203 list_for_each_entry_safe(sibling, next_sibling,
2204 &rt->rt6i_siblings,
2205 rt6i_siblings) {
2206 err = fib6_del(sibling, info);
2207 if (err)
2208 goto out_unlock;
2209 }
2210 }
2211
2212 err = fib6_del(rt, info);
2213out_unlock:
2214 write_unlock_bh(&table->tb6_lock);
2215out_put:
2216 ip6_rt_put(rt);
2217
2218 if (skb) {
2219 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2220 info->nlh, gfp_any());
2221 }
2222 return err;
2223}
2224
2225static int ip6_route_del(struct fib6_config *cfg)
2226{
2227 struct fib6_table *table;
2228 struct fib6_node *fn;
2229 struct rt6_info *rt;
2230 int err = -ESRCH;
2231
2232 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2233 if (!table)
2234 return err;
2235
2236 read_lock_bh(&table->tb6_lock);
2237
2238 fn = fib6_locate(&table->tb6_root,
2239 &cfg->fc_dst, cfg->fc_dst_len,
2240 &cfg->fc_src, cfg->fc_src_len);
2241
2242 if (fn) {
2243 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2244 if ((rt->rt6i_flags & RTF_CACHE) &&
2245 !(cfg->fc_flags & RTF_CACHE))
2246 continue;
2247 if (cfg->fc_ifindex &&
2248 (!rt->dst.dev ||
2249 rt->dst.dev->ifindex != cfg->fc_ifindex))
2250 continue;
2251 if (cfg->fc_flags & RTF_GATEWAY &&
2252 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2253 continue;
2254 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2255 continue;
2256 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2257 continue;
2258 dst_hold(&rt->dst);
2259 read_unlock_bh(&table->tb6_lock);
2260
2261
2262 if (cfg->fc_flags & RTF_GATEWAY)
2263 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2264
2265 return __ip6_del_rt_siblings(rt, cfg);
2266 }
2267 }
2268 read_unlock_bh(&table->tb6_lock);
2269
2270 return err;
2271}
2272
2273static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2274{
2275 struct netevent_redirect netevent;
2276 struct rt6_info *rt, *nrt = NULL;
2277 struct ndisc_options ndopts;
2278 struct inet6_dev *in6_dev;
2279 struct neighbour *neigh;
2280 struct rd_msg *msg;
2281 int optlen, on_link;
2282 u8 *lladdr;
2283
2284 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2285 optlen -= sizeof(*msg);
2286
2287 if (optlen < 0) {
2288 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2289 return;
2290 }
2291
2292 msg = (struct rd_msg *)icmp6_hdr(skb);
2293
2294 if (ipv6_addr_is_multicast(&msg->dest)) {
2295 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2296 return;
2297 }
2298
2299 on_link = 0;
2300 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2301 on_link = 1;
2302 } else if (ipv6_addr_type(&msg->target) !=
2303 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2304 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2305 return;
2306 }
2307
2308 in6_dev = __in6_dev_get(skb->dev);
2309 if (!in6_dev)
2310 return;
2311 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2312 return;
2313
2314
2315
2316
2317
2318
2319 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2320 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2321 return;
2322 }
2323
2324 lladdr = NULL;
2325 if (ndopts.nd_opts_tgt_lladdr) {
2326 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2327 skb->dev);
2328 if (!lladdr) {
2329 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2330 return;
2331 }
2332 }
2333
2334 rt = (struct rt6_info *) dst;
2335 if (rt->rt6i_flags & RTF_REJECT) {
2336 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2337 return;
2338 }
2339
2340
2341
2342
2343
2344 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2345
2346 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2347 if (!neigh)
2348 return;
2349
2350
2351
2352
2353
2354 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2355 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2356 NEIGH_UPDATE_F_OVERRIDE|
2357 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2358 NEIGH_UPDATE_F_ISROUTER)),
2359 NDISC_REDIRECT, &ndopts);
2360
2361 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2362 if (!nrt)
2363 goto out;
2364
2365 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2366 if (on_link)
2367 nrt->rt6i_flags &= ~RTF_GATEWAY;
2368
2369 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2370
2371 if (ip6_ins_rt(nrt))
2372 goto out;
2373
2374 netevent.old = &rt->dst;
2375 netevent.new = &nrt->dst;
2376 netevent.daddr = &msg->dest;
2377 netevent.neigh = neigh;
2378 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2379
2380 if (rt->rt6i_flags & RTF_CACHE) {
2381 rt = (struct rt6_info *) dst_clone(&rt->dst);
2382 ip6_del_rt(rt);
2383 }
2384
2385out:
2386 neigh_release(neigh);
2387}
2388
2389
2390
2391
2392
2393static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2394{
2395 BUG_ON(from->dst.from);
2396
2397 rt->rt6i_flags &= ~RTF_EXPIRES;
2398 dst_hold(&from->dst);
2399 rt->dst.from = &from->dst;
2400 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2401}
2402
2403static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2404{
2405 rt->dst.input = ort->dst.input;
2406 rt->dst.output = ort->dst.output;
2407 rt->rt6i_dst = ort->rt6i_dst;
2408 rt->dst.error = ort->dst.error;
2409 rt->rt6i_idev = ort->rt6i_idev;
2410 if (rt->rt6i_idev)
2411 in6_dev_hold(rt->rt6i_idev);
2412 rt->dst.lastuse = jiffies;
2413 rt->rt6i_gateway = ort->rt6i_gateway;
2414 rt->rt6i_flags = ort->rt6i_flags;
2415 rt6_set_from(rt, ort);
2416 rt->rt6i_metric = ort->rt6i_metric;
2417#ifdef CONFIG_IPV6_SUBTREES
2418 rt->rt6i_src = ort->rt6i_src;
2419#endif
2420 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2421 rt->rt6i_table = ort->rt6i_table;
2422 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2423}
2424
2425#ifdef CONFIG_IPV6_ROUTE_INFO
2426static struct rt6_info *rt6_get_route_info(struct net *net,
2427 const struct in6_addr *prefix, int prefixlen,
2428 const struct in6_addr *gwaddr,
2429 struct net_device *dev)
2430{
2431 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2432 int ifindex = dev->ifindex;
2433 struct fib6_node *fn;
2434 struct rt6_info *rt = NULL;
2435 struct fib6_table *table;
2436
2437 table = fib6_get_table(net, tb_id);
2438 if (!table)
2439 return NULL;
2440
2441 read_lock_bh(&table->tb6_lock);
2442 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2443 if (!fn)
2444 goto out;
2445
2446 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2447 if (rt->dst.dev->ifindex != ifindex)
2448 continue;
2449 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2450 continue;
2451 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2452 continue;
2453 dst_hold(&rt->dst);
2454 break;
2455 }
2456out:
2457 read_unlock_bh(&table->tb6_lock);
2458 return rt;
2459}
2460
2461static struct rt6_info *rt6_add_route_info(struct net *net,
2462 const struct in6_addr *prefix, int prefixlen,
2463 const struct in6_addr *gwaddr,
2464 struct net_device *dev,
2465 unsigned int pref)
2466{
2467 struct fib6_config cfg = {
2468 .fc_metric = IP6_RT_PRIO_USER,
2469 .fc_ifindex = dev->ifindex,
2470 .fc_dst_len = prefixlen,
2471 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2472 RTF_UP | RTF_PREF(pref),
2473 .fc_nlinfo.portid = 0,
2474 .fc_nlinfo.nlh = NULL,
2475 .fc_nlinfo.nl_net = net,
2476 };
2477
2478 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
2479 cfg.fc_dst = *prefix;
2480 cfg.fc_gateway = *gwaddr;
2481
2482
2483 if (!prefixlen)
2484 cfg.fc_flags |= RTF_DEFAULT;
2485
2486 ip6_route_add(&cfg);
2487
2488 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2489}
2490#endif
2491
2492struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2493{
2494 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
2495 struct rt6_info *rt;
2496 struct fib6_table *table;
2497
2498 table = fib6_get_table(dev_net(dev), tb_id);
2499 if (!table)
2500 return NULL;
2501
2502 read_lock_bh(&table->tb6_lock);
2503 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2504 if (dev == rt->dst.dev &&
2505 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2506 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2507 break;
2508 }
2509 if (rt)
2510 dst_hold(&rt->dst);
2511 read_unlock_bh(&table->tb6_lock);
2512 return rt;
2513}
2514
2515struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2516 struct net_device *dev,
2517 unsigned int pref)
2518{
2519 struct fib6_config cfg = {
2520 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2521 .fc_metric = IP6_RT_PRIO_USER,
2522 .fc_ifindex = dev->ifindex,
2523 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2524 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2525 .fc_nlinfo.portid = 0,
2526 .fc_nlinfo.nlh = NULL,
2527 .fc_nlinfo.nl_net = dev_net(dev),
2528 };
2529
2530 cfg.fc_gateway = *gwaddr;
2531
2532 if (!ip6_route_add(&cfg)) {
2533 struct fib6_table *table;
2534
2535 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2536 if (table)
2537 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2538 }
2539
2540 return rt6_get_dflt_router(gwaddr, dev);
2541}
2542
2543static void __rt6_purge_dflt_routers(struct fib6_table *table)
2544{
2545 struct rt6_info *rt;
2546
2547restart:
2548 read_lock_bh(&table->tb6_lock);
2549 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2550 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2551 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2552 dst_hold(&rt->dst);
2553 read_unlock_bh(&table->tb6_lock);
2554 ip6_del_rt(rt);
2555 goto restart;
2556 }
2557 }
2558 read_unlock_bh(&table->tb6_lock);
2559
2560 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2561}
2562
2563void rt6_purge_dflt_routers(struct net *net)
2564{
2565 struct fib6_table *table;
2566 struct hlist_head *head;
2567 unsigned int h;
2568
2569 rcu_read_lock();
2570
2571 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2572 head = &net->ipv6.fib_table_hash[h];
2573 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2574 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2575 __rt6_purge_dflt_routers(table);
2576 }
2577 }
2578
2579 rcu_read_unlock();
2580}
2581
2582static void rtmsg_to_fib6_config(struct net *net,
2583 struct in6_rtmsg *rtmsg,
2584 struct fib6_config *cfg)
2585{
2586 memset(cfg, 0, sizeof(*cfg));
2587
2588 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2589 : RT6_TABLE_MAIN;
2590 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2591 cfg->fc_metric = rtmsg->rtmsg_metric;
2592 cfg->fc_expires = rtmsg->rtmsg_info;
2593 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2594 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2595 cfg->fc_flags = rtmsg->rtmsg_flags;
2596
2597 cfg->fc_nlinfo.nl_net = net;
2598
2599 cfg->fc_dst = rtmsg->rtmsg_dst;
2600 cfg->fc_src = rtmsg->rtmsg_src;
2601 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2602}
2603
2604int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2605{
2606 struct fib6_config cfg;
2607 struct in6_rtmsg rtmsg;
2608 int err;
2609
2610 switch (cmd) {
2611 case SIOCADDRT:
2612 case SIOCDELRT:
2613 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2614 return -EPERM;
2615 err = copy_from_user(&rtmsg, arg,
2616 sizeof(struct in6_rtmsg));
2617 if (err)
2618 return -EFAULT;
2619
2620 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2621
2622 rtnl_lock();
2623 switch (cmd) {
2624 case SIOCADDRT:
2625 err = ip6_route_add(&cfg);
2626 break;
2627 case SIOCDELRT:
2628 err = ip6_route_del(&cfg);
2629 break;
2630 default:
2631 err = -EINVAL;
2632 }
2633 rtnl_unlock();
2634
2635 return err;
2636 }
2637
2638 return -EINVAL;
2639}
2640
2641
2642
2643
2644
2645static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2646{
2647 int type;
2648 struct dst_entry *dst = skb_dst(skb);
2649 switch (ipstats_mib_noroutes) {
2650 case IPSTATS_MIB_INNOROUTES:
2651 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2652 if (type == IPV6_ADDR_ANY) {
2653 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2654 IPSTATS_MIB_INADDRERRORS);
2655 break;
2656 }
2657
2658 case IPSTATS_MIB_OUTNOROUTES:
2659 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2660 ipstats_mib_noroutes);
2661 break;
2662 }
2663 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2664 kfree_skb(skb);
2665 return 0;
2666}
2667
2668static int ip6_pkt_discard(struct sk_buff *skb)
2669{
2670 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2671}
2672
2673static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2674{
2675 skb->dev = skb_dst(skb)->dev;
2676 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2677}
2678
2679static int ip6_pkt_prohibit(struct sk_buff *skb)
2680{
2681 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2682}
2683
2684static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2685{
2686 skb->dev = skb_dst(skb)->dev;
2687 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2688}
2689
2690
2691
2692
2693
2694struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2695 const struct in6_addr *addr,
2696 bool anycast)
2697{
2698 u32 tb_id;
2699 struct net *net = dev_net(idev->dev);
2700 struct net_device *dev = net->loopback_dev;
2701 struct rt6_info *rt;
2702
2703
2704
2705
2706 if (!rt6_need_strict(addr))
2707 dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2708
2709 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2710 if (!rt)
2711 return ERR_PTR(-ENOMEM);
2712
2713 in6_dev_hold(idev);
2714
2715 rt->dst.flags |= DST_HOST;
2716 rt->dst.input = ip6_input;
2717 rt->dst.output = ip6_output;
2718 rt->rt6i_idev = idev;
2719
2720 rt->rt6i_protocol = RTPROT_KERNEL;
2721 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2722 if (anycast)
2723 rt->rt6i_flags |= RTF_ANYCAST;
2724 else
2725 rt->rt6i_flags |= RTF_LOCAL;
2726
2727 rt->rt6i_gateway = *addr;
2728 rt->rt6i_dst.addr = *addr;
2729 rt->rt6i_dst.plen = 128;
2730 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2731 rt->rt6i_table = fib6_get_table(net, tb_id);
2732 rt->dst.flags |= DST_NOCACHE;
2733
2734 atomic_set(&rt->dst.__refcnt, 1);
2735
2736 return rt;
2737}
2738
2739
2740struct arg_dev_net_ip {
2741 struct net_device *dev;
2742 struct net *net;
2743 struct in6_addr *addr;
2744};
2745
2746static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2747{
2748 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2749 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2750 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2751
2752 if (((void *)rt->dst.dev == dev || !dev) &&
2753 rt != net->ipv6.ip6_null_entry &&
2754 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2755
2756 rt->rt6i_prefsrc.plen = 0;
2757 }
2758 return 0;
2759}
2760
2761void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2762{
2763 struct net *net = dev_net(ifp->idev->dev);
2764 struct arg_dev_net_ip adni = {
2765 .dev = ifp->idev->dev,
2766 .net = net,
2767 .addr = &ifp->addr,
2768 };
2769 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2770}
2771
2772#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2773#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2774
2775
2776static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2777{
2778 struct in6_addr *gateway = (struct in6_addr *)arg;
2779
2780 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2781 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2782 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2783 return -1;
2784 }
2785 return 0;
2786}
2787
2788void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2789{
2790 fib6_clean_all(net, fib6_clean_tohost, gateway);
2791}
2792
2793struct arg_dev_net {
2794 struct net_device *dev;
2795 struct net *net;
2796};
2797
2798
2799static int fib6_ifdown(struct rt6_info *rt, void *arg)
2800{
2801 const struct arg_dev_net *adn = arg;
2802 const struct net_device *dev = adn->dev;
2803
2804 if ((rt->dst.dev == dev || !dev) &&
2805 rt != adn->net->ipv6.ip6_null_entry &&
2806 (rt->rt6i_nsiblings == 0 ||
2807 (dev && netdev_unregistering(dev)) ||
2808 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
2809 return -1;
2810
2811 return 0;
2812}
2813
2814void rt6_ifdown(struct net *net, struct net_device *dev)
2815{
2816 struct arg_dev_net adn = {
2817 .dev = dev,
2818 .net = net,
2819 };
2820
2821 fib6_clean_all(net, fib6_ifdown, &adn);
2822 icmp6_clean_all(fib6_ifdown, &adn);
2823 if (dev)
2824 rt6_uncached_list_flush_dev(net, dev);
2825}
2826
2827struct rt6_mtu_change_arg {
2828 struct net_device *dev;
2829 unsigned int mtu;
2830};
2831
2832static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2833{
2834 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2835 struct inet6_dev *idev;
2836
2837
2838
2839
2840
2841
2842
2843 idev = __in6_dev_get(arg->dev);
2844 if (!idev)
2845 return 0;
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861 if (rt->dst.dev == arg->dev &&
2862 dst_metric_raw(&rt->dst, RTAX_MTU) &&
2863 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2864 if (rt->rt6i_flags & RTF_CACHE) {
2865
2866
2867
2868
2869
2870 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2871 rt->rt6i_pmtu = arg->mtu;
2872 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2873 (dst_mtu(&rt->dst) < arg->mtu &&
2874 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2875 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2876 }
2877 }
2878 return 0;
2879}
2880
2881void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2882{
2883 struct rt6_mtu_change_arg arg = {
2884 .dev = dev,
2885 .mtu = mtu,
2886 };
2887
2888 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2889}
2890
2891static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2892 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2893 [RTA_OIF] = { .type = NLA_U32 },
2894 [RTA_IIF] = { .type = NLA_U32 },
2895 [RTA_PRIORITY] = { .type = NLA_U32 },
2896 [RTA_METRICS] = { .type = NLA_NESTED },
2897 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2898 [RTA_PREF] = { .type = NLA_U8 },
2899 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2900 [RTA_ENCAP] = { .type = NLA_NESTED },
2901 [RTA_EXPIRES] = { .type = NLA_U32 },
2902 [RTA_UID] = { .type = NLA_U32 },
2903 [RTA_MARK] = { .type = NLA_U32 },
2904};
2905
2906static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2907 struct fib6_config *cfg)
2908{
2909 struct rtmsg *rtm;
2910 struct nlattr *tb[RTA_MAX+1];
2911 unsigned int pref;
2912 int err;
2913
2914 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
2915 NULL);
2916 if (err < 0)
2917 goto errout;
2918
2919 err = -EINVAL;
2920 rtm = nlmsg_data(nlh);
2921 memset(cfg, 0, sizeof(*cfg));
2922
2923 cfg->fc_table = rtm->rtm_table;
2924 cfg->fc_dst_len = rtm->rtm_dst_len;
2925 cfg->fc_src_len = rtm->rtm_src_len;
2926 cfg->fc_flags = RTF_UP;
2927 cfg->fc_protocol = rtm->rtm_protocol;
2928 cfg->fc_type = rtm->rtm_type;
2929
2930 if (rtm->rtm_type == RTN_UNREACHABLE ||
2931 rtm->rtm_type == RTN_BLACKHOLE ||
2932 rtm->rtm_type == RTN_PROHIBIT ||
2933 rtm->rtm_type == RTN_THROW)
2934 cfg->fc_flags |= RTF_REJECT;
2935
2936 if (rtm->rtm_type == RTN_LOCAL)
2937 cfg->fc_flags |= RTF_LOCAL;
2938
2939 if (rtm->rtm_flags & RTM_F_CLONED)
2940 cfg->fc_flags |= RTF_CACHE;
2941
2942 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2943 cfg->fc_nlinfo.nlh = nlh;
2944 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2945
2946 if (tb[RTA_GATEWAY]) {
2947 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2948 cfg->fc_flags |= RTF_GATEWAY;
2949 }
2950
2951 if (tb[RTA_DST]) {
2952 int plen = (rtm->rtm_dst_len + 7) >> 3;
2953
2954 if (nla_len(tb[RTA_DST]) < plen)
2955 goto errout;
2956
2957 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2958 }
2959
2960 if (tb[RTA_SRC]) {
2961 int plen = (rtm->rtm_src_len + 7) >> 3;
2962
2963 if (nla_len(tb[RTA_SRC]) < plen)
2964 goto errout;
2965
2966 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2967 }
2968
2969 if (tb[RTA_PREFSRC])
2970 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2971
2972 if (tb[RTA_OIF])
2973 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2974
2975 if (tb[RTA_PRIORITY])
2976 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2977
2978 if (tb[RTA_METRICS]) {
2979 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2980 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2981 }
2982
2983 if (tb[RTA_TABLE])
2984 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2985
2986 if (tb[RTA_MULTIPATH]) {
2987 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2988 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2989
2990 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
2991 cfg->fc_mp_len);
2992 if (err < 0)
2993 goto errout;
2994 }
2995
2996 if (tb[RTA_PREF]) {
2997 pref = nla_get_u8(tb[RTA_PREF]);
2998 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2999 pref != ICMPV6_ROUTER_PREF_HIGH)
3000 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3001 cfg->fc_flags |= RTF_PREF(pref);
3002 }
3003
3004 if (tb[RTA_ENCAP])
3005 cfg->fc_encap = tb[RTA_ENCAP];
3006
3007 if (tb[RTA_ENCAP_TYPE]) {
3008 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3009
3010 err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
3011 if (err < 0)
3012 goto errout;
3013 }
3014
3015 if (tb[RTA_EXPIRES]) {
3016 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3017
3018 if (addrconf_finite_timeout(timeout)) {
3019 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3020 cfg->fc_flags |= RTF_EXPIRES;
3021 }
3022 }
3023
3024 err = 0;
3025errout:
3026 return err;
3027}
3028
3029struct rt6_nh {
3030 struct rt6_info *rt6_info;
3031 struct fib6_config r_cfg;
3032 struct mx6_config mxc;
3033 struct list_head next;
3034};
3035
3036static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3037{
3038 struct rt6_nh *nh;
3039
3040 list_for_each_entry(nh, rt6_nh_list, next) {
3041 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3042 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3043 nh->r_cfg.fc_ifindex);
3044 }
3045}
3046
3047static int ip6_route_info_append(struct list_head *rt6_nh_list,
3048 struct rt6_info *rt, struct fib6_config *r_cfg)
3049{
3050 struct rt6_nh *nh;
3051 struct rt6_info *rtnh;
3052 int err = -EEXIST;
3053
3054 list_for_each_entry(nh, rt6_nh_list, next) {
3055
3056 rtnh = nh->rt6_info;
3057
3058 if (rtnh->dst.dev == rt->dst.dev &&
3059 rtnh->rt6i_idev == rt->rt6i_idev &&
3060 ipv6_addr_equal(&rtnh->rt6i_gateway,
3061 &rt->rt6i_gateway))
3062 return err;
3063 }
3064
3065 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3066 if (!nh)
3067 return -ENOMEM;
3068 nh->rt6_info = rt;
3069 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3070 if (err) {
3071 kfree(nh);
3072 return err;
3073 }
3074 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3075 list_add_tail(&nh->next, rt6_nh_list);
3076
3077 return 0;
3078}
3079
3080static void ip6_route_mpath_notify(struct rt6_info *rt,
3081 struct rt6_info *rt_last,
3082 struct nl_info *info,
3083 __u16 nlflags)
3084{
3085
3086
3087
3088
3089
3090
3091 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3092 rt = list_first_entry(&rt_last->rt6i_siblings,
3093 struct rt6_info,
3094 rt6i_siblings);
3095 }
3096
3097 if (rt)
3098 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3099}
3100
3101static int ip6_route_multipath_add(struct fib6_config *cfg)
3102{
3103 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3104 struct nl_info *info = &cfg->fc_nlinfo;
3105 struct fib6_config r_cfg;
3106 struct rtnexthop *rtnh;
3107 struct rt6_info *rt;
3108 struct rt6_nh *err_nh;
3109 struct rt6_nh *nh, *nh_safe;
3110 __u16 nlflags;
3111 int remaining;
3112 int attrlen;
3113 int err = 1;
3114 int nhn = 0;
3115 int replace = (cfg->fc_nlinfo.nlh &&
3116 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3117 LIST_HEAD(rt6_nh_list);
3118
3119 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3120 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3121 nlflags |= NLM_F_APPEND;
3122
3123 remaining = cfg->fc_mp_len;
3124 rtnh = (struct rtnexthop *)cfg->fc_mp;
3125
3126
3127
3128
3129 while (rtnh_ok(rtnh, remaining)) {
3130 memcpy(&r_cfg, cfg, sizeof(*cfg));
3131 if (rtnh->rtnh_ifindex)
3132 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3133
3134 attrlen = rtnh_attrlen(rtnh);
3135 if (attrlen > 0) {
3136 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3137
3138 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3139 if (nla) {
3140 r_cfg.fc_gateway = nla_get_in6_addr(nla);
3141 r_cfg.fc_flags |= RTF_GATEWAY;
3142 }
3143 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3144 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3145 if (nla)
3146 r_cfg.fc_encap_type = nla_get_u16(nla);
3147 }
3148
3149 rt = ip6_route_info_create(&r_cfg);
3150 if (IS_ERR(rt)) {
3151 err = PTR_ERR(rt);
3152 rt = NULL;
3153 goto cleanup;
3154 }
3155
3156 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3157 if (err) {
3158 dst_free(&rt->dst);
3159 goto cleanup;
3160 }
3161
3162 rtnh = rtnh_next(rtnh, &remaining);
3163 }
3164
3165
3166
3167
3168
3169 info->skip_notify = 1;
3170
3171 err_nh = NULL;
3172 list_for_each_entry(nh, &rt6_nh_list, next) {
3173 rt_last = nh->rt6_info;
3174 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc);
3175
3176 if (!rt_notif && !err)
3177 rt_notif = nh->rt6_info;
3178
3179
3180 nh->rt6_info = NULL;
3181 if (err) {
3182 if (replace && nhn)
3183 ip6_print_replace_route_err(&rt6_nh_list);
3184 err_nh = nh;
3185 goto add_errout;
3186 }
3187
3188
3189
3190
3191
3192
3193
3194
3195 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3196 NLM_F_REPLACE);
3197 nhn++;
3198 }
3199
3200
3201 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3202 goto cleanup;
3203
3204add_errout:
3205
3206
3207
3208
3209 if (rt_notif)
3210 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3211
3212
3213 list_for_each_entry(nh, &rt6_nh_list, next) {
3214 if (err_nh == nh)
3215 break;
3216 ip6_route_del(&nh->r_cfg);
3217 }
3218
3219cleanup:
3220 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3221 if (nh->rt6_info)
3222 dst_free(&nh->rt6_info->dst);
3223 kfree(nh->mxc.mx);
3224 list_del(&nh->next);
3225 kfree(nh);
3226 }
3227
3228 return err;
3229}
3230
3231static int ip6_route_multipath_del(struct fib6_config *cfg)
3232{
3233 struct fib6_config r_cfg;
3234 struct rtnexthop *rtnh;
3235 int remaining;
3236 int attrlen;
3237 int err = 1, last_err = 0;
3238
3239 remaining = cfg->fc_mp_len;
3240 rtnh = (struct rtnexthop *)cfg->fc_mp;
3241
3242
3243 while (rtnh_ok(rtnh, remaining)) {
3244 memcpy(&r_cfg, cfg, sizeof(*cfg));
3245 if (rtnh->rtnh_ifindex)
3246 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3247
3248 attrlen = rtnh_attrlen(rtnh);
3249 if (attrlen > 0) {
3250 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3251
3252 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3253 if (nla) {
3254 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3255 r_cfg.fc_flags |= RTF_GATEWAY;
3256 }
3257 }
3258 err = ip6_route_del(&r_cfg);
3259 if (err)
3260 last_err = err;
3261
3262 rtnh = rtnh_next(rtnh, &remaining);
3263 }
3264
3265 return last_err;
3266}
3267
3268static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3269 struct netlink_ext_ack *extack)
3270{
3271 struct fib6_config cfg;
3272 int err;
3273
3274 err = rtm_to_fib6_config(skb, nlh, &cfg);
3275 if (err < 0)
3276 return err;
3277
3278 if (cfg.fc_mp)
3279 return ip6_route_multipath_del(&cfg);
3280 else {
3281 cfg.fc_delete_all_nh = 1;
3282 return ip6_route_del(&cfg);
3283 }
3284}
3285
3286static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3287 struct netlink_ext_ack *extack)
3288{
3289 struct fib6_config cfg;
3290 int err;
3291
3292 err = rtm_to_fib6_config(skb, nlh, &cfg);
3293 if (err < 0)
3294 return err;
3295
3296 if (cfg.fc_mp)
3297 return ip6_route_multipath_add(&cfg);
3298 else
3299 return ip6_route_add(&cfg);
3300}
3301
3302static size_t rt6_nlmsg_size(struct rt6_info *rt)
3303{
3304 int nexthop_len = 0;
3305
3306 if (rt->rt6i_nsiblings) {
3307 nexthop_len = nla_total_size(0)
3308 + NLA_ALIGN(sizeof(struct rtnexthop))
3309 + nla_total_size(16)
3310 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3311
3312 nexthop_len *= rt->rt6i_nsiblings;
3313 }
3314
3315 return NLMSG_ALIGN(sizeof(struct rtmsg))
3316 + nla_total_size(16)
3317 + nla_total_size(16)
3318 + nla_total_size(16)
3319 + nla_total_size(16)
3320 + nla_total_size(4)
3321 + nla_total_size(4)
3322 + nla_total_size(4)
3323 + nla_total_size(4)
3324 + RTAX_MAX * nla_total_size(4)
3325 + nla_total_size(sizeof(struct rta_cacheinfo))
3326 + nla_total_size(TCP_CA_NAME_MAX)
3327 + nla_total_size(1)
3328 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3329 + nexthop_len;
3330}
3331
3332static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3333 unsigned int *flags, bool skip_oif)
3334{
3335 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3336 *flags |= RTNH_F_LINKDOWN;
3337 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3338 *flags |= RTNH_F_DEAD;
3339 }
3340
3341 if (rt->rt6i_flags & RTF_GATEWAY) {
3342 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3343 goto nla_put_failure;
3344 }
3345
3346
3347 if (!skip_oif && rt->dst.dev &&
3348 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3349 goto nla_put_failure;
3350
3351 if (rt->dst.lwtstate &&
3352 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3353 goto nla_put_failure;
3354
3355 return 0;
3356
3357nla_put_failure:
3358 return -EMSGSIZE;
3359}
3360
3361
3362static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3363{
3364 struct rtnexthop *rtnh;
3365 unsigned int flags = 0;
3366
3367 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3368 if (!rtnh)
3369 goto nla_put_failure;
3370
3371 rtnh->rtnh_hops = 0;
3372 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3373
3374 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
3375 goto nla_put_failure;
3376
3377 rtnh->rtnh_flags = flags;
3378
3379
3380 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3381
3382 return 0;
3383
3384nla_put_failure:
3385 return -EMSGSIZE;
3386}
3387
3388static int rt6_fill_node(struct net *net,
3389 struct sk_buff *skb, struct rt6_info *rt,
3390 struct in6_addr *dst, struct in6_addr *src,
3391 int iif, int type, u32 portid, u32 seq,
3392 unsigned int flags)
3393{
3394 u32 metrics[RTAX_MAX];
3395 struct rtmsg *rtm;
3396 struct nlmsghdr *nlh;
3397 long expires;
3398 u32 table;
3399
3400 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3401 if (!nlh)
3402 return -EMSGSIZE;
3403
3404 rtm = nlmsg_data(nlh);
3405 rtm->rtm_family = AF_INET6;
3406 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3407 rtm->rtm_src_len = rt->rt6i_src.plen;
3408 rtm->rtm_tos = 0;
3409 if (rt->rt6i_table)
3410 table = rt->rt6i_table->tb6_id;
3411 else
3412 table = RT6_TABLE_UNSPEC;
3413 rtm->rtm_table = table;
3414 if (nla_put_u32(skb, RTA_TABLE, table))
3415 goto nla_put_failure;
3416 if (rt->rt6i_flags & RTF_REJECT) {
3417 switch (rt->dst.error) {
3418 case -EINVAL:
3419 rtm->rtm_type = RTN_BLACKHOLE;
3420 break;
3421 case -EACCES:
3422 rtm->rtm_type = RTN_PROHIBIT;
3423 break;
3424 case -EAGAIN:
3425 rtm->rtm_type = RTN_THROW;
3426 break;
3427 default:
3428 rtm->rtm_type = RTN_UNREACHABLE;
3429 break;
3430 }
3431 }
3432 else if (rt->rt6i_flags & RTF_LOCAL)
3433 rtm->rtm_type = RTN_LOCAL;
3434 else if (rt->rt6i_flags & RTF_ANYCAST)
3435 rtm->rtm_type = RTN_ANYCAST;
3436 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3437 rtm->rtm_type = RTN_LOCAL;
3438 else
3439 rtm->rtm_type = RTN_UNICAST;
3440 rtm->rtm_flags = 0;
3441 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3442 rtm->rtm_protocol = rt->rt6i_protocol;
3443 if (rt->rt6i_flags & RTF_DYNAMIC)
3444 rtm->rtm_protocol = RTPROT_REDIRECT;
3445 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3446 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3447 rtm->rtm_protocol = RTPROT_RA;
3448 else
3449 rtm->rtm_protocol = RTPROT_KERNEL;
3450 }
3451
3452 if (rt->rt6i_flags & RTF_CACHE)
3453 rtm->rtm_flags |= RTM_F_CLONED;
3454
3455 if (dst) {
3456 if (nla_put_in6_addr(skb, RTA_DST, dst))
3457 goto nla_put_failure;
3458 rtm->rtm_dst_len = 128;
3459 } else if (rtm->rtm_dst_len)
3460 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3461 goto nla_put_failure;
3462#ifdef CONFIG_IPV6_SUBTREES
3463 if (src) {
3464 if (nla_put_in6_addr(skb, RTA_SRC, src))
3465 goto nla_put_failure;
3466 rtm->rtm_src_len = 128;
3467 } else if (rtm->rtm_src_len &&
3468 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3469 goto nla_put_failure;
3470#endif
3471 if (iif) {
3472#ifdef CONFIG_IPV6_MROUTE
3473 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3474 int err = ip6mr_get_route(net, skb, rtm, portid);
3475
3476 if (err == 0)
3477 return 0;
3478 if (err < 0)
3479 goto nla_put_failure;
3480 } else
3481#endif
3482 if (nla_put_u32(skb, RTA_IIF, iif))
3483 goto nla_put_failure;
3484 } else if (dst) {
3485 struct in6_addr saddr_buf;
3486 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3487 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3488 goto nla_put_failure;
3489 }
3490
3491 if (rt->rt6i_prefsrc.plen) {
3492 struct in6_addr saddr_buf;
3493 saddr_buf = rt->rt6i_prefsrc.addr;
3494 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3495 goto nla_put_failure;
3496 }
3497
3498 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3499 if (rt->rt6i_pmtu)
3500 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3501 if (rtnetlink_put_metrics(skb, metrics) < 0)
3502 goto nla_put_failure;
3503
3504 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3505 goto nla_put_failure;
3506
3507
3508
3509
3510 if (rt->rt6i_nsiblings) {
3511 struct rt6_info *sibling, *next_sibling;
3512 struct nlattr *mp;
3513
3514 mp = nla_nest_start(skb, RTA_MULTIPATH);
3515 if (!mp)
3516 goto nla_put_failure;
3517
3518 if (rt6_add_nexthop(skb, rt) < 0)
3519 goto nla_put_failure;
3520
3521 list_for_each_entry_safe(sibling, next_sibling,
3522 &rt->rt6i_siblings, rt6i_siblings) {
3523 if (rt6_add_nexthop(skb, sibling) < 0)
3524 goto nla_put_failure;
3525 }
3526
3527 nla_nest_end(skb, mp);
3528 } else {
3529 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
3530 goto nla_put_failure;
3531 }
3532
3533 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3534
3535 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3536 goto nla_put_failure;
3537
3538 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3539 goto nla_put_failure;
3540
3541
3542 nlmsg_end(skb, nlh);
3543 return 0;
3544
3545nla_put_failure:
3546 nlmsg_cancel(skb, nlh);
3547 return -EMSGSIZE;
3548}
3549
3550int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3551{
3552 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3553 struct net *net = arg->net;
3554
3555 if (rt == net->ipv6.ip6_null_entry)
3556 return 0;
3557
3558 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3559 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3560
3561
3562 if (rtm->rtm_flags & RTM_F_PREFIX &&
3563 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3564
3565 return 1;
3566 }
3567 }
3568
3569 return rt6_fill_node(net,
3570 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3571 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3572 NLM_F_MULTI);
3573}
3574
3575static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3576 struct netlink_ext_ack *extack)
3577{
3578 struct net *net = sock_net(in_skb->sk);
3579 struct nlattr *tb[RTA_MAX+1];
3580 struct rt6_info *rt;
3581 struct sk_buff *skb;
3582 struct rtmsg *rtm;
3583 struct flowi6 fl6;
3584 int err, iif = 0, oif = 0;
3585
3586 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3587 extack);
3588 if (err < 0)
3589 goto errout;
3590
3591 err = -EINVAL;
3592 memset(&fl6, 0, sizeof(fl6));
3593 rtm = nlmsg_data(nlh);
3594 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3595
3596 if (tb[RTA_SRC]) {
3597 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3598 goto errout;
3599
3600 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3601 }
3602
3603 if (tb[RTA_DST]) {
3604 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3605 goto errout;
3606
3607 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3608 }
3609
3610 if (tb[RTA_IIF])
3611 iif = nla_get_u32(tb[RTA_IIF]);
3612
3613 if (tb[RTA_OIF])
3614 oif = nla_get_u32(tb[RTA_OIF]);
3615
3616 if (tb[RTA_MARK])
3617 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3618
3619 if (tb[RTA_UID])
3620 fl6.flowi6_uid = make_kuid(current_user_ns(),
3621 nla_get_u32(tb[RTA_UID]));
3622 else
3623 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3624
3625 if (iif) {
3626 struct net_device *dev;
3627 int flags = 0;
3628
3629 dev = __dev_get_by_index(net, iif);
3630 if (!dev) {
3631 err = -ENODEV;
3632 goto errout;
3633 }
3634
3635 fl6.flowi6_iif = iif;
3636
3637 if (!ipv6_addr_any(&fl6.saddr))
3638 flags |= RT6_LOOKUP_F_HAS_SADDR;
3639
3640 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3641 flags);
3642 } else {
3643 fl6.flowi6_oif = oif;
3644
3645 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3646 }
3647
3648 if (rt == net->ipv6.ip6_null_entry) {
3649 err = rt->dst.error;
3650 ip6_rt_put(rt);
3651 goto errout;
3652 }
3653
3654 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3655 if (!skb) {
3656 ip6_rt_put(rt);
3657 err = -ENOBUFS;
3658 goto errout;
3659 }
3660
3661 skb_dst_set(skb, &rt->dst);
3662
3663 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3664 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3665 nlh->nlmsg_seq, 0);
3666 if (err < 0) {
3667 kfree_skb(skb);
3668 goto errout;
3669 }
3670
3671 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3672errout:
3673 return err;
3674}
3675
3676void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3677 unsigned int nlm_flags)
3678{
3679 struct sk_buff *skb;
3680 struct net *net = info->nl_net;
3681 u32 seq;
3682 int err;
3683
3684 err = -ENOBUFS;
3685 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3686
3687 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3688 if (!skb)
3689 goto errout;
3690
3691 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3692 event, info->portid, seq, nlm_flags);
3693 if (err < 0) {
3694
3695 WARN_ON(err == -EMSGSIZE);
3696 kfree_skb(skb);
3697 goto errout;
3698 }
3699 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3700 info->nlh, gfp_any());
3701 return;
3702errout:
3703 if (err < 0)
3704 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3705}
3706
3707static int ip6_route_dev_notify(struct notifier_block *this,
3708 unsigned long event, void *ptr)
3709{
3710 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3711 struct net *net = dev_net(dev);
3712
3713 if (!(dev->flags & IFF_LOOPBACK))
3714 return NOTIFY_OK;
3715
3716 if (event == NETDEV_REGISTER) {
3717 net->ipv6.ip6_null_entry->dst.dev = dev;
3718 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3719#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3720 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3721 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3722 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3723 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3724#endif
3725 } else if (event == NETDEV_UNREGISTER &&
3726 dev->reg_state != NETREG_UNREGISTERED) {
3727
3728
3729
3730 in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
3731#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3732 in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
3733 in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3734#endif
3735 }
3736
3737 return NOTIFY_OK;
3738}
3739
3740
3741
3742
3743
3744#ifdef CONFIG_PROC_FS
3745
3746static const struct file_operations ipv6_route_proc_fops = {
3747 .owner = THIS_MODULE,
3748 .open = ipv6_route_open,
3749 .read = seq_read,
3750 .llseek = seq_lseek,
3751 .release = seq_release_net,
3752};
3753
3754static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3755{
3756 struct net *net = (struct net *)seq->private;
3757 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3758 net->ipv6.rt6_stats->fib_nodes,
3759 net->ipv6.rt6_stats->fib_route_nodes,
3760 net->ipv6.rt6_stats->fib_rt_alloc,
3761 net->ipv6.rt6_stats->fib_rt_entries,
3762 net->ipv6.rt6_stats->fib_rt_cache,
3763 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3764 net->ipv6.rt6_stats->fib_discarded_routes);
3765
3766 return 0;
3767}
3768
3769static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3770{
3771 return single_open_net(inode, file, rt6_stats_seq_show);
3772}
3773
3774static const struct file_operations rt6_stats_seq_fops = {
3775 .owner = THIS_MODULE,
3776 .open = rt6_stats_seq_open,
3777 .read = seq_read,
3778 .llseek = seq_lseek,
3779 .release = single_release_net,
3780};
3781#endif
3782
3783#ifdef CONFIG_SYSCTL
3784
3785static
3786int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3787 void __user *buffer, size_t *lenp, loff_t *ppos)
3788{
3789 struct net *net;
3790 int delay;
3791 if (!write)
3792 return -EINVAL;
3793
3794 net = (struct net *)ctl->extra1;
3795 delay = net->ipv6.sysctl.flush_delay;
3796 proc_dointvec(ctl, write, buffer, lenp, ppos);
3797 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3798 return 0;
3799}
3800
3801struct ctl_table ipv6_route_table_template[] = {
3802 {
3803 .procname = "flush",
3804 .data = &init_net.ipv6.sysctl.flush_delay,
3805 .maxlen = sizeof(int),
3806 .mode = 0200,
3807 .proc_handler = ipv6_sysctl_rtcache_flush
3808 },
3809 {
3810 .procname = "gc_thresh",
3811 .data = &ip6_dst_ops_template.gc_thresh,
3812 .maxlen = sizeof(int),
3813 .mode = 0644,
3814 .proc_handler = proc_dointvec,
3815 },
3816 {
3817 .procname = "max_size",
3818 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
3819 .maxlen = sizeof(int),
3820 .mode = 0644,
3821 .proc_handler = proc_dointvec,
3822 },
3823 {
3824 .procname = "gc_min_interval",
3825 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3826 .maxlen = sizeof(int),
3827 .mode = 0644,
3828 .proc_handler = proc_dointvec_jiffies,
3829 },
3830 {
3831 .procname = "gc_timeout",
3832 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3833 .maxlen = sizeof(int),
3834 .mode = 0644,
3835 .proc_handler = proc_dointvec_jiffies,
3836 },
3837 {
3838 .procname = "gc_interval",
3839 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3840 .maxlen = sizeof(int),
3841 .mode = 0644,
3842 .proc_handler = proc_dointvec_jiffies,
3843 },
3844 {
3845 .procname = "gc_elasticity",
3846 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3847 .maxlen = sizeof(int),
3848 .mode = 0644,
3849 .proc_handler = proc_dointvec,
3850 },
3851 {
3852 .procname = "mtu_expires",
3853 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3854 .maxlen = sizeof(int),
3855 .mode = 0644,
3856 .proc_handler = proc_dointvec_jiffies,
3857 },
3858 {
3859 .procname = "min_adv_mss",
3860 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3861 .maxlen = sizeof(int),
3862 .mode = 0644,
3863 .proc_handler = proc_dointvec,
3864 },
3865 {
3866 .procname = "gc_min_interval_ms",
3867 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3868 .maxlen = sizeof(int),
3869 .mode = 0644,
3870 .proc_handler = proc_dointvec_ms_jiffies,
3871 },
3872 { }
3873};
3874
3875struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3876{
3877 struct ctl_table *table;
3878
3879 table = kmemdup(ipv6_route_table_template,
3880 sizeof(ipv6_route_table_template),
3881 GFP_KERNEL);
3882
3883 if (table) {
3884 table[0].data = &net->ipv6.sysctl.flush_delay;
3885 table[0].extra1 = net;
3886 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3887 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3888 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3889 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3890 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3891 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3892 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3893 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3894 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3895
3896
3897 if (net->user_ns != &init_user_ns)
3898 table[0].procname = NULL;
3899 }
3900
3901 return table;
3902}
3903#endif
3904
3905static int __net_init ip6_route_net_init(struct net *net)
3906{
3907 int ret = -ENOMEM;
3908
3909 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3910 sizeof(net->ipv6.ip6_dst_ops));
3911
3912 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3913 goto out_ip6_dst_ops;
3914
3915 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3916 sizeof(*net->ipv6.ip6_null_entry),
3917 GFP_KERNEL);
3918 if (!net->ipv6.ip6_null_entry)
3919 goto out_ip6_dst_entries;
3920 net->ipv6.ip6_null_entry->dst.path =
3921 (struct dst_entry *)net->ipv6.ip6_null_entry;
3922 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3923 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3924 ip6_template_metrics, true);
3925
3926#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3927 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3928 sizeof(*net->ipv6.ip6_prohibit_entry),
3929 GFP_KERNEL);
3930 if (!net->ipv6.ip6_prohibit_entry)
3931 goto out_ip6_null_entry;
3932 net->ipv6.ip6_prohibit_entry->dst.path =
3933 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3934 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3935 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3936 ip6_template_metrics, true);
3937
3938 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3939 sizeof(*net->ipv6.ip6_blk_hole_entry),
3940 GFP_KERNEL);
3941 if (!net->ipv6.ip6_blk_hole_entry)
3942 goto out_ip6_prohibit_entry;
3943 net->ipv6.ip6_blk_hole_entry->dst.path =
3944 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3945 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3946 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3947 ip6_template_metrics, true);
3948#endif
3949
3950 net->ipv6.sysctl.flush_delay = 0;
3951 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3952 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3953 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3954 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3955 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3956 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3957 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3958
3959 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3960
3961 ret = 0;
3962out:
3963 return ret;
3964
3965#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3966out_ip6_prohibit_entry:
3967 kfree(net->ipv6.ip6_prohibit_entry);
3968out_ip6_null_entry:
3969 kfree(net->ipv6.ip6_null_entry);
3970#endif
3971out_ip6_dst_entries:
3972 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3973out_ip6_dst_ops:
3974 goto out;
3975}
3976
3977static void __net_exit ip6_route_net_exit(struct net *net)
3978{
3979 kfree(net->ipv6.ip6_null_entry);
3980#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3981 kfree(net->ipv6.ip6_prohibit_entry);
3982 kfree(net->ipv6.ip6_blk_hole_entry);
3983#endif
3984 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3985}
3986
3987static int __net_init ip6_route_net_init_late(struct net *net)
3988{
3989#ifdef CONFIG_PROC_FS
3990 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3991 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3992#endif
3993 return 0;
3994}
3995
3996static void __net_exit ip6_route_net_exit_late(struct net *net)
3997{
3998#ifdef CONFIG_PROC_FS
3999 remove_proc_entry("ipv6_route", net->proc_net);
4000 remove_proc_entry("rt6_stats", net->proc_net);
4001#endif
4002}
4003
4004static struct pernet_operations ip6_route_net_ops = {
4005 .init = ip6_route_net_init,
4006 .exit = ip6_route_net_exit,
4007};
4008
4009static int __net_init ipv6_inetpeer_init(struct net *net)
4010{
4011 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4012
4013 if (!bp)
4014 return -ENOMEM;
4015 inet_peer_base_init(bp);
4016 net->ipv6.peers = bp;
4017 return 0;
4018}
4019
4020static void __net_exit ipv6_inetpeer_exit(struct net *net)
4021{
4022 struct inet_peer_base *bp = net->ipv6.peers;
4023
4024 net->ipv6.peers = NULL;
4025 inetpeer_invalidate_tree(bp);
4026 kfree(bp);
4027}
4028
4029static struct pernet_operations ipv6_inetpeer_ops = {
4030 .init = ipv6_inetpeer_init,
4031 .exit = ipv6_inetpeer_exit,
4032};
4033
4034static struct pernet_operations ip6_route_net_late_ops = {
4035 .init = ip6_route_net_init_late,
4036 .exit = ip6_route_net_exit_late,
4037};
4038
4039static struct notifier_block ip6_route_dev_notifier = {
4040 .notifier_call = ip6_route_dev_notify,
4041 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
4042};
4043
4044void __init ip6_route_init_special_entries(void)
4045{
4046
4047
4048
4049 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4050 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4051 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4052 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4053 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4054 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4055 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4056 #endif
4057}
4058
4059int __init ip6_route_init(void)
4060{
4061 int ret;
4062 int cpu;
4063
4064 ret = -ENOMEM;
4065 ip6_dst_ops_template.kmem_cachep =
4066 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4067 SLAB_HWCACHE_ALIGN, NULL);
4068 if (!ip6_dst_ops_template.kmem_cachep)
4069 goto out;
4070
4071 ret = dst_entries_init(&ip6_dst_blackhole_ops);
4072 if (ret)
4073 goto out_kmem_cache;
4074
4075 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4076 if (ret)
4077 goto out_dst_entries;
4078
4079 ret = register_pernet_subsys(&ip6_route_net_ops);
4080 if (ret)
4081 goto out_register_inetpeer;
4082
4083 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4084
4085 ret = fib6_init();
4086 if (ret)
4087 goto out_register_subsys;
4088
4089 ret = xfrm6_init();
4090 if (ret)
4091 goto out_fib6_init;
4092
4093 ret = fib6_rules_init();
4094 if (ret)
4095 goto xfrm6_init;
4096
4097 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4098 if (ret)
4099 goto fib6_rules_init;
4100
4101 ret = -ENOBUFS;
4102 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
4103 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
4104 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
4105 goto out_register_late_subsys;
4106
4107 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
4108 if (ret)
4109 goto out_register_late_subsys;
4110
4111 for_each_possible_cpu(cpu) {
4112 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4113
4114 INIT_LIST_HEAD(&ul->head);
4115 spin_lock_init(&ul->lock);
4116 }
4117
4118out:
4119 return ret;
4120
4121out_register_late_subsys:
4122 unregister_pernet_subsys(&ip6_route_net_late_ops);
4123fib6_rules_init:
4124 fib6_rules_cleanup();
4125xfrm6_init:
4126 xfrm6_fini();
4127out_fib6_init:
4128 fib6_gc_cleanup();
4129out_register_subsys:
4130 unregister_pernet_subsys(&ip6_route_net_ops);
4131out_register_inetpeer:
4132 unregister_pernet_subsys(&ipv6_inetpeer_ops);
4133out_dst_entries:
4134 dst_entries_destroy(&ip6_dst_blackhole_ops);
4135out_kmem_cache:
4136 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4137 goto out;
4138}
4139
4140void ip6_route_cleanup(void)
4141{
4142 unregister_netdevice_notifier(&ip6_route_dev_notifier);
4143 unregister_pernet_subsys(&ip6_route_net_late_ops);
4144 fib6_rules_cleanup();
4145 xfrm6_fini();
4146 fib6_gc_cleanup();
4147 unregister_pernet_subsys(&ipv6_inetpeer_ops);
4148 unregister_pernet_subsys(&ip6_route_net_ops);
4149 dst_entries_destroy(&ip6_dst_blackhole_ops);
4150 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4151}
4152