1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#define pr_fmt(fmt) "IPv6: " fmt
28
29#include <linux/capability.h>
30#include <linux/errno.h>
31#include <linux/export.h>
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
40#include <linux/mroute6.h>
41#include <linux/init.h>
42#include <linux/if_arp.h>
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#include <linux/nsproxy.h>
46#include <linux/slab.h>
47#include <net/net_namespace.h>
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
58#include <net/netevent.h>
59#include <net/netlink.h>
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71static unsigned int ip6_mtu(const struct dst_entry *dst);
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76static int ip6_dst_gc(struct dst_ops *ops);
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
83#ifdef CONFIG_IPV6_ROUTE_INFO
84static struct rt6_info *rt6_add_route_info(struct net *net,
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
87 unsigned int pref);
88static struct rt6_info *rt6_get_route_info(struct net *net,
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
91#endif
92
93static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94{
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
102 if (!rt->rt6i_peer)
103 rt6_bind_peer(rt, 1);
104
105 peer = rt->rt6i_peer;
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124}
125
126static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
127{
128 struct in6_addr *p = &rt->rt6i_gateway;
129
130 if (!ipv6_addr_any(p))
131 return (const void *) p;
132 return daddr;
133}
134
135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 struct rt6_info *rt = (struct rt6_info *) dst;
138 struct neighbour *n;
139
140 daddr = choose_neigh_daddr(rt, daddr);
141 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
142 if (n)
143 return n;
144 return neigh_create(&nd_tbl, daddr, dst->dev);
145}
146
147static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
148{
149 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
150 if (!n) {
151 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
152 if (IS_ERR(n))
153 return PTR_ERR(n);
154 }
155 dst_set_neighbour(&rt->dst, n);
156
157 return 0;
158}
159
160static struct dst_ops ip6_dst_ops_template = {
161 .family = AF_INET6,
162 .protocol = cpu_to_be16(ETH_P_IPV6),
163 .gc = ip6_dst_gc,
164 .gc_thresh = 1024,
165 .check = ip6_dst_check,
166 .default_advmss = ip6_default_advmss,
167 .mtu = ip6_mtu,
168 .cow_metrics = ipv6_cow_metrics,
169 .destroy = ip6_dst_destroy,
170 .ifdown = ip6_dst_ifdown,
171 .negative_advice = ip6_negative_advice,
172 .link_failure = ip6_link_failure,
173 .update_pmtu = ip6_rt_update_pmtu,
174 .local_out = __ip6_local_out,
175 .neigh_lookup = ip6_neigh_lookup,
176};
177
178static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
179{
180 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
181
182 return mtu ? : dst->dev->mtu;
183}
184
185static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
186{
187}
188
189static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
190 unsigned long old)
191{
192 return NULL;
193}
194
195static struct dst_ops ip6_dst_blackhole_ops = {
196 .family = AF_INET6,
197 .protocol = cpu_to_be16(ETH_P_IPV6),
198 .destroy = ip6_dst_destroy,
199 .check = ip6_dst_check,
200 .mtu = ip6_blackhole_mtu,
201 .default_advmss = ip6_default_advmss,
202 .update_pmtu = ip6_rt_blackhole_update_pmtu,
203 .cow_metrics = ip6_rt_blackhole_cow_metrics,
204 .neigh_lookup = ip6_neigh_lookup,
205};
206
207static const u32 ip6_template_metrics[RTAX_MAX] = {
208 [RTAX_HOPLIMIT - 1] = 255,
209};
210
211static struct rt6_info ip6_null_entry_template = {
212 .dst = {
213 .__refcnt = ATOMIC_INIT(1),
214 .__use = 1,
215 .obsolete = -1,
216 .error = -ENETUNREACH,
217 .input = ip6_pkt_discard,
218 .output = ip6_pkt_discard_out,
219 },
220 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
221 .rt6i_protocol = RTPROT_KERNEL,
222 .rt6i_metric = ~(u32) 0,
223 .rt6i_ref = ATOMIC_INIT(1),
224};
225
226#ifdef CONFIG_IPV6_MULTIPLE_TABLES
227
228static int ip6_pkt_prohibit(struct sk_buff *skb);
229static int ip6_pkt_prohibit_out(struct sk_buff *skb);
230
231static struct rt6_info ip6_prohibit_entry_template = {
232 .dst = {
233 .__refcnt = ATOMIC_INIT(1),
234 .__use = 1,
235 .obsolete = -1,
236 .error = -EACCES,
237 .input = ip6_pkt_prohibit,
238 .output = ip6_pkt_prohibit_out,
239 },
240 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
241 .rt6i_protocol = RTPROT_KERNEL,
242 .rt6i_metric = ~(u32) 0,
243 .rt6i_ref = ATOMIC_INIT(1),
244};
245
246static struct rt6_info ip6_blk_hole_entry_template = {
247 .dst = {
248 .__refcnt = ATOMIC_INIT(1),
249 .__use = 1,
250 .obsolete = -1,
251 .error = -EINVAL,
252 .input = dst_discard,
253 .output = dst_discard,
254 },
255 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
256 .rt6i_protocol = RTPROT_KERNEL,
257 .rt6i_metric = ~(u32) 0,
258 .rt6i_ref = ATOMIC_INIT(1),
259};
260
261#endif
262
263
264static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
265 struct net_device *dev,
266 int flags)
267{
268 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
269
270 if (rt)
271 memset(&rt->rt6i_table, 0,
272 sizeof(*rt) - sizeof(struct dst_entry));
273
274 return rt;
275}
276
277static void ip6_dst_destroy(struct dst_entry *dst)
278{
279 struct rt6_info *rt = (struct rt6_info *)dst;
280 struct inet6_dev *idev = rt->rt6i_idev;
281 struct inet_peer *peer = rt->rt6i_peer;
282
283 if (!(rt->dst.flags & DST_HOST))
284 dst_destroy_metrics_generic(dst);
285
286 if (idev) {
287 rt->rt6i_idev = NULL;
288 in6_dev_put(idev);
289 }
290
291 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
292 dst_release(dst->from);
293
294 if (peer) {
295 rt->rt6i_peer = NULL;
296 inet_putpeer(peer);
297 }
298}
299
300static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
301
302static u32 rt6_peer_genid(void)
303{
304 return atomic_read(&__rt6_peer_genid);
305}
306
307void rt6_bind_peer(struct rt6_info *rt, int create)
308{
309 struct inet_peer *peer;
310
311 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
312 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
313 inet_putpeer(peer);
314 else
315 rt->rt6i_peer_genid = rt6_peer_genid();
316}
317
318static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
319 int how)
320{
321 struct rt6_info *rt = (struct rt6_info *)dst;
322 struct inet6_dev *idev = rt->rt6i_idev;
323 struct net_device *loopback_dev =
324 dev_net(dev)->loopback_dev;
325
326 if (dev != loopback_dev && idev && idev->dev == dev) {
327 struct inet6_dev *loopback_idev =
328 in6_dev_get(loopback_dev);
329 if (loopback_idev) {
330 rt->rt6i_idev = loopback_idev;
331 in6_dev_put(idev);
332 }
333 }
334}
335
336static bool rt6_check_expired(const struct rt6_info *rt)
337{
338 struct rt6_info *ort = NULL;
339
340 if (rt->rt6i_flags & RTF_EXPIRES) {
341 if (time_after(jiffies, rt->dst.expires))
342 return true;
343 } else if (rt->dst.from) {
344 ort = (struct rt6_info *) rt->dst.from;
345 return (ort->rt6i_flags & RTF_EXPIRES) &&
346 time_after(jiffies, ort->dst.expires);
347 }
348 return false;
349}
350
351static bool rt6_need_strict(const struct in6_addr *daddr)
352{
353 return ipv6_addr_type(daddr) &
354 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
355}
356
357
358
359
360
361static inline struct rt6_info *rt6_device_match(struct net *net,
362 struct rt6_info *rt,
363 const struct in6_addr *saddr,
364 int oif,
365 int flags)
366{
367 struct rt6_info *local = NULL;
368 struct rt6_info *sprt;
369
370 if (!oif && ipv6_addr_any(saddr))
371 goto out;
372
373 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
374 struct net_device *dev = sprt->dst.dev;
375
376 if (oif) {
377 if (dev->ifindex == oif)
378 return sprt;
379 if (dev->flags & IFF_LOOPBACK) {
380 if (!sprt->rt6i_idev ||
381 sprt->rt6i_idev->dev->ifindex != oif) {
382 if (flags & RT6_LOOKUP_F_IFACE && oif)
383 continue;
384 if (local && (!oif ||
385 local->rt6i_idev->dev->ifindex == oif))
386 continue;
387 }
388 local = sprt;
389 }
390 } else {
391 if (ipv6_chk_addr(net, saddr, dev,
392 flags & RT6_LOOKUP_F_IFACE))
393 return sprt;
394 }
395 }
396
397 if (oif) {
398 if (local)
399 return local;
400
401 if (flags & RT6_LOOKUP_F_IFACE)
402 return net->ipv6.ip6_null_entry;
403 }
404out:
405 return rt;
406}
407
408#ifdef CONFIG_IPV6_ROUTER_PREF
409static void rt6_probe(struct rt6_info *rt)
410{
411 struct neighbour *neigh;
412
413
414
415
416
417
418
419
420 rcu_read_lock();
421 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
422 if (!neigh || (neigh->nud_state & NUD_VALID))
423 goto out;
424 read_lock_bh(&neigh->lock);
425 if (!(neigh->nud_state & NUD_VALID) &&
426 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
427 struct in6_addr mcaddr;
428 struct in6_addr *target;
429
430 neigh->updated = jiffies;
431 read_unlock_bh(&neigh->lock);
432
433 target = (struct in6_addr *)&neigh->primary_key;
434 addrconf_addr_solict_mult(target, &mcaddr);
435 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
436 } else {
437 read_unlock_bh(&neigh->lock);
438 }
439out:
440 rcu_read_unlock();
441}
442#else
443static inline void rt6_probe(struct rt6_info *rt)
444{
445}
446#endif
447
448
449
450
451static inline int rt6_check_dev(struct rt6_info *rt, int oif)
452{
453 struct net_device *dev = rt->dst.dev;
454 if (!oif || dev->ifindex == oif)
455 return 2;
456 if ((dev->flags & IFF_LOOPBACK) &&
457 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
458 return 1;
459 return 0;
460}
461
462static inline int rt6_check_neigh(struct rt6_info *rt)
463{
464 struct neighbour *neigh;
465 int m;
466
467 rcu_read_lock();
468 neigh = dst_get_neighbour_noref(&rt->dst);
469 if (rt->rt6i_flags & RTF_NONEXTHOP ||
470 !(rt->rt6i_flags & RTF_GATEWAY))
471 m = 1;
472 else if (neigh) {
473 read_lock_bh(&neigh->lock);
474 if (neigh->nud_state & NUD_VALID)
475 m = 2;
476#ifdef CONFIG_IPV6_ROUTER_PREF
477 else if (neigh->nud_state & NUD_FAILED)
478 m = 0;
479#endif
480 else
481 m = 1;
482 read_unlock_bh(&neigh->lock);
483 } else
484 m = 0;
485 rcu_read_unlock();
486 return m;
487}
488
489static int rt6_score_route(struct rt6_info *rt, int oif,
490 int strict)
491{
492 int m, n;
493
494 m = rt6_check_dev(rt, oif);
495 if (!m && (strict & RT6_LOOKUP_F_IFACE))
496 return -1;
497#ifdef CONFIG_IPV6_ROUTER_PREF
498 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
499#endif
500 n = rt6_check_neigh(rt);
501 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
502 return -1;
503 return m;
504}
505
506static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
507 int *mpri, struct rt6_info *match)
508{
509 int m;
510
511 if (rt6_check_expired(rt))
512 goto out;
513
514 m = rt6_score_route(rt, oif, strict);
515 if (m < 0)
516 goto out;
517
518 if (m > *mpri) {
519 if (strict & RT6_LOOKUP_F_REACHABLE)
520 rt6_probe(match);
521 *mpri = m;
522 match = rt;
523 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
524 rt6_probe(rt);
525 }
526
527out:
528 return match;
529}
530
531static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
532 struct rt6_info *rr_head,
533 u32 metric, int oif, int strict)
534{
535 struct rt6_info *rt, *match;
536 int mpri = -1;
537
538 match = NULL;
539 for (rt = rr_head; rt && rt->rt6i_metric == metric;
540 rt = rt->dst.rt6_next)
541 match = find_match(rt, oif, strict, &mpri, match);
542 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
543 rt = rt->dst.rt6_next)
544 match = find_match(rt, oif, strict, &mpri, match);
545
546 return match;
547}
548
549static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
550{
551 struct rt6_info *match, *rt0;
552 struct net *net;
553
554 rt0 = fn->rr_ptr;
555 if (!rt0)
556 fn->rr_ptr = rt0 = fn->leaf;
557
558 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
559
560 if (!match &&
561 (strict & RT6_LOOKUP_F_REACHABLE)) {
562 struct rt6_info *next = rt0->dst.rt6_next;
563
564
565 if (!next || next->rt6i_metric != rt0->rt6i_metric)
566 next = fn->leaf;
567
568 if (next != rt0)
569 fn->rr_ptr = next;
570 }
571
572 net = dev_net(rt0->dst.dev);
573 return match ? match : net->ipv6.ip6_null_entry;
574}
575
576#ifdef CONFIG_IPV6_ROUTE_INFO
577int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
578 const struct in6_addr *gwaddr)
579{
580 struct net *net = dev_net(dev);
581 struct route_info *rinfo = (struct route_info *) opt;
582 struct in6_addr prefix_buf, *prefix;
583 unsigned int pref;
584 unsigned long lifetime;
585 struct rt6_info *rt;
586
587 if (len < sizeof(struct route_info)) {
588 return -EINVAL;
589 }
590
591
592 if (rinfo->length > 3) {
593 return -EINVAL;
594 } else if (rinfo->prefix_len > 128) {
595 return -EINVAL;
596 } else if (rinfo->prefix_len > 64) {
597 if (rinfo->length < 2) {
598 return -EINVAL;
599 }
600 } else if (rinfo->prefix_len > 0) {
601 if (rinfo->length < 1) {
602 return -EINVAL;
603 }
604 }
605
606 pref = rinfo->route_pref;
607 if (pref == ICMPV6_ROUTER_PREF_INVALID)
608 return -EINVAL;
609
610 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
611
612 if (rinfo->length == 3)
613 prefix = (struct in6_addr *)rinfo->prefix;
614 else {
615
616 ipv6_addr_prefix(&prefix_buf,
617 (struct in6_addr *)rinfo->prefix,
618 rinfo->prefix_len);
619 prefix = &prefix_buf;
620 }
621
622 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
623 dev->ifindex);
624
625 if (rt && !lifetime) {
626 ip6_del_rt(rt);
627 rt = NULL;
628 }
629
630 if (!rt && lifetime)
631 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
632 pref);
633 else if (rt)
634 rt->rt6i_flags = RTF_ROUTEINFO |
635 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
636
637 if (rt) {
638 if (!addrconf_finite_timeout(lifetime))
639 rt6_clean_expires(rt);
640 else
641 rt6_set_expires(rt, jiffies + HZ * lifetime);
642
643 dst_release(&rt->dst);
644 }
645 return 0;
646}
647#endif
648
649#define BACKTRACK(__net, saddr) \
650do { \
651 if (rt == __net->ipv6.ip6_null_entry) { \
652 struct fib6_node *pn; \
653 while (1) { \
654 if (fn->fn_flags & RTN_TL_ROOT) \
655 goto out; \
656 pn = fn->parent; \
657 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
658 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
659 else \
660 fn = pn; \
661 if (fn->fn_flags & RTN_RTINFO) \
662 goto restart; \
663 } \
664 } \
665} while (0)
666
667static struct rt6_info *ip6_pol_route_lookup(struct net *net,
668 struct fib6_table *table,
669 struct flowi6 *fl6, int flags)
670{
671 struct fib6_node *fn;
672 struct rt6_info *rt;
673
674 read_lock_bh(&table->tb6_lock);
675 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
676restart:
677 rt = fn->leaf;
678 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
679 BACKTRACK(net, &fl6->saddr);
680out:
681 dst_use(&rt->dst, jiffies);
682 read_unlock_bh(&table->tb6_lock);
683 return rt;
684
685}
686
687struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
688 int flags)
689{
690 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
691}
692EXPORT_SYMBOL_GPL(ip6_route_lookup);
693
694struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
695 const struct in6_addr *saddr, int oif, int strict)
696{
697 struct flowi6 fl6 = {
698 .flowi6_oif = oif,
699 .daddr = *daddr,
700 };
701 struct dst_entry *dst;
702 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
703
704 if (saddr) {
705 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
706 flags |= RT6_LOOKUP_F_HAS_SADDR;
707 }
708
709 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
710 if (dst->error == 0)
711 return (struct rt6_info *) dst;
712
713 dst_release(dst);
714
715 return NULL;
716}
717
718EXPORT_SYMBOL(rt6_lookup);
719
720
721
722
723
724
725
726static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
727{
728 int err;
729 struct fib6_table *table;
730
731 table = rt->rt6i_table;
732 write_lock_bh(&table->tb6_lock);
733 err = fib6_add(&table->tb6_root, rt, info);
734 write_unlock_bh(&table->tb6_lock);
735
736 return err;
737}
738
739int ip6_ins_rt(struct rt6_info *rt)
740{
741 struct nl_info info = {
742 .nl_net = dev_net(rt->dst.dev),
743 };
744 return __ip6_ins_rt(rt, &info);
745}
746
747static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
748 const struct in6_addr *daddr,
749 const struct in6_addr *saddr)
750{
751 struct rt6_info *rt;
752
753
754
755
756
757 rt = ip6_rt_copy(ort, daddr);
758
759 if (rt) {
760 int attempts = !in_softirq();
761
762 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
763 if (ort->rt6i_dst.plen != 128 &&
764 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
765 rt->rt6i_flags |= RTF_ANYCAST;
766 rt->rt6i_gateway = *daddr;
767 }
768
769 rt->rt6i_flags |= RTF_CACHE;
770
771#ifdef CONFIG_IPV6_SUBTREES
772 if (rt->rt6i_src.plen && saddr) {
773 rt->rt6i_src.addr = *saddr;
774 rt->rt6i_src.plen = 128;
775 }
776#endif
777
778 retry:
779 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
780 struct net *net = dev_net(rt->dst.dev);
781 int saved_rt_min_interval =
782 net->ipv6.sysctl.ip6_rt_gc_min_interval;
783 int saved_rt_elasticity =
784 net->ipv6.sysctl.ip6_rt_gc_elasticity;
785
786 if (attempts-- > 0) {
787 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
788 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
789
790 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
791
792 net->ipv6.sysctl.ip6_rt_gc_elasticity =
793 saved_rt_elasticity;
794 net->ipv6.sysctl.ip6_rt_gc_min_interval =
795 saved_rt_min_interval;
796 goto retry;
797 }
798
799 net_warn_ratelimited("Neighbour table overflow\n");
800 dst_free(&rt->dst);
801 return NULL;
802 }
803 }
804
805 return rt;
806}
807
808static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
809 const struct in6_addr *daddr)
810{
811 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
812
813 if (rt) {
814 rt->rt6i_flags |= RTF_CACHE;
815 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
816 }
817 return rt;
818}
819
820static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
821 struct flowi6 *fl6, int flags)
822{
823 struct fib6_node *fn;
824 struct rt6_info *rt, *nrt;
825 int strict = 0;
826 int attempts = 3;
827 int err;
828 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
829
830 strict |= flags & RT6_LOOKUP_F_IFACE;
831
832relookup:
833 read_lock_bh(&table->tb6_lock);
834
835restart_2:
836 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
837
838restart:
839 rt = rt6_select(fn, oif, strict | reachable);
840
841 BACKTRACK(net, &fl6->saddr);
842 if (rt == net->ipv6.ip6_null_entry ||
843 rt->rt6i_flags & RTF_CACHE)
844 goto out;
845
846 dst_hold(&rt->dst);
847 read_unlock_bh(&table->tb6_lock);
848
849 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
850 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
851 else if (!(rt->dst.flags & DST_HOST))
852 nrt = rt6_alloc_clone(rt, &fl6->daddr);
853 else
854 goto out2;
855
856 dst_release(&rt->dst);
857 rt = nrt ? : net->ipv6.ip6_null_entry;
858
859 dst_hold(&rt->dst);
860 if (nrt) {
861 err = ip6_ins_rt(nrt);
862 if (!err)
863 goto out2;
864 }
865
866 if (--attempts <= 0)
867 goto out2;
868
869
870
871
872
873 dst_release(&rt->dst);
874 goto relookup;
875
876out:
877 if (reachable) {
878 reachable = 0;
879 goto restart_2;
880 }
881 dst_hold(&rt->dst);
882 read_unlock_bh(&table->tb6_lock);
883out2:
884 rt->dst.lastuse = jiffies;
885 rt->dst.__use++;
886
887 return rt;
888}
889
890static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
891 struct flowi6 *fl6, int flags)
892{
893 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
894}
895
896static struct dst_entry *ip6_route_input_lookup(struct net *net,
897 struct net_device *dev,
898 struct flowi6 *fl6, int flags)
899{
900 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
901 flags |= RT6_LOOKUP_F_IFACE;
902
903 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
904}
905
906void ip6_route_input(struct sk_buff *skb)
907{
908 const struct ipv6hdr *iph = ipv6_hdr(skb);
909 struct net *net = dev_net(skb->dev);
910 int flags = RT6_LOOKUP_F_HAS_SADDR;
911 struct flowi6 fl6 = {
912 .flowi6_iif = skb->dev->ifindex,
913 .daddr = iph->daddr,
914 .saddr = iph->saddr,
915 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
916 .flowi6_mark = skb->mark,
917 .flowi6_proto = iph->nexthdr,
918 };
919
920 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
921}
922
923static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
924 struct flowi6 *fl6, int flags)
925{
926 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
927}
928
929struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
930 struct flowi6 *fl6)
931{
932 int flags = 0;
933
934 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
935 flags |= RT6_LOOKUP_F_IFACE;
936
937 if (!ipv6_addr_any(&fl6->saddr))
938 flags |= RT6_LOOKUP_F_HAS_SADDR;
939 else if (sk)
940 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
941
942 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
943}
944
945EXPORT_SYMBOL(ip6_route_output);
946
947struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
948{
949 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
950 struct dst_entry *new = NULL;
951
952 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
953 if (rt) {
954 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
955
956 new = &rt->dst;
957
958 new->__use = 1;
959 new->input = dst_discard;
960 new->output = dst_discard;
961
962 if (dst_metrics_read_only(&ort->dst))
963 new->_metrics = ort->dst._metrics;
964 else
965 dst_copy_metrics(new, &ort->dst);
966 rt->rt6i_idev = ort->rt6i_idev;
967 if (rt->rt6i_idev)
968 in6_dev_hold(rt->rt6i_idev);
969
970 rt->rt6i_gateway = ort->rt6i_gateway;
971 rt->rt6i_flags = ort->rt6i_flags;
972 rt6_clean_expires(rt);
973 rt->rt6i_metric = 0;
974
975 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
976#ifdef CONFIG_IPV6_SUBTREES
977 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
978#endif
979
980 dst_free(new);
981 }
982
983 dst_release(dst_orig);
984 return new ? new : ERR_PTR(-ENOMEM);
985}
986
987
988
989
990
991static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
992{
993 struct rt6_info *rt;
994
995 rt = (struct rt6_info *) dst;
996
997 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
998 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
999 if (!rt->rt6i_peer)
1000 rt6_bind_peer(rt, 0);
1001 rt->rt6i_peer_genid = rt6_peer_genid();
1002 }
1003 return dst;
1004 }
1005 return NULL;
1006}
1007
1008static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1009{
1010 struct rt6_info *rt = (struct rt6_info *) dst;
1011
1012 if (rt) {
1013 if (rt->rt6i_flags & RTF_CACHE) {
1014 if (rt6_check_expired(rt)) {
1015 ip6_del_rt(rt);
1016 dst = NULL;
1017 }
1018 } else {
1019 dst_release(dst);
1020 dst = NULL;
1021 }
1022 }
1023 return dst;
1024}
1025
1026static void ip6_link_failure(struct sk_buff *skb)
1027{
1028 struct rt6_info *rt;
1029
1030 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1031
1032 rt = (struct rt6_info *) skb_dst(skb);
1033 if (rt) {
1034 if (rt->rt6i_flags & RTF_CACHE)
1035 rt6_update_expires(rt, 0);
1036 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1037 rt->rt6i_node->fn_sernum = -1;
1038 }
1039}
1040
1041static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1042{
1043 struct rt6_info *rt6 = (struct rt6_info*)dst;
1044
1045 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1046 rt6->rt6i_flags |= RTF_MODIFIED;
1047 if (mtu < IPV6_MIN_MTU) {
1048 u32 features = dst_metric(dst, RTAX_FEATURES);
1049 mtu = IPV6_MIN_MTU;
1050 features |= RTAX_FEATURE_ALLFRAG;
1051 dst_metric_set(dst, RTAX_FEATURES, features);
1052 }
1053 dst_metric_set(dst, RTAX_MTU, mtu);
1054 }
1055}
1056
1057static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1058{
1059 struct net_device *dev = dst->dev;
1060 unsigned int mtu = dst_mtu(dst);
1061 struct net *net = dev_net(dev);
1062
1063 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1064
1065 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1066 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1067
1068
1069
1070
1071
1072
1073
1074 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1075 mtu = IPV6_MAXPLEN;
1076 return mtu;
1077}
1078
1079static unsigned int ip6_mtu(const struct dst_entry *dst)
1080{
1081 struct inet6_dev *idev;
1082 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1083
1084 if (mtu)
1085 return mtu;
1086
1087 mtu = IPV6_MIN_MTU;
1088
1089 rcu_read_lock();
1090 idev = __in6_dev_get(dst->dev);
1091 if (idev)
1092 mtu = idev->cnf.mtu6;
1093 rcu_read_unlock();
1094
1095 return mtu;
1096}
1097
1098static struct dst_entry *icmp6_dst_gc_list;
1099static DEFINE_SPINLOCK(icmp6_dst_lock);
1100
1101struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1102 struct neighbour *neigh,
1103 struct flowi6 *fl6)
1104{
1105 struct dst_entry *dst;
1106 struct rt6_info *rt;
1107 struct inet6_dev *idev = in6_dev_get(dev);
1108 struct net *net = dev_net(dev);
1109
1110 if (unlikely(!idev))
1111 return ERR_PTR(-ENODEV);
1112
1113 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1114 if (unlikely(!rt)) {
1115 in6_dev_put(idev);
1116 dst = ERR_PTR(-ENOMEM);
1117 goto out;
1118 }
1119
1120 if (neigh)
1121 neigh_hold(neigh);
1122 else {
1123 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1124 if (IS_ERR(neigh)) {
1125 in6_dev_put(idev);
1126 dst_free(&rt->dst);
1127 return ERR_CAST(neigh);
1128 }
1129 }
1130
1131 rt->dst.flags |= DST_HOST;
1132 rt->dst.output = ip6_output;
1133 dst_set_neighbour(&rt->dst, neigh);
1134 atomic_set(&rt->dst.__refcnt, 1);
1135 rt->rt6i_dst.addr = fl6->daddr;
1136 rt->rt6i_dst.plen = 128;
1137 rt->rt6i_idev = idev;
1138 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1139
1140 spin_lock_bh(&icmp6_dst_lock);
1141 rt->dst.next = icmp6_dst_gc_list;
1142 icmp6_dst_gc_list = &rt->dst;
1143 spin_unlock_bh(&icmp6_dst_lock);
1144
1145 fib6_force_start_gc(net);
1146
1147 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1148
1149out:
1150 return dst;
1151}
1152
1153int icmp6_dst_gc(void)
1154{
1155 struct dst_entry *dst, **pprev;
1156 int more = 0;
1157
1158 spin_lock_bh(&icmp6_dst_lock);
1159 pprev = &icmp6_dst_gc_list;
1160
1161 while ((dst = *pprev) != NULL) {
1162 if (!atomic_read(&dst->__refcnt)) {
1163 *pprev = dst->next;
1164 dst_free(dst);
1165 } else {
1166 pprev = &dst->next;
1167 ++more;
1168 }
1169 }
1170
1171 spin_unlock_bh(&icmp6_dst_lock);
1172
1173 return more;
1174}
1175
1176static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1177 void *arg)
1178{
1179 struct dst_entry *dst, **pprev;
1180
1181 spin_lock_bh(&icmp6_dst_lock);
1182 pprev = &icmp6_dst_gc_list;
1183 while ((dst = *pprev) != NULL) {
1184 struct rt6_info *rt = (struct rt6_info *) dst;
1185 if (func(rt, arg)) {
1186 *pprev = dst->next;
1187 dst_free(dst);
1188 } else {
1189 pprev = &dst->next;
1190 }
1191 }
1192 spin_unlock_bh(&icmp6_dst_lock);
1193}
1194
1195static int ip6_dst_gc(struct dst_ops *ops)
1196{
1197 unsigned long now = jiffies;
1198 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1199 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1200 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1201 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1202 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1203 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1204 int entries;
1205
1206 entries = dst_entries_get_fast(ops);
1207 if (time_after(rt_last_gc + rt_min_interval, now) &&
1208 entries <= rt_max_size)
1209 goto out;
1210
1211 net->ipv6.ip6_rt_gc_expire++;
1212 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1213 net->ipv6.ip6_rt_last_gc = now;
1214 entries = dst_entries_get_slow(ops);
1215 if (entries < ops->gc_thresh)
1216 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1217out:
1218 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1219 return entries > rt_max_size;
1220}
1221
1222
1223
1224
1225
1226
1227
1228int ip6_dst_hoplimit(struct dst_entry *dst)
1229{
1230 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1231 if (hoplimit == 0) {
1232 struct net_device *dev = dst->dev;
1233 struct inet6_dev *idev;
1234
1235 rcu_read_lock();
1236 idev = __in6_dev_get(dev);
1237 if (idev)
1238 hoplimit = idev->cnf.hop_limit;
1239 else
1240 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1241 rcu_read_unlock();
1242 }
1243 return hoplimit;
1244}
1245EXPORT_SYMBOL(ip6_dst_hoplimit);
1246
1247
1248
1249
1250
1251int ip6_route_add(struct fib6_config *cfg)
1252{
1253 int err;
1254 struct net *net = cfg->fc_nlinfo.nl_net;
1255 struct rt6_info *rt = NULL;
1256 struct net_device *dev = NULL;
1257 struct inet6_dev *idev = NULL;
1258 struct fib6_table *table;
1259 int addr_type;
1260
1261 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1262 return -EINVAL;
1263#ifndef CONFIG_IPV6_SUBTREES
1264 if (cfg->fc_src_len)
1265 return -EINVAL;
1266#endif
1267 if (cfg->fc_ifindex) {
1268 err = -ENODEV;
1269 dev = dev_get_by_index(net, cfg->fc_ifindex);
1270 if (!dev)
1271 goto out;
1272 idev = in6_dev_get(dev);
1273 if (!idev)
1274 goto out;
1275 }
1276
1277 if (cfg->fc_metric == 0)
1278 cfg->fc_metric = IP6_RT_PRIO_USER;
1279
1280 err = -ENOBUFS;
1281 if (cfg->fc_nlinfo.nlh &&
1282 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1283 table = fib6_get_table(net, cfg->fc_table);
1284 if (!table) {
1285 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1286 table = fib6_new_table(net, cfg->fc_table);
1287 }
1288 } else {
1289 table = fib6_new_table(net, cfg->fc_table);
1290 }
1291
1292 if (!table)
1293 goto out;
1294
1295 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1296
1297 if (!rt) {
1298 err = -ENOMEM;
1299 goto out;
1300 }
1301
1302 rt->dst.obsolete = -1;
1303
1304 if (cfg->fc_flags & RTF_EXPIRES)
1305 rt6_set_expires(rt, jiffies +
1306 clock_t_to_jiffies(cfg->fc_expires));
1307 else
1308 rt6_clean_expires(rt);
1309
1310 if (cfg->fc_protocol == RTPROT_UNSPEC)
1311 cfg->fc_protocol = RTPROT_BOOT;
1312 rt->rt6i_protocol = cfg->fc_protocol;
1313
1314 addr_type = ipv6_addr_type(&cfg->fc_dst);
1315
1316 if (addr_type & IPV6_ADDR_MULTICAST)
1317 rt->dst.input = ip6_mc_input;
1318 else if (cfg->fc_flags & RTF_LOCAL)
1319 rt->dst.input = ip6_input;
1320 else
1321 rt->dst.input = ip6_forward;
1322
1323 rt->dst.output = ip6_output;
1324
1325 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1326 rt->rt6i_dst.plen = cfg->fc_dst_len;
1327 if (rt->rt6i_dst.plen == 128)
1328 rt->dst.flags |= DST_HOST;
1329
1330 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1331 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1332 if (!metrics) {
1333 err = -ENOMEM;
1334 goto out;
1335 }
1336 dst_init_metrics(&rt->dst, metrics, 0);
1337 }
1338#ifdef CONFIG_IPV6_SUBTREES
1339 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1340 rt->rt6i_src.plen = cfg->fc_src_len;
1341#endif
1342
1343 rt->rt6i_metric = cfg->fc_metric;
1344
1345
1346
1347
1348 if ((cfg->fc_flags & RTF_REJECT) ||
1349 (dev && (dev->flags & IFF_LOOPBACK) &&
1350 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1351 !(cfg->fc_flags & RTF_LOCAL))) {
1352
1353 if (dev != net->loopback_dev) {
1354 if (dev) {
1355 dev_put(dev);
1356 in6_dev_put(idev);
1357 }
1358 dev = net->loopback_dev;
1359 dev_hold(dev);
1360 idev = in6_dev_get(dev);
1361 if (!idev) {
1362 err = -ENODEV;
1363 goto out;
1364 }
1365 }
1366 rt->dst.output = ip6_pkt_discard_out;
1367 rt->dst.input = ip6_pkt_discard;
1368 rt->dst.error = -ENETUNREACH;
1369 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1370 goto install_route;
1371 }
1372
1373 if (cfg->fc_flags & RTF_GATEWAY) {
1374 const struct in6_addr *gw_addr;
1375 int gwa_type;
1376
1377 gw_addr = &cfg->fc_gateway;
1378 rt->rt6i_gateway = *gw_addr;
1379 gwa_type = ipv6_addr_type(gw_addr);
1380
1381 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1382 struct rt6_info *grt;
1383
1384
1385
1386
1387
1388
1389
1390
1391 err = -EINVAL;
1392 if (!(gwa_type & IPV6_ADDR_UNICAST))
1393 goto out;
1394
1395 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1396
1397 err = -EHOSTUNREACH;
1398 if (!grt)
1399 goto out;
1400 if (dev) {
1401 if (dev != grt->dst.dev) {
1402 dst_release(&grt->dst);
1403 goto out;
1404 }
1405 } else {
1406 dev = grt->dst.dev;
1407 idev = grt->rt6i_idev;
1408 dev_hold(dev);
1409 in6_dev_hold(grt->rt6i_idev);
1410 }
1411 if (!(grt->rt6i_flags & RTF_GATEWAY))
1412 err = 0;
1413 dst_release(&grt->dst);
1414
1415 if (err)
1416 goto out;
1417 }
1418 err = -EINVAL;
1419 if (!dev || (dev->flags & IFF_LOOPBACK))
1420 goto out;
1421 }
1422
1423 err = -ENODEV;
1424 if (!dev)
1425 goto out;
1426
1427 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1428 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1429 err = -EINVAL;
1430 goto out;
1431 }
1432 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1433 rt->rt6i_prefsrc.plen = 128;
1434 } else
1435 rt->rt6i_prefsrc.plen = 0;
1436
1437 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1438 err = rt6_bind_neighbour(rt, dev);
1439 if (err)
1440 goto out;
1441 }
1442
1443 rt->rt6i_flags = cfg->fc_flags;
1444
1445install_route:
1446 if (cfg->fc_mx) {
1447 struct nlattr *nla;
1448 int remaining;
1449
1450 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1451 int type = nla_type(nla);
1452
1453 if (type) {
1454 if (type > RTAX_MAX) {
1455 err = -EINVAL;
1456 goto out;
1457 }
1458
1459 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1460 }
1461 }
1462 }
1463
1464 rt->dst.dev = dev;
1465 rt->rt6i_idev = idev;
1466 rt->rt6i_table = table;
1467
1468 cfg->fc_nlinfo.nl_net = dev_net(dev);
1469
1470 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1471
1472out:
1473 if (dev)
1474 dev_put(dev);
1475 if (idev)
1476 in6_dev_put(idev);
1477 if (rt)
1478 dst_free(&rt->dst);
1479 return err;
1480}
1481
1482static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1483{
1484 int err;
1485 struct fib6_table *table;
1486 struct net *net = dev_net(rt->dst.dev);
1487
1488 if (rt == net->ipv6.ip6_null_entry)
1489 return -ENOENT;
1490
1491 table = rt->rt6i_table;
1492 write_lock_bh(&table->tb6_lock);
1493
1494 err = fib6_del(rt, info);
1495 dst_release(&rt->dst);
1496
1497 write_unlock_bh(&table->tb6_lock);
1498
1499 return err;
1500}
1501
1502int ip6_del_rt(struct rt6_info *rt)
1503{
1504 struct nl_info info = {
1505 .nl_net = dev_net(rt->dst.dev),
1506 };
1507 return __ip6_del_rt(rt, &info);
1508}
1509
1510static int ip6_route_del(struct fib6_config *cfg)
1511{
1512 struct fib6_table *table;
1513 struct fib6_node *fn;
1514 struct rt6_info *rt;
1515 int err = -ESRCH;
1516
1517 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1518 if (!table)
1519 return err;
1520
1521 read_lock_bh(&table->tb6_lock);
1522
1523 fn = fib6_locate(&table->tb6_root,
1524 &cfg->fc_dst, cfg->fc_dst_len,
1525 &cfg->fc_src, cfg->fc_src_len);
1526
1527 if (fn) {
1528 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1529 if (cfg->fc_ifindex &&
1530 (!rt->dst.dev ||
1531 rt->dst.dev->ifindex != cfg->fc_ifindex))
1532 continue;
1533 if (cfg->fc_flags & RTF_GATEWAY &&
1534 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1535 continue;
1536 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1537 continue;
1538 dst_hold(&rt->dst);
1539 read_unlock_bh(&table->tb6_lock);
1540
1541 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1542 }
1543 }
1544 read_unlock_bh(&table->tb6_lock);
1545
1546 return err;
1547}
1548
1549
1550
1551
1552struct ip6rd_flowi {
1553 struct flowi6 fl6;
1554 struct in6_addr gateway;
1555};
1556
1557static struct rt6_info *__ip6_route_redirect(struct net *net,
1558 struct fib6_table *table,
1559 struct flowi6 *fl6,
1560 int flags)
1561{
1562 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1563 struct rt6_info *rt;
1564 struct fib6_node *fn;
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577 read_lock_bh(&table->tb6_lock);
1578 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1579restart:
1580 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1581
1582
1583
1584
1585
1586
1587
1588
1589 if (rt6_check_expired(rt))
1590 continue;
1591 if (!(rt->rt6i_flags & RTF_GATEWAY))
1592 continue;
1593 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1594 continue;
1595 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1596 continue;
1597 break;
1598 }
1599
1600 if (!rt)
1601 rt = net->ipv6.ip6_null_entry;
1602 BACKTRACK(net, &fl6->saddr);
1603out:
1604 dst_hold(&rt->dst);
1605
1606 read_unlock_bh(&table->tb6_lock);
1607
1608 return rt;
1609};
1610
1611static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1612 const struct in6_addr *src,
1613 const struct in6_addr *gateway,
1614 struct net_device *dev)
1615{
1616 int flags = RT6_LOOKUP_F_HAS_SADDR;
1617 struct net *net = dev_net(dev);
1618 struct ip6rd_flowi rdfl = {
1619 .fl6 = {
1620 .flowi6_oif = dev->ifindex,
1621 .daddr = *dest,
1622 .saddr = *src,
1623 },
1624 };
1625
1626 rdfl.gateway = *gateway;
1627
1628 if (rt6_need_strict(dest))
1629 flags |= RT6_LOOKUP_F_IFACE;
1630
1631 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1632 flags, __ip6_route_redirect);
1633}
1634
1635void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1636 const struct in6_addr *saddr,
1637 struct neighbour *neigh, u8 *lladdr, int on_link)
1638{
1639 struct rt6_info *rt, *nrt = NULL;
1640 struct netevent_redirect netevent;
1641 struct net *net = dev_net(neigh->dev);
1642
1643 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1644
1645 if (rt == net->ipv6.ip6_null_entry) {
1646 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1647 goto out;
1648 }
1649
1650
1651
1652
1653
1654 neigh_update(neigh, lladdr, NUD_STALE,
1655 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1656 NEIGH_UPDATE_F_OVERRIDE|
1657 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1658 NEIGH_UPDATE_F_ISROUTER))
1659 );
1660
1661
1662
1663
1664
1665
1666 dst_confirm(&rt->dst);
1667
1668
1669 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1670 goto out;
1671
1672 nrt = ip6_rt_copy(rt, dest);
1673 if (!nrt)
1674 goto out;
1675
1676 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1677 if (on_link)
1678 nrt->rt6i_flags &= ~RTF_GATEWAY;
1679
1680 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1681 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1682
1683 if (ip6_ins_rt(nrt))
1684 goto out;
1685
1686 netevent.old = &rt->dst;
1687 netevent.new = &nrt->dst;
1688 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1689
1690 if (rt->rt6i_flags & RTF_CACHE) {
1691 ip6_del_rt(rt);
1692 return;
1693 }
1694
1695out:
1696 dst_release(&rt->dst);
1697}
1698
1699
1700
1701
1702
1703
1704static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1705 struct net *net, u32 pmtu, int ifindex)
1706{
1707 struct rt6_info *rt, *nrt;
1708 int allfrag = 0;
1709again:
1710 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1711 if (!rt)
1712 return;
1713
1714 if (rt6_check_expired(rt)) {
1715 ip6_del_rt(rt);
1716 goto again;
1717 }
1718
1719 if (pmtu >= dst_mtu(&rt->dst))
1720 goto out;
1721
1722 if (pmtu < IPV6_MIN_MTU) {
1723
1724
1725
1726
1727
1728
1729 pmtu = IPV6_MIN_MTU;
1730 allfrag = 1;
1731 }
1732
1733
1734
1735
1736
1737 dst_confirm(&rt->dst);
1738
1739
1740
1741
1742
1743
1744 if (rt->rt6i_flags & RTF_CACHE) {
1745 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1746 if (allfrag) {
1747 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1748 features |= RTAX_FEATURE_ALLFRAG;
1749 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1750 }
1751 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1752 rt->rt6i_flags |= RTF_MODIFIED;
1753 goto out;
1754 }
1755
1756
1757
1758
1759
1760
1761 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1762 nrt = rt6_alloc_cow(rt, daddr, saddr);
1763 else
1764 nrt = rt6_alloc_clone(rt, daddr);
1765
1766 if (nrt) {
1767 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1768 if (allfrag) {
1769 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1770 features |= RTAX_FEATURE_ALLFRAG;
1771 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1772 }
1773
1774
1775
1776
1777
1778
1779
1780 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1781 nrt->rt6i_flags |= RTF_DYNAMIC;
1782 ip6_ins_rt(nrt);
1783 }
1784out:
1785 dst_release(&rt->dst);
1786}
1787
1788void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1789 struct net_device *dev, u32 pmtu)
1790{
1791 struct net *net = dev_net(dev);
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1806 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1807}
1808
1809
1810
1811
1812
1813static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1814 const struct in6_addr *dest)
1815{
1816 struct net *net = dev_net(ort->dst.dev);
1817 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1818 ort->dst.dev, 0);
1819
1820 if (rt) {
1821 rt->dst.input = ort->dst.input;
1822 rt->dst.output = ort->dst.output;
1823 rt->dst.flags |= DST_HOST;
1824
1825 rt->rt6i_dst.addr = *dest;
1826 rt->rt6i_dst.plen = 128;
1827 dst_copy_metrics(&rt->dst, &ort->dst);
1828 rt->dst.error = ort->dst.error;
1829 rt->rt6i_idev = ort->rt6i_idev;
1830 if (rt->rt6i_idev)
1831 in6_dev_hold(rt->rt6i_idev);
1832 rt->dst.lastuse = jiffies;
1833
1834 rt->rt6i_gateway = ort->rt6i_gateway;
1835 rt->rt6i_flags = ort->rt6i_flags;
1836 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1837 (RTF_DEFAULT | RTF_ADDRCONF))
1838 rt6_set_from(rt, ort);
1839 else
1840 rt6_clean_expires(rt);
1841 rt->rt6i_metric = 0;
1842
1843#ifdef CONFIG_IPV6_SUBTREES
1844 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1845#endif
1846 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1847 rt->rt6i_table = ort->rt6i_table;
1848 }
1849 return rt;
1850}
1851
1852#ifdef CONFIG_IPV6_ROUTE_INFO
1853static struct rt6_info *rt6_get_route_info(struct net *net,
1854 const struct in6_addr *prefix, int prefixlen,
1855 const struct in6_addr *gwaddr, int ifindex)
1856{
1857 struct fib6_node *fn;
1858 struct rt6_info *rt = NULL;
1859 struct fib6_table *table;
1860
1861 table = fib6_get_table(net, RT6_TABLE_INFO);
1862 if (!table)
1863 return NULL;
1864
1865 write_lock_bh(&table->tb6_lock);
1866 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1867 if (!fn)
1868 goto out;
1869
1870 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1871 if (rt->dst.dev->ifindex != ifindex)
1872 continue;
1873 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1874 continue;
1875 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1876 continue;
1877 dst_hold(&rt->dst);
1878 break;
1879 }
1880out:
1881 write_unlock_bh(&table->tb6_lock);
1882 return rt;
1883}
1884
1885static struct rt6_info *rt6_add_route_info(struct net *net,
1886 const struct in6_addr *prefix, int prefixlen,
1887 const struct in6_addr *gwaddr, int ifindex,
1888 unsigned int pref)
1889{
1890 struct fib6_config cfg = {
1891 .fc_table = RT6_TABLE_INFO,
1892 .fc_metric = IP6_RT_PRIO_USER,
1893 .fc_ifindex = ifindex,
1894 .fc_dst_len = prefixlen,
1895 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1896 RTF_UP | RTF_PREF(pref),
1897 .fc_nlinfo.pid = 0,
1898 .fc_nlinfo.nlh = NULL,
1899 .fc_nlinfo.nl_net = net,
1900 };
1901
1902 cfg.fc_dst = *prefix;
1903 cfg.fc_gateway = *gwaddr;
1904
1905
1906 if (!prefixlen)
1907 cfg.fc_flags |= RTF_DEFAULT;
1908
1909 ip6_route_add(&cfg);
1910
1911 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1912}
1913#endif
1914
1915struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1916{
1917 struct rt6_info *rt;
1918 struct fib6_table *table;
1919
1920 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1921 if (!table)
1922 return NULL;
1923
1924 write_lock_bh(&table->tb6_lock);
1925 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1926 if (dev == rt->dst.dev &&
1927 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1928 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1929 break;
1930 }
1931 if (rt)
1932 dst_hold(&rt->dst);
1933 write_unlock_bh(&table->tb6_lock);
1934 return rt;
1935}
1936
1937struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1938 struct net_device *dev,
1939 unsigned int pref)
1940{
1941 struct fib6_config cfg = {
1942 .fc_table = RT6_TABLE_DFLT,
1943 .fc_metric = IP6_RT_PRIO_USER,
1944 .fc_ifindex = dev->ifindex,
1945 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1946 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1947 .fc_nlinfo.pid = 0,
1948 .fc_nlinfo.nlh = NULL,
1949 .fc_nlinfo.nl_net = dev_net(dev),
1950 };
1951
1952 cfg.fc_gateway = *gwaddr;
1953
1954 ip6_route_add(&cfg);
1955
1956 return rt6_get_dflt_router(gwaddr, dev);
1957}
1958
1959void rt6_purge_dflt_routers(struct net *net)
1960{
1961 struct rt6_info *rt;
1962 struct fib6_table *table;
1963
1964
1965 table = fib6_get_table(net, RT6_TABLE_DFLT);
1966 if (!table)
1967 return;
1968
1969restart:
1970 read_lock_bh(&table->tb6_lock);
1971 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1972 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1973 dst_hold(&rt->dst);
1974 read_unlock_bh(&table->tb6_lock);
1975 ip6_del_rt(rt);
1976 goto restart;
1977 }
1978 }
1979 read_unlock_bh(&table->tb6_lock);
1980}
1981
1982static void rtmsg_to_fib6_config(struct net *net,
1983 struct in6_rtmsg *rtmsg,
1984 struct fib6_config *cfg)
1985{
1986 memset(cfg, 0, sizeof(*cfg));
1987
1988 cfg->fc_table = RT6_TABLE_MAIN;
1989 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1990 cfg->fc_metric = rtmsg->rtmsg_metric;
1991 cfg->fc_expires = rtmsg->rtmsg_info;
1992 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1993 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1994 cfg->fc_flags = rtmsg->rtmsg_flags;
1995
1996 cfg->fc_nlinfo.nl_net = net;
1997
1998 cfg->fc_dst = rtmsg->rtmsg_dst;
1999 cfg->fc_src = rtmsg->rtmsg_src;
2000 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2001}
2002
2003int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2004{
2005 struct fib6_config cfg;
2006 struct in6_rtmsg rtmsg;
2007 int err;
2008
2009 switch(cmd) {
2010 case SIOCADDRT:
2011 case SIOCDELRT:
2012 if (!capable(CAP_NET_ADMIN))
2013 return -EPERM;
2014 err = copy_from_user(&rtmsg, arg,
2015 sizeof(struct in6_rtmsg));
2016 if (err)
2017 return -EFAULT;
2018
2019 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2020
2021 rtnl_lock();
2022 switch (cmd) {
2023 case SIOCADDRT:
2024 err = ip6_route_add(&cfg);
2025 break;
2026 case SIOCDELRT:
2027 err = ip6_route_del(&cfg);
2028 break;
2029 default:
2030 err = -EINVAL;
2031 }
2032 rtnl_unlock();
2033
2034 return err;
2035 }
2036
2037 return -EINVAL;
2038}
2039
2040
2041
2042
2043
2044static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2045{
2046 int type;
2047 struct dst_entry *dst = skb_dst(skb);
2048 switch (ipstats_mib_noroutes) {
2049 case IPSTATS_MIB_INNOROUTES:
2050 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2051 if (type == IPV6_ADDR_ANY) {
2052 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2053 IPSTATS_MIB_INADDRERRORS);
2054 break;
2055 }
2056
2057 case IPSTATS_MIB_OUTNOROUTES:
2058 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2059 ipstats_mib_noroutes);
2060 break;
2061 }
2062 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2063 kfree_skb(skb);
2064 return 0;
2065}
2066
2067static int ip6_pkt_discard(struct sk_buff *skb)
2068{
2069 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2070}
2071
2072static int ip6_pkt_discard_out(struct sk_buff *skb)
2073{
2074 skb->dev = skb_dst(skb)->dev;
2075 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2076}
2077
2078#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2079
2080static int ip6_pkt_prohibit(struct sk_buff *skb)
2081{
2082 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2083}
2084
2085static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2086{
2087 skb->dev = skb_dst(skb)->dev;
2088 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2089}
2090
2091#endif
2092
2093
2094
2095
2096
2097struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2098 const struct in6_addr *addr,
2099 bool anycast)
2100{
2101 struct net *net = dev_net(idev->dev);
2102 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2103 net->loopback_dev, 0);
2104 int err;
2105
2106 if (!rt) {
2107 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2108 return ERR_PTR(-ENOMEM);
2109 }
2110
2111 in6_dev_hold(idev);
2112
2113 rt->dst.flags |= DST_HOST;
2114 rt->dst.input = ip6_input;
2115 rt->dst.output = ip6_output;
2116 rt->rt6i_idev = idev;
2117 rt->dst.obsolete = -1;
2118
2119 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2120 if (anycast)
2121 rt->rt6i_flags |= RTF_ANYCAST;
2122 else
2123 rt->rt6i_flags |= RTF_LOCAL;
2124 err = rt6_bind_neighbour(rt, rt->dst.dev);
2125 if (err) {
2126 dst_free(&rt->dst);
2127 return ERR_PTR(err);
2128 }
2129
2130 rt->rt6i_dst.addr = *addr;
2131 rt->rt6i_dst.plen = 128;
2132 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2133
2134 atomic_set(&rt->dst.__refcnt, 1);
2135
2136 return rt;
2137}
2138
2139int ip6_route_get_saddr(struct net *net,
2140 struct rt6_info *rt,
2141 const struct in6_addr *daddr,
2142 unsigned int prefs,
2143 struct in6_addr *saddr)
2144{
2145 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2146 int err = 0;
2147 if (rt->rt6i_prefsrc.plen)
2148 *saddr = rt->rt6i_prefsrc.addr;
2149 else
2150 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2151 daddr, prefs, saddr);
2152 return err;
2153}
2154
2155
2156struct arg_dev_net_ip {
2157 struct net_device *dev;
2158 struct net *net;
2159 struct in6_addr *addr;
2160};
2161
2162static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2163{
2164 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2165 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2166 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2167
2168 if (((void *)rt->dst.dev == dev || !dev) &&
2169 rt != net->ipv6.ip6_null_entry &&
2170 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2171
2172 rt->rt6i_prefsrc.plen = 0;
2173 }
2174 return 0;
2175}
2176
2177void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2178{
2179 struct net *net = dev_net(ifp->idev->dev);
2180 struct arg_dev_net_ip adni = {
2181 .dev = ifp->idev->dev,
2182 .net = net,
2183 .addr = &ifp->addr,
2184 };
2185 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2186}
2187
2188struct arg_dev_net {
2189 struct net_device *dev;
2190 struct net *net;
2191};
2192
2193static int fib6_ifdown(struct rt6_info *rt, void *arg)
2194{
2195 const struct arg_dev_net *adn = arg;
2196 const struct net_device *dev = adn->dev;
2197
2198 if ((rt->dst.dev == dev || !dev) &&
2199 rt != adn->net->ipv6.ip6_null_entry)
2200 return -1;
2201
2202 return 0;
2203}
2204
2205void rt6_ifdown(struct net *net, struct net_device *dev)
2206{
2207 struct arg_dev_net adn = {
2208 .dev = dev,
2209 .net = net,
2210 };
2211
2212 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2213 icmp6_clean_all(fib6_ifdown, &adn);
2214}
2215
2216struct rt6_mtu_change_arg {
2217 struct net_device *dev;
2218 unsigned int mtu;
2219};
2220
2221static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2222{
2223 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2224 struct inet6_dev *idev;
2225
2226
2227
2228
2229
2230
2231
2232 idev = __in6_dev_get(arg->dev);
2233 if (!idev)
2234 return 0;
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250 if (rt->dst.dev == arg->dev &&
2251 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2252 (dst_mtu(&rt->dst) >= arg->mtu ||
2253 (dst_mtu(&rt->dst) < arg->mtu &&
2254 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2255 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2256 }
2257 return 0;
2258}
2259
2260void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2261{
2262 struct rt6_mtu_change_arg arg = {
2263 .dev = dev,
2264 .mtu = mtu,
2265 };
2266
2267 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2268}
2269
2270static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2271 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2272 [RTA_OIF] = { .type = NLA_U32 },
2273 [RTA_IIF] = { .type = NLA_U32 },
2274 [RTA_PRIORITY] = { .type = NLA_U32 },
2275 [RTA_METRICS] = { .type = NLA_NESTED },
2276};
2277
2278static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2279 struct fib6_config *cfg)
2280{
2281 struct rtmsg *rtm;
2282 struct nlattr *tb[RTA_MAX+1];
2283 int err;
2284
2285 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2286 if (err < 0)
2287 goto errout;
2288
2289 err = -EINVAL;
2290 rtm = nlmsg_data(nlh);
2291 memset(cfg, 0, sizeof(*cfg));
2292
2293 cfg->fc_table = rtm->rtm_table;
2294 cfg->fc_dst_len = rtm->rtm_dst_len;
2295 cfg->fc_src_len = rtm->rtm_src_len;
2296 cfg->fc_flags = RTF_UP;
2297 cfg->fc_protocol = rtm->rtm_protocol;
2298
2299 if (rtm->rtm_type == RTN_UNREACHABLE)
2300 cfg->fc_flags |= RTF_REJECT;
2301
2302 if (rtm->rtm_type == RTN_LOCAL)
2303 cfg->fc_flags |= RTF_LOCAL;
2304
2305 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2306 cfg->fc_nlinfo.nlh = nlh;
2307 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2308
2309 if (tb[RTA_GATEWAY]) {
2310 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2311 cfg->fc_flags |= RTF_GATEWAY;
2312 }
2313
2314 if (tb[RTA_DST]) {
2315 int plen = (rtm->rtm_dst_len + 7) >> 3;
2316
2317 if (nla_len(tb[RTA_DST]) < plen)
2318 goto errout;
2319
2320 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2321 }
2322
2323 if (tb[RTA_SRC]) {
2324 int plen = (rtm->rtm_src_len + 7) >> 3;
2325
2326 if (nla_len(tb[RTA_SRC]) < plen)
2327 goto errout;
2328
2329 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2330 }
2331
2332 if (tb[RTA_PREFSRC])
2333 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2334
2335 if (tb[RTA_OIF])
2336 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2337
2338 if (tb[RTA_PRIORITY])
2339 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2340
2341 if (tb[RTA_METRICS]) {
2342 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2343 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2344 }
2345
2346 if (tb[RTA_TABLE])
2347 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2348
2349 err = 0;
2350errout:
2351 return err;
2352}
2353
2354static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2355{
2356 struct fib6_config cfg;
2357 int err;
2358
2359 err = rtm_to_fib6_config(skb, nlh, &cfg);
2360 if (err < 0)
2361 return err;
2362
2363 return ip6_route_del(&cfg);
2364}
2365
2366static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2367{
2368 struct fib6_config cfg;
2369 int err;
2370
2371 err = rtm_to_fib6_config(skb, nlh, &cfg);
2372 if (err < 0)
2373 return err;
2374
2375 return ip6_route_add(&cfg);
2376}
2377
2378static inline size_t rt6_nlmsg_size(void)
2379{
2380 return NLMSG_ALIGN(sizeof(struct rtmsg))
2381 + nla_total_size(16)
2382 + nla_total_size(16)
2383 + nla_total_size(16)
2384 + nla_total_size(16)
2385 + nla_total_size(4)
2386 + nla_total_size(4)
2387 + nla_total_size(4)
2388 + nla_total_size(4)
2389 + RTAX_MAX * nla_total_size(4)
2390 + nla_total_size(sizeof(struct rta_cacheinfo));
2391}
2392
2393static int rt6_fill_node(struct net *net,
2394 struct sk_buff *skb, struct rt6_info *rt,
2395 struct in6_addr *dst, struct in6_addr *src,
2396 int iif, int type, u32 pid, u32 seq,
2397 int prefix, int nowait, unsigned int flags)
2398{
2399 const struct inet_peer *peer;
2400 struct rtmsg *rtm;
2401 struct nlmsghdr *nlh;
2402 long expires;
2403 u32 table;
2404 struct neighbour *n;
2405 u32 ts, tsage;
2406
2407 if (prefix) {
2408 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2409
2410 return 1;
2411 }
2412 }
2413
2414 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2415 if (!nlh)
2416 return -EMSGSIZE;
2417
2418 rtm = nlmsg_data(nlh);
2419 rtm->rtm_family = AF_INET6;
2420 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2421 rtm->rtm_src_len = rt->rt6i_src.plen;
2422 rtm->rtm_tos = 0;
2423 if (rt->rt6i_table)
2424 table = rt->rt6i_table->tb6_id;
2425 else
2426 table = RT6_TABLE_UNSPEC;
2427 rtm->rtm_table = table;
2428 if (nla_put_u32(skb, RTA_TABLE, table))
2429 goto nla_put_failure;
2430 if (rt->rt6i_flags & RTF_REJECT)
2431 rtm->rtm_type = RTN_UNREACHABLE;
2432 else if (rt->rt6i_flags & RTF_LOCAL)
2433 rtm->rtm_type = RTN_LOCAL;
2434 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2435 rtm->rtm_type = RTN_LOCAL;
2436 else
2437 rtm->rtm_type = RTN_UNICAST;
2438 rtm->rtm_flags = 0;
2439 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2440 rtm->rtm_protocol = rt->rt6i_protocol;
2441 if (rt->rt6i_flags & RTF_DYNAMIC)
2442 rtm->rtm_protocol = RTPROT_REDIRECT;
2443 else if (rt->rt6i_flags & RTF_ADDRCONF)
2444 rtm->rtm_protocol = RTPROT_KERNEL;
2445 else if (rt->rt6i_flags & RTF_DEFAULT)
2446 rtm->rtm_protocol = RTPROT_RA;
2447
2448 if (rt->rt6i_flags & RTF_CACHE)
2449 rtm->rtm_flags |= RTM_F_CLONED;
2450
2451 if (dst) {
2452 if (nla_put(skb, RTA_DST, 16, dst))
2453 goto nla_put_failure;
2454 rtm->rtm_dst_len = 128;
2455 } else if (rtm->rtm_dst_len)
2456 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2457 goto nla_put_failure;
2458#ifdef CONFIG_IPV6_SUBTREES
2459 if (src) {
2460 if (nla_put(skb, RTA_SRC, 16, src))
2461 goto nla_put_failure;
2462 rtm->rtm_src_len = 128;
2463 } else if (rtm->rtm_src_len &&
2464 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2465 goto nla_put_failure;
2466#endif
2467 if (iif) {
2468#ifdef CONFIG_IPV6_MROUTE
2469 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2470 int err = ip6mr_get_route(net, skb, rtm, nowait);
2471 if (err <= 0) {
2472 if (!nowait) {
2473 if (err == 0)
2474 return 0;
2475 goto nla_put_failure;
2476 } else {
2477 if (err == -EMSGSIZE)
2478 goto nla_put_failure;
2479 }
2480 }
2481 } else
2482#endif
2483 if (nla_put_u32(skb, RTA_IIF, iif))
2484 goto nla_put_failure;
2485 } else if (dst) {
2486 struct in6_addr saddr_buf;
2487 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2488 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2489 goto nla_put_failure;
2490 }
2491
2492 if (rt->rt6i_prefsrc.plen) {
2493 struct in6_addr saddr_buf;
2494 saddr_buf = rt->rt6i_prefsrc.addr;
2495 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2496 goto nla_put_failure;
2497 }
2498
2499 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2500 goto nla_put_failure;
2501
2502 rcu_read_lock();
2503 n = dst_get_neighbour_noref(&rt->dst);
2504 if (n) {
2505 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2506 rcu_read_unlock();
2507 goto nla_put_failure;
2508 }
2509 }
2510 rcu_read_unlock();
2511
2512 if (rt->dst.dev &&
2513 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2514 goto nla_put_failure;
2515 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2516 goto nla_put_failure;
2517 if (!(rt->rt6i_flags & RTF_EXPIRES))
2518 expires = 0;
2519 else if (rt->dst.expires - jiffies < INT_MAX)
2520 expires = rt->dst.expires - jiffies;
2521 else
2522 expires = INT_MAX;
2523
2524 peer = rt->rt6i_peer;
2525 ts = tsage = 0;
2526 if (peer && peer->tcp_ts_stamp) {
2527 ts = peer->tcp_ts;
2528 tsage = get_seconds() - peer->tcp_ts_stamp;
2529 }
2530
2531 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2532 expires, rt->dst.error) < 0)
2533 goto nla_put_failure;
2534
2535 return nlmsg_end(skb, nlh);
2536
2537nla_put_failure:
2538 nlmsg_cancel(skb, nlh);
2539 return -EMSGSIZE;
2540}
2541
2542int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2543{
2544 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2545 int prefix;
2546
2547 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2548 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2549 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2550 } else
2551 prefix = 0;
2552
2553 return rt6_fill_node(arg->net,
2554 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2555 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2556 prefix, 0, NLM_F_MULTI);
2557}
2558
2559static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2560{
2561 struct net *net = sock_net(in_skb->sk);
2562 struct nlattr *tb[RTA_MAX+1];
2563 struct rt6_info *rt;
2564 struct sk_buff *skb;
2565 struct rtmsg *rtm;
2566 struct flowi6 fl6;
2567 int err, iif = 0, oif = 0;
2568
2569 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2570 if (err < 0)
2571 goto errout;
2572
2573 err = -EINVAL;
2574 memset(&fl6, 0, sizeof(fl6));
2575
2576 if (tb[RTA_SRC]) {
2577 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2578 goto errout;
2579
2580 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2581 }
2582
2583 if (tb[RTA_DST]) {
2584 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2585 goto errout;
2586
2587 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2588 }
2589
2590 if (tb[RTA_IIF])
2591 iif = nla_get_u32(tb[RTA_IIF]);
2592
2593 if (tb[RTA_OIF])
2594 oif = nla_get_u32(tb[RTA_OIF]);
2595
2596 if (iif) {
2597 struct net_device *dev;
2598 int flags = 0;
2599
2600 dev = __dev_get_by_index(net, iif);
2601 if (!dev) {
2602 err = -ENODEV;
2603 goto errout;
2604 }
2605
2606 fl6.flowi6_iif = iif;
2607
2608 if (!ipv6_addr_any(&fl6.saddr))
2609 flags |= RT6_LOOKUP_F_HAS_SADDR;
2610
2611 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2612 flags);
2613 } else {
2614 fl6.flowi6_oif = oif;
2615
2616 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2617 }
2618
2619 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2620 if (!skb) {
2621 dst_release(&rt->dst);
2622 err = -ENOBUFS;
2623 goto errout;
2624 }
2625
2626
2627
2628
2629 skb_reset_mac_header(skb);
2630 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2631
2632 skb_dst_set(skb, &rt->dst);
2633
2634 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2635 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2636 nlh->nlmsg_seq, 0, 0, 0);
2637 if (err < 0) {
2638 kfree_skb(skb);
2639 goto errout;
2640 }
2641
2642 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2643errout:
2644 return err;
2645}
2646
2647void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2648{
2649 struct sk_buff *skb;
2650 struct net *net = info->nl_net;
2651 u32 seq;
2652 int err;
2653
2654 err = -ENOBUFS;
2655 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2656
2657 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2658 if (!skb)
2659 goto errout;
2660
2661 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2662 event, info->pid, seq, 0, 0, 0);
2663 if (err < 0) {
2664
2665 WARN_ON(err == -EMSGSIZE);
2666 kfree_skb(skb);
2667 goto errout;
2668 }
2669 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2670 info->nlh, gfp_any());
2671 return;
2672errout:
2673 if (err < 0)
2674 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2675}
2676
2677static int ip6_route_dev_notify(struct notifier_block *this,
2678 unsigned long event, void *data)
2679{
2680 struct net_device *dev = (struct net_device *)data;
2681 struct net *net = dev_net(dev);
2682
2683 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2684 net->ipv6.ip6_null_entry->dst.dev = dev;
2685 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2686#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2687 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2688 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2689 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2690 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2691#endif
2692 }
2693
2694 return NOTIFY_OK;
2695}
2696
2697
2698
2699
2700
2701#ifdef CONFIG_PROC_FS
2702
2703struct rt6_proc_arg
2704{
2705 char *buffer;
2706 int offset;
2707 int length;
2708 int skip;
2709 int len;
2710};
2711
2712static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2713{
2714 struct seq_file *m = p_arg;
2715 struct neighbour *n;
2716
2717 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2718
2719#ifdef CONFIG_IPV6_SUBTREES
2720 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2721#else
2722 seq_puts(m, "00000000000000000000000000000000 00 ");
2723#endif
2724 rcu_read_lock();
2725 n = dst_get_neighbour_noref(&rt->dst);
2726 if (n) {
2727 seq_printf(m, "%pi6", n->primary_key);
2728 } else {
2729 seq_puts(m, "00000000000000000000000000000000");
2730 }
2731 rcu_read_unlock();
2732 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2733 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2734 rt->dst.__use, rt->rt6i_flags,
2735 rt->dst.dev ? rt->dst.dev->name : "");
2736 return 0;
2737}
2738
2739static int ipv6_route_show(struct seq_file *m, void *v)
2740{
2741 struct net *net = (struct net *)m->private;
2742 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2743 return 0;
2744}
2745
2746static int ipv6_route_open(struct inode *inode, struct file *file)
2747{
2748 return single_open_net(inode, file, ipv6_route_show);
2749}
2750
2751static const struct file_operations ipv6_route_proc_fops = {
2752 .owner = THIS_MODULE,
2753 .open = ipv6_route_open,
2754 .read = seq_read,
2755 .llseek = seq_lseek,
2756 .release = single_release_net,
2757};
2758
2759static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2760{
2761 struct net *net = (struct net *)seq->private;
2762 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2763 net->ipv6.rt6_stats->fib_nodes,
2764 net->ipv6.rt6_stats->fib_route_nodes,
2765 net->ipv6.rt6_stats->fib_rt_alloc,
2766 net->ipv6.rt6_stats->fib_rt_entries,
2767 net->ipv6.rt6_stats->fib_rt_cache,
2768 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2769 net->ipv6.rt6_stats->fib_discarded_routes);
2770
2771 return 0;
2772}
2773
2774static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2775{
2776 return single_open_net(inode, file, rt6_stats_seq_show);
2777}
2778
2779static const struct file_operations rt6_stats_seq_fops = {
2780 .owner = THIS_MODULE,
2781 .open = rt6_stats_seq_open,
2782 .read = seq_read,
2783 .llseek = seq_lseek,
2784 .release = single_release_net,
2785};
2786#endif
2787
2788#ifdef CONFIG_SYSCTL
2789
2790static
2791int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2792 void __user *buffer, size_t *lenp, loff_t *ppos)
2793{
2794 struct net *net;
2795 int delay;
2796 if (!write)
2797 return -EINVAL;
2798
2799 net = (struct net *)ctl->extra1;
2800 delay = net->ipv6.sysctl.flush_delay;
2801 proc_dointvec(ctl, write, buffer, lenp, ppos);
2802 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2803 return 0;
2804}
2805
2806ctl_table ipv6_route_table_template[] = {
2807 {
2808 .procname = "flush",
2809 .data = &init_net.ipv6.sysctl.flush_delay,
2810 .maxlen = sizeof(int),
2811 .mode = 0200,
2812 .proc_handler = ipv6_sysctl_rtcache_flush
2813 },
2814 {
2815 .procname = "gc_thresh",
2816 .data = &ip6_dst_ops_template.gc_thresh,
2817 .maxlen = sizeof(int),
2818 .mode = 0644,
2819 .proc_handler = proc_dointvec,
2820 },
2821 {
2822 .procname = "max_size",
2823 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2824 .maxlen = sizeof(int),
2825 .mode = 0644,
2826 .proc_handler = proc_dointvec,
2827 },
2828 {
2829 .procname = "gc_min_interval",
2830 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2831 .maxlen = sizeof(int),
2832 .mode = 0644,
2833 .proc_handler = proc_dointvec_jiffies,
2834 },
2835 {
2836 .procname = "gc_timeout",
2837 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2838 .maxlen = sizeof(int),
2839 .mode = 0644,
2840 .proc_handler = proc_dointvec_jiffies,
2841 },
2842 {
2843 .procname = "gc_interval",
2844 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2845 .maxlen = sizeof(int),
2846 .mode = 0644,
2847 .proc_handler = proc_dointvec_jiffies,
2848 },
2849 {
2850 .procname = "gc_elasticity",
2851 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2852 .maxlen = sizeof(int),
2853 .mode = 0644,
2854 .proc_handler = proc_dointvec,
2855 },
2856 {
2857 .procname = "mtu_expires",
2858 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2859 .maxlen = sizeof(int),
2860 .mode = 0644,
2861 .proc_handler = proc_dointvec_jiffies,
2862 },
2863 {
2864 .procname = "min_adv_mss",
2865 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2866 .maxlen = sizeof(int),
2867 .mode = 0644,
2868 .proc_handler = proc_dointvec,
2869 },
2870 {
2871 .procname = "gc_min_interval_ms",
2872 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2873 .maxlen = sizeof(int),
2874 .mode = 0644,
2875 .proc_handler = proc_dointvec_ms_jiffies,
2876 },
2877 { }
2878};
2879
2880struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2881{
2882 struct ctl_table *table;
2883
2884 table = kmemdup(ipv6_route_table_template,
2885 sizeof(ipv6_route_table_template),
2886 GFP_KERNEL);
2887
2888 if (table) {
2889 table[0].data = &net->ipv6.sysctl.flush_delay;
2890 table[0].extra1 = net;
2891 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2892 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2893 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2894 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2895 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2896 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2897 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2898 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2899 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2900 }
2901
2902 return table;
2903}
2904#endif
2905
2906static int __net_init ip6_route_net_init(struct net *net)
2907{
2908 int ret = -ENOMEM;
2909
2910 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2911 sizeof(net->ipv6.ip6_dst_ops));
2912
2913 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2914 goto out_ip6_dst_ops;
2915
2916 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2917 sizeof(*net->ipv6.ip6_null_entry),
2918 GFP_KERNEL);
2919 if (!net->ipv6.ip6_null_entry)
2920 goto out_ip6_dst_entries;
2921 net->ipv6.ip6_null_entry->dst.path =
2922 (struct dst_entry *)net->ipv6.ip6_null_entry;
2923 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2924 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2925 ip6_template_metrics, true);
2926
2927#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2928 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2929 sizeof(*net->ipv6.ip6_prohibit_entry),
2930 GFP_KERNEL);
2931 if (!net->ipv6.ip6_prohibit_entry)
2932 goto out_ip6_null_entry;
2933 net->ipv6.ip6_prohibit_entry->dst.path =
2934 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2935 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2936 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2937 ip6_template_metrics, true);
2938
2939 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2940 sizeof(*net->ipv6.ip6_blk_hole_entry),
2941 GFP_KERNEL);
2942 if (!net->ipv6.ip6_blk_hole_entry)
2943 goto out_ip6_prohibit_entry;
2944 net->ipv6.ip6_blk_hole_entry->dst.path =
2945 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2946 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2947 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2948 ip6_template_metrics, true);
2949#endif
2950
2951 net->ipv6.sysctl.flush_delay = 0;
2952 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2953 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2954 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2955 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2956 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2957 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2958 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2959
2960 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2961
2962 ret = 0;
2963out:
2964 return ret;
2965
2966#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2967out_ip6_prohibit_entry:
2968 kfree(net->ipv6.ip6_prohibit_entry);
2969out_ip6_null_entry:
2970 kfree(net->ipv6.ip6_null_entry);
2971#endif
2972out_ip6_dst_entries:
2973 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2974out_ip6_dst_ops:
2975 goto out;
2976}
2977
2978static void __net_exit ip6_route_net_exit(struct net *net)
2979{
2980 kfree(net->ipv6.ip6_null_entry);
2981#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2982 kfree(net->ipv6.ip6_prohibit_entry);
2983 kfree(net->ipv6.ip6_blk_hole_entry);
2984#endif
2985 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2986}
2987
2988static int __net_init ip6_route_net_init_late(struct net *net)
2989{
2990#ifdef CONFIG_PROC_FS
2991 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2992 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2993#endif
2994 return 0;
2995}
2996
2997static void __net_exit ip6_route_net_exit_late(struct net *net)
2998{
2999#ifdef CONFIG_PROC_FS
3000 proc_net_remove(net, "ipv6_route");
3001 proc_net_remove(net, "rt6_stats");
3002#endif
3003}
3004
3005static struct pernet_operations ip6_route_net_ops = {
3006 .init = ip6_route_net_init,
3007 .exit = ip6_route_net_exit,
3008};
3009
3010static struct pernet_operations ip6_route_net_late_ops = {
3011 .init = ip6_route_net_init_late,
3012 .exit = ip6_route_net_exit_late,
3013};
3014
3015static struct notifier_block ip6_route_dev_notifier = {
3016 .notifier_call = ip6_route_dev_notify,
3017 .priority = 0,
3018};
3019
3020int __init ip6_route_init(void)
3021{
3022 int ret;
3023
3024 ret = -ENOMEM;
3025 ip6_dst_ops_template.kmem_cachep =
3026 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3027 SLAB_HWCACHE_ALIGN, NULL);
3028 if (!ip6_dst_ops_template.kmem_cachep)
3029 goto out;
3030
3031 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3032 if (ret)
3033 goto out_kmem_cache;
3034
3035 ret = register_pernet_subsys(&ip6_route_net_ops);
3036 if (ret)
3037 goto out_dst_entries;
3038
3039 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3040
3041
3042
3043
3044 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3045 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3046 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3047 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3048 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3049 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3050 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3051 #endif
3052 ret = fib6_init();
3053 if (ret)
3054 goto out_register_subsys;
3055
3056 ret = xfrm6_init();
3057 if (ret)
3058 goto out_fib6_init;
3059
3060 ret = fib6_rules_init();
3061 if (ret)
3062 goto xfrm6_init;
3063
3064 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3065 if (ret)
3066 goto fib6_rules_init;
3067
3068 ret = -ENOBUFS;
3069 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3070 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3071 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3072 goto out_register_late_subsys;
3073
3074 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3075 if (ret)
3076 goto out_register_late_subsys;
3077
3078out:
3079 return ret;
3080
3081out_register_late_subsys:
3082 unregister_pernet_subsys(&ip6_route_net_late_ops);
3083fib6_rules_init:
3084 fib6_rules_cleanup();
3085xfrm6_init:
3086 xfrm6_fini();
3087out_fib6_init:
3088 fib6_gc_cleanup();
3089out_register_subsys:
3090 unregister_pernet_subsys(&ip6_route_net_ops);
3091out_dst_entries:
3092 dst_entries_destroy(&ip6_dst_blackhole_ops);
3093out_kmem_cache:
3094 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3095 goto out;
3096}
3097
3098void ip6_route_cleanup(void)
3099{
3100 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3101 unregister_pernet_subsys(&ip6_route_net_late_ops);
3102 fib6_rules_cleanup();
3103 xfrm6_fini();
3104 fib6_gc_cleanup();
3105 unregister_pernet_subsys(&ip6_route_net_ops);
3106 dst_entries_destroy(&ip6_dst_blackhole_ops);
3107 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3108}
3109