1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#define pr_fmt(fmt) "IPv6: " fmt
28
29#include <linux/capability.h>
30#include <linux/errno.h>
31#include <linux/export.h>
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
40#include <linux/mroute6.h>
41#include <linux/init.h>
42#include <linux/if_arp.h>
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#include <linux/nsproxy.h>
46#include <linux/slab.h>
47#include <net/net_namespace.h>
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
58#include <net/netevent.h>
59#include <net/netlink.h>
60#include <net/nexthop.h>
61
62#include <asm/uaccess.h>
63
64#ifdef CONFIG_SYSCTL
65#include <linux/sysctl.h>
66#endif
67
68enum rt6_nud_state {
69 RT6_NUD_FAIL_HARD = -2,
70 RT6_NUD_FAIL_SOFT = -1,
71 RT6_NUD_SUCCEED = 1
72};
73
74static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
75 const struct in6_addr *dest);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
78static unsigned int ip6_mtu(const struct dst_entry *dst);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83static int ip6_dst_gc(struct dst_ops *ops);
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
89 struct sk_buff *skb, u32 mtu);
90static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
91 struct sk_buff *skb);
92static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
93
94#ifdef CONFIG_IPV6_ROUTE_INFO
95static struct rt6_info *rt6_add_route_info(struct net *net,
96 const struct in6_addr *prefix, int prefixlen,
97 const struct in6_addr *gwaddr, int ifindex,
98 unsigned int pref);
99static struct rt6_info *rt6_get_route_info(struct net *net,
100 const struct in6_addr *prefix, int prefixlen,
101 const struct in6_addr *gwaddr, int ifindex);
102#endif
103
104static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
105{
106 struct rt6_info *rt = (struct rt6_info *) dst;
107 struct inet_peer *peer;
108 u32 *p = NULL;
109
110 if (!(rt->dst.flags & DST_HOST))
111 return NULL;
112
113 peer = rt6_get_peer_create(rt);
114 if (peer) {
115 u32 *old_p = __DST_METRICS_PTR(old);
116 unsigned long prev, new;
117
118 p = peer->metrics;
119 if (inet_metrics_new(peer))
120 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
121
122 new = (unsigned long) p;
123 prev = cmpxchg(&dst->_metrics, old, new);
124
125 if (prev != old) {
126 p = __DST_METRICS_PTR(prev);
127 if (prev & DST_METRICS_READ_ONLY)
128 p = NULL;
129 }
130 }
131 return p;
132}
133
134static inline const void *choose_neigh_daddr(struct rt6_info *rt,
135 struct sk_buff *skb,
136 const void *daddr)
137{
138 struct in6_addr *p = &rt->rt6i_gateway;
139
140 if (!ipv6_addr_any(p))
141 return (const void *) p;
142 else if (skb)
143 return &ipv6_hdr(skb)->daddr;
144 return daddr;
145}
146
147static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
148 struct sk_buff *skb,
149 const void *daddr)
150{
151 struct rt6_info *rt = (struct rt6_info *) dst;
152 struct neighbour *n;
153
154 daddr = choose_neigh_daddr(rt, skb, daddr);
155 n = __ipv6_neigh_lookup(dst->dev, daddr);
156 if (n)
157 return n;
158 return neigh_create(&nd_tbl, daddr, dst->dev);
159}
160
161static struct dst_ops ip6_dst_ops_template = {
162 .family = AF_INET6,
163 .protocol = cpu_to_be16(ETH_P_IPV6),
164 .gc = ip6_dst_gc,
165 .gc_thresh = 1024,
166 .check = ip6_dst_check,
167 .default_advmss = ip6_default_advmss,
168 .mtu = ip6_mtu,
169 .cow_metrics = ipv6_cow_metrics,
170 .destroy = ip6_dst_destroy,
171 .ifdown = ip6_dst_ifdown,
172 .negative_advice = ip6_negative_advice,
173 .link_failure = ip6_link_failure,
174 .update_pmtu = ip6_rt_update_pmtu,
175 .redirect = rt6_do_redirect,
176 .local_out = __ip6_local_out,
177 .neigh_lookup = ip6_neigh_lookup,
178};
179
180static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
181{
182 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
183
184 return mtu ? : dst->dev->mtu;
185}
186
187static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
188 struct sk_buff *skb, u32 mtu)
189{
190}
191
192static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb)
194{
195}
196
197static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
198 unsigned long old)
199{
200 return NULL;
201}
202
203static struct dst_ops ip6_dst_blackhole_ops = {
204 .family = AF_INET6,
205 .protocol = cpu_to_be16(ETH_P_IPV6),
206 .destroy = ip6_dst_destroy,
207 .check = ip6_dst_check,
208 .mtu = ip6_blackhole_mtu,
209 .default_advmss = ip6_default_advmss,
210 .update_pmtu = ip6_rt_blackhole_update_pmtu,
211 .redirect = ip6_rt_blackhole_redirect,
212 .cow_metrics = ip6_rt_blackhole_cow_metrics,
213 .neigh_lookup = ip6_neigh_lookup,
214};
215
216static const u32 ip6_template_metrics[RTAX_MAX] = {
217 [RTAX_HOPLIMIT - 1] = 0,
218};
219
220static const struct rt6_info ip6_null_entry_template = {
221 .dst = {
222 .__refcnt = ATOMIC_INIT(1),
223 .__use = 1,
224 .obsolete = DST_OBSOLETE_FORCE_CHK,
225 .error = -ENETUNREACH,
226 .input = ip6_pkt_discard,
227 .output = ip6_pkt_discard_out,
228 },
229 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
230 .rt6i_protocol = RTPROT_KERNEL,
231 .rt6i_metric = ~(u32) 0,
232 .rt6i_ref = ATOMIC_INIT(1),
233};
234
235#ifdef CONFIG_IPV6_MULTIPLE_TABLES
236
237static int ip6_pkt_prohibit(struct sk_buff *skb);
238static int ip6_pkt_prohibit_out(struct sk_buff *skb);
239
240static const struct rt6_info ip6_prohibit_entry_template = {
241 .dst = {
242 .__refcnt = ATOMIC_INIT(1),
243 .__use = 1,
244 .obsolete = DST_OBSOLETE_FORCE_CHK,
245 .error = -EACCES,
246 .input = ip6_pkt_prohibit,
247 .output = ip6_pkt_prohibit_out,
248 },
249 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
250 .rt6i_protocol = RTPROT_KERNEL,
251 .rt6i_metric = ~(u32) 0,
252 .rt6i_ref = ATOMIC_INIT(1),
253};
254
255static const struct rt6_info ip6_blk_hole_entry_template = {
256 .dst = {
257 .__refcnt = ATOMIC_INIT(1),
258 .__use = 1,
259 .obsolete = DST_OBSOLETE_FORCE_CHK,
260 .error = -EINVAL,
261 .input = dst_discard,
262 .output = dst_discard,
263 },
264 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
265 .rt6i_protocol = RTPROT_KERNEL,
266 .rt6i_metric = ~(u32) 0,
267 .rt6i_ref = ATOMIC_INIT(1),
268};
269
270#endif
271
272
273static inline struct rt6_info *ip6_dst_alloc(struct net *net,
274 struct net_device *dev,
275 int flags,
276 struct fib6_table *table)
277{
278 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
279 0, DST_OBSOLETE_FORCE_CHK, flags);
280
281 if (rt) {
282 struct dst_entry *dst = &rt->dst;
283
284 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
285 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
286 rt->rt6i_genid = rt_genid(net);
287 INIT_LIST_HEAD(&rt->rt6i_siblings);
288 rt->rt6i_nsiblings = 0;
289 }
290 return rt;
291}
292
293static void ip6_dst_destroy(struct dst_entry *dst)
294{
295 struct rt6_info *rt = (struct rt6_info *)dst;
296 struct inet6_dev *idev = rt->rt6i_idev;
297 struct dst_entry *from = dst->from;
298
299 if (!(rt->dst.flags & DST_HOST))
300 dst_destroy_metrics_generic(dst);
301
302 if (idev) {
303 rt->rt6i_idev = NULL;
304 in6_dev_put(idev);
305 }
306
307 dst->from = NULL;
308 dst_release(from);
309
310 if (rt6_has_peer(rt)) {
311 struct inet_peer *peer = rt6_peer_ptr(rt);
312 inet_putpeer(peer);
313 }
314}
315
316void rt6_bind_peer(struct rt6_info *rt, int create)
317{
318 struct inet_peer_base *base;
319 struct inet_peer *peer;
320
321 base = inetpeer_base_ptr(rt->_rt6i_peer);
322 if (!base)
323 return;
324
325 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
326 if (peer) {
327 if (!rt6_set_peer(rt, peer))
328 inet_putpeer(peer);
329 }
330}
331
332static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
333 int how)
334{
335 struct rt6_info *rt = (struct rt6_info *)dst;
336 struct inet6_dev *idev = rt->rt6i_idev;
337 struct net_device *loopback_dev =
338 dev_net(dev)->loopback_dev;
339
340 if (dev != loopback_dev) {
341 if (idev && idev->dev == dev) {
342 struct inet6_dev *loopback_idev =
343 in6_dev_get(loopback_dev);
344 if (loopback_idev) {
345 rt->rt6i_idev = loopback_idev;
346 in6_dev_put(idev);
347 }
348 }
349 }
350}
351
352static bool rt6_check_expired(const struct rt6_info *rt)
353{
354 if (rt->rt6i_flags & RTF_EXPIRES) {
355 if (time_after(jiffies, rt->dst.expires))
356 return true;
357 } else if (rt->dst.from) {
358 return rt6_check_expired((struct rt6_info *) rt->dst.from);
359 }
360 return false;
361}
362
363static bool rt6_need_strict(const struct in6_addr *daddr)
364{
365 return ipv6_addr_type(daddr) &
366 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
367}
368
369
370
371
372
373static int rt6_info_hash_nhsfn(unsigned int candidate_count,
374 const struct flowi6 *fl6)
375{
376 unsigned int val = fl6->flowi6_proto;
377
378 val ^= ipv6_addr_hash(&fl6->daddr);
379 val ^= ipv6_addr_hash(&fl6->saddr);
380
381
382 switch (fl6->flowi6_proto) {
383 case IPPROTO_UDP:
384 case IPPROTO_TCP:
385 case IPPROTO_SCTP:
386 val ^= (__force u16)fl6->fl6_sport;
387 val ^= (__force u16)fl6->fl6_dport;
388 break;
389
390 case IPPROTO_ICMPV6:
391 val ^= (__force u16)fl6->fl6_icmp_type;
392 val ^= (__force u16)fl6->fl6_icmp_code;
393 break;
394 }
395
396 val ^= (__force u32)fl6->flowlabel;
397
398
399 val = val ^ (val >> 7) ^ (val >> 12);
400 return val % candidate_count;
401}
402
403static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
404 struct flowi6 *fl6, int oif,
405 int strict)
406{
407 struct rt6_info *sibling, *next_sibling;
408 int route_choosen;
409
410 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
411
412
413
414 if (route_choosen)
415 list_for_each_entry_safe(sibling, next_sibling,
416 &match->rt6i_siblings, rt6i_siblings) {
417 route_choosen--;
418 if (route_choosen == 0) {
419 if (rt6_score_route(sibling, oif, strict) < 0)
420 break;
421 match = sibling;
422 break;
423 }
424 }
425 return match;
426}
427
428
429
430
431
432static inline struct rt6_info *rt6_device_match(struct net *net,
433 struct rt6_info *rt,
434 const struct in6_addr *saddr,
435 int oif,
436 int flags)
437{
438 struct rt6_info *local = NULL;
439 struct rt6_info *sprt;
440
441 if (!oif && ipv6_addr_any(saddr))
442 goto out;
443
444 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
445 struct net_device *dev = sprt->dst.dev;
446
447 if (oif) {
448 if (dev->ifindex == oif)
449 return sprt;
450 if (dev->flags & IFF_LOOPBACK) {
451 if (!sprt->rt6i_idev ||
452 sprt->rt6i_idev->dev->ifindex != oif) {
453 if (flags & RT6_LOOKUP_F_IFACE && oif)
454 continue;
455 if (local && (!oif ||
456 local->rt6i_idev->dev->ifindex == oif))
457 continue;
458 }
459 local = sprt;
460 }
461 } else {
462 if (ipv6_chk_addr(net, saddr, dev,
463 flags & RT6_LOOKUP_F_IFACE))
464 return sprt;
465 }
466 }
467
468 if (oif) {
469 if (local)
470 return local;
471
472 if (flags & RT6_LOOKUP_F_IFACE)
473 return net->ipv6.ip6_null_entry;
474 }
475out:
476 return rt;
477}
478
479#ifdef CONFIG_IPV6_ROUTER_PREF
480static void rt6_probe(struct rt6_info *rt)
481{
482 struct neighbour *neigh;
483
484
485
486
487
488
489
490
491 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
492 return;
493 rcu_read_lock_bh();
494 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
495 if (neigh) {
496 write_lock(&neigh->lock);
497 if (neigh->nud_state & NUD_VALID)
498 goto out;
499 }
500
501 if (!neigh ||
502 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
503 struct in6_addr mcaddr;
504 struct in6_addr *target;
505
506 if (neigh) {
507 neigh->updated = jiffies;
508 write_unlock(&neigh->lock);
509 }
510
511 target = (struct in6_addr *)&rt->rt6i_gateway;
512 addrconf_addr_solict_mult(target, &mcaddr);
513 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
514 } else {
515out:
516 write_unlock(&neigh->lock);
517 }
518 rcu_read_unlock_bh();
519}
520#else
521static inline void rt6_probe(struct rt6_info *rt)
522{
523}
524#endif
525
526
527
528
529static inline int rt6_check_dev(struct rt6_info *rt, int oif)
530{
531 struct net_device *dev = rt->dst.dev;
532 if (!oif || dev->ifindex == oif)
533 return 2;
534 if ((dev->flags & IFF_LOOPBACK) &&
535 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
536 return 1;
537 return 0;
538}
539
540static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
541{
542 struct neighbour *neigh;
543 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
544
545 if (rt->rt6i_flags & RTF_NONEXTHOP ||
546 !(rt->rt6i_flags & RTF_GATEWAY))
547 return RT6_NUD_SUCCEED;
548
549 rcu_read_lock_bh();
550 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
551 if (neigh) {
552 read_lock(&neigh->lock);
553 if (neigh->nud_state & NUD_VALID)
554 ret = RT6_NUD_SUCCEED;
555#ifdef CONFIG_IPV6_ROUTER_PREF
556 else if (!(neigh->nud_state & NUD_FAILED))
557 ret = RT6_NUD_SUCCEED;
558#endif
559 read_unlock(&neigh->lock);
560 } else {
561 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
562 RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
563 }
564 rcu_read_unlock_bh();
565
566 return ret;
567}
568
569static int rt6_score_route(struct rt6_info *rt, int oif,
570 int strict)
571{
572 int m;
573
574 m = rt6_check_dev(rt, oif);
575 if (!m && (strict & RT6_LOOKUP_F_IFACE))
576 return RT6_NUD_FAIL_HARD;
577#ifdef CONFIG_IPV6_ROUTER_PREF
578 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
579#endif
580 if (strict & RT6_LOOKUP_F_REACHABLE) {
581 int n = rt6_check_neigh(rt);
582 if (n < 0)
583 return n;
584 }
585 return m;
586}
587
588static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
589 int *mpri, struct rt6_info *match,
590 bool *do_rr)
591{
592 int m;
593 bool match_do_rr = false;
594
595 if (rt6_check_expired(rt))
596 goto out;
597
598 m = rt6_score_route(rt, oif, strict);
599 if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
600 match_do_rr = true;
601 m = 0;
602 } else if (m < 0) {
603 goto out;
604 }
605
606 if (strict & RT6_LOOKUP_F_REACHABLE)
607 rt6_probe(rt);
608
609 if (m > *mpri) {
610 *do_rr = match_do_rr;
611 *mpri = m;
612 match = rt;
613 }
614out:
615 return match;
616}
617
618static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
619 struct rt6_info *rr_head,
620 u32 metric, int oif, int strict,
621 bool *do_rr)
622{
623 struct rt6_info *rt, *match;
624 int mpri = -1;
625
626 match = NULL;
627 for (rt = rr_head; rt && rt->rt6i_metric == metric;
628 rt = rt->dst.rt6_next)
629 match = find_match(rt, oif, strict, &mpri, match, do_rr);
630 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
631 rt = rt->dst.rt6_next)
632 match = find_match(rt, oif, strict, &mpri, match, do_rr);
633
634 return match;
635}
636
637static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
638{
639 struct rt6_info *match, *rt0;
640 struct net *net;
641 bool do_rr = false;
642
643 rt0 = fn->rr_ptr;
644 if (!rt0)
645 fn->rr_ptr = rt0 = fn->leaf;
646
647 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
648 &do_rr);
649
650 if (do_rr) {
651 struct rt6_info *next = rt0->dst.rt6_next;
652
653
654 if (!next || next->rt6i_metric != rt0->rt6i_metric)
655 next = fn->leaf;
656
657 if (next != rt0)
658 fn->rr_ptr = next;
659 }
660
661 net = dev_net(rt0->dst.dev);
662 return match ? match : net->ipv6.ip6_null_entry;
663}
664
665#ifdef CONFIG_IPV6_ROUTE_INFO
666int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
667 const struct in6_addr *gwaddr)
668{
669 struct net *net = dev_net(dev);
670 struct route_info *rinfo = (struct route_info *) opt;
671 struct in6_addr prefix_buf, *prefix;
672 unsigned int pref;
673 unsigned long lifetime;
674 struct rt6_info *rt;
675
676 if (len < sizeof(struct route_info)) {
677 return -EINVAL;
678 }
679
680
681 if (rinfo->length > 3) {
682 return -EINVAL;
683 } else if (rinfo->prefix_len > 128) {
684 return -EINVAL;
685 } else if (rinfo->prefix_len > 64) {
686 if (rinfo->length < 2) {
687 return -EINVAL;
688 }
689 } else if (rinfo->prefix_len > 0) {
690 if (rinfo->length < 1) {
691 return -EINVAL;
692 }
693 }
694
695 pref = rinfo->route_pref;
696 if (pref == ICMPV6_ROUTER_PREF_INVALID)
697 return -EINVAL;
698
699 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
700
701 if (rinfo->length == 3)
702 prefix = (struct in6_addr *)rinfo->prefix;
703 else {
704
705 ipv6_addr_prefix(&prefix_buf,
706 (struct in6_addr *)rinfo->prefix,
707 rinfo->prefix_len);
708 prefix = &prefix_buf;
709 }
710
711 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
712 dev->ifindex);
713
714 if (rt && !lifetime) {
715 ip6_del_rt(rt);
716 rt = NULL;
717 }
718
719 if (!rt && lifetime)
720 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
721 pref);
722 else if (rt)
723 rt->rt6i_flags = RTF_ROUTEINFO |
724 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
725
726 if (rt) {
727 if (!addrconf_finite_timeout(lifetime))
728 rt6_clean_expires(rt);
729 else
730 rt6_set_expires(rt, jiffies + HZ * lifetime);
731
732 ip6_rt_put(rt);
733 }
734 return 0;
735}
736#endif
737
738#define BACKTRACK(__net, saddr) \
739do { \
740 if (rt == __net->ipv6.ip6_null_entry) { \
741 struct fib6_node *pn; \
742 while (1) { \
743 if (fn->fn_flags & RTN_TL_ROOT) \
744 goto out; \
745 pn = fn->parent; \
746 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
747 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
748 else \
749 fn = pn; \
750 if (fn->fn_flags & RTN_RTINFO) \
751 goto restart; \
752 } \
753 } \
754} while (0)
755
756static struct rt6_info *ip6_pol_route_lookup(struct net *net,
757 struct fib6_table *table,
758 struct flowi6 *fl6, int flags)
759{
760 struct fib6_node *fn;
761 struct rt6_info *rt;
762
763 read_lock_bh(&table->tb6_lock);
764 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
765restart:
766 rt = fn->leaf;
767 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
768 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
769 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
770 BACKTRACK(net, &fl6->saddr);
771out:
772 dst_use(&rt->dst, jiffies);
773 read_unlock_bh(&table->tb6_lock);
774 return rt;
775
776}
777
778struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
779 int flags)
780{
781 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
782}
783EXPORT_SYMBOL_GPL(ip6_route_lookup);
784
785struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
786 const struct in6_addr *saddr, int oif, int strict)
787{
788 struct flowi6 fl6 = {
789 .flowi6_oif = oif,
790 .daddr = *daddr,
791 };
792 struct dst_entry *dst;
793 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
794
795 if (saddr) {
796 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
797 flags |= RT6_LOOKUP_F_HAS_SADDR;
798 }
799
800 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
801 if (dst->error == 0)
802 return (struct rt6_info *) dst;
803
804 dst_release(dst);
805
806 return NULL;
807}
808
809EXPORT_SYMBOL(rt6_lookup);
810
811
812
813
814
815
816
817static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
818{
819 int err;
820 struct fib6_table *table;
821
822 table = rt->rt6i_table;
823 write_lock_bh(&table->tb6_lock);
824 err = fib6_add(&table->tb6_root, rt, info);
825 write_unlock_bh(&table->tb6_lock);
826
827 return err;
828}
829
830int ip6_ins_rt(struct rt6_info *rt)
831{
832 struct nl_info info = {
833 .nl_net = dev_net(rt->dst.dev),
834 };
835 return __ip6_ins_rt(rt, &info);
836}
837
838static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
839 const struct in6_addr *daddr,
840 const struct in6_addr *saddr)
841{
842 struct rt6_info *rt;
843
844
845
846
847
848 rt = ip6_rt_copy(ort, daddr);
849
850 if (rt) {
851 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
852 if (ort->rt6i_dst.plen != 128 &&
853 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
854 rt->rt6i_flags |= RTF_ANYCAST;
855 rt->rt6i_gateway = *daddr;
856 }
857
858 rt->rt6i_flags |= RTF_CACHE;
859
860#ifdef CONFIG_IPV6_SUBTREES
861 if (rt->rt6i_src.plen && saddr) {
862 rt->rt6i_src.addr = *saddr;
863 rt->rt6i_src.plen = 128;
864 }
865#endif
866 }
867
868 return rt;
869}
870
871static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
872 const struct in6_addr *daddr)
873{
874 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
875
876 if (rt)
877 rt->rt6i_flags |= RTF_CACHE;
878 return rt;
879}
880
881static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
882 struct flowi6 *fl6, int flags)
883{
884 struct fib6_node *fn;
885 struct rt6_info *rt, *nrt;
886 int strict = 0;
887 int attempts = 3;
888 int err;
889 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
890
891 strict |= flags & RT6_LOOKUP_F_IFACE;
892
893relookup:
894 read_lock_bh(&table->tb6_lock);
895
896restart_2:
897 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
898
899restart:
900 rt = rt6_select(fn, oif, strict | reachable);
901 if (rt->rt6i_nsiblings)
902 rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
903 BACKTRACK(net, &fl6->saddr);
904 if (rt == net->ipv6.ip6_null_entry ||
905 rt->rt6i_flags & RTF_CACHE)
906 goto out;
907
908 dst_hold(&rt->dst);
909 read_unlock_bh(&table->tb6_lock);
910
911 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
912 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
913 else if (!(rt->dst.flags & DST_HOST))
914 nrt = rt6_alloc_clone(rt, &fl6->daddr);
915 else
916 goto out2;
917
918 ip6_rt_put(rt);
919 rt = nrt ? : net->ipv6.ip6_null_entry;
920
921 dst_hold(&rt->dst);
922 if (nrt) {
923 err = ip6_ins_rt(nrt);
924 if (!err)
925 goto out2;
926 }
927
928 if (--attempts <= 0)
929 goto out2;
930
931
932
933
934
935 ip6_rt_put(rt);
936 goto relookup;
937
938out:
939 if (reachable) {
940 reachable = 0;
941 goto restart_2;
942 }
943 dst_hold(&rt->dst);
944 read_unlock_bh(&table->tb6_lock);
945out2:
946 rt->dst.lastuse = jiffies;
947 rt->dst.__use++;
948
949 return rt;
950}
951
952static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
953 struct flowi6 *fl6, int flags)
954{
955 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
956}
957
958static struct dst_entry *ip6_route_input_lookup(struct net *net,
959 struct net_device *dev,
960 struct flowi6 *fl6, int flags)
961{
962 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
963 flags |= RT6_LOOKUP_F_IFACE;
964
965 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
966}
967
968void ip6_route_input(struct sk_buff *skb)
969{
970 const struct ipv6hdr *iph = ipv6_hdr(skb);
971 struct net *net = dev_net(skb->dev);
972 int flags = RT6_LOOKUP_F_HAS_SADDR;
973 struct flowi6 fl6 = {
974 .flowi6_iif = skb->dev->ifindex,
975 .daddr = iph->daddr,
976 .saddr = iph->saddr,
977 .flowlabel = ip6_flowinfo(iph),
978 .flowi6_mark = skb->mark,
979 .flowi6_proto = iph->nexthdr,
980 };
981
982 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
983}
984
985static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
986 struct flowi6 *fl6, int flags)
987{
988 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
989}
990
991struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
992 struct flowi6 *fl6)
993{
994 int flags = 0;
995
996 fl6->flowi6_iif = LOOPBACK_IFINDEX;
997
998 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
999 flags |= RT6_LOOKUP_F_IFACE;
1000
1001 if (!ipv6_addr_any(&fl6->saddr))
1002 flags |= RT6_LOOKUP_F_HAS_SADDR;
1003 else if (sk)
1004 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1005
1006 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1007}
1008
1009EXPORT_SYMBOL(ip6_route_output);
1010
1011struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1012{
1013 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1014 struct dst_entry *new = NULL;
1015
1016 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1017 if (rt) {
1018 new = &rt->dst;
1019
1020 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1021 rt6_init_peer(rt, net->ipv6.peers);
1022
1023 new->__use = 1;
1024 new->input = dst_discard;
1025 new->output = dst_discard;
1026
1027 if (dst_metrics_read_only(&ort->dst))
1028 new->_metrics = ort->dst._metrics;
1029 else
1030 dst_copy_metrics(new, &ort->dst);
1031 rt->rt6i_idev = ort->rt6i_idev;
1032 if (rt->rt6i_idev)
1033 in6_dev_hold(rt->rt6i_idev);
1034
1035 rt->rt6i_gateway = ort->rt6i_gateway;
1036 rt->rt6i_flags = ort->rt6i_flags;
1037 rt->rt6i_metric = 0;
1038
1039 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1040#ifdef CONFIG_IPV6_SUBTREES
1041 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1042#endif
1043
1044 dst_free(new);
1045 }
1046
1047 dst_release(dst_orig);
1048 return new ? new : ERR_PTR(-ENOMEM);
1049}
1050
1051
1052
1053
1054
1055static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1056{
1057 struct rt6_info *rt;
1058
1059 rt = (struct rt6_info *) dst;
1060
1061
1062
1063
1064
1065 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1066 return NULL;
1067
1068 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1069 return dst;
1070
1071 return NULL;
1072}
1073
1074static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1075{
1076 struct rt6_info *rt = (struct rt6_info *) dst;
1077
1078 if (rt) {
1079 if (rt->rt6i_flags & RTF_CACHE) {
1080 if (rt6_check_expired(rt)) {
1081 ip6_del_rt(rt);
1082 dst = NULL;
1083 }
1084 } else {
1085 dst_release(dst);
1086 dst = NULL;
1087 }
1088 }
1089 return dst;
1090}
1091
1092static void ip6_link_failure(struct sk_buff *skb)
1093{
1094 struct rt6_info *rt;
1095
1096 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1097
1098 rt = (struct rt6_info *) skb_dst(skb);
1099 if (rt) {
1100 if (rt->rt6i_flags & RTF_CACHE) {
1101 dst_hold(&rt->dst);
1102 if (ip6_del_rt(rt))
1103 dst_free(&rt->dst);
1104 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1105 rt->rt6i_node->fn_sernum = -1;
1106 }
1107 }
1108}
1109
1110static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1111 struct sk_buff *skb, u32 mtu)
1112{
1113 struct rt6_info *rt6 = (struct rt6_info*)dst;
1114
1115 dst_confirm(dst);
1116 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1117 struct net *net = dev_net(dst->dev);
1118
1119 rt6->rt6i_flags |= RTF_MODIFIED;
1120 if (mtu < IPV6_MIN_MTU) {
1121 u32 features = dst_metric(dst, RTAX_FEATURES);
1122 mtu = IPV6_MIN_MTU;
1123 features |= RTAX_FEATURE_ALLFRAG;
1124 dst_metric_set(dst, RTAX_FEATURES, features);
1125 }
1126 dst_metric_set(dst, RTAX_MTU, mtu);
1127 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1128 }
1129}
1130
1131void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1132 int oif, u32 mark)
1133{
1134 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1135 struct dst_entry *dst;
1136 struct flowi6 fl6;
1137
1138 memset(&fl6, 0, sizeof(fl6));
1139 fl6.flowi6_oif = oif;
1140 fl6.flowi6_mark = mark;
1141 fl6.flowi6_flags = 0;
1142 fl6.daddr = iph->daddr;
1143 fl6.saddr = iph->saddr;
1144 fl6.flowlabel = ip6_flowinfo(iph);
1145
1146 dst = ip6_route_output(net, NULL, &fl6);
1147 if (!dst->error)
1148 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1149 dst_release(dst);
1150}
1151EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1152
1153void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1154{
1155 ip6_update_pmtu(skb, sock_net(sk), mtu,
1156 sk->sk_bound_dev_if, sk->sk_mark);
1157}
1158EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1159
1160void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1161{
1162 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1163 struct dst_entry *dst;
1164 struct flowi6 fl6;
1165
1166 memset(&fl6, 0, sizeof(fl6));
1167 fl6.flowi6_oif = oif;
1168 fl6.flowi6_mark = mark;
1169 fl6.flowi6_flags = 0;
1170 fl6.daddr = iph->daddr;
1171 fl6.saddr = iph->saddr;
1172 fl6.flowlabel = ip6_flowinfo(iph);
1173
1174 dst = ip6_route_output(net, NULL, &fl6);
1175 if (!dst->error)
1176 rt6_do_redirect(dst, NULL, skb);
1177 dst_release(dst);
1178}
1179EXPORT_SYMBOL_GPL(ip6_redirect);
1180
1181void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1182 u32 mark)
1183{
1184 const struct ipv6hdr *iph = ipv6_hdr(skb);
1185 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1186 struct dst_entry *dst;
1187 struct flowi6 fl6;
1188
1189 memset(&fl6, 0, sizeof(fl6));
1190 fl6.flowi6_oif = oif;
1191 fl6.flowi6_mark = mark;
1192 fl6.flowi6_flags = 0;
1193 fl6.daddr = msg->dest;
1194 fl6.saddr = iph->daddr;
1195
1196 dst = ip6_route_output(net, NULL, &fl6);
1197 if (!dst->error)
1198 rt6_do_redirect(dst, NULL, skb);
1199 dst_release(dst);
1200}
1201
1202void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1203{
1204 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1205}
1206EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1207
1208static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1209{
1210 struct net_device *dev = dst->dev;
1211 unsigned int mtu = dst_mtu(dst);
1212 struct net *net = dev_net(dev);
1213
1214 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1215
1216 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1217 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1218
1219
1220
1221
1222
1223
1224
1225 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1226 mtu = IPV6_MAXPLEN;
1227 return mtu;
1228}
1229
1230static unsigned int ip6_mtu(const struct dst_entry *dst)
1231{
1232 struct inet6_dev *idev;
1233 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1234
1235 if (mtu)
1236 return mtu;
1237
1238 mtu = IPV6_MIN_MTU;
1239
1240 rcu_read_lock();
1241 idev = __in6_dev_get(dst->dev);
1242 if (idev)
1243 mtu = idev->cnf.mtu6;
1244 rcu_read_unlock();
1245
1246 return mtu;
1247}
1248
1249static struct dst_entry *icmp6_dst_gc_list;
1250static DEFINE_SPINLOCK(icmp6_dst_lock);
1251
1252struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1253 struct flowi6 *fl6)
1254{
1255 struct dst_entry *dst;
1256 struct rt6_info *rt;
1257 struct inet6_dev *idev = in6_dev_get(dev);
1258 struct net *net = dev_net(dev);
1259
1260 if (unlikely(!idev))
1261 return ERR_PTR(-ENODEV);
1262
1263 rt = ip6_dst_alloc(net, dev, 0, NULL);
1264 if (unlikely(!rt)) {
1265 in6_dev_put(idev);
1266 dst = ERR_PTR(-ENOMEM);
1267 goto out;
1268 }
1269
1270 rt->dst.flags |= DST_HOST;
1271 rt->dst.output = ip6_output;
1272 atomic_set(&rt->dst.__refcnt, 1);
1273 rt->rt6i_dst.addr = fl6->daddr;
1274 rt->rt6i_dst.plen = 128;
1275 rt->rt6i_idev = idev;
1276 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1277
1278 spin_lock_bh(&icmp6_dst_lock);
1279 rt->dst.next = icmp6_dst_gc_list;
1280 icmp6_dst_gc_list = &rt->dst;
1281 spin_unlock_bh(&icmp6_dst_lock);
1282
1283 fib6_force_start_gc(net);
1284
1285 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1286
1287out:
1288 return dst;
1289}
1290
1291int icmp6_dst_gc(void)
1292{
1293 struct dst_entry *dst, **pprev;
1294 int more = 0;
1295
1296 spin_lock_bh(&icmp6_dst_lock);
1297 pprev = &icmp6_dst_gc_list;
1298
1299 while ((dst = *pprev) != NULL) {
1300 if (!atomic_read(&dst->__refcnt)) {
1301 *pprev = dst->next;
1302 dst_free(dst);
1303 } else {
1304 pprev = &dst->next;
1305 ++more;
1306 }
1307 }
1308
1309 spin_unlock_bh(&icmp6_dst_lock);
1310
1311 return more;
1312}
1313
1314static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1315 void *arg)
1316{
1317 struct dst_entry *dst, **pprev;
1318
1319 spin_lock_bh(&icmp6_dst_lock);
1320 pprev = &icmp6_dst_gc_list;
1321 while ((dst = *pprev) != NULL) {
1322 struct rt6_info *rt = (struct rt6_info *) dst;
1323 if (func(rt, arg)) {
1324 *pprev = dst->next;
1325 dst_free(dst);
1326 } else {
1327 pprev = &dst->next;
1328 }
1329 }
1330 spin_unlock_bh(&icmp6_dst_lock);
1331}
1332
1333static int ip6_dst_gc(struct dst_ops *ops)
1334{
1335 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1336 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1337 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1338 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1339 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1340 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1341 int entries;
1342
1343 entries = dst_entries_get_fast(ops);
1344 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1345 entries <= rt_max_size)
1346 goto out;
1347
1348 net->ipv6.ip6_rt_gc_expire++;
1349 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1350 entries = dst_entries_get_slow(ops);
1351 if (entries < ops->gc_thresh)
1352 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1353out:
1354 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1355 return entries > rt_max_size;
1356}
1357
1358int ip6_dst_hoplimit(struct dst_entry *dst)
1359{
1360 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1361 if (hoplimit == 0) {
1362 struct net_device *dev = dst->dev;
1363 struct inet6_dev *idev;
1364
1365 rcu_read_lock();
1366 idev = __in6_dev_get(dev);
1367 if (idev)
1368 hoplimit = idev->cnf.hop_limit;
1369 else
1370 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1371 rcu_read_unlock();
1372 }
1373 return hoplimit;
1374}
1375EXPORT_SYMBOL(ip6_dst_hoplimit);
1376
1377
1378
1379
1380
1381int ip6_route_add(struct fib6_config *cfg)
1382{
1383 int err;
1384 struct net *net = cfg->fc_nlinfo.nl_net;
1385 struct rt6_info *rt = NULL;
1386 struct net_device *dev = NULL;
1387 struct inet6_dev *idev = NULL;
1388 struct fib6_table *table;
1389 int addr_type;
1390
1391 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1392 return -EINVAL;
1393#ifndef CONFIG_IPV6_SUBTREES
1394 if (cfg->fc_src_len)
1395 return -EINVAL;
1396#endif
1397 if (cfg->fc_ifindex) {
1398 err = -ENODEV;
1399 dev = dev_get_by_index(net, cfg->fc_ifindex);
1400 if (!dev)
1401 goto out;
1402 idev = in6_dev_get(dev);
1403 if (!idev)
1404 goto out;
1405 }
1406
1407 if (cfg->fc_metric == 0)
1408 cfg->fc_metric = IP6_RT_PRIO_USER;
1409
1410 err = -ENOBUFS;
1411 if (cfg->fc_nlinfo.nlh &&
1412 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1413 table = fib6_get_table(net, cfg->fc_table);
1414 if (!table) {
1415 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1416 table = fib6_new_table(net, cfg->fc_table);
1417 }
1418 } else {
1419 table = fib6_new_table(net, cfg->fc_table);
1420 }
1421
1422 if (!table)
1423 goto out;
1424
1425 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1426
1427 if (!rt) {
1428 err = -ENOMEM;
1429 goto out;
1430 }
1431
1432 if (cfg->fc_flags & RTF_EXPIRES)
1433 rt6_set_expires(rt, jiffies +
1434 clock_t_to_jiffies(cfg->fc_expires));
1435 else
1436 rt6_clean_expires(rt);
1437
1438 if (cfg->fc_protocol == RTPROT_UNSPEC)
1439 cfg->fc_protocol = RTPROT_BOOT;
1440 rt->rt6i_protocol = cfg->fc_protocol;
1441
1442 addr_type = ipv6_addr_type(&cfg->fc_dst);
1443
1444 if (addr_type & IPV6_ADDR_MULTICAST)
1445 rt->dst.input = ip6_mc_input;
1446 else if (cfg->fc_flags & RTF_LOCAL)
1447 rt->dst.input = ip6_input;
1448 else
1449 rt->dst.input = ip6_forward;
1450
1451 rt->dst.output = ip6_output;
1452
1453 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1454 rt->rt6i_dst.plen = cfg->fc_dst_len;
1455 if (rt->rt6i_dst.plen == 128)
1456 rt->dst.flags |= DST_HOST;
1457
1458 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1459 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1460 if (!metrics) {
1461 err = -ENOMEM;
1462 goto out;
1463 }
1464 dst_init_metrics(&rt->dst, metrics, 0);
1465 }
1466#ifdef CONFIG_IPV6_SUBTREES
1467 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1468 rt->rt6i_src.plen = cfg->fc_src_len;
1469#endif
1470
1471 rt->rt6i_metric = cfg->fc_metric;
1472
1473
1474
1475
1476 if ((cfg->fc_flags & RTF_REJECT) ||
1477 (dev && (dev->flags & IFF_LOOPBACK) &&
1478 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1479 !(cfg->fc_flags & RTF_LOCAL))) {
1480
1481 if (dev != net->loopback_dev) {
1482 if (dev) {
1483 dev_put(dev);
1484 in6_dev_put(idev);
1485 }
1486 dev = net->loopback_dev;
1487 dev_hold(dev);
1488 idev = in6_dev_get(dev);
1489 if (!idev) {
1490 err = -ENODEV;
1491 goto out;
1492 }
1493 }
1494 rt->dst.output = ip6_pkt_discard_out;
1495 rt->dst.input = ip6_pkt_discard;
1496 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1497 switch (cfg->fc_type) {
1498 case RTN_BLACKHOLE:
1499 rt->dst.error = -EINVAL;
1500 break;
1501 case RTN_PROHIBIT:
1502 rt->dst.error = -EACCES;
1503 break;
1504 case RTN_THROW:
1505 rt->dst.error = -EAGAIN;
1506 break;
1507 default:
1508 rt->dst.error = -ENETUNREACH;
1509 break;
1510 }
1511 goto install_route;
1512 }
1513
1514 if (cfg->fc_flags & RTF_GATEWAY) {
1515 const struct in6_addr *gw_addr;
1516 int gwa_type;
1517
1518 gw_addr = &cfg->fc_gateway;
1519 rt->rt6i_gateway = *gw_addr;
1520 gwa_type = ipv6_addr_type(gw_addr);
1521
1522 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1523 struct rt6_info *grt;
1524
1525
1526
1527
1528
1529
1530
1531
1532 err = -EINVAL;
1533 if (!(gwa_type & IPV6_ADDR_UNICAST))
1534 goto out;
1535
1536 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1537
1538 err = -EHOSTUNREACH;
1539 if (!grt)
1540 goto out;
1541 if (dev) {
1542 if (dev != grt->dst.dev) {
1543 ip6_rt_put(grt);
1544 goto out;
1545 }
1546 } else {
1547 dev = grt->dst.dev;
1548 idev = grt->rt6i_idev;
1549 dev_hold(dev);
1550 in6_dev_hold(grt->rt6i_idev);
1551 }
1552 if (!(grt->rt6i_flags & RTF_GATEWAY))
1553 err = 0;
1554 ip6_rt_put(grt);
1555
1556 if (err)
1557 goto out;
1558 }
1559 err = -EINVAL;
1560 if (!dev || (dev->flags & IFF_LOOPBACK))
1561 goto out;
1562 }
1563
1564 err = -ENODEV;
1565 if (!dev)
1566 goto out;
1567
1568 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1569 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1570 err = -EINVAL;
1571 goto out;
1572 }
1573 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1574 rt->rt6i_prefsrc.plen = 128;
1575 } else
1576 rt->rt6i_prefsrc.plen = 0;
1577
1578 rt->rt6i_flags = cfg->fc_flags;
1579
1580install_route:
1581 if (cfg->fc_mx) {
1582 struct nlattr *nla;
1583 int remaining;
1584
1585 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1586 int type = nla_type(nla);
1587
1588 if (type) {
1589 if (type > RTAX_MAX) {
1590 err = -EINVAL;
1591 goto out;
1592 }
1593
1594 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1595 }
1596 }
1597 }
1598
1599 rt->dst.dev = dev;
1600 rt->rt6i_idev = idev;
1601 rt->rt6i_table = table;
1602
1603 cfg->fc_nlinfo.nl_net = dev_net(dev);
1604
1605 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1606
1607out:
1608 if (dev)
1609 dev_put(dev);
1610 if (idev)
1611 in6_dev_put(idev);
1612 if (rt)
1613 dst_free(&rt->dst);
1614 return err;
1615}
1616
1617static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1618{
1619 int err;
1620 struct fib6_table *table;
1621 struct net *net = dev_net(rt->dst.dev);
1622
1623 if (rt == net->ipv6.ip6_null_entry) {
1624 err = -ENOENT;
1625 goto out;
1626 }
1627
1628 table = rt->rt6i_table;
1629 write_lock_bh(&table->tb6_lock);
1630 err = fib6_del(rt, info);
1631 write_unlock_bh(&table->tb6_lock);
1632
1633out:
1634 ip6_rt_put(rt);
1635 return err;
1636}
1637
1638int ip6_del_rt(struct rt6_info *rt)
1639{
1640 struct nl_info info = {
1641 .nl_net = dev_net(rt->dst.dev),
1642 };
1643 return __ip6_del_rt(rt, &info);
1644}
1645
1646static int ip6_route_del(struct fib6_config *cfg)
1647{
1648 struct fib6_table *table;
1649 struct fib6_node *fn;
1650 struct rt6_info *rt;
1651 int err = -ESRCH;
1652
1653 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1654 if (!table)
1655 return err;
1656
1657 read_lock_bh(&table->tb6_lock);
1658
1659 fn = fib6_locate(&table->tb6_root,
1660 &cfg->fc_dst, cfg->fc_dst_len,
1661 &cfg->fc_src, cfg->fc_src_len);
1662
1663 if (fn) {
1664 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1665 if (cfg->fc_ifindex &&
1666 (!rt->dst.dev ||
1667 rt->dst.dev->ifindex != cfg->fc_ifindex))
1668 continue;
1669 if (cfg->fc_flags & RTF_GATEWAY &&
1670 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1671 continue;
1672 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1673 continue;
1674 dst_hold(&rt->dst);
1675 read_unlock_bh(&table->tb6_lock);
1676
1677 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1678 }
1679 }
1680 read_unlock_bh(&table->tb6_lock);
1681
1682 return err;
1683}
1684
1685static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1686{
1687 struct net *net = dev_net(skb->dev);
1688 struct netevent_redirect netevent;
1689 struct rt6_info *rt, *nrt = NULL;
1690 struct ndisc_options ndopts;
1691 struct inet6_dev *in6_dev;
1692 struct neighbour *neigh;
1693 struct rd_msg *msg;
1694 int optlen, on_link;
1695 u8 *lladdr;
1696
1697 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1698 optlen -= sizeof(*msg);
1699
1700 if (optlen < 0) {
1701 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1702 return;
1703 }
1704
1705 msg = (struct rd_msg *)icmp6_hdr(skb);
1706
1707 if (ipv6_addr_is_multicast(&msg->dest)) {
1708 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1709 return;
1710 }
1711
1712 on_link = 0;
1713 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1714 on_link = 1;
1715 } else if (ipv6_addr_type(&msg->target) !=
1716 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1717 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1718 return;
1719 }
1720
1721 in6_dev = __in6_dev_get(skb->dev);
1722 if (!in6_dev)
1723 return;
1724 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1725 return;
1726
1727
1728
1729
1730
1731
1732 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1733 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1734 return;
1735 }
1736
1737 lladdr = NULL;
1738 if (ndopts.nd_opts_tgt_lladdr) {
1739 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1740 skb->dev);
1741 if (!lladdr) {
1742 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1743 return;
1744 }
1745 }
1746
1747 rt = (struct rt6_info *) dst;
1748 if (rt == net->ipv6.ip6_null_entry) {
1749 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1750 return;
1751 }
1752
1753
1754
1755
1756
1757 dst_confirm(&rt->dst);
1758
1759 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1760 if (!neigh)
1761 return;
1762
1763
1764
1765
1766
1767 neigh_update(neigh, lladdr, NUD_STALE,
1768 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1769 NEIGH_UPDATE_F_OVERRIDE|
1770 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1771 NEIGH_UPDATE_F_ISROUTER))
1772 );
1773
1774 nrt = ip6_rt_copy(rt, &msg->dest);
1775 if (!nrt)
1776 goto out;
1777
1778 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1779 if (on_link)
1780 nrt->rt6i_flags &= ~RTF_GATEWAY;
1781
1782 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1783
1784 if (ip6_ins_rt(nrt))
1785 goto out;
1786
1787 netevent.old = &rt->dst;
1788 netevent.new = &nrt->dst;
1789 netevent.daddr = &msg->dest;
1790 netevent.neigh = neigh;
1791 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1792
1793 if (rt->rt6i_flags & RTF_CACHE) {
1794 rt = (struct rt6_info *) dst_clone(&rt->dst);
1795 ip6_del_rt(rt);
1796 }
1797
1798out:
1799 neigh_release(neigh);
1800}
1801
1802
1803
1804
1805
1806static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1807 const struct in6_addr *dest)
1808{
1809 struct net *net = dev_net(ort->dst.dev);
1810 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1811 ort->rt6i_table);
1812
1813 if (rt) {
1814 rt->dst.input = ort->dst.input;
1815 rt->dst.output = ort->dst.output;
1816 rt->dst.flags |= DST_HOST;
1817
1818 rt->rt6i_dst.addr = *dest;
1819 rt->rt6i_dst.plen = 128;
1820 dst_copy_metrics(&rt->dst, &ort->dst);
1821 rt->dst.error = ort->dst.error;
1822 rt->rt6i_idev = ort->rt6i_idev;
1823 if (rt->rt6i_idev)
1824 in6_dev_hold(rt->rt6i_idev);
1825 rt->dst.lastuse = jiffies;
1826
1827 rt->rt6i_gateway = ort->rt6i_gateway;
1828 rt->rt6i_flags = ort->rt6i_flags;
1829 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1830 (RTF_DEFAULT | RTF_ADDRCONF))
1831 rt6_set_from(rt, ort);
1832 rt->rt6i_metric = 0;
1833
1834#ifdef CONFIG_IPV6_SUBTREES
1835 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1836#endif
1837 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1838 rt->rt6i_table = ort->rt6i_table;
1839 }
1840 return rt;
1841}
1842
1843#ifdef CONFIG_IPV6_ROUTE_INFO
1844static struct rt6_info *rt6_get_route_info(struct net *net,
1845 const struct in6_addr *prefix, int prefixlen,
1846 const struct in6_addr *gwaddr, int ifindex)
1847{
1848 struct fib6_node *fn;
1849 struct rt6_info *rt = NULL;
1850 struct fib6_table *table;
1851
1852 table = fib6_get_table(net, RT6_TABLE_INFO);
1853 if (!table)
1854 return NULL;
1855
1856 read_lock_bh(&table->tb6_lock);
1857 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1858 if (!fn)
1859 goto out;
1860
1861 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1862 if (rt->dst.dev->ifindex != ifindex)
1863 continue;
1864 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1865 continue;
1866 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1867 continue;
1868 dst_hold(&rt->dst);
1869 break;
1870 }
1871out:
1872 read_unlock_bh(&table->tb6_lock);
1873 return rt;
1874}
1875
1876static struct rt6_info *rt6_add_route_info(struct net *net,
1877 const struct in6_addr *prefix, int prefixlen,
1878 const struct in6_addr *gwaddr, int ifindex,
1879 unsigned int pref)
1880{
1881 struct fib6_config cfg = {
1882 .fc_table = RT6_TABLE_INFO,
1883 .fc_metric = IP6_RT_PRIO_USER,
1884 .fc_ifindex = ifindex,
1885 .fc_dst_len = prefixlen,
1886 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1887 RTF_UP | RTF_PREF(pref),
1888 .fc_nlinfo.portid = 0,
1889 .fc_nlinfo.nlh = NULL,
1890 .fc_nlinfo.nl_net = net,
1891 };
1892
1893 cfg.fc_dst = *prefix;
1894 cfg.fc_gateway = *gwaddr;
1895
1896
1897 if (!prefixlen)
1898 cfg.fc_flags |= RTF_DEFAULT;
1899
1900 ip6_route_add(&cfg);
1901
1902 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1903}
1904#endif
1905
1906struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1907{
1908 struct rt6_info *rt;
1909 struct fib6_table *table;
1910
1911 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1912 if (!table)
1913 return NULL;
1914
1915 read_lock_bh(&table->tb6_lock);
1916 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1917 if (dev == rt->dst.dev &&
1918 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1919 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1920 break;
1921 }
1922 if (rt)
1923 dst_hold(&rt->dst);
1924 read_unlock_bh(&table->tb6_lock);
1925 return rt;
1926}
1927
1928struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1929 struct net_device *dev,
1930 unsigned int pref)
1931{
1932 struct fib6_config cfg = {
1933 .fc_table = RT6_TABLE_DFLT,
1934 .fc_metric = IP6_RT_PRIO_USER,
1935 .fc_ifindex = dev->ifindex,
1936 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1937 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1938 .fc_nlinfo.portid = 0,
1939 .fc_nlinfo.nlh = NULL,
1940 .fc_nlinfo.nl_net = dev_net(dev),
1941 };
1942
1943 cfg.fc_gateway = *gwaddr;
1944
1945 ip6_route_add(&cfg);
1946
1947 return rt6_get_dflt_router(gwaddr, dev);
1948}
1949
1950void rt6_purge_dflt_routers(struct net *net)
1951{
1952 struct rt6_info *rt;
1953 struct fib6_table *table;
1954
1955
1956 table = fib6_get_table(net, RT6_TABLE_DFLT);
1957 if (!table)
1958 return;
1959
1960restart:
1961 read_lock_bh(&table->tb6_lock);
1962 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1963 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1964 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1965 dst_hold(&rt->dst);
1966 read_unlock_bh(&table->tb6_lock);
1967 ip6_del_rt(rt);
1968 goto restart;
1969 }
1970 }
1971 read_unlock_bh(&table->tb6_lock);
1972}
1973
1974static void rtmsg_to_fib6_config(struct net *net,
1975 struct in6_rtmsg *rtmsg,
1976 struct fib6_config *cfg)
1977{
1978 memset(cfg, 0, sizeof(*cfg));
1979
1980 cfg->fc_table = RT6_TABLE_MAIN;
1981 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1982 cfg->fc_metric = rtmsg->rtmsg_metric;
1983 cfg->fc_expires = rtmsg->rtmsg_info;
1984 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1985 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1986 cfg->fc_flags = rtmsg->rtmsg_flags;
1987
1988 cfg->fc_nlinfo.nl_net = net;
1989
1990 cfg->fc_dst = rtmsg->rtmsg_dst;
1991 cfg->fc_src = rtmsg->rtmsg_src;
1992 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1993}
1994
1995int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1996{
1997 struct fib6_config cfg;
1998 struct in6_rtmsg rtmsg;
1999 int err;
2000
2001 switch(cmd) {
2002 case SIOCADDRT:
2003 case SIOCDELRT:
2004 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2005 return -EPERM;
2006 err = copy_from_user(&rtmsg, arg,
2007 sizeof(struct in6_rtmsg));
2008 if (err)
2009 return -EFAULT;
2010
2011 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2012
2013 rtnl_lock();
2014 switch (cmd) {
2015 case SIOCADDRT:
2016 err = ip6_route_add(&cfg);
2017 break;
2018 case SIOCDELRT:
2019 err = ip6_route_del(&cfg);
2020 break;
2021 default:
2022 err = -EINVAL;
2023 }
2024 rtnl_unlock();
2025
2026 return err;
2027 }
2028
2029 return -EINVAL;
2030}
2031
2032
2033
2034
2035
2036static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2037{
2038 int type;
2039 struct dst_entry *dst = skb_dst(skb);
2040 switch (ipstats_mib_noroutes) {
2041 case IPSTATS_MIB_INNOROUTES:
2042 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2043 if (type == IPV6_ADDR_ANY) {
2044 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2045 IPSTATS_MIB_INADDRERRORS);
2046 break;
2047 }
2048
2049 case IPSTATS_MIB_OUTNOROUTES:
2050 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2051 ipstats_mib_noroutes);
2052 break;
2053 }
2054 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2055 kfree_skb(skb);
2056 return 0;
2057}
2058
2059static int ip6_pkt_discard(struct sk_buff *skb)
2060{
2061 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2062}
2063
2064static int ip6_pkt_discard_out(struct sk_buff *skb)
2065{
2066 skb->dev = skb_dst(skb)->dev;
2067 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2068}
2069
2070#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2071
2072static int ip6_pkt_prohibit(struct sk_buff *skb)
2073{
2074 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2075}
2076
2077static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2078{
2079 skb->dev = skb_dst(skb)->dev;
2080 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2081}
2082
2083#endif
2084
2085
2086
2087
2088
2089struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2090 const struct in6_addr *addr,
2091 bool anycast)
2092{
2093 struct net *net = dev_net(idev->dev);
2094 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2095
2096 if (!rt) {
2097 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2098 return ERR_PTR(-ENOMEM);
2099 }
2100
2101 in6_dev_hold(idev);
2102
2103 rt->dst.flags |= DST_HOST;
2104 rt->dst.input = ip6_input;
2105 rt->dst.output = ip6_output;
2106 rt->rt6i_idev = idev;
2107
2108 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2109 if (anycast)
2110 rt->rt6i_flags |= RTF_ANYCAST;
2111 else
2112 rt->rt6i_flags |= RTF_LOCAL;
2113
2114 rt->rt6i_dst.addr = *addr;
2115 rt->rt6i_dst.plen = 128;
2116 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2117
2118 atomic_set(&rt->dst.__refcnt, 1);
2119
2120 return rt;
2121}
2122
2123int ip6_route_get_saddr(struct net *net,
2124 struct rt6_info *rt,
2125 const struct in6_addr *daddr,
2126 unsigned int prefs,
2127 struct in6_addr *saddr)
2128{
2129 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2130 int err = 0;
2131 if (rt->rt6i_prefsrc.plen)
2132 *saddr = rt->rt6i_prefsrc.addr;
2133 else
2134 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2135 daddr, prefs, saddr);
2136 return err;
2137}
2138
2139
2140struct arg_dev_net_ip {
2141 struct net_device *dev;
2142 struct net *net;
2143 struct in6_addr *addr;
2144};
2145
2146static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2147{
2148 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2149 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2150 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2151
2152 if (((void *)rt->dst.dev == dev || !dev) &&
2153 rt != net->ipv6.ip6_null_entry &&
2154 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2155
2156 rt->rt6i_prefsrc.plen = 0;
2157 }
2158 return 0;
2159}
2160
2161void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2162{
2163 struct net *net = dev_net(ifp->idev->dev);
2164 struct arg_dev_net_ip adni = {
2165 .dev = ifp->idev->dev,
2166 .net = net,
2167 .addr = &ifp->addr,
2168 };
2169 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2170}
2171
2172struct arg_dev_net {
2173 struct net_device *dev;
2174 struct net *net;
2175};
2176
2177static int fib6_ifdown(struct rt6_info *rt, void *arg)
2178{
2179 const struct arg_dev_net *adn = arg;
2180 const struct net_device *dev = adn->dev;
2181
2182 if ((rt->dst.dev == dev || !dev) &&
2183 rt != adn->net->ipv6.ip6_null_entry)
2184 return -1;
2185
2186 return 0;
2187}
2188
2189void rt6_ifdown(struct net *net, struct net_device *dev)
2190{
2191 struct arg_dev_net adn = {
2192 .dev = dev,
2193 .net = net,
2194 };
2195
2196 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2197 icmp6_clean_all(fib6_ifdown, &adn);
2198}
2199
2200struct rt6_mtu_change_arg {
2201 struct net_device *dev;
2202 unsigned int mtu;
2203};
2204
2205static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2206{
2207 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2208 struct inet6_dev *idev;
2209
2210
2211
2212
2213
2214
2215
2216 idev = __in6_dev_get(arg->dev);
2217 if (!idev)
2218 return 0;
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234 if (rt->dst.dev == arg->dev &&
2235 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2236 (dst_mtu(&rt->dst) >= arg->mtu ||
2237 (dst_mtu(&rt->dst) < arg->mtu &&
2238 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2239 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2240 }
2241 return 0;
2242}
2243
2244void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2245{
2246 struct rt6_mtu_change_arg arg = {
2247 .dev = dev,
2248 .mtu = mtu,
2249 };
2250
2251 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2252}
2253
2254static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2255 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2256 [RTA_OIF] = { .type = NLA_U32 },
2257 [RTA_IIF] = { .type = NLA_U32 },
2258 [RTA_PRIORITY] = { .type = NLA_U32 },
2259 [RTA_METRICS] = { .type = NLA_NESTED },
2260 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2261};
2262
2263static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2264 struct fib6_config *cfg)
2265{
2266 struct rtmsg *rtm;
2267 struct nlattr *tb[RTA_MAX+1];
2268 int err;
2269
2270 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2271 if (err < 0)
2272 goto errout;
2273
2274 err = -EINVAL;
2275 rtm = nlmsg_data(nlh);
2276 memset(cfg, 0, sizeof(*cfg));
2277
2278 cfg->fc_table = rtm->rtm_table;
2279 cfg->fc_dst_len = rtm->rtm_dst_len;
2280 cfg->fc_src_len = rtm->rtm_src_len;
2281 cfg->fc_flags = RTF_UP;
2282 cfg->fc_protocol = rtm->rtm_protocol;
2283 cfg->fc_type = rtm->rtm_type;
2284
2285 if (rtm->rtm_type == RTN_UNREACHABLE ||
2286 rtm->rtm_type == RTN_BLACKHOLE ||
2287 rtm->rtm_type == RTN_PROHIBIT ||
2288 rtm->rtm_type == RTN_THROW)
2289 cfg->fc_flags |= RTF_REJECT;
2290
2291 if (rtm->rtm_type == RTN_LOCAL)
2292 cfg->fc_flags |= RTF_LOCAL;
2293
2294 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2295 cfg->fc_nlinfo.nlh = nlh;
2296 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2297
2298 if (tb[RTA_GATEWAY]) {
2299 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2300 cfg->fc_flags |= RTF_GATEWAY;
2301 }
2302
2303 if (tb[RTA_DST]) {
2304 int plen = (rtm->rtm_dst_len + 7) >> 3;
2305
2306 if (nla_len(tb[RTA_DST]) < plen)
2307 goto errout;
2308
2309 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2310 }
2311
2312 if (tb[RTA_SRC]) {
2313 int plen = (rtm->rtm_src_len + 7) >> 3;
2314
2315 if (nla_len(tb[RTA_SRC]) < plen)
2316 goto errout;
2317
2318 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2319 }
2320
2321 if (tb[RTA_PREFSRC])
2322 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2323
2324 if (tb[RTA_OIF])
2325 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2326
2327 if (tb[RTA_PRIORITY])
2328 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2329
2330 if (tb[RTA_METRICS]) {
2331 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2332 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2333 }
2334
2335 if (tb[RTA_TABLE])
2336 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2337
2338 if (tb[RTA_MULTIPATH]) {
2339 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2340 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2341 }
2342
2343 err = 0;
2344errout:
2345 return err;
2346}
2347
2348static int ip6_route_multipath(struct fib6_config *cfg, int add)
2349{
2350 struct fib6_config r_cfg;
2351 struct rtnexthop *rtnh;
2352 int remaining;
2353 int attrlen;
2354 int err = 0, last_err = 0;
2355
2356beginning:
2357 rtnh = (struct rtnexthop *)cfg->fc_mp;
2358 remaining = cfg->fc_mp_len;
2359
2360
2361 while (rtnh_ok(rtnh, remaining)) {
2362 memcpy(&r_cfg, cfg, sizeof(*cfg));
2363 if (rtnh->rtnh_ifindex)
2364 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2365
2366 attrlen = rtnh_attrlen(rtnh);
2367 if (attrlen > 0) {
2368 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2369
2370 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2371 if (nla) {
2372 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2373 r_cfg.fc_flags |= RTF_GATEWAY;
2374 }
2375 }
2376 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2377 if (err) {
2378 last_err = err;
2379
2380
2381
2382
2383 if (add) {
2384
2385
2386
2387 add = 0;
2388 goto beginning;
2389 }
2390 }
2391
2392
2393
2394
2395
2396 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2397 rtnh = rtnh_next(rtnh, &remaining);
2398 }
2399
2400 return last_err;
2401}
2402
2403static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2404{
2405 struct fib6_config cfg;
2406 int err;
2407
2408 err = rtm_to_fib6_config(skb, nlh, &cfg);
2409 if (err < 0)
2410 return err;
2411
2412 if (cfg.fc_mp)
2413 return ip6_route_multipath(&cfg, 0);
2414 else
2415 return ip6_route_del(&cfg);
2416}
2417
2418static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2419{
2420 struct fib6_config cfg;
2421 int err;
2422
2423 err = rtm_to_fib6_config(skb, nlh, &cfg);
2424 if (err < 0)
2425 return err;
2426
2427 if (cfg.fc_mp)
2428 return ip6_route_multipath(&cfg, 1);
2429 else
2430 return ip6_route_add(&cfg);
2431}
2432
2433static inline size_t rt6_nlmsg_size(void)
2434{
2435 return NLMSG_ALIGN(sizeof(struct rtmsg))
2436 + nla_total_size(16)
2437 + nla_total_size(16)
2438 + nla_total_size(16)
2439 + nla_total_size(16)
2440 + nla_total_size(4)
2441 + nla_total_size(4)
2442 + nla_total_size(4)
2443 + nla_total_size(4)
2444 + RTAX_MAX * nla_total_size(4)
2445 + nla_total_size(sizeof(struct rta_cacheinfo));
2446}
2447
2448static int rt6_fill_node(struct net *net,
2449 struct sk_buff *skb, struct rt6_info *rt,
2450 struct in6_addr *dst, struct in6_addr *src,
2451 int iif, int type, u32 portid, u32 seq,
2452 int prefix, int nowait, unsigned int flags)
2453{
2454 struct rtmsg *rtm;
2455 struct nlmsghdr *nlh;
2456 long expires;
2457 u32 table;
2458
2459 if (prefix) {
2460 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2461
2462 return 1;
2463 }
2464 }
2465
2466 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2467 if (!nlh)
2468 return -EMSGSIZE;
2469
2470 rtm = nlmsg_data(nlh);
2471 rtm->rtm_family = AF_INET6;
2472 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2473 rtm->rtm_src_len = rt->rt6i_src.plen;
2474 rtm->rtm_tos = 0;
2475 if (rt->rt6i_table)
2476 table = rt->rt6i_table->tb6_id;
2477 else
2478 table = RT6_TABLE_UNSPEC;
2479 rtm->rtm_table = table;
2480 if (nla_put_u32(skb, RTA_TABLE, table))
2481 goto nla_put_failure;
2482 if (rt->rt6i_flags & RTF_REJECT) {
2483 switch (rt->dst.error) {
2484 case -EINVAL:
2485 rtm->rtm_type = RTN_BLACKHOLE;
2486 break;
2487 case -EACCES:
2488 rtm->rtm_type = RTN_PROHIBIT;
2489 break;
2490 case -EAGAIN:
2491 rtm->rtm_type = RTN_THROW;
2492 break;
2493 default:
2494 rtm->rtm_type = RTN_UNREACHABLE;
2495 break;
2496 }
2497 }
2498 else if (rt->rt6i_flags & RTF_LOCAL)
2499 rtm->rtm_type = RTN_LOCAL;
2500 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2501 rtm->rtm_type = RTN_LOCAL;
2502 else
2503 rtm->rtm_type = RTN_UNICAST;
2504 rtm->rtm_flags = 0;
2505 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2506 rtm->rtm_protocol = rt->rt6i_protocol;
2507 if (rt->rt6i_flags & RTF_DYNAMIC)
2508 rtm->rtm_protocol = RTPROT_REDIRECT;
2509 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2510 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2511 rtm->rtm_protocol = RTPROT_RA;
2512 else
2513 rtm->rtm_protocol = RTPROT_KERNEL;
2514 }
2515
2516 if (rt->rt6i_flags & RTF_CACHE)
2517 rtm->rtm_flags |= RTM_F_CLONED;
2518
2519 if (dst) {
2520 if (nla_put(skb, RTA_DST, 16, dst))
2521 goto nla_put_failure;
2522 rtm->rtm_dst_len = 128;
2523 } else if (rtm->rtm_dst_len)
2524 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2525 goto nla_put_failure;
2526#ifdef CONFIG_IPV6_SUBTREES
2527 if (src) {
2528 if (nla_put(skb, RTA_SRC, 16, src))
2529 goto nla_put_failure;
2530 rtm->rtm_src_len = 128;
2531 } else if (rtm->rtm_src_len &&
2532 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2533 goto nla_put_failure;
2534#endif
2535 if (iif) {
2536#ifdef CONFIG_IPV6_MROUTE
2537 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2538 int err = ip6mr_get_route(net, skb, rtm, nowait);
2539 if (err <= 0) {
2540 if (!nowait) {
2541 if (err == 0)
2542 return 0;
2543 goto nla_put_failure;
2544 } else {
2545 if (err == -EMSGSIZE)
2546 goto nla_put_failure;
2547 }
2548 }
2549 } else
2550#endif
2551 if (nla_put_u32(skb, RTA_IIF, iif))
2552 goto nla_put_failure;
2553 } else if (dst) {
2554 struct in6_addr saddr_buf;
2555 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2556 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2557 goto nla_put_failure;
2558 }
2559
2560 if (rt->rt6i_prefsrc.plen) {
2561 struct in6_addr saddr_buf;
2562 saddr_buf = rt->rt6i_prefsrc.addr;
2563 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2564 goto nla_put_failure;
2565 }
2566
2567 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2568 goto nla_put_failure;
2569
2570 if (rt->rt6i_flags & RTF_GATEWAY) {
2571 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2572 goto nla_put_failure;
2573 }
2574
2575 if (rt->dst.dev &&
2576 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2577 goto nla_put_failure;
2578 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2579 goto nla_put_failure;
2580
2581 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2582
2583 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2584 goto nla_put_failure;
2585
2586 return nlmsg_end(skb, nlh);
2587
2588nla_put_failure:
2589 nlmsg_cancel(skb, nlh);
2590 return -EMSGSIZE;
2591}
2592
2593int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2594{
2595 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2596 int prefix;
2597
2598 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2599 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2600 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2601 } else
2602 prefix = 0;
2603
2604 return rt6_fill_node(arg->net,
2605 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2606 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2607 prefix, 0, NLM_F_MULTI);
2608}
2609
2610static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2611{
2612 struct net *net = sock_net(in_skb->sk);
2613 struct nlattr *tb[RTA_MAX+1];
2614 struct rt6_info *rt;
2615 struct sk_buff *skb;
2616 struct rtmsg *rtm;
2617 struct flowi6 fl6;
2618 int err, iif = 0, oif = 0;
2619
2620 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2621 if (err < 0)
2622 goto errout;
2623
2624 err = -EINVAL;
2625 memset(&fl6, 0, sizeof(fl6));
2626
2627 if (tb[RTA_SRC]) {
2628 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2629 goto errout;
2630
2631 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2632 }
2633
2634 if (tb[RTA_DST]) {
2635 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2636 goto errout;
2637
2638 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2639 }
2640
2641 if (tb[RTA_IIF])
2642 iif = nla_get_u32(tb[RTA_IIF]);
2643
2644 if (tb[RTA_OIF])
2645 oif = nla_get_u32(tb[RTA_OIF]);
2646
2647 if (iif) {
2648 struct net_device *dev;
2649 int flags = 0;
2650
2651 dev = __dev_get_by_index(net, iif);
2652 if (!dev) {
2653 err = -ENODEV;
2654 goto errout;
2655 }
2656
2657 fl6.flowi6_iif = iif;
2658
2659 if (!ipv6_addr_any(&fl6.saddr))
2660 flags |= RT6_LOOKUP_F_HAS_SADDR;
2661
2662 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2663 flags);
2664 } else {
2665 fl6.flowi6_oif = oif;
2666
2667 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2668 }
2669
2670 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2671 if (!skb) {
2672 ip6_rt_put(rt);
2673 err = -ENOBUFS;
2674 goto errout;
2675 }
2676
2677
2678
2679
2680 skb_reset_mac_header(skb);
2681 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2682
2683 skb_dst_set(skb, &rt->dst);
2684
2685 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2686 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2687 nlh->nlmsg_seq, 0, 0, 0);
2688 if (err < 0) {
2689 kfree_skb(skb);
2690 goto errout;
2691 }
2692
2693 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2694errout:
2695 return err;
2696}
2697
2698void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2699{
2700 struct sk_buff *skb;
2701 struct net *net = info->nl_net;
2702 u32 seq;
2703 int err;
2704
2705 err = -ENOBUFS;
2706 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2707
2708 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2709 if (!skb)
2710 goto errout;
2711
2712 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2713 event, info->portid, seq, 0, 0, 0);
2714 if (err < 0) {
2715
2716 WARN_ON(err == -EMSGSIZE);
2717 kfree_skb(skb);
2718 goto errout;
2719 }
2720 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2721 info->nlh, gfp_any());
2722 return;
2723errout:
2724 if (err < 0)
2725 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2726}
2727
2728static int ip6_route_dev_notify(struct notifier_block *this,
2729 unsigned long event, void *ptr)
2730{
2731 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2732 struct net *net = dev_net(dev);
2733
2734 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2735 net->ipv6.ip6_null_entry->dst.dev = dev;
2736 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2737#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2738 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2739 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2740 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2741 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2742#endif
2743 }
2744
2745 return NOTIFY_OK;
2746}
2747
2748
2749
2750
2751
2752#ifdef CONFIG_PROC_FS
2753
2754struct rt6_proc_arg
2755{
2756 char *buffer;
2757 int offset;
2758 int length;
2759 int skip;
2760 int len;
2761};
2762
2763static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2764{
2765 struct seq_file *m = p_arg;
2766
2767 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2768
2769#ifdef CONFIG_IPV6_SUBTREES
2770 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2771#else
2772 seq_puts(m, "00000000000000000000000000000000 00 ");
2773#endif
2774 if (rt->rt6i_flags & RTF_GATEWAY) {
2775 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2776 } else {
2777 seq_puts(m, "00000000000000000000000000000000");
2778 }
2779 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2780 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2781 rt->dst.__use, rt->rt6i_flags,
2782 rt->dst.dev ? rt->dst.dev->name : "");
2783 return 0;
2784}
2785
2786static int ipv6_route_show(struct seq_file *m, void *v)
2787{
2788 struct net *net = (struct net *)m->private;
2789 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2790 return 0;
2791}
2792
2793static int ipv6_route_open(struct inode *inode, struct file *file)
2794{
2795 return single_open_net(inode, file, ipv6_route_show);
2796}
2797
2798static const struct file_operations ipv6_route_proc_fops = {
2799 .owner = THIS_MODULE,
2800 .open = ipv6_route_open,
2801 .read = seq_read,
2802 .llseek = seq_lseek,
2803 .release = single_release_net,
2804};
2805
2806static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2807{
2808 struct net *net = (struct net *)seq->private;
2809 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2810 net->ipv6.rt6_stats->fib_nodes,
2811 net->ipv6.rt6_stats->fib_route_nodes,
2812 net->ipv6.rt6_stats->fib_rt_alloc,
2813 net->ipv6.rt6_stats->fib_rt_entries,
2814 net->ipv6.rt6_stats->fib_rt_cache,
2815 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2816 net->ipv6.rt6_stats->fib_discarded_routes);
2817
2818 return 0;
2819}
2820
2821static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2822{
2823 return single_open_net(inode, file, rt6_stats_seq_show);
2824}
2825
2826static const struct file_operations rt6_stats_seq_fops = {
2827 .owner = THIS_MODULE,
2828 .open = rt6_stats_seq_open,
2829 .read = seq_read,
2830 .llseek = seq_lseek,
2831 .release = single_release_net,
2832};
2833#endif
2834
2835#ifdef CONFIG_SYSCTL
2836
2837static
2838int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2839 void __user *buffer, size_t *lenp, loff_t *ppos)
2840{
2841 struct net *net;
2842 int delay;
2843 if (!write)
2844 return -EINVAL;
2845
2846 net = (struct net *)ctl->extra1;
2847 delay = net->ipv6.sysctl.flush_delay;
2848 proc_dointvec(ctl, write, buffer, lenp, ppos);
2849 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2850 return 0;
2851}
2852
2853struct ctl_table ipv6_route_table_template[] = {
2854 {
2855 .procname = "flush",
2856 .data = &init_net.ipv6.sysctl.flush_delay,
2857 .maxlen = sizeof(int),
2858 .mode = 0200,
2859 .proc_handler = ipv6_sysctl_rtcache_flush
2860 },
2861 {
2862 .procname = "gc_thresh",
2863 .data = &ip6_dst_ops_template.gc_thresh,
2864 .maxlen = sizeof(int),
2865 .mode = 0644,
2866 .proc_handler = proc_dointvec,
2867 },
2868 {
2869 .procname = "max_size",
2870 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2871 .maxlen = sizeof(int),
2872 .mode = 0644,
2873 .proc_handler = proc_dointvec,
2874 },
2875 {
2876 .procname = "gc_min_interval",
2877 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2878 .maxlen = sizeof(int),
2879 .mode = 0644,
2880 .proc_handler = proc_dointvec_jiffies,
2881 },
2882 {
2883 .procname = "gc_timeout",
2884 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2885 .maxlen = sizeof(int),
2886 .mode = 0644,
2887 .proc_handler = proc_dointvec_jiffies,
2888 },
2889 {
2890 .procname = "gc_interval",
2891 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2892 .maxlen = sizeof(int),
2893 .mode = 0644,
2894 .proc_handler = proc_dointvec_jiffies,
2895 },
2896 {
2897 .procname = "gc_elasticity",
2898 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2899 .maxlen = sizeof(int),
2900 .mode = 0644,
2901 .proc_handler = proc_dointvec,
2902 },
2903 {
2904 .procname = "mtu_expires",
2905 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2906 .maxlen = sizeof(int),
2907 .mode = 0644,
2908 .proc_handler = proc_dointvec_jiffies,
2909 },
2910 {
2911 .procname = "min_adv_mss",
2912 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2913 .maxlen = sizeof(int),
2914 .mode = 0644,
2915 .proc_handler = proc_dointvec,
2916 },
2917 {
2918 .procname = "gc_min_interval_ms",
2919 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2920 .maxlen = sizeof(int),
2921 .mode = 0644,
2922 .proc_handler = proc_dointvec_ms_jiffies,
2923 },
2924 { }
2925};
2926
2927struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2928{
2929 struct ctl_table *table;
2930
2931 table = kmemdup(ipv6_route_table_template,
2932 sizeof(ipv6_route_table_template),
2933 GFP_KERNEL);
2934
2935 if (table) {
2936 table[0].data = &net->ipv6.sysctl.flush_delay;
2937 table[0].extra1 = net;
2938 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2939 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2940 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2941 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2942 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2943 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2944 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2945 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2946 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2947
2948
2949 if (net->user_ns != &init_user_ns)
2950 table[0].procname = NULL;
2951 }
2952
2953 return table;
2954}
2955#endif
2956
2957static int __net_init ip6_route_net_init(struct net *net)
2958{
2959 int ret = -ENOMEM;
2960
2961 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2962 sizeof(net->ipv6.ip6_dst_ops));
2963
2964 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2965 goto out_ip6_dst_ops;
2966
2967 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2968 sizeof(*net->ipv6.ip6_null_entry),
2969 GFP_KERNEL);
2970 if (!net->ipv6.ip6_null_entry)
2971 goto out_ip6_dst_entries;
2972 net->ipv6.ip6_null_entry->dst.path =
2973 (struct dst_entry *)net->ipv6.ip6_null_entry;
2974 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2975 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2976 ip6_template_metrics, true);
2977
2978#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2979 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2980 sizeof(*net->ipv6.ip6_prohibit_entry),
2981 GFP_KERNEL);
2982 if (!net->ipv6.ip6_prohibit_entry)
2983 goto out_ip6_null_entry;
2984 net->ipv6.ip6_prohibit_entry->dst.path =
2985 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2986 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2987 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2988 ip6_template_metrics, true);
2989
2990 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2991 sizeof(*net->ipv6.ip6_blk_hole_entry),
2992 GFP_KERNEL);
2993 if (!net->ipv6.ip6_blk_hole_entry)
2994 goto out_ip6_prohibit_entry;
2995 net->ipv6.ip6_blk_hole_entry->dst.path =
2996 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2997 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2998 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2999 ip6_template_metrics, true);
3000#endif
3001
3002 net->ipv6.sysctl.flush_delay = 0;
3003 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3004 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3005 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3006 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3007 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3008 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3009 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3010
3011 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3012
3013 ret = 0;
3014out:
3015 return ret;
3016
3017#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3018out_ip6_prohibit_entry:
3019 kfree(net->ipv6.ip6_prohibit_entry);
3020out_ip6_null_entry:
3021 kfree(net->ipv6.ip6_null_entry);
3022#endif
3023out_ip6_dst_entries:
3024 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3025out_ip6_dst_ops:
3026 goto out;
3027}
3028
3029static void __net_exit ip6_route_net_exit(struct net *net)
3030{
3031 kfree(net->ipv6.ip6_null_entry);
3032#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3033 kfree(net->ipv6.ip6_prohibit_entry);
3034 kfree(net->ipv6.ip6_blk_hole_entry);
3035#endif
3036 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3037}
3038
3039static int __net_init ip6_route_net_init_late(struct net *net)
3040{
3041#ifdef CONFIG_PROC_FS
3042 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3043 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3044#endif
3045 return 0;
3046}
3047
3048static void __net_exit ip6_route_net_exit_late(struct net *net)
3049{
3050#ifdef CONFIG_PROC_FS
3051 remove_proc_entry("ipv6_route", net->proc_net);
3052 remove_proc_entry("rt6_stats", net->proc_net);
3053#endif
3054}
3055
3056static struct pernet_operations ip6_route_net_ops = {
3057 .init = ip6_route_net_init,
3058 .exit = ip6_route_net_exit,
3059};
3060
3061static int __net_init ipv6_inetpeer_init(struct net *net)
3062{
3063 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3064
3065 if (!bp)
3066 return -ENOMEM;
3067 inet_peer_base_init(bp);
3068 net->ipv6.peers = bp;
3069 return 0;
3070}
3071
3072static void __net_exit ipv6_inetpeer_exit(struct net *net)
3073{
3074 struct inet_peer_base *bp = net->ipv6.peers;
3075
3076 net->ipv6.peers = NULL;
3077 inetpeer_invalidate_tree(bp);
3078 kfree(bp);
3079}
3080
3081static struct pernet_operations ipv6_inetpeer_ops = {
3082 .init = ipv6_inetpeer_init,
3083 .exit = ipv6_inetpeer_exit,
3084};
3085
3086static struct pernet_operations ip6_route_net_late_ops = {
3087 .init = ip6_route_net_init_late,
3088 .exit = ip6_route_net_exit_late,
3089};
3090
3091static struct notifier_block ip6_route_dev_notifier = {
3092 .notifier_call = ip6_route_dev_notify,
3093 .priority = 0,
3094};
3095
3096int __init ip6_route_init(void)
3097{
3098 int ret;
3099
3100 ret = -ENOMEM;
3101 ip6_dst_ops_template.kmem_cachep =
3102 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3103 SLAB_HWCACHE_ALIGN, NULL);
3104 if (!ip6_dst_ops_template.kmem_cachep)
3105 goto out;
3106
3107 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3108 if (ret)
3109 goto out_kmem_cache;
3110
3111 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3112 if (ret)
3113 goto out_dst_entries;
3114
3115 ret = register_pernet_subsys(&ip6_route_net_ops);
3116 if (ret)
3117 goto out_register_inetpeer;
3118
3119 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3120
3121
3122
3123
3124 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3125 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3126 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3127 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3128 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3129 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3130 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3131 #endif
3132 ret = fib6_init();
3133 if (ret)
3134 goto out_register_subsys;
3135
3136 ret = xfrm6_init();
3137 if (ret)
3138 goto out_fib6_init;
3139
3140 ret = fib6_rules_init();
3141 if (ret)
3142 goto xfrm6_init;
3143
3144 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3145 if (ret)
3146 goto fib6_rules_init;
3147
3148 ret = -ENOBUFS;
3149 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3150 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3151 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3152 goto out_register_late_subsys;
3153
3154 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3155 if (ret)
3156 goto out_register_late_subsys;
3157
3158out:
3159 return ret;
3160
3161out_register_late_subsys:
3162 unregister_pernet_subsys(&ip6_route_net_late_ops);
3163fib6_rules_init:
3164 fib6_rules_cleanup();
3165xfrm6_init:
3166 xfrm6_fini();
3167out_fib6_init:
3168 fib6_gc_cleanup();
3169out_register_subsys:
3170 unregister_pernet_subsys(&ip6_route_net_ops);
3171out_register_inetpeer:
3172 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3173out_dst_entries:
3174 dst_entries_destroy(&ip6_dst_blackhole_ops);
3175out_kmem_cache:
3176 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3177 goto out;
3178}
3179
3180void ip6_route_cleanup(void)
3181{
3182 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3183 unregister_pernet_subsys(&ip6_route_net_late_ops);
3184 fib6_rules_cleanup();
3185 xfrm6_fini();
3186 fib6_gc_cleanup();
3187 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3188 unregister_pernet_subsys(&ip6_route_net_ops);
3189 dst_entries_destroy(&ip6_dst_blackhole_ops);
3190 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3191}
3192