1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#define pr_fmt(fmt) "IPv6: " fmt
28
29#include <linux/capability.h>
30#include <linux/errno.h>
31#include <linux/export.h>
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
40#include <linux/mroute6.h>
41#include <linux/init.h>
42#include <linux/if_arp.h>
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#include <linux/nsproxy.h>
46#include <linux/slab.h>
47#include <net/net_namespace.h>
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
58#include <net/netevent.h>
59#include <net/netlink.h>
60#include <net/nexthop.h>
61
62#include <asm/uaccess.h>
63
64#ifdef CONFIG_SYSCTL
65#include <linux/sysctl.h>
66#endif
67
68enum rt6_nud_state {
69 RT6_NUD_FAIL_HARD = -3,
70 RT6_NUD_FAIL_PROBE = -2,
71 RT6_NUD_FAIL_DO_RR = -1,
72 RT6_NUD_SUCCEED = 1
73};
74
75static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 const struct in6_addr *dest);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static unsigned int ip6_default_advmss(const struct dst_entry *dst);
79static unsigned int ip6_mtu(const struct dst_entry *dst);
80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
84static int ip6_dst_gc(struct dst_ops *ops);
85
86static int ip6_pkt_discard(struct sk_buff *skb);
87static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88static int ip6_pkt_prohibit(struct sk_buff *skb);
89static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90static void ip6_link_failure(struct sk_buff *skb);
91static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb);
95static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96
97#ifdef CONFIG_IPV6_ROUTE_INFO
98static struct rt6_info *rt6_add_route_info(struct net *net,
99 const struct in6_addr *prefix, int prefixlen,
100 const struct in6_addr *gwaddr, int ifindex,
101 unsigned int pref);
102static struct rt6_info *rt6_get_route_info(struct net *net,
103 const struct in6_addr *prefix, int prefixlen,
104 const struct in6_addr *gwaddr, int ifindex);
105#endif
106
107static void rt6_bind_peer(struct rt6_info *rt, int create)
108{
109 struct inet_peer_base *base;
110 struct inet_peer *peer;
111
112 base = inetpeer_base_ptr(rt->_rt6i_peer);
113 if (!base)
114 return;
115
116 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
117 if (peer) {
118 if (!rt6_set_peer(rt, peer))
119 inet_putpeer(peer);
120 }
121}
122
123static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
124{
125 if (rt6_has_peer(rt))
126 return rt6_peer_ptr(rt);
127
128 rt6_bind_peer(rt, create);
129 return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
130}
131
132static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
133{
134 return __rt6_get_peer(rt, 1);
135}
136
137static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138{
139 struct rt6_info *rt = (struct rt6_info *) dst;
140 struct inet_peer *peer;
141 u32 *p = NULL;
142
143 if (!(rt->dst.flags & DST_HOST))
144 return dst_cow_metrics_generic(dst, old);
145
146 peer = rt6_get_peer_create(rt);
147 if (peer) {
148 u32 *old_p = __DST_METRICS_PTR(old);
149 unsigned long prev, new;
150
151 p = peer->metrics;
152 if (inet_metrics_new(peer) ||
153 (old & DST_METRICS_FORCE_OVERWRITE))
154 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
155
156 new = (unsigned long) p;
157 prev = cmpxchg(&dst->_metrics, old, new);
158
159 if (prev != old) {
160 p = __DST_METRICS_PTR(prev);
161 if (prev & DST_METRICS_READ_ONLY)
162 p = NULL;
163 }
164 }
165 return p;
166}
167
168static inline const void *choose_neigh_daddr(struct rt6_info *rt,
169 struct sk_buff *skb,
170 const void *daddr)
171{
172 struct in6_addr *p = &rt->rt6i_gateway;
173
174 if (!ipv6_addr_any(p))
175 return (const void *) p;
176 else if (skb)
177 return &ipv6_hdr(skb)->daddr;
178 return daddr;
179}
180
181static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
182 struct sk_buff *skb,
183 const void *daddr)
184{
185 struct rt6_info *rt = (struct rt6_info *) dst;
186 struct neighbour *n;
187
188 daddr = choose_neigh_daddr(rt, skb, daddr);
189 n = __ipv6_neigh_lookup(dst->dev, daddr);
190 if (n)
191 return n;
192 return neigh_create(&nd_tbl, daddr, dst->dev);
193}
194
195static struct dst_ops ip6_dst_ops_template = {
196 .family = AF_INET6,
197 .gc = ip6_dst_gc,
198 .gc_thresh = 1024,
199 .check = ip6_dst_check,
200 .default_advmss = ip6_default_advmss,
201 .mtu = ip6_mtu,
202 .cow_metrics = ipv6_cow_metrics,
203 .destroy = ip6_dst_destroy,
204 .ifdown = ip6_dst_ifdown,
205 .negative_advice = ip6_negative_advice,
206 .link_failure = ip6_link_failure,
207 .update_pmtu = ip6_rt_update_pmtu,
208 .redirect = rt6_do_redirect,
209 .local_out = __ip6_local_out,
210 .neigh_lookup = ip6_neigh_lookup,
211};
212
213static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
214{
215 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
216
217 return mtu ? : dst->dev->mtu;
218}
219
220static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
221 struct sk_buff *skb, u32 mtu)
222{
223}
224
225static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
226 struct sk_buff *skb)
227{
228}
229
230static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
231 unsigned long old)
232{
233 return NULL;
234}
235
236static struct dst_ops ip6_dst_blackhole_ops = {
237 .family = AF_INET6,
238 .destroy = ip6_dst_destroy,
239 .check = ip6_dst_check,
240 .mtu = ip6_blackhole_mtu,
241 .default_advmss = ip6_default_advmss,
242 .update_pmtu = ip6_rt_blackhole_update_pmtu,
243 .redirect = ip6_rt_blackhole_redirect,
244 .cow_metrics = ip6_rt_blackhole_cow_metrics,
245 .neigh_lookup = ip6_neigh_lookup,
246};
247
248static const u32 ip6_template_metrics[RTAX_MAX] = {
249 [RTAX_HOPLIMIT - 1] = 0,
250};
251
252static const struct rt6_info ip6_null_entry_template = {
253 .dst = {
254 .__refcnt = ATOMIC_INIT(1),
255 .__use = 1,
256 .obsolete = DST_OBSOLETE_FORCE_CHK,
257 .error = -ENETUNREACH,
258 .input = ip6_pkt_discard,
259 .output = ip6_pkt_discard_out,
260 },
261 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
262 .rt6i_protocol = RTPROT_KERNEL,
263 .rt6i_metric = ~(u32) 0,
264 .rt6i_ref = ATOMIC_INIT(1),
265};
266
267#ifdef CONFIG_IPV6_MULTIPLE_TABLES
268
269static const struct rt6_info ip6_prohibit_entry_template = {
270 .dst = {
271 .__refcnt = ATOMIC_INIT(1),
272 .__use = 1,
273 .obsolete = DST_OBSOLETE_FORCE_CHK,
274 .error = -EACCES,
275 .input = ip6_pkt_prohibit,
276 .output = ip6_pkt_prohibit_out,
277 },
278 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
279 .rt6i_protocol = RTPROT_KERNEL,
280 .rt6i_metric = ~(u32) 0,
281 .rt6i_ref = ATOMIC_INIT(1),
282};
283
284static const struct rt6_info ip6_blk_hole_entry_template = {
285 .dst = {
286 .__refcnt = ATOMIC_INIT(1),
287 .__use = 1,
288 .obsolete = DST_OBSOLETE_FORCE_CHK,
289 .error = -EINVAL,
290 .input = dst_discard,
291 .output = dst_discard_sk,
292 },
293 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
294 .rt6i_protocol = RTPROT_KERNEL,
295 .rt6i_metric = ~(u32) 0,
296 .rt6i_ref = ATOMIC_INIT(1),
297};
298
299#endif
300
301
302static inline struct rt6_info *ip6_dst_alloc(struct net *net,
303 struct net_device *dev,
304 int flags,
305 struct fib6_table *table)
306{
307 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
308 0, DST_OBSOLETE_FORCE_CHK, flags);
309
310 if (rt) {
311 struct dst_entry *dst = &rt->dst;
312
313 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
314 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
315 INIT_LIST_HEAD(&rt->rt6i_siblings);
316 }
317 return rt;
318}
319
320static void ip6_dst_destroy(struct dst_entry *dst)
321{
322 struct rt6_info *rt = (struct rt6_info *)dst;
323 struct inet6_dev *idev = rt->rt6i_idev;
324 struct dst_entry *from = dst->from;
325
326 if (!(rt->dst.flags & DST_HOST))
327 dst_destroy_metrics_generic(dst);
328
329 if (idev) {
330 rt->rt6i_idev = NULL;
331 in6_dev_put(idev);
332 }
333
334 dst->from = NULL;
335 dst_release(from);
336
337 if (rt6_has_peer(rt)) {
338 struct inet_peer *peer = rt6_peer_ptr(rt);
339 inet_putpeer(peer);
340 }
341}
342
343static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
344 int how)
345{
346 struct rt6_info *rt = (struct rt6_info *)dst;
347 struct inet6_dev *idev = rt->rt6i_idev;
348 struct net_device *loopback_dev =
349 dev_net(dev)->loopback_dev;
350
351 if (dev != loopback_dev) {
352 if (idev && idev->dev == dev) {
353 struct inet6_dev *loopback_idev =
354 in6_dev_get(loopback_dev);
355 if (loopback_idev) {
356 rt->rt6i_idev = loopback_idev;
357 in6_dev_put(idev);
358 }
359 }
360 }
361}
362
363static bool rt6_check_expired(const struct rt6_info *rt)
364{
365 if (rt->rt6i_flags & RTF_EXPIRES) {
366 if (time_after(jiffies, rt->dst.expires))
367 return true;
368 } else if (rt->dst.from) {
369 return rt6_check_expired((struct rt6_info *) rt->dst.from);
370 }
371 return false;
372}
373
374
375
376
377
378static int rt6_info_hash_nhsfn(unsigned int candidate_count,
379 const struct flowi6 *fl6)
380{
381 unsigned int val = fl6->flowi6_proto;
382
383 val ^= ipv6_addr_hash(&fl6->daddr);
384 val ^= ipv6_addr_hash(&fl6->saddr);
385
386
387 switch (fl6->flowi6_proto) {
388 case IPPROTO_UDP:
389 case IPPROTO_TCP:
390 case IPPROTO_SCTP:
391 val ^= (__force u16)fl6->fl6_sport;
392 val ^= (__force u16)fl6->fl6_dport;
393 break;
394
395 case IPPROTO_ICMPV6:
396 val ^= (__force u16)fl6->fl6_icmp_type;
397 val ^= (__force u16)fl6->fl6_icmp_code;
398 break;
399 }
400
401 val ^= (__force u32)fl6->flowlabel;
402
403
404 val = val ^ (val >> 7) ^ (val >> 12);
405 return val % candidate_count;
406}
407
408static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
409 struct flowi6 *fl6, int oif,
410 int strict)
411{
412 struct rt6_info *sibling, *next_sibling;
413 int route_choosen;
414
415 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
416
417
418
419 if (route_choosen)
420 list_for_each_entry_safe(sibling, next_sibling,
421 &match->rt6i_siblings, rt6i_siblings) {
422 route_choosen--;
423 if (route_choosen == 0) {
424 if (rt6_score_route(sibling, oif, strict) < 0)
425 break;
426 match = sibling;
427 break;
428 }
429 }
430 return match;
431}
432
433
434
435
436
437static inline struct rt6_info *rt6_device_match(struct net *net,
438 struct rt6_info *rt,
439 const struct in6_addr *saddr,
440 int oif,
441 int flags)
442{
443 struct rt6_info *local = NULL;
444 struct rt6_info *sprt;
445
446 if (!oif && ipv6_addr_any(saddr))
447 goto out;
448
449 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
450 struct net_device *dev = sprt->dst.dev;
451
452 if (oif) {
453 if (dev->ifindex == oif)
454 return sprt;
455 if (dev->flags & IFF_LOOPBACK) {
456 if (!sprt->rt6i_idev ||
457 sprt->rt6i_idev->dev->ifindex != oif) {
458 if (flags & RT6_LOOKUP_F_IFACE && oif)
459 continue;
460 if (local && (!oif ||
461 local->rt6i_idev->dev->ifindex == oif))
462 continue;
463 }
464 local = sprt;
465 }
466 } else {
467 if (ipv6_chk_addr(net, saddr, dev,
468 flags & RT6_LOOKUP_F_IFACE))
469 return sprt;
470 }
471 }
472
473 if (oif) {
474 if (local)
475 return local;
476
477 if (flags & RT6_LOOKUP_F_IFACE)
478 return net->ipv6.ip6_null_entry;
479 }
480out:
481 return rt;
482}
483
484#ifdef CONFIG_IPV6_ROUTER_PREF
485struct __rt6_probe_work {
486 struct work_struct work;
487 struct in6_addr target;
488 struct net_device *dev;
489};
490
491static void rt6_probe_deferred(struct work_struct *w)
492{
493 struct in6_addr mcaddr;
494 struct __rt6_probe_work *work =
495 container_of(w, struct __rt6_probe_work, work);
496
497 addrconf_addr_solict_mult(&work->target, &mcaddr);
498 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
499 dev_put(work->dev);
500 kfree(work);
501}
502
503static void rt6_probe(struct rt6_info *rt)
504{
505 struct neighbour *neigh;
506
507
508
509
510
511
512
513
514 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
515 return;
516 rcu_read_lock_bh();
517 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
518 if (neigh) {
519 write_lock(&neigh->lock);
520 if (neigh->nud_state & NUD_VALID)
521 goto out;
522 }
523
524 if (!neigh ||
525 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
526 struct __rt6_probe_work *work;
527
528 work = kmalloc(sizeof(*work), GFP_ATOMIC);
529
530 if (neigh && work)
531 __neigh_set_probe_once(neigh);
532
533 if (neigh)
534 write_unlock(&neigh->lock);
535
536 if (work) {
537 INIT_WORK(&work->work, rt6_probe_deferred);
538 work->target = rt->rt6i_gateway;
539 dev_hold(rt->dst.dev);
540 work->dev = rt->dst.dev;
541 schedule_work(&work->work);
542 }
543 } else {
544out:
545 write_unlock(&neigh->lock);
546 }
547 rcu_read_unlock_bh();
548}
549#else
550static inline void rt6_probe(struct rt6_info *rt)
551{
552}
553#endif
554
555
556
557
558static inline int rt6_check_dev(struct rt6_info *rt, int oif)
559{
560 struct net_device *dev = rt->dst.dev;
561 if (!oif || dev->ifindex == oif)
562 return 2;
563 if ((dev->flags & IFF_LOOPBACK) &&
564 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
565 return 1;
566 return 0;
567}
568
569static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
570{
571 struct neighbour *neigh;
572 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
573
574 if (rt->rt6i_flags & RTF_NONEXTHOP ||
575 !(rt->rt6i_flags & RTF_GATEWAY))
576 return RT6_NUD_SUCCEED;
577
578 rcu_read_lock_bh();
579 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
580 if (neigh) {
581 read_lock(&neigh->lock);
582 if (neigh->nud_state & NUD_VALID)
583 ret = RT6_NUD_SUCCEED;
584#ifdef CONFIG_IPV6_ROUTER_PREF
585 else if (!(neigh->nud_state & NUD_FAILED))
586 ret = RT6_NUD_SUCCEED;
587 else
588 ret = RT6_NUD_FAIL_PROBE;
589#endif
590 read_unlock(&neigh->lock);
591 } else {
592 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
593 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
594 }
595 rcu_read_unlock_bh();
596
597 return ret;
598}
599
600static int rt6_score_route(struct rt6_info *rt, int oif,
601 int strict)
602{
603 int m;
604
605 m = rt6_check_dev(rt, oif);
606 if (!m && (strict & RT6_LOOKUP_F_IFACE))
607 return RT6_NUD_FAIL_HARD;
608#ifdef CONFIG_IPV6_ROUTER_PREF
609 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
610#endif
611 if (strict & RT6_LOOKUP_F_REACHABLE) {
612 int n = rt6_check_neigh(rt);
613 if (n < 0)
614 return n;
615 }
616 return m;
617}
618
619static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
620 int *mpri, struct rt6_info *match,
621 bool *do_rr)
622{
623 int m;
624 bool match_do_rr = false;
625
626 if (rt6_check_expired(rt))
627 goto out;
628
629 m = rt6_score_route(rt, oif, strict);
630 if (m == RT6_NUD_FAIL_DO_RR) {
631 match_do_rr = true;
632 m = 0;
633 } else if (m == RT6_NUD_FAIL_HARD) {
634 goto out;
635 }
636
637 if (strict & RT6_LOOKUP_F_REACHABLE)
638 rt6_probe(rt);
639
640
641 if (m > *mpri) {
642 *do_rr = match_do_rr;
643 *mpri = m;
644 match = rt;
645 }
646out:
647 return match;
648}
649
650static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
651 struct rt6_info *rr_head,
652 u32 metric, int oif, int strict,
653 bool *do_rr)
654{
655 struct rt6_info *rt, *match;
656 int mpri = -1;
657
658 match = NULL;
659 for (rt = rr_head; rt && rt->rt6i_metric == metric;
660 rt = rt->dst.rt6_next)
661 match = find_match(rt, oif, strict, &mpri, match, do_rr);
662 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
663 rt = rt->dst.rt6_next)
664 match = find_match(rt, oif, strict, &mpri, match, do_rr);
665
666 return match;
667}
668
669static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
670{
671 struct rt6_info *match, *rt0;
672 struct net *net;
673 bool do_rr = false;
674
675 rt0 = fn->rr_ptr;
676 if (!rt0)
677 fn->rr_ptr = rt0 = fn->leaf;
678
679 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
680 &do_rr);
681
682 if (do_rr) {
683 struct rt6_info *next = rt0->dst.rt6_next;
684
685
686 if (!next || next->rt6i_metric != rt0->rt6i_metric)
687 next = fn->leaf;
688
689 if (next != rt0)
690 fn->rr_ptr = next;
691 }
692
693 net = dev_net(rt0->dst.dev);
694 return match ? match : net->ipv6.ip6_null_entry;
695}
696
697#ifdef CONFIG_IPV6_ROUTE_INFO
698int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
699 const struct in6_addr *gwaddr)
700{
701 struct net *net = dev_net(dev);
702 struct route_info *rinfo = (struct route_info *) opt;
703 struct in6_addr prefix_buf, *prefix;
704 unsigned int pref;
705 unsigned long lifetime;
706 struct rt6_info *rt;
707
708 if (len < sizeof(struct route_info)) {
709 return -EINVAL;
710 }
711
712
713 if (rinfo->length > 3) {
714 return -EINVAL;
715 } else if (rinfo->prefix_len > 128) {
716 return -EINVAL;
717 } else if (rinfo->prefix_len > 64) {
718 if (rinfo->length < 2) {
719 return -EINVAL;
720 }
721 } else if (rinfo->prefix_len > 0) {
722 if (rinfo->length < 1) {
723 return -EINVAL;
724 }
725 }
726
727 pref = rinfo->route_pref;
728 if (pref == ICMPV6_ROUTER_PREF_INVALID)
729 return -EINVAL;
730
731 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
732
733 if (rinfo->length == 3)
734 prefix = (struct in6_addr *)rinfo->prefix;
735 else {
736
737 ipv6_addr_prefix(&prefix_buf,
738 (struct in6_addr *)rinfo->prefix,
739 rinfo->prefix_len);
740 prefix = &prefix_buf;
741 }
742
743 if (rinfo->prefix_len == 0)
744 rt = rt6_get_dflt_router(gwaddr, dev);
745 else
746 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
747 gwaddr, dev->ifindex);
748
749 if (rt && !lifetime) {
750 ip6_del_rt(rt);
751 rt = NULL;
752 }
753
754 if (!rt && lifetime)
755 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
756 pref);
757 else if (rt)
758 rt->rt6i_flags = RTF_ROUTEINFO |
759 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
760
761 if (rt) {
762 if (!addrconf_finite_timeout(lifetime))
763 rt6_clean_expires(rt);
764 else
765 rt6_set_expires(rt, jiffies + HZ * lifetime);
766
767 ip6_rt_put(rt);
768 }
769 return 0;
770}
771#endif
772
773static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
774 struct in6_addr *saddr)
775{
776 struct fib6_node *pn;
777 while (1) {
778 if (fn->fn_flags & RTN_TL_ROOT)
779 return NULL;
780 pn = fn->parent;
781 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
782 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
783 else
784 fn = pn;
785 if (fn->fn_flags & RTN_RTINFO)
786 return fn;
787 }
788}
789
790static struct rt6_info *ip6_pol_route_lookup(struct net *net,
791 struct fib6_table *table,
792 struct flowi6 *fl6, int flags)
793{
794 struct fib6_node *fn;
795 struct rt6_info *rt;
796
797 read_lock_bh(&table->tb6_lock);
798 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
799restart:
800 rt = fn->leaf;
801 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
802 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
803 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
804 if (rt == net->ipv6.ip6_null_entry) {
805 fn = fib6_backtrack(fn, &fl6->saddr);
806 if (fn)
807 goto restart;
808 }
809 dst_use(&rt->dst, jiffies);
810 read_unlock_bh(&table->tb6_lock);
811 return rt;
812
813}
814
815struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
816 int flags)
817{
818 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
819}
820EXPORT_SYMBOL_GPL(ip6_route_lookup);
821
822struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
823 const struct in6_addr *saddr, int oif, int strict)
824{
825 struct flowi6 fl6 = {
826 .flowi6_oif = oif,
827 .daddr = *daddr,
828 };
829 struct dst_entry *dst;
830 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
831
832 if (saddr) {
833 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
834 flags |= RT6_LOOKUP_F_HAS_SADDR;
835 }
836
837 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
838 if (dst->error == 0)
839 return (struct rt6_info *) dst;
840
841 dst_release(dst);
842
843 return NULL;
844}
845EXPORT_SYMBOL(rt6_lookup);
846
847
848
849
850
851
852
853static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
854 struct mx6_config *mxc)
855{
856 int err;
857 struct fib6_table *table;
858
859 table = rt->rt6i_table;
860 write_lock_bh(&table->tb6_lock);
861 err = fib6_add(&table->tb6_root, rt, info, mxc);
862 write_unlock_bh(&table->tb6_lock);
863
864 return err;
865}
866
867int ip6_ins_rt(struct rt6_info *rt)
868{
869 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
870 struct mx6_config mxc = { .mx = NULL, };
871
872 return __ip6_ins_rt(rt, &info, &mxc);
873}
874
875static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
876 const struct in6_addr *daddr,
877 const struct in6_addr *saddr)
878{
879 struct rt6_info *rt;
880
881
882
883
884
885 rt = ip6_rt_copy(ort, daddr);
886
887 if (rt) {
888 if (ort->rt6i_dst.plen != 128 &&
889 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
890 rt->rt6i_flags |= RTF_ANYCAST;
891
892 rt->rt6i_flags |= RTF_CACHE;
893
894#ifdef CONFIG_IPV6_SUBTREES
895 if (rt->rt6i_src.plen && saddr) {
896 rt->rt6i_src.addr = *saddr;
897 rt->rt6i_src.plen = 128;
898 }
899#endif
900 }
901
902 return rt;
903}
904
905static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
906 const struct in6_addr *daddr)
907{
908 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
909
910 if (rt)
911 rt->rt6i_flags |= RTF_CACHE;
912 return rt;
913}
914
915static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
916 struct flowi6 *fl6, int flags)
917{
918 struct fib6_node *fn, *saved_fn;
919 struct rt6_info *rt, *nrt;
920 int strict = 0;
921 int attempts = 3;
922 int err;
923
924 strict |= flags & RT6_LOOKUP_F_IFACE;
925 if (net->ipv6.devconf_all->forwarding == 0)
926 strict |= RT6_LOOKUP_F_REACHABLE;
927
928redo_fib6_lookup_lock:
929 read_lock_bh(&table->tb6_lock);
930
931 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
932 saved_fn = fn;
933
934redo_rt6_select:
935 rt = rt6_select(fn, oif, strict);
936 if (rt->rt6i_nsiblings)
937 rt = rt6_multipath_select(rt, fl6, oif, strict);
938 if (rt == net->ipv6.ip6_null_entry) {
939 fn = fib6_backtrack(fn, &fl6->saddr);
940 if (fn)
941 goto redo_rt6_select;
942 else if (strict & RT6_LOOKUP_F_REACHABLE) {
943
944 strict &= ~RT6_LOOKUP_F_REACHABLE;
945 fn = saved_fn;
946 goto redo_rt6_select;
947 } else {
948 dst_hold(&rt->dst);
949 read_unlock_bh(&table->tb6_lock);
950 goto out2;
951 }
952 }
953
954 dst_hold(&rt->dst);
955 read_unlock_bh(&table->tb6_lock);
956
957 if (rt->rt6i_flags & RTF_CACHE)
958 goto out2;
959
960 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
961 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
962 else if (!(rt->dst.flags & DST_HOST))
963 nrt = rt6_alloc_clone(rt, &fl6->daddr);
964 else
965 goto out2;
966
967 ip6_rt_put(rt);
968 rt = nrt ? : net->ipv6.ip6_null_entry;
969
970 dst_hold(&rt->dst);
971 if (nrt) {
972 err = ip6_ins_rt(nrt);
973 if (!err)
974 goto out2;
975 }
976
977 if (--attempts <= 0)
978 goto out2;
979
980
981
982
983
984 ip6_rt_put(rt);
985 goto redo_fib6_lookup_lock;
986
987out2:
988 rt->dst.lastuse = jiffies;
989 rt->dst.__use++;
990
991 return rt;
992}
993
994static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
995 struct flowi6 *fl6, int flags)
996{
997 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
998}
999
1000static struct dst_entry *ip6_route_input_lookup(struct net *net,
1001 struct net_device *dev,
1002 struct flowi6 *fl6, int flags)
1003{
1004 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1005 flags |= RT6_LOOKUP_F_IFACE;
1006
1007 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1008}
1009
1010void ip6_route_input(struct sk_buff *skb)
1011{
1012 const struct ipv6hdr *iph = ipv6_hdr(skb);
1013 struct net *net = dev_net(skb->dev);
1014 int flags = RT6_LOOKUP_F_HAS_SADDR;
1015 struct flowi6 fl6 = {
1016 .flowi6_iif = skb->dev->ifindex,
1017 .daddr = iph->daddr,
1018 .saddr = iph->saddr,
1019 .flowlabel = ip6_flowinfo(iph),
1020 .flowi6_mark = skb->mark,
1021 .flowi6_proto = iph->nexthdr,
1022 };
1023
1024 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1025}
1026
1027static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1028 struct flowi6 *fl6, int flags)
1029{
1030 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1031}
1032
1033struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1034 struct flowi6 *fl6)
1035{
1036 int flags = 0;
1037
1038 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1039
1040 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1041 flags |= RT6_LOOKUP_F_IFACE;
1042
1043 if (!ipv6_addr_any(&fl6->saddr))
1044 flags |= RT6_LOOKUP_F_HAS_SADDR;
1045 else if (sk)
1046 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1047
1048 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1049}
1050EXPORT_SYMBOL(ip6_route_output);
1051
1052struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1053{
1054 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1055 struct dst_entry *new = NULL;
1056
1057 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1058 if (rt) {
1059 new = &rt->dst;
1060
1061 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1062 rt6_init_peer(rt, net->ipv6.peers);
1063
1064 new->__use = 1;
1065 new->input = dst_discard;
1066 new->output = dst_discard_sk;
1067
1068 if (dst_metrics_read_only(&ort->dst))
1069 new->_metrics = ort->dst._metrics;
1070 else
1071 dst_copy_metrics(new, &ort->dst);
1072 rt->rt6i_idev = ort->rt6i_idev;
1073 if (rt->rt6i_idev)
1074 in6_dev_hold(rt->rt6i_idev);
1075
1076 rt->rt6i_gateway = ort->rt6i_gateway;
1077 rt->rt6i_flags = ort->rt6i_flags;
1078 rt->rt6i_metric = 0;
1079
1080 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1081#ifdef CONFIG_IPV6_SUBTREES
1082 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1083#endif
1084
1085 dst_free(new);
1086 }
1087
1088 dst_release(dst_orig);
1089 return new ? new : ERR_PTR(-ENOMEM);
1090}
1091
1092
1093
1094
1095
1096static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1097{
1098 struct rt6_info *rt;
1099
1100 rt = (struct rt6_info *) dst;
1101
1102
1103
1104
1105
1106 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1107 return NULL;
1108
1109 if (rt6_check_expired(rt))
1110 return NULL;
1111
1112 return dst;
1113}
1114
1115static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1116{
1117 struct rt6_info *rt = (struct rt6_info *) dst;
1118
1119 if (rt) {
1120 if (rt->rt6i_flags & RTF_CACHE) {
1121 if (rt6_check_expired(rt)) {
1122 ip6_del_rt(rt);
1123 dst = NULL;
1124 }
1125 } else {
1126 dst_release(dst);
1127 dst = NULL;
1128 }
1129 }
1130 return dst;
1131}
1132
1133static void ip6_link_failure(struct sk_buff *skb)
1134{
1135 struct rt6_info *rt;
1136
1137 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1138
1139 rt = (struct rt6_info *) skb_dst(skb);
1140 if (rt) {
1141 if (rt->rt6i_flags & RTF_CACHE) {
1142 dst_hold(&rt->dst);
1143 if (ip6_del_rt(rt))
1144 dst_free(&rt->dst);
1145 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1146 rt->rt6i_node->fn_sernum = -1;
1147 }
1148 }
1149}
1150
1151static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1152 struct sk_buff *skb, u32 mtu)
1153{
1154 struct rt6_info *rt6 = (struct rt6_info *)dst;
1155
1156 dst_confirm(dst);
1157 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1158 struct net *net = dev_net(dst->dev);
1159
1160 rt6->rt6i_flags |= RTF_MODIFIED;
1161 if (mtu < IPV6_MIN_MTU)
1162 mtu = IPV6_MIN_MTU;
1163
1164 dst_metric_set(dst, RTAX_MTU, mtu);
1165 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1166 }
1167}
1168
1169void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1170 int oif, u32 mark)
1171{
1172 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1173 struct dst_entry *dst;
1174 struct flowi6 fl6;
1175
1176 memset(&fl6, 0, sizeof(fl6));
1177 fl6.flowi6_oif = oif;
1178 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1179 fl6.daddr = iph->daddr;
1180 fl6.saddr = iph->saddr;
1181 fl6.flowlabel = ip6_flowinfo(iph);
1182
1183 dst = ip6_route_output(net, NULL, &fl6);
1184 if (!dst->error)
1185 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1186 dst_release(dst);
1187}
1188EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1189
1190void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1191{
1192 ip6_update_pmtu(skb, sock_net(sk), mtu,
1193 sk->sk_bound_dev_if, sk->sk_mark);
1194}
1195EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1196
1197
1198struct ip6rd_flowi {
1199 struct flowi6 fl6;
1200 struct in6_addr gateway;
1201};
1202
1203static struct rt6_info *__ip6_route_redirect(struct net *net,
1204 struct fib6_table *table,
1205 struct flowi6 *fl6,
1206 int flags)
1207{
1208 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1209 struct rt6_info *rt;
1210 struct fib6_node *fn;
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222 read_lock_bh(&table->tb6_lock);
1223 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1224restart:
1225 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1226 if (rt6_check_expired(rt))
1227 continue;
1228 if (rt->dst.error)
1229 break;
1230 if (!(rt->rt6i_flags & RTF_GATEWAY))
1231 continue;
1232 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1233 continue;
1234 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1235 continue;
1236 break;
1237 }
1238
1239 if (!rt)
1240 rt = net->ipv6.ip6_null_entry;
1241 else if (rt->dst.error) {
1242 rt = net->ipv6.ip6_null_entry;
1243 goto out;
1244 }
1245
1246 if (rt == net->ipv6.ip6_null_entry) {
1247 fn = fib6_backtrack(fn, &fl6->saddr);
1248 if (fn)
1249 goto restart;
1250 }
1251
1252out:
1253 dst_hold(&rt->dst);
1254
1255 read_unlock_bh(&table->tb6_lock);
1256
1257 return rt;
1258};
1259
1260static struct dst_entry *ip6_route_redirect(struct net *net,
1261 const struct flowi6 *fl6,
1262 const struct in6_addr *gateway)
1263{
1264 int flags = RT6_LOOKUP_F_HAS_SADDR;
1265 struct ip6rd_flowi rdfl;
1266
1267 rdfl.fl6 = *fl6;
1268 rdfl.gateway = *gateway;
1269
1270 return fib6_rule_lookup(net, &rdfl.fl6,
1271 flags, __ip6_route_redirect);
1272}
1273
1274void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1275{
1276 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1277 struct dst_entry *dst;
1278 struct flowi6 fl6;
1279
1280 memset(&fl6, 0, sizeof(fl6));
1281 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1282 fl6.flowi6_oif = oif;
1283 fl6.flowi6_mark = mark;
1284 fl6.daddr = iph->daddr;
1285 fl6.saddr = iph->saddr;
1286 fl6.flowlabel = ip6_flowinfo(iph);
1287
1288 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1289 rt6_do_redirect(dst, NULL, skb);
1290 dst_release(dst);
1291}
1292EXPORT_SYMBOL_GPL(ip6_redirect);
1293
1294void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1295 u32 mark)
1296{
1297 const struct ipv6hdr *iph = ipv6_hdr(skb);
1298 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1299 struct dst_entry *dst;
1300 struct flowi6 fl6;
1301
1302 memset(&fl6, 0, sizeof(fl6));
1303 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1304 fl6.flowi6_oif = oif;
1305 fl6.flowi6_mark = mark;
1306 fl6.daddr = msg->dest;
1307 fl6.saddr = iph->daddr;
1308
1309 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1310 rt6_do_redirect(dst, NULL, skb);
1311 dst_release(dst);
1312}
1313
1314void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1315{
1316 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1317}
1318EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1319
1320static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1321{
1322 struct net_device *dev = dst->dev;
1323 unsigned int mtu = dst_mtu(dst);
1324 struct net *net = dev_net(dev);
1325
1326 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1327
1328 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1329 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1330
1331
1332
1333
1334
1335
1336
1337 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1338 mtu = IPV6_MAXPLEN;
1339 return mtu;
1340}
1341
1342static unsigned int ip6_mtu(const struct dst_entry *dst)
1343{
1344 struct inet6_dev *idev;
1345 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1346
1347 if (mtu)
1348 goto out;
1349
1350 mtu = IPV6_MIN_MTU;
1351
1352 rcu_read_lock();
1353 idev = __in6_dev_get(dst->dev);
1354 if (idev)
1355 mtu = idev->cnf.mtu6;
1356 rcu_read_unlock();
1357
1358out:
1359 return min_t(unsigned int, mtu, IP6_MAX_MTU);
1360}
1361
1362static struct dst_entry *icmp6_dst_gc_list;
1363static DEFINE_SPINLOCK(icmp6_dst_lock);
1364
1365struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1366 struct flowi6 *fl6)
1367{
1368 struct dst_entry *dst;
1369 struct rt6_info *rt;
1370 struct inet6_dev *idev = in6_dev_get(dev);
1371 struct net *net = dev_net(dev);
1372
1373 if (unlikely(!idev))
1374 return ERR_PTR(-ENODEV);
1375
1376 rt = ip6_dst_alloc(net, dev, 0, NULL);
1377 if (unlikely(!rt)) {
1378 in6_dev_put(idev);
1379 dst = ERR_PTR(-ENOMEM);
1380 goto out;
1381 }
1382
1383 rt->dst.flags |= DST_HOST;
1384 rt->dst.output = ip6_output;
1385 atomic_set(&rt->dst.__refcnt, 1);
1386 rt->rt6i_gateway = fl6->daddr;
1387 rt->rt6i_dst.addr = fl6->daddr;
1388 rt->rt6i_dst.plen = 128;
1389 rt->rt6i_idev = idev;
1390 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1391
1392 spin_lock_bh(&icmp6_dst_lock);
1393 rt->dst.next = icmp6_dst_gc_list;
1394 icmp6_dst_gc_list = &rt->dst;
1395 spin_unlock_bh(&icmp6_dst_lock);
1396
1397 fib6_force_start_gc(net);
1398
1399 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1400
1401out:
1402 return dst;
1403}
1404
1405int icmp6_dst_gc(void)
1406{
1407 struct dst_entry *dst, **pprev;
1408 int more = 0;
1409
1410 spin_lock_bh(&icmp6_dst_lock);
1411 pprev = &icmp6_dst_gc_list;
1412
1413 while ((dst = *pprev) != NULL) {
1414 if (!atomic_read(&dst->__refcnt)) {
1415 *pprev = dst->next;
1416 dst_free(dst);
1417 } else {
1418 pprev = &dst->next;
1419 ++more;
1420 }
1421 }
1422
1423 spin_unlock_bh(&icmp6_dst_lock);
1424
1425 return more;
1426}
1427
1428static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1429 void *arg)
1430{
1431 struct dst_entry *dst, **pprev;
1432
1433 spin_lock_bh(&icmp6_dst_lock);
1434 pprev = &icmp6_dst_gc_list;
1435 while ((dst = *pprev) != NULL) {
1436 struct rt6_info *rt = (struct rt6_info *) dst;
1437 if (func(rt, arg)) {
1438 *pprev = dst->next;
1439 dst_free(dst);
1440 } else {
1441 pprev = &dst->next;
1442 }
1443 }
1444 spin_unlock_bh(&icmp6_dst_lock);
1445}
1446
1447static int ip6_dst_gc(struct dst_ops *ops)
1448{
1449 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1450 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1451 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1452 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1453 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1454 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1455 int entries;
1456
1457 entries = dst_entries_get_fast(ops);
1458 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1459 entries <= rt_max_size)
1460 goto out;
1461
1462 net->ipv6.ip6_rt_gc_expire++;
1463 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1464 entries = dst_entries_get_slow(ops);
1465 if (entries < ops->gc_thresh)
1466 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1467out:
1468 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1469 return entries > rt_max_size;
1470}
1471
1472static int ip6_convert_metrics(struct mx6_config *mxc,
1473 const struct fib6_config *cfg)
1474{
1475 struct nlattr *nla;
1476 int remaining;
1477 u32 *mp;
1478
1479 if (!cfg->fc_mx)
1480 return 0;
1481
1482 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1483 if (unlikely(!mp))
1484 return -ENOMEM;
1485
1486 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1487 int type = nla_type(nla);
1488
1489 if (type) {
1490 u32 val;
1491
1492 if (unlikely(type > RTAX_MAX))
1493 goto err;
1494 if (type == RTAX_CC_ALGO) {
1495 char tmp[TCP_CA_NAME_MAX];
1496
1497 nla_strlcpy(tmp, nla, sizeof(tmp));
1498 val = tcp_ca_get_key_by_name(tmp);
1499 if (val == TCP_CA_UNSPEC)
1500 goto err;
1501 } else {
1502 val = nla_get_u32(nla);
1503 }
1504
1505 mp[type - 1] = val;
1506 __set_bit(type - 1, mxc->mx_valid);
1507 }
1508 }
1509
1510 mxc->mx = mp;
1511
1512 return 0;
1513 err:
1514 kfree(mp);
1515 return -EINVAL;
1516}
1517
1518int ip6_route_add(struct fib6_config *cfg)
1519{
1520 int err;
1521 struct net *net = cfg->fc_nlinfo.nl_net;
1522 struct rt6_info *rt = NULL;
1523 struct net_device *dev = NULL;
1524 struct inet6_dev *idev = NULL;
1525 struct fib6_table *table;
1526 struct mx6_config mxc = { .mx = NULL, };
1527 int addr_type;
1528
1529 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1530 return -EINVAL;
1531#ifndef CONFIG_IPV6_SUBTREES
1532 if (cfg->fc_src_len)
1533 return -EINVAL;
1534#endif
1535 if (cfg->fc_ifindex) {
1536 err = -ENODEV;
1537 dev = dev_get_by_index(net, cfg->fc_ifindex);
1538 if (!dev)
1539 goto out;
1540 idev = in6_dev_get(dev);
1541 if (!idev)
1542 goto out;
1543 }
1544
1545 if (cfg->fc_metric == 0)
1546 cfg->fc_metric = IP6_RT_PRIO_USER;
1547
1548 err = -ENOBUFS;
1549 if (cfg->fc_nlinfo.nlh &&
1550 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1551 table = fib6_get_table(net, cfg->fc_table);
1552 if (!table) {
1553 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1554 table = fib6_new_table(net, cfg->fc_table);
1555 }
1556 } else {
1557 table = fib6_new_table(net, cfg->fc_table);
1558 }
1559
1560 if (!table)
1561 goto out;
1562
1563 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1564
1565 if (!rt) {
1566 err = -ENOMEM;
1567 goto out;
1568 }
1569
1570 if (cfg->fc_flags & RTF_EXPIRES)
1571 rt6_set_expires(rt, jiffies +
1572 clock_t_to_jiffies(cfg->fc_expires));
1573 else
1574 rt6_clean_expires(rt);
1575
1576 if (cfg->fc_protocol == RTPROT_UNSPEC)
1577 cfg->fc_protocol = RTPROT_BOOT;
1578 rt->rt6i_protocol = cfg->fc_protocol;
1579
1580 addr_type = ipv6_addr_type(&cfg->fc_dst);
1581
1582 if (addr_type & IPV6_ADDR_MULTICAST)
1583 rt->dst.input = ip6_mc_input;
1584 else if (cfg->fc_flags & RTF_LOCAL)
1585 rt->dst.input = ip6_input;
1586 else
1587 rt->dst.input = ip6_forward;
1588
1589 rt->dst.output = ip6_output;
1590
1591 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1592 rt->rt6i_dst.plen = cfg->fc_dst_len;
1593 if (rt->rt6i_dst.plen == 128) {
1594 rt->dst.flags |= DST_HOST;
1595 dst_metrics_set_force_overwrite(&rt->dst);
1596 }
1597
1598#ifdef CONFIG_IPV6_SUBTREES
1599 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1600 rt->rt6i_src.plen = cfg->fc_src_len;
1601#endif
1602
1603 rt->rt6i_metric = cfg->fc_metric;
1604
1605
1606
1607
1608 if ((cfg->fc_flags & RTF_REJECT) ||
1609 (dev && (dev->flags & IFF_LOOPBACK) &&
1610 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1611 !(cfg->fc_flags & RTF_LOCAL))) {
1612
1613 if (dev != net->loopback_dev) {
1614 if (dev) {
1615 dev_put(dev);
1616 in6_dev_put(idev);
1617 }
1618 dev = net->loopback_dev;
1619 dev_hold(dev);
1620 idev = in6_dev_get(dev);
1621 if (!idev) {
1622 err = -ENODEV;
1623 goto out;
1624 }
1625 }
1626 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1627 switch (cfg->fc_type) {
1628 case RTN_BLACKHOLE:
1629 rt->dst.error = -EINVAL;
1630 rt->dst.output = dst_discard_sk;
1631 rt->dst.input = dst_discard;
1632 break;
1633 case RTN_PROHIBIT:
1634 rt->dst.error = -EACCES;
1635 rt->dst.output = ip6_pkt_prohibit_out;
1636 rt->dst.input = ip6_pkt_prohibit;
1637 break;
1638 case RTN_THROW:
1639 default:
1640 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1641 : -ENETUNREACH;
1642 rt->dst.output = ip6_pkt_discard_out;
1643 rt->dst.input = ip6_pkt_discard;
1644 break;
1645 }
1646 goto install_route;
1647 }
1648
1649 if (cfg->fc_flags & RTF_GATEWAY) {
1650 const struct in6_addr *gw_addr;
1651 int gwa_type;
1652
1653 gw_addr = &cfg->fc_gateway;
1654 rt->rt6i_gateway = *gw_addr;
1655 gwa_type = ipv6_addr_type(gw_addr);
1656
1657 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1658 struct rt6_info *grt;
1659
1660
1661
1662
1663
1664
1665
1666
1667 err = -EINVAL;
1668 if (!(gwa_type & IPV6_ADDR_UNICAST))
1669 goto out;
1670
1671 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1672
1673 err = -EHOSTUNREACH;
1674 if (!grt)
1675 goto out;
1676 if (dev) {
1677 if (dev != grt->dst.dev) {
1678 ip6_rt_put(grt);
1679 goto out;
1680 }
1681 } else {
1682 dev = grt->dst.dev;
1683 idev = grt->rt6i_idev;
1684 dev_hold(dev);
1685 in6_dev_hold(grt->rt6i_idev);
1686 }
1687 if (!(grt->rt6i_flags & RTF_GATEWAY))
1688 err = 0;
1689 ip6_rt_put(grt);
1690
1691 if (err)
1692 goto out;
1693 }
1694 err = -EINVAL;
1695 if (!dev || (dev->flags & IFF_LOOPBACK))
1696 goto out;
1697 }
1698
1699 err = -ENODEV;
1700 if (!dev)
1701 goto out;
1702
1703 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1704 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1705 err = -EINVAL;
1706 goto out;
1707 }
1708 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1709 rt->rt6i_prefsrc.plen = 128;
1710 } else
1711 rt->rt6i_prefsrc.plen = 0;
1712
1713 rt->rt6i_flags = cfg->fc_flags;
1714
1715install_route:
1716 rt->dst.dev = dev;
1717 rt->rt6i_idev = idev;
1718 rt->rt6i_table = table;
1719
1720 cfg->fc_nlinfo.nl_net = dev_net(dev);
1721
1722 err = ip6_convert_metrics(&mxc, cfg);
1723 if (err)
1724 goto out;
1725
1726 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1727
1728 kfree(mxc.mx);
1729 return err;
1730out:
1731 if (dev)
1732 dev_put(dev);
1733 if (idev)
1734 in6_dev_put(idev);
1735 if (rt)
1736 dst_free(&rt->dst);
1737 return err;
1738}
1739
1740static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1741{
1742 int err;
1743 struct fib6_table *table;
1744 struct net *net = dev_net(rt->dst.dev);
1745
1746 if (rt == net->ipv6.ip6_null_entry) {
1747 err = -ENOENT;
1748 goto out;
1749 }
1750
1751 table = rt->rt6i_table;
1752 write_lock_bh(&table->tb6_lock);
1753 err = fib6_del(rt, info);
1754 write_unlock_bh(&table->tb6_lock);
1755
1756out:
1757 ip6_rt_put(rt);
1758 return err;
1759}
1760
1761int ip6_del_rt(struct rt6_info *rt)
1762{
1763 struct nl_info info = {
1764 .nl_net = dev_net(rt->dst.dev),
1765 };
1766 return __ip6_del_rt(rt, &info);
1767}
1768
1769static int ip6_route_del(struct fib6_config *cfg)
1770{
1771 struct fib6_table *table;
1772 struct fib6_node *fn;
1773 struct rt6_info *rt;
1774 int err = -ESRCH;
1775
1776 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1777 if (!table)
1778 return err;
1779
1780 read_lock_bh(&table->tb6_lock);
1781
1782 fn = fib6_locate(&table->tb6_root,
1783 &cfg->fc_dst, cfg->fc_dst_len,
1784 &cfg->fc_src, cfg->fc_src_len);
1785
1786 if (fn) {
1787 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1788 if (cfg->fc_ifindex &&
1789 (!rt->dst.dev ||
1790 rt->dst.dev->ifindex != cfg->fc_ifindex))
1791 continue;
1792 if (cfg->fc_flags & RTF_GATEWAY &&
1793 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1794 continue;
1795 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1796 continue;
1797 dst_hold(&rt->dst);
1798 read_unlock_bh(&table->tb6_lock);
1799
1800 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1801 }
1802 }
1803 read_unlock_bh(&table->tb6_lock);
1804
1805 return err;
1806}
1807
1808static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1809{
1810 struct net *net = dev_net(skb->dev);
1811 struct netevent_redirect netevent;
1812 struct rt6_info *rt, *nrt = NULL;
1813 struct ndisc_options ndopts;
1814 struct inet6_dev *in6_dev;
1815 struct neighbour *neigh;
1816 struct rd_msg *msg;
1817 int optlen, on_link;
1818 u8 *lladdr;
1819
1820 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1821 optlen -= sizeof(*msg);
1822
1823 if (optlen < 0) {
1824 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1825 return;
1826 }
1827
1828 msg = (struct rd_msg *)icmp6_hdr(skb);
1829
1830 if (ipv6_addr_is_multicast(&msg->dest)) {
1831 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1832 return;
1833 }
1834
1835 on_link = 0;
1836 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1837 on_link = 1;
1838 } else if (ipv6_addr_type(&msg->target) !=
1839 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1840 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1841 return;
1842 }
1843
1844 in6_dev = __in6_dev_get(skb->dev);
1845 if (!in6_dev)
1846 return;
1847 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1848 return;
1849
1850
1851
1852
1853
1854
1855 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1856 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1857 return;
1858 }
1859
1860 lladdr = NULL;
1861 if (ndopts.nd_opts_tgt_lladdr) {
1862 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1863 skb->dev);
1864 if (!lladdr) {
1865 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1866 return;
1867 }
1868 }
1869
1870 rt = (struct rt6_info *) dst;
1871 if (rt == net->ipv6.ip6_null_entry) {
1872 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1873 return;
1874 }
1875
1876
1877
1878
1879
1880 dst_confirm(&rt->dst);
1881
1882 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1883 if (!neigh)
1884 return;
1885
1886
1887
1888
1889
1890 neigh_update(neigh, lladdr, NUD_STALE,
1891 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1892 NEIGH_UPDATE_F_OVERRIDE|
1893 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1894 NEIGH_UPDATE_F_ISROUTER))
1895 );
1896
1897 nrt = ip6_rt_copy(rt, &msg->dest);
1898 if (!nrt)
1899 goto out;
1900
1901 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1902 if (on_link)
1903 nrt->rt6i_flags &= ~RTF_GATEWAY;
1904
1905 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1906
1907 if (ip6_ins_rt(nrt))
1908 goto out;
1909
1910 netevent.old = &rt->dst;
1911 netevent.new = &nrt->dst;
1912 netevent.daddr = &msg->dest;
1913 netevent.neigh = neigh;
1914 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1915
1916 if (rt->rt6i_flags & RTF_CACHE) {
1917 rt = (struct rt6_info *) dst_clone(&rt->dst);
1918 ip6_del_rt(rt);
1919 }
1920
1921out:
1922 neigh_release(neigh);
1923}
1924
1925
1926
1927
1928
1929static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1930 const struct in6_addr *dest)
1931{
1932 struct net *net = dev_net(ort->dst.dev);
1933 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1934 ort->rt6i_table);
1935
1936 if (rt) {
1937 rt->dst.input = ort->dst.input;
1938 rt->dst.output = ort->dst.output;
1939 rt->dst.flags |= DST_HOST;
1940
1941 rt->rt6i_dst.addr = *dest;
1942 rt->rt6i_dst.plen = 128;
1943 dst_copy_metrics(&rt->dst, &ort->dst);
1944 rt->dst.error = ort->dst.error;
1945 rt->rt6i_idev = ort->rt6i_idev;
1946 if (rt->rt6i_idev)
1947 in6_dev_hold(rt->rt6i_idev);
1948 rt->dst.lastuse = jiffies;
1949
1950 if (ort->rt6i_flags & RTF_GATEWAY)
1951 rt->rt6i_gateway = ort->rt6i_gateway;
1952 else
1953 rt->rt6i_gateway = *dest;
1954 rt->rt6i_flags = ort->rt6i_flags;
1955 rt6_set_from(rt, ort);
1956 rt->rt6i_metric = 0;
1957
1958#ifdef CONFIG_IPV6_SUBTREES
1959 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1960#endif
1961 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1962 rt->rt6i_table = ort->rt6i_table;
1963 }
1964 return rt;
1965}
1966
1967#ifdef CONFIG_IPV6_ROUTE_INFO
1968static struct rt6_info *rt6_get_route_info(struct net *net,
1969 const struct in6_addr *prefix, int prefixlen,
1970 const struct in6_addr *gwaddr, int ifindex)
1971{
1972 struct fib6_node *fn;
1973 struct rt6_info *rt = NULL;
1974 struct fib6_table *table;
1975
1976 table = fib6_get_table(net, RT6_TABLE_INFO);
1977 if (!table)
1978 return NULL;
1979
1980 read_lock_bh(&table->tb6_lock);
1981 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
1982 if (!fn)
1983 goto out;
1984
1985 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1986 if (rt->dst.dev->ifindex != ifindex)
1987 continue;
1988 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1989 continue;
1990 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1991 continue;
1992 dst_hold(&rt->dst);
1993 break;
1994 }
1995out:
1996 read_unlock_bh(&table->tb6_lock);
1997 return rt;
1998}
1999
2000static struct rt6_info *rt6_add_route_info(struct net *net,
2001 const struct in6_addr *prefix, int prefixlen,
2002 const struct in6_addr *gwaddr, int ifindex,
2003 unsigned int pref)
2004{
2005 struct fib6_config cfg = {
2006 .fc_table = RT6_TABLE_INFO,
2007 .fc_metric = IP6_RT_PRIO_USER,
2008 .fc_ifindex = ifindex,
2009 .fc_dst_len = prefixlen,
2010 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2011 RTF_UP | RTF_PREF(pref),
2012 .fc_nlinfo.portid = 0,
2013 .fc_nlinfo.nlh = NULL,
2014 .fc_nlinfo.nl_net = net,
2015 };
2016
2017 cfg.fc_dst = *prefix;
2018 cfg.fc_gateway = *gwaddr;
2019
2020
2021 if (!prefixlen)
2022 cfg.fc_flags |= RTF_DEFAULT;
2023
2024 ip6_route_add(&cfg);
2025
2026 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2027}
2028#endif
2029
2030struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2031{
2032 struct rt6_info *rt;
2033 struct fib6_table *table;
2034
2035 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2036 if (!table)
2037 return NULL;
2038
2039 read_lock_bh(&table->tb6_lock);
2040 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2041 if (dev == rt->dst.dev &&
2042 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2043 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2044 break;
2045 }
2046 if (rt)
2047 dst_hold(&rt->dst);
2048 read_unlock_bh(&table->tb6_lock);
2049 return rt;
2050}
2051
2052struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2053 struct net_device *dev,
2054 unsigned int pref)
2055{
2056 struct fib6_config cfg = {
2057 .fc_table = RT6_TABLE_DFLT,
2058 .fc_metric = IP6_RT_PRIO_USER,
2059 .fc_ifindex = dev->ifindex,
2060 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2061 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2062 .fc_nlinfo.portid = 0,
2063 .fc_nlinfo.nlh = NULL,
2064 .fc_nlinfo.nl_net = dev_net(dev),
2065 };
2066
2067 cfg.fc_gateway = *gwaddr;
2068
2069 ip6_route_add(&cfg);
2070
2071 return rt6_get_dflt_router(gwaddr, dev);
2072}
2073
2074void rt6_purge_dflt_routers(struct net *net)
2075{
2076 struct rt6_info *rt;
2077 struct fib6_table *table;
2078
2079
2080 table = fib6_get_table(net, RT6_TABLE_DFLT);
2081 if (!table)
2082 return;
2083
2084restart:
2085 read_lock_bh(&table->tb6_lock);
2086 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2087 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2088 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2089 dst_hold(&rt->dst);
2090 read_unlock_bh(&table->tb6_lock);
2091 ip6_del_rt(rt);
2092 goto restart;
2093 }
2094 }
2095 read_unlock_bh(&table->tb6_lock);
2096}
2097
2098static void rtmsg_to_fib6_config(struct net *net,
2099 struct in6_rtmsg *rtmsg,
2100 struct fib6_config *cfg)
2101{
2102 memset(cfg, 0, sizeof(*cfg));
2103
2104 cfg->fc_table = RT6_TABLE_MAIN;
2105 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2106 cfg->fc_metric = rtmsg->rtmsg_metric;
2107 cfg->fc_expires = rtmsg->rtmsg_info;
2108 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2109 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2110 cfg->fc_flags = rtmsg->rtmsg_flags;
2111
2112 cfg->fc_nlinfo.nl_net = net;
2113
2114 cfg->fc_dst = rtmsg->rtmsg_dst;
2115 cfg->fc_src = rtmsg->rtmsg_src;
2116 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2117}
2118
2119int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2120{
2121 struct fib6_config cfg;
2122 struct in6_rtmsg rtmsg;
2123 int err;
2124
2125 switch (cmd) {
2126 case SIOCADDRT:
2127 case SIOCDELRT:
2128 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2129 return -EPERM;
2130 err = copy_from_user(&rtmsg, arg,
2131 sizeof(struct in6_rtmsg));
2132 if (err)
2133 return -EFAULT;
2134
2135 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2136
2137 rtnl_lock();
2138 switch (cmd) {
2139 case SIOCADDRT:
2140 err = ip6_route_add(&cfg);
2141 break;
2142 case SIOCDELRT:
2143 err = ip6_route_del(&cfg);
2144 break;
2145 default:
2146 err = -EINVAL;
2147 }
2148 rtnl_unlock();
2149
2150 return err;
2151 }
2152
2153 return -EINVAL;
2154}
2155
2156
2157
2158
2159
2160static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2161{
2162 int type;
2163 struct dst_entry *dst = skb_dst(skb);
2164 switch (ipstats_mib_noroutes) {
2165 case IPSTATS_MIB_INNOROUTES:
2166 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2167 if (type == IPV6_ADDR_ANY) {
2168 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2169 IPSTATS_MIB_INADDRERRORS);
2170 break;
2171 }
2172
2173 case IPSTATS_MIB_OUTNOROUTES:
2174 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2175 ipstats_mib_noroutes);
2176 break;
2177 }
2178 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2179 kfree_skb(skb);
2180 return 0;
2181}
2182
2183static int ip6_pkt_discard(struct sk_buff *skb)
2184{
2185 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2186}
2187
2188static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2189{
2190 skb->dev = skb_dst(skb)->dev;
2191 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2192}
2193
2194static int ip6_pkt_prohibit(struct sk_buff *skb)
2195{
2196 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2197}
2198
2199static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2200{
2201 skb->dev = skb_dst(skb)->dev;
2202 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2203}
2204
2205
2206
2207
2208
2209struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2210 const struct in6_addr *addr,
2211 bool anycast)
2212{
2213 struct net *net = dev_net(idev->dev);
2214 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2215 DST_NOCOUNT, NULL);
2216 if (!rt)
2217 return ERR_PTR(-ENOMEM);
2218
2219 in6_dev_hold(idev);
2220
2221 rt->dst.flags |= DST_HOST;
2222 rt->dst.input = ip6_input;
2223 rt->dst.output = ip6_output;
2224 rt->rt6i_idev = idev;
2225
2226 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2227 if (anycast)
2228 rt->rt6i_flags |= RTF_ANYCAST;
2229 else
2230 rt->rt6i_flags |= RTF_LOCAL;
2231
2232 rt->rt6i_gateway = *addr;
2233 rt->rt6i_dst.addr = *addr;
2234 rt->rt6i_dst.plen = 128;
2235 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2236
2237 atomic_set(&rt->dst.__refcnt, 1);
2238
2239 return rt;
2240}
2241
2242int ip6_route_get_saddr(struct net *net,
2243 struct rt6_info *rt,
2244 const struct in6_addr *daddr,
2245 unsigned int prefs,
2246 struct in6_addr *saddr)
2247{
2248 struct inet6_dev *idev =
2249 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2250 int err = 0;
2251 if (rt && rt->rt6i_prefsrc.plen)
2252 *saddr = rt->rt6i_prefsrc.addr;
2253 else
2254 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2255 daddr, prefs, saddr);
2256 return err;
2257}
2258
2259
2260struct arg_dev_net_ip {
2261 struct net_device *dev;
2262 struct net *net;
2263 struct in6_addr *addr;
2264};
2265
2266static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2267{
2268 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2269 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2270 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2271
2272 if (((void *)rt->dst.dev == dev || !dev) &&
2273 rt != net->ipv6.ip6_null_entry &&
2274 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2275
2276 rt->rt6i_prefsrc.plen = 0;
2277 }
2278 return 0;
2279}
2280
2281void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2282{
2283 struct net *net = dev_net(ifp->idev->dev);
2284 struct arg_dev_net_ip adni = {
2285 .dev = ifp->idev->dev,
2286 .net = net,
2287 .addr = &ifp->addr,
2288 };
2289 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2290}
2291
2292#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2293#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2294
2295
2296static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2297{
2298 struct in6_addr *gateway = (struct in6_addr *)arg;
2299
2300 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2301 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2302 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2303 return -1;
2304 }
2305 return 0;
2306}
2307
2308void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2309{
2310 fib6_clean_all(net, fib6_clean_tohost, gateway);
2311}
2312
2313struct arg_dev_net {
2314 struct net_device *dev;
2315 struct net *net;
2316};
2317
2318static int fib6_ifdown(struct rt6_info *rt, void *arg)
2319{
2320 const struct arg_dev_net *adn = arg;
2321 const struct net_device *dev = adn->dev;
2322
2323 if ((rt->dst.dev == dev || !dev) &&
2324 rt != adn->net->ipv6.ip6_null_entry)
2325 return -1;
2326
2327 return 0;
2328}
2329
2330void rt6_ifdown(struct net *net, struct net_device *dev)
2331{
2332 struct arg_dev_net adn = {
2333 .dev = dev,
2334 .net = net,
2335 };
2336
2337 fib6_clean_all(net, fib6_ifdown, &adn);
2338 icmp6_clean_all(fib6_ifdown, &adn);
2339}
2340
2341struct rt6_mtu_change_arg {
2342 struct net_device *dev;
2343 unsigned int mtu;
2344};
2345
2346static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2347{
2348 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2349 struct inet6_dev *idev;
2350
2351
2352
2353
2354
2355
2356
2357 idev = __in6_dev_get(arg->dev);
2358 if (!idev)
2359 return 0;
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375 if (rt->dst.dev == arg->dev &&
2376 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2377 (dst_mtu(&rt->dst) >= arg->mtu ||
2378 (dst_mtu(&rt->dst) < arg->mtu &&
2379 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2380 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2381 }
2382 return 0;
2383}
2384
2385void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2386{
2387 struct rt6_mtu_change_arg arg = {
2388 .dev = dev,
2389 .mtu = mtu,
2390 };
2391
2392 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2393}
2394
2395static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2396 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2397 [RTA_OIF] = { .type = NLA_U32 },
2398 [RTA_IIF] = { .type = NLA_U32 },
2399 [RTA_PRIORITY] = { .type = NLA_U32 },
2400 [RTA_METRICS] = { .type = NLA_NESTED },
2401 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2402 [RTA_PREF] = { .type = NLA_U8 },
2403};
2404
2405static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2406 struct fib6_config *cfg)
2407{
2408 struct rtmsg *rtm;
2409 struct nlattr *tb[RTA_MAX+1];
2410 unsigned int pref;
2411 int err;
2412
2413 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2414 if (err < 0)
2415 goto errout;
2416
2417 err = -EINVAL;
2418 rtm = nlmsg_data(nlh);
2419 memset(cfg, 0, sizeof(*cfg));
2420
2421 cfg->fc_table = rtm->rtm_table;
2422 cfg->fc_dst_len = rtm->rtm_dst_len;
2423 cfg->fc_src_len = rtm->rtm_src_len;
2424 cfg->fc_flags = RTF_UP;
2425 cfg->fc_protocol = rtm->rtm_protocol;
2426 cfg->fc_type = rtm->rtm_type;
2427
2428 if (rtm->rtm_type == RTN_UNREACHABLE ||
2429 rtm->rtm_type == RTN_BLACKHOLE ||
2430 rtm->rtm_type == RTN_PROHIBIT ||
2431 rtm->rtm_type == RTN_THROW)
2432 cfg->fc_flags |= RTF_REJECT;
2433
2434 if (rtm->rtm_type == RTN_LOCAL)
2435 cfg->fc_flags |= RTF_LOCAL;
2436
2437 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2438 cfg->fc_nlinfo.nlh = nlh;
2439 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2440
2441 if (tb[RTA_GATEWAY]) {
2442 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2443 cfg->fc_flags |= RTF_GATEWAY;
2444 }
2445
2446 if (tb[RTA_DST]) {
2447 int plen = (rtm->rtm_dst_len + 7) >> 3;
2448
2449 if (nla_len(tb[RTA_DST]) < plen)
2450 goto errout;
2451
2452 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2453 }
2454
2455 if (tb[RTA_SRC]) {
2456 int plen = (rtm->rtm_src_len + 7) >> 3;
2457
2458 if (nla_len(tb[RTA_SRC]) < plen)
2459 goto errout;
2460
2461 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2462 }
2463
2464 if (tb[RTA_PREFSRC])
2465 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2466
2467 if (tb[RTA_OIF])
2468 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2469
2470 if (tb[RTA_PRIORITY])
2471 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2472
2473 if (tb[RTA_METRICS]) {
2474 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2475 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2476 }
2477
2478 if (tb[RTA_TABLE])
2479 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2480
2481 if (tb[RTA_MULTIPATH]) {
2482 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2483 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2484 }
2485
2486 if (tb[RTA_PREF]) {
2487 pref = nla_get_u8(tb[RTA_PREF]);
2488 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2489 pref != ICMPV6_ROUTER_PREF_HIGH)
2490 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2491 cfg->fc_flags |= RTF_PREF(pref);
2492 }
2493
2494 err = 0;
2495errout:
2496 return err;
2497}
2498
2499static int ip6_route_multipath(struct fib6_config *cfg, int add)
2500{
2501 struct fib6_config r_cfg;
2502 struct rtnexthop *rtnh;
2503 int remaining;
2504 int attrlen;
2505 int err = 0, last_err = 0;
2506
2507 remaining = cfg->fc_mp_len;
2508beginning:
2509 rtnh = (struct rtnexthop *)cfg->fc_mp;
2510
2511
2512 while (rtnh_ok(rtnh, remaining)) {
2513 memcpy(&r_cfg, cfg, sizeof(*cfg));
2514 if (rtnh->rtnh_ifindex)
2515 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2516
2517 attrlen = rtnh_attrlen(rtnh);
2518 if (attrlen > 0) {
2519 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2520
2521 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2522 if (nla) {
2523 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2524 r_cfg.fc_flags |= RTF_GATEWAY;
2525 }
2526 }
2527 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2528 if (err) {
2529 last_err = err;
2530
2531
2532
2533
2534 if (add) {
2535
2536
2537
2538 add = 0;
2539 remaining = cfg->fc_mp_len - remaining;
2540 goto beginning;
2541 }
2542 }
2543
2544
2545
2546
2547
2548
2549
2550 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2551 NLM_F_REPLACE);
2552 rtnh = rtnh_next(rtnh, &remaining);
2553 }
2554
2555 return last_err;
2556}
2557
2558static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2559{
2560 struct fib6_config cfg;
2561 int err;
2562
2563 err = rtm_to_fib6_config(skb, nlh, &cfg);
2564 if (err < 0)
2565 return err;
2566
2567 if (cfg.fc_mp)
2568 return ip6_route_multipath(&cfg, 0);
2569 else
2570 return ip6_route_del(&cfg);
2571}
2572
2573static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2574{
2575 struct fib6_config cfg;
2576 int err;
2577
2578 err = rtm_to_fib6_config(skb, nlh, &cfg);
2579 if (err < 0)
2580 return err;
2581
2582 if (cfg.fc_mp)
2583 return ip6_route_multipath(&cfg, 1);
2584 else
2585 return ip6_route_add(&cfg);
2586}
2587
2588static inline size_t rt6_nlmsg_size(void)
2589{
2590 return NLMSG_ALIGN(sizeof(struct rtmsg))
2591 + nla_total_size(16)
2592 + nla_total_size(16)
2593 + nla_total_size(16)
2594 + nla_total_size(16)
2595 + nla_total_size(4)
2596 + nla_total_size(4)
2597 + nla_total_size(4)
2598 + nla_total_size(4)
2599 + RTAX_MAX * nla_total_size(4)
2600 + nla_total_size(sizeof(struct rta_cacheinfo))
2601 + nla_total_size(TCP_CA_NAME_MAX)
2602 + nla_total_size(1);
2603}
2604
2605static int rt6_fill_node(struct net *net,
2606 struct sk_buff *skb, struct rt6_info *rt,
2607 struct in6_addr *dst, struct in6_addr *src,
2608 int iif, int type, u32 portid, u32 seq,
2609 int prefix, int nowait, unsigned int flags)
2610{
2611 struct rtmsg *rtm;
2612 struct nlmsghdr *nlh;
2613 long expires;
2614 u32 table;
2615
2616 if (prefix) {
2617 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2618
2619 return 1;
2620 }
2621 }
2622
2623 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2624 if (!nlh)
2625 return -EMSGSIZE;
2626
2627 rtm = nlmsg_data(nlh);
2628 rtm->rtm_family = AF_INET6;
2629 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2630 rtm->rtm_src_len = rt->rt6i_src.plen;
2631 rtm->rtm_tos = 0;
2632 if (rt->rt6i_table)
2633 table = rt->rt6i_table->tb6_id;
2634 else
2635 table = RT6_TABLE_UNSPEC;
2636 rtm->rtm_table = table;
2637 if (nla_put_u32(skb, RTA_TABLE, table))
2638 goto nla_put_failure;
2639 if (rt->rt6i_flags & RTF_REJECT) {
2640 switch (rt->dst.error) {
2641 case -EINVAL:
2642 rtm->rtm_type = RTN_BLACKHOLE;
2643 break;
2644 case -EACCES:
2645 rtm->rtm_type = RTN_PROHIBIT;
2646 break;
2647 case -EAGAIN:
2648 rtm->rtm_type = RTN_THROW;
2649 break;
2650 default:
2651 rtm->rtm_type = RTN_UNREACHABLE;
2652 break;
2653 }
2654 }
2655 else if (rt->rt6i_flags & RTF_LOCAL)
2656 rtm->rtm_type = RTN_LOCAL;
2657 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2658 rtm->rtm_type = RTN_LOCAL;
2659 else
2660 rtm->rtm_type = RTN_UNICAST;
2661 rtm->rtm_flags = 0;
2662 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2663 rtm->rtm_protocol = rt->rt6i_protocol;
2664 if (rt->rt6i_flags & RTF_DYNAMIC)
2665 rtm->rtm_protocol = RTPROT_REDIRECT;
2666 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2667 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2668 rtm->rtm_protocol = RTPROT_RA;
2669 else
2670 rtm->rtm_protocol = RTPROT_KERNEL;
2671 }
2672
2673 if (rt->rt6i_flags & RTF_CACHE)
2674 rtm->rtm_flags |= RTM_F_CLONED;
2675
2676 if (dst) {
2677 if (nla_put_in6_addr(skb, RTA_DST, dst))
2678 goto nla_put_failure;
2679 rtm->rtm_dst_len = 128;
2680 } else if (rtm->rtm_dst_len)
2681 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2682 goto nla_put_failure;
2683#ifdef CONFIG_IPV6_SUBTREES
2684 if (src) {
2685 if (nla_put_in6_addr(skb, RTA_SRC, src))
2686 goto nla_put_failure;
2687 rtm->rtm_src_len = 128;
2688 } else if (rtm->rtm_src_len &&
2689 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2690 goto nla_put_failure;
2691#endif
2692 if (iif) {
2693#ifdef CONFIG_IPV6_MROUTE
2694 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2695 int err = ip6mr_get_route(net, skb, rtm, nowait);
2696 if (err <= 0) {
2697 if (!nowait) {
2698 if (err == 0)
2699 return 0;
2700 goto nla_put_failure;
2701 } else {
2702 if (err == -EMSGSIZE)
2703 goto nla_put_failure;
2704 }
2705 }
2706 } else
2707#endif
2708 if (nla_put_u32(skb, RTA_IIF, iif))
2709 goto nla_put_failure;
2710 } else if (dst) {
2711 struct in6_addr saddr_buf;
2712 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2713 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2714 goto nla_put_failure;
2715 }
2716
2717 if (rt->rt6i_prefsrc.plen) {
2718 struct in6_addr saddr_buf;
2719 saddr_buf = rt->rt6i_prefsrc.addr;
2720 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2721 goto nla_put_failure;
2722 }
2723
2724 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2725 goto nla_put_failure;
2726
2727 if (rt->rt6i_flags & RTF_GATEWAY) {
2728 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2729 goto nla_put_failure;
2730 }
2731
2732 if (rt->dst.dev &&
2733 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2734 goto nla_put_failure;
2735 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2736 goto nla_put_failure;
2737
2738 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2739
2740 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2741 goto nla_put_failure;
2742
2743 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2744 goto nla_put_failure;
2745
2746 nlmsg_end(skb, nlh);
2747 return 0;
2748
2749nla_put_failure:
2750 nlmsg_cancel(skb, nlh);
2751 return -EMSGSIZE;
2752}
2753
2754int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2755{
2756 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2757 int prefix;
2758
2759 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2760 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2761 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2762 } else
2763 prefix = 0;
2764
2765 return rt6_fill_node(arg->net,
2766 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2767 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2768 prefix, 0, NLM_F_MULTI);
2769}
2770
2771static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2772{
2773 struct net *net = sock_net(in_skb->sk);
2774 struct nlattr *tb[RTA_MAX+1];
2775 struct rt6_info *rt;
2776 struct sk_buff *skb;
2777 struct rtmsg *rtm;
2778 struct flowi6 fl6;
2779 int err, iif = 0, oif = 0;
2780
2781 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2782 if (err < 0)
2783 goto errout;
2784
2785 err = -EINVAL;
2786 memset(&fl6, 0, sizeof(fl6));
2787
2788 if (tb[RTA_SRC]) {
2789 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2790 goto errout;
2791
2792 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2793 }
2794
2795 if (tb[RTA_DST]) {
2796 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2797 goto errout;
2798
2799 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2800 }
2801
2802 if (tb[RTA_IIF])
2803 iif = nla_get_u32(tb[RTA_IIF]);
2804
2805 if (tb[RTA_OIF])
2806 oif = nla_get_u32(tb[RTA_OIF]);
2807
2808 if (tb[RTA_MARK])
2809 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2810
2811 if (iif) {
2812 struct net_device *dev;
2813 int flags = 0;
2814
2815 dev = __dev_get_by_index(net, iif);
2816 if (!dev) {
2817 err = -ENODEV;
2818 goto errout;
2819 }
2820
2821 fl6.flowi6_iif = iif;
2822
2823 if (!ipv6_addr_any(&fl6.saddr))
2824 flags |= RT6_LOOKUP_F_HAS_SADDR;
2825
2826 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2827 flags);
2828 } else {
2829 fl6.flowi6_oif = oif;
2830
2831 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2832 }
2833
2834 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2835 if (!skb) {
2836 ip6_rt_put(rt);
2837 err = -ENOBUFS;
2838 goto errout;
2839 }
2840
2841
2842
2843
2844 skb_reset_mac_header(skb);
2845 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2846
2847 skb_dst_set(skb, &rt->dst);
2848
2849 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2850 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2851 nlh->nlmsg_seq, 0, 0, 0);
2852 if (err < 0) {
2853 kfree_skb(skb);
2854 goto errout;
2855 }
2856
2857 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2858errout:
2859 return err;
2860}
2861
2862void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2863{
2864 struct sk_buff *skb;
2865 struct net *net = info->nl_net;
2866 u32 seq;
2867 int err;
2868
2869 err = -ENOBUFS;
2870 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2871
2872 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2873 if (!skb)
2874 goto errout;
2875
2876 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2877 event, info->portid, seq, 0, 0, 0);
2878 if (err < 0) {
2879
2880 WARN_ON(err == -EMSGSIZE);
2881 kfree_skb(skb);
2882 goto errout;
2883 }
2884 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2885 info->nlh, gfp_any());
2886 return;
2887errout:
2888 if (err < 0)
2889 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2890}
2891
2892static int ip6_route_dev_notify(struct notifier_block *this,
2893 unsigned long event, void *ptr)
2894{
2895 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2896 struct net *net = dev_net(dev);
2897
2898 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2899 net->ipv6.ip6_null_entry->dst.dev = dev;
2900 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2901#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2902 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2903 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2904 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2905 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2906#endif
2907 }
2908
2909 return NOTIFY_OK;
2910}
2911
2912
2913
2914
2915
2916#ifdef CONFIG_PROC_FS
2917
2918static const struct file_operations ipv6_route_proc_fops = {
2919 .owner = THIS_MODULE,
2920 .open = ipv6_route_open,
2921 .read = seq_read,
2922 .llseek = seq_lseek,
2923 .release = seq_release_net,
2924};
2925
2926static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2927{
2928 struct net *net = (struct net *)seq->private;
2929 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2930 net->ipv6.rt6_stats->fib_nodes,
2931 net->ipv6.rt6_stats->fib_route_nodes,
2932 net->ipv6.rt6_stats->fib_rt_alloc,
2933 net->ipv6.rt6_stats->fib_rt_entries,
2934 net->ipv6.rt6_stats->fib_rt_cache,
2935 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2936 net->ipv6.rt6_stats->fib_discarded_routes);
2937
2938 return 0;
2939}
2940
2941static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2942{
2943 return single_open_net(inode, file, rt6_stats_seq_show);
2944}
2945
2946static const struct file_operations rt6_stats_seq_fops = {
2947 .owner = THIS_MODULE,
2948 .open = rt6_stats_seq_open,
2949 .read = seq_read,
2950 .llseek = seq_lseek,
2951 .release = single_release_net,
2952};
2953#endif
2954
2955#ifdef CONFIG_SYSCTL
2956
2957static
2958int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2959 void __user *buffer, size_t *lenp, loff_t *ppos)
2960{
2961 struct net *net;
2962 int delay;
2963 if (!write)
2964 return -EINVAL;
2965
2966 net = (struct net *)ctl->extra1;
2967 delay = net->ipv6.sysctl.flush_delay;
2968 proc_dointvec(ctl, write, buffer, lenp, ppos);
2969 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2970 return 0;
2971}
2972
2973struct ctl_table ipv6_route_table_template[] = {
2974 {
2975 .procname = "flush",
2976 .data = &init_net.ipv6.sysctl.flush_delay,
2977 .maxlen = sizeof(int),
2978 .mode = 0200,
2979 .proc_handler = ipv6_sysctl_rtcache_flush
2980 },
2981 {
2982 .procname = "gc_thresh",
2983 .data = &ip6_dst_ops_template.gc_thresh,
2984 .maxlen = sizeof(int),
2985 .mode = 0644,
2986 .proc_handler = proc_dointvec,
2987 },
2988 {
2989 .procname = "max_size",
2990 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2991 .maxlen = sizeof(int),
2992 .mode = 0644,
2993 .proc_handler = proc_dointvec,
2994 },
2995 {
2996 .procname = "gc_min_interval",
2997 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2998 .maxlen = sizeof(int),
2999 .mode = 0644,
3000 .proc_handler = proc_dointvec_jiffies,
3001 },
3002 {
3003 .procname = "gc_timeout",
3004 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3005 .maxlen = sizeof(int),
3006 .mode = 0644,
3007 .proc_handler = proc_dointvec_jiffies,
3008 },
3009 {
3010 .procname = "gc_interval",
3011 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3012 .maxlen = sizeof(int),
3013 .mode = 0644,
3014 .proc_handler = proc_dointvec_jiffies,
3015 },
3016 {
3017 .procname = "gc_elasticity",
3018 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3019 .maxlen = sizeof(int),
3020 .mode = 0644,
3021 .proc_handler = proc_dointvec,
3022 },
3023 {
3024 .procname = "mtu_expires",
3025 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3026 .maxlen = sizeof(int),
3027 .mode = 0644,
3028 .proc_handler = proc_dointvec_jiffies,
3029 },
3030 {
3031 .procname = "min_adv_mss",
3032 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3033 .maxlen = sizeof(int),
3034 .mode = 0644,
3035 .proc_handler = proc_dointvec,
3036 },
3037 {
3038 .procname = "gc_min_interval_ms",
3039 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3040 .maxlen = sizeof(int),
3041 .mode = 0644,
3042 .proc_handler = proc_dointvec_ms_jiffies,
3043 },
3044 { }
3045};
3046
3047struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3048{
3049 struct ctl_table *table;
3050
3051 table = kmemdup(ipv6_route_table_template,
3052 sizeof(ipv6_route_table_template),
3053 GFP_KERNEL);
3054
3055 if (table) {
3056 table[0].data = &net->ipv6.sysctl.flush_delay;
3057 table[0].extra1 = net;
3058 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3059 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3060 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3061 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3062 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3063 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3064 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3065 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3066 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3067
3068
3069 if (net->user_ns != &init_user_ns)
3070 table[0].procname = NULL;
3071 }
3072
3073 return table;
3074}
3075#endif
3076
3077static int __net_init ip6_route_net_init(struct net *net)
3078{
3079 int ret = -ENOMEM;
3080
3081 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3082 sizeof(net->ipv6.ip6_dst_ops));
3083
3084 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3085 goto out_ip6_dst_ops;
3086
3087 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3088 sizeof(*net->ipv6.ip6_null_entry),
3089 GFP_KERNEL);
3090 if (!net->ipv6.ip6_null_entry)
3091 goto out_ip6_dst_entries;
3092 net->ipv6.ip6_null_entry->dst.path =
3093 (struct dst_entry *)net->ipv6.ip6_null_entry;
3094 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3095 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3096 ip6_template_metrics, true);
3097
3098#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3099 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3100 sizeof(*net->ipv6.ip6_prohibit_entry),
3101 GFP_KERNEL);
3102 if (!net->ipv6.ip6_prohibit_entry)
3103 goto out_ip6_null_entry;
3104 net->ipv6.ip6_prohibit_entry->dst.path =
3105 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3106 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3107 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3108 ip6_template_metrics, true);
3109
3110 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3111 sizeof(*net->ipv6.ip6_blk_hole_entry),
3112 GFP_KERNEL);
3113 if (!net->ipv6.ip6_blk_hole_entry)
3114 goto out_ip6_prohibit_entry;
3115 net->ipv6.ip6_blk_hole_entry->dst.path =
3116 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3117 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3118 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3119 ip6_template_metrics, true);
3120#endif
3121
3122 net->ipv6.sysctl.flush_delay = 0;
3123 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3124 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3125 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3126 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3127 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3128 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3129 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3130
3131 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3132
3133 ret = 0;
3134out:
3135 return ret;
3136
3137#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3138out_ip6_prohibit_entry:
3139 kfree(net->ipv6.ip6_prohibit_entry);
3140out_ip6_null_entry:
3141 kfree(net->ipv6.ip6_null_entry);
3142#endif
3143out_ip6_dst_entries:
3144 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3145out_ip6_dst_ops:
3146 goto out;
3147}
3148
3149static void __net_exit ip6_route_net_exit(struct net *net)
3150{
3151 kfree(net->ipv6.ip6_null_entry);
3152#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3153 kfree(net->ipv6.ip6_prohibit_entry);
3154 kfree(net->ipv6.ip6_blk_hole_entry);
3155#endif
3156 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3157}
3158
3159static int __net_init ip6_route_net_init_late(struct net *net)
3160{
3161#ifdef CONFIG_PROC_FS
3162 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3163 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3164#endif
3165 return 0;
3166}
3167
3168static void __net_exit ip6_route_net_exit_late(struct net *net)
3169{
3170#ifdef CONFIG_PROC_FS
3171 remove_proc_entry("ipv6_route", net->proc_net);
3172 remove_proc_entry("rt6_stats", net->proc_net);
3173#endif
3174}
3175
3176static struct pernet_operations ip6_route_net_ops = {
3177 .init = ip6_route_net_init,
3178 .exit = ip6_route_net_exit,
3179};
3180
3181static int __net_init ipv6_inetpeer_init(struct net *net)
3182{
3183 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3184
3185 if (!bp)
3186 return -ENOMEM;
3187 inet_peer_base_init(bp);
3188 net->ipv6.peers = bp;
3189 return 0;
3190}
3191
3192static void __net_exit ipv6_inetpeer_exit(struct net *net)
3193{
3194 struct inet_peer_base *bp = net->ipv6.peers;
3195
3196 net->ipv6.peers = NULL;
3197 inetpeer_invalidate_tree(bp);
3198 kfree(bp);
3199}
3200
3201static struct pernet_operations ipv6_inetpeer_ops = {
3202 .init = ipv6_inetpeer_init,
3203 .exit = ipv6_inetpeer_exit,
3204};
3205
3206static struct pernet_operations ip6_route_net_late_ops = {
3207 .init = ip6_route_net_init_late,
3208 .exit = ip6_route_net_exit_late,
3209};
3210
3211static struct notifier_block ip6_route_dev_notifier = {
3212 .notifier_call = ip6_route_dev_notify,
3213 .priority = 0,
3214};
3215
3216int __init ip6_route_init(void)
3217{
3218 int ret;
3219
3220 ret = -ENOMEM;
3221 ip6_dst_ops_template.kmem_cachep =
3222 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3223 SLAB_HWCACHE_ALIGN, NULL);
3224 if (!ip6_dst_ops_template.kmem_cachep)
3225 goto out;
3226
3227 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3228 if (ret)
3229 goto out_kmem_cache;
3230
3231 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3232 if (ret)
3233 goto out_dst_entries;
3234
3235 ret = register_pernet_subsys(&ip6_route_net_ops);
3236 if (ret)
3237 goto out_register_inetpeer;
3238
3239 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3240
3241
3242
3243
3244 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3245 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3246 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3247 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3248 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3249 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3250 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3251 #endif
3252 ret = fib6_init();
3253 if (ret)
3254 goto out_register_subsys;
3255
3256 ret = xfrm6_init();
3257 if (ret)
3258 goto out_fib6_init;
3259
3260 ret = fib6_rules_init();
3261 if (ret)
3262 goto xfrm6_init;
3263
3264 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3265 if (ret)
3266 goto fib6_rules_init;
3267
3268 ret = -ENOBUFS;
3269 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3270 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3271 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3272 goto out_register_late_subsys;
3273
3274 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3275 if (ret)
3276 goto out_register_late_subsys;
3277
3278out:
3279 return ret;
3280
3281out_register_late_subsys:
3282 unregister_pernet_subsys(&ip6_route_net_late_ops);
3283fib6_rules_init:
3284 fib6_rules_cleanup();
3285xfrm6_init:
3286 xfrm6_fini();
3287out_fib6_init:
3288 fib6_gc_cleanup();
3289out_register_subsys:
3290 unregister_pernet_subsys(&ip6_route_net_ops);
3291out_register_inetpeer:
3292 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3293out_dst_entries:
3294 dst_entries_destroy(&ip6_dst_blackhole_ops);
3295out_kmem_cache:
3296 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3297 goto out;
3298}
3299
3300void ip6_route_cleanup(void)
3301{
3302 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3303 unregister_pernet_subsys(&ip6_route_net_late_ops);
3304 fib6_rules_cleanup();
3305 xfrm6_fini();
3306 fib6_gc_cleanup();
3307 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3308 unregister_pernet_subsys(&ip6_route_net_ops);
3309 dst_entries_destroy(&ip6_dst_blackhole_ops);
3310 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3311}
3312