1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65#define pr_fmt(fmt) "IPv4: " fmt
66
67#include <linux/module.h>
68#include <asm/uaccess.h>
69#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
72#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
83#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
89#include <linux/rcupdate.h>
90#include <linux/times.h>
91#include <linux/slab.h>
92#include <net/dst.h>
93#include <net/net_namespace.h>
94#include <net/protocol.h>
95#include <net/ip.h>
96#include <net/route.h>
97#include <net/inetpeer.h>
98#include <net/sock.h>
99#include <net/ip_fib.h>
100#include <net/arp.h>
101#include <net/tcp.h>
102#include <net/icmp.h>
103#include <net/xfrm.h>
104#include <net/netevent.h>
105#include <net/rtnetlink.h>
106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
108#include <linux/kmemleak.h>
109#endif
110#include <net/secure_seq.h>
111
112#define RT_FL_TOS(oldflp4) \
113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
114
115#define IP_MAX_MTU 0xFFF0
116
117#define RT_GC_TIMEOUT (300*HZ)
118
119static int ip_rt_max_size;
120static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
121static int ip_rt_gc_interval __read_mostly = 60 * HZ;
122static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
123static int ip_rt_redirect_number __read_mostly = 9;
124static int ip_rt_redirect_load __read_mostly = HZ / 50;
125static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
126static int ip_rt_error_cost __read_mostly = HZ;
127static int ip_rt_error_burst __read_mostly = 5 * HZ;
128static int ip_rt_gc_elasticity __read_mostly = 8;
129static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
130static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
131static int ip_rt_min_advmss __read_mostly = 256;
132
133
134
135
136
137static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
138static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
139static unsigned int ipv4_mtu(const struct dst_entry *dst);
140static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
141static void ipv4_link_failure(struct sk_buff *skb);
142static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
143 struct sk_buff *skb, u32 mtu);
144static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
145 struct sk_buff *skb);
146static void ipv4_dst_destroy(struct dst_entry *dst);
147
148static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
149 int how)
150{
151}
152
153static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
154{
155 WARN_ON(1);
156 return NULL;
157}
158
159static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
160 struct sk_buff *skb,
161 const void *daddr);
162
163static struct dst_ops ipv4_dst_ops = {
164 .family = AF_INET,
165 .protocol = cpu_to_be16(ETH_P_IP),
166 .check = ipv4_dst_check,
167 .default_advmss = ipv4_default_advmss,
168 .mtu = ipv4_mtu,
169 .cow_metrics = ipv4_cow_metrics,
170 .destroy = ipv4_dst_destroy,
171 .ifdown = ipv4_dst_ifdown,
172 .negative_advice = ipv4_negative_advice,
173 .link_failure = ipv4_link_failure,
174 .update_pmtu = ip_rt_update_pmtu,
175 .redirect = ip_do_redirect,
176 .local_out = __ip_local_out,
177 .neigh_lookup = ipv4_neigh_lookup,
178};
179
180#define ECN_OR_COST(class) TC_PRIO_##class
181
182const __u8 ip_tos2prio[16] = {
183 TC_PRIO_BESTEFFORT,
184 ECN_OR_COST(BESTEFFORT),
185 TC_PRIO_BESTEFFORT,
186 ECN_OR_COST(BESTEFFORT),
187 TC_PRIO_BULK,
188 ECN_OR_COST(BULK),
189 TC_PRIO_BULK,
190 ECN_OR_COST(BULK),
191 TC_PRIO_INTERACTIVE,
192 ECN_OR_COST(INTERACTIVE),
193 TC_PRIO_INTERACTIVE,
194 ECN_OR_COST(INTERACTIVE),
195 TC_PRIO_INTERACTIVE_BULK,
196 ECN_OR_COST(INTERACTIVE_BULK),
197 TC_PRIO_INTERACTIVE_BULK,
198 ECN_OR_COST(INTERACTIVE_BULK)
199};
200EXPORT_SYMBOL(ip_tos2prio);
201
202static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
203#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
204
205#ifdef CONFIG_PROC_FS
206static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
207{
208 if (*pos)
209 return NULL;
210 return SEQ_START_TOKEN;
211}
212
213static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
214{
215 ++*pos;
216 return NULL;
217}
218
219static void rt_cache_seq_stop(struct seq_file *seq, void *v)
220{
221}
222
223static int rt_cache_seq_show(struct seq_file *seq, void *v)
224{
225 if (v == SEQ_START_TOKEN)
226 seq_printf(seq, "%-127s\n",
227 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
228 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
229 "HHUptod\tSpecDst");
230 return 0;
231}
232
233static const struct seq_operations rt_cache_seq_ops = {
234 .start = rt_cache_seq_start,
235 .next = rt_cache_seq_next,
236 .stop = rt_cache_seq_stop,
237 .show = rt_cache_seq_show,
238};
239
240static int rt_cache_seq_open(struct inode *inode, struct file *file)
241{
242 return seq_open(file, &rt_cache_seq_ops);
243}
244
245static const struct file_operations rt_cache_seq_fops = {
246 .owner = THIS_MODULE,
247 .open = rt_cache_seq_open,
248 .read = seq_read,
249 .llseek = seq_lseek,
250 .release = seq_release,
251};
252
253
254static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
255{
256 int cpu;
257
258 if (*pos == 0)
259 return SEQ_START_TOKEN;
260
261 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
262 if (!cpu_possible(cpu))
263 continue;
264 *pos = cpu+1;
265 return &per_cpu(rt_cache_stat, cpu);
266 }
267 return NULL;
268}
269
270static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
271{
272 int cpu;
273
274 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
275 if (!cpu_possible(cpu))
276 continue;
277 *pos = cpu+1;
278 return &per_cpu(rt_cache_stat, cpu);
279 }
280 return NULL;
281
282}
283
284static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
285{
286
287}
288
289static int rt_cpu_seq_show(struct seq_file *seq, void *v)
290{
291 struct rt_cache_stat *st = v;
292
293 if (v == SEQ_START_TOKEN) {
294 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
295 return 0;
296 }
297
298 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
299 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
300 dst_entries_get_slow(&ipv4_dst_ops),
301 st->in_hit,
302 st->in_slow_tot,
303 st->in_slow_mc,
304 st->in_no_route,
305 st->in_brd,
306 st->in_martian_dst,
307 st->in_martian_src,
308
309 st->out_hit,
310 st->out_slow_tot,
311 st->out_slow_mc,
312
313 st->gc_total,
314 st->gc_ignored,
315 st->gc_goal_miss,
316 st->gc_dst_overflow,
317 st->in_hlist_search,
318 st->out_hlist_search
319 );
320 return 0;
321}
322
323static const struct seq_operations rt_cpu_seq_ops = {
324 .start = rt_cpu_seq_start,
325 .next = rt_cpu_seq_next,
326 .stop = rt_cpu_seq_stop,
327 .show = rt_cpu_seq_show,
328};
329
330
331static int rt_cpu_seq_open(struct inode *inode, struct file *file)
332{
333 return seq_open(file, &rt_cpu_seq_ops);
334}
335
336static const struct file_operations rt_cpu_seq_fops = {
337 .owner = THIS_MODULE,
338 .open = rt_cpu_seq_open,
339 .read = seq_read,
340 .llseek = seq_lseek,
341 .release = seq_release,
342};
343
344#ifdef CONFIG_IP_ROUTE_CLASSID
345static int rt_acct_proc_show(struct seq_file *m, void *v)
346{
347 struct ip_rt_acct *dst, *src;
348 unsigned int i, j;
349
350 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
351 if (!dst)
352 return -ENOMEM;
353
354 for_each_possible_cpu(i) {
355 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
356 for (j = 0; j < 256; j++) {
357 dst[j].o_bytes += src[j].o_bytes;
358 dst[j].o_packets += src[j].o_packets;
359 dst[j].i_bytes += src[j].i_bytes;
360 dst[j].i_packets += src[j].i_packets;
361 }
362 }
363
364 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
365 kfree(dst);
366 return 0;
367}
368
369static int rt_acct_proc_open(struct inode *inode, struct file *file)
370{
371 return single_open(file, rt_acct_proc_show, NULL);
372}
373
374static const struct file_operations rt_acct_proc_fops = {
375 .owner = THIS_MODULE,
376 .open = rt_acct_proc_open,
377 .read = seq_read,
378 .llseek = seq_lseek,
379 .release = single_release,
380};
381#endif
382
383static int __net_init ip_rt_do_proc_init(struct net *net)
384{
385 struct proc_dir_entry *pde;
386
387 pde = proc_net_fops_create(net, "rt_cache", S_IRUGO,
388 &rt_cache_seq_fops);
389 if (!pde)
390 goto err1;
391
392 pde = proc_create("rt_cache", S_IRUGO,
393 net->proc_net_stat, &rt_cpu_seq_fops);
394 if (!pde)
395 goto err2;
396
397#ifdef CONFIG_IP_ROUTE_CLASSID
398 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
399 if (!pde)
400 goto err3;
401#endif
402 return 0;
403
404#ifdef CONFIG_IP_ROUTE_CLASSID
405err3:
406 remove_proc_entry("rt_cache", net->proc_net_stat);
407#endif
408err2:
409 remove_proc_entry("rt_cache", net->proc_net);
410err1:
411 return -ENOMEM;
412}
413
414static void __net_exit ip_rt_do_proc_exit(struct net *net)
415{
416 remove_proc_entry("rt_cache", net->proc_net_stat);
417 remove_proc_entry("rt_cache", net->proc_net);
418#ifdef CONFIG_IP_ROUTE_CLASSID
419 remove_proc_entry("rt_acct", net->proc_net);
420#endif
421}
422
423static struct pernet_operations ip_rt_proc_ops __net_initdata = {
424 .init = ip_rt_do_proc_init,
425 .exit = ip_rt_do_proc_exit,
426};
427
428static int __init ip_rt_proc_init(void)
429{
430 return register_pernet_subsys(&ip_rt_proc_ops);
431}
432
433#else
434static inline int ip_rt_proc_init(void)
435{
436 return 0;
437}
438#endif
439
440static inline bool rt_is_expired(const struct rtable *rth)
441{
442 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
443}
444
445void rt_cache_flush(struct net *net)
446{
447 rt_genid_bump(net);
448}
449
450static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
451 struct sk_buff *skb,
452 const void *daddr)
453{
454 struct net_device *dev = dst->dev;
455 const __be32 *pkey = daddr;
456 const struct rtable *rt;
457 struct neighbour *n;
458
459 rt = (const struct rtable *) dst;
460 if (rt->rt_gateway)
461 pkey = (const __be32 *) &rt->rt_gateway;
462 else if (skb)
463 pkey = &ip_hdr(skb)->daddr;
464
465 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
466 if (n)
467 return n;
468 return neigh_create(&arp_tbl, pkey, dev);
469}
470
471
472
473
474
475
476
477
478static void ip_select_fb_ident(struct iphdr *iph)
479{
480 static DEFINE_SPINLOCK(ip_fb_id_lock);
481 static u32 ip_fallback_id;
482 u32 salt;
483
484 spin_lock_bh(&ip_fb_id_lock);
485 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
486 iph->id = htons(salt & 0xFFFF);
487 ip_fallback_id = salt;
488 spin_unlock_bh(&ip_fb_id_lock);
489}
490
491void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
492{
493 struct net *net = dev_net(dst->dev);
494 struct inet_peer *peer;
495
496 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
497 if (peer) {
498 iph->id = htons(inet_getid(peer, more));
499 inet_putpeer(peer);
500 return;
501 }
502
503 ip_select_fb_ident(iph);
504}
505EXPORT_SYMBOL(__ip_select_ident);
506
507static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
508 const struct iphdr *iph,
509 int oif, u8 tos,
510 u8 prot, u32 mark, int flow_flags)
511{
512 if (sk) {
513 const struct inet_sock *inet = inet_sk(sk);
514
515 oif = sk->sk_bound_dev_if;
516 mark = sk->sk_mark;
517 tos = RT_CONN_FLAGS(sk);
518 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
519 }
520 flowi4_init_output(fl4, oif, mark, tos,
521 RT_SCOPE_UNIVERSE, prot,
522 flow_flags,
523 iph->daddr, iph->saddr, 0, 0);
524}
525
526static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
527 const struct sock *sk)
528{
529 const struct iphdr *iph = ip_hdr(skb);
530 int oif = skb->dev->ifindex;
531 u8 tos = RT_TOS(iph->tos);
532 u8 prot = iph->protocol;
533 u32 mark = skb->mark;
534
535 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
536}
537
538static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
539{
540 const struct inet_sock *inet = inet_sk(sk);
541 const struct ip_options_rcu *inet_opt;
542 __be32 daddr = inet->inet_daddr;
543
544 rcu_read_lock();
545 inet_opt = rcu_dereference(inet->inet_opt);
546 if (inet_opt && inet_opt->opt.srr)
547 daddr = inet_opt->opt.faddr;
548 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
549 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
550 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
551 inet_sk_flowi_flags(sk),
552 daddr, inet->inet_saddr, 0, 0);
553 rcu_read_unlock();
554}
555
556static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
557 const struct sk_buff *skb)
558{
559 if (skb)
560 build_skb_flow_key(fl4, skb, sk);
561 else
562 build_sk_flow_key(fl4, sk);
563}
564
565static inline void rt_free(struct rtable *rt)
566{
567 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
568}
569
570static DEFINE_SPINLOCK(fnhe_lock);
571
572static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
573{
574 struct fib_nh_exception *fnhe, *oldest;
575 struct rtable *orig;
576
577 oldest = rcu_dereference(hash->chain);
578 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
579 fnhe = rcu_dereference(fnhe->fnhe_next)) {
580 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
581 oldest = fnhe;
582 }
583 orig = rcu_dereference(oldest->fnhe_rth);
584 if (orig) {
585 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
586 rt_free(orig);
587 }
588 return oldest;
589}
590
591static inline u32 fnhe_hashfun(__be32 daddr)
592{
593 u32 hval;
594
595 hval = (__force u32) daddr;
596 hval ^= (hval >> 11) ^ (hval >> 22);
597
598 return hval & (FNHE_HASH_SIZE - 1);
599}
600
601static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
602 u32 pmtu, unsigned long expires)
603{
604 struct fnhe_hash_bucket *hash;
605 struct fib_nh_exception *fnhe;
606 int depth;
607 u32 hval = fnhe_hashfun(daddr);
608
609 spin_lock_bh(&fnhe_lock);
610
611 hash = nh->nh_exceptions;
612 if (!hash) {
613 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
614 if (!hash)
615 goto out_unlock;
616 nh->nh_exceptions = hash;
617 }
618
619 hash += hval;
620
621 depth = 0;
622 for (fnhe = rcu_dereference(hash->chain); fnhe;
623 fnhe = rcu_dereference(fnhe->fnhe_next)) {
624 if (fnhe->fnhe_daddr == daddr)
625 break;
626 depth++;
627 }
628
629 if (fnhe) {
630 if (gw)
631 fnhe->fnhe_gw = gw;
632 if (pmtu) {
633 fnhe->fnhe_pmtu = pmtu;
634 fnhe->fnhe_expires = expires;
635 }
636 } else {
637 if (depth > FNHE_RECLAIM_DEPTH)
638 fnhe = fnhe_oldest(hash);
639 else {
640 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
641 if (!fnhe)
642 goto out_unlock;
643
644 fnhe->fnhe_next = hash->chain;
645 rcu_assign_pointer(hash->chain, fnhe);
646 }
647 fnhe->fnhe_daddr = daddr;
648 fnhe->fnhe_gw = gw;
649 fnhe->fnhe_pmtu = pmtu;
650 fnhe->fnhe_expires = expires;
651 }
652
653 fnhe->fnhe_stamp = jiffies;
654
655out_unlock:
656 spin_unlock_bh(&fnhe_lock);
657 return;
658}
659
660static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
661 bool kill_route)
662{
663 __be32 new_gw = icmp_hdr(skb)->un.gateway;
664 __be32 old_gw = ip_hdr(skb)->saddr;
665 struct net_device *dev = skb->dev;
666 struct in_device *in_dev;
667 struct fib_result res;
668 struct neighbour *n;
669 struct net *net;
670
671 switch (icmp_hdr(skb)->code & 7) {
672 case ICMP_REDIR_NET:
673 case ICMP_REDIR_NETTOS:
674 case ICMP_REDIR_HOST:
675 case ICMP_REDIR_HOSTTOS:
676 break;
677
678 default:
679 return;
680 }
681
682 if (rt->rt_gateway != old_gw)
683 return;
684
685 in_dev = __in_dev_get_rcu(dev);
686 if (!in_dev)
687 return;
688
689 net = dev_net(dev);
690 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
691 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
692 ipv4_is_zeronet(new_gw))
693 goto reject_redirect;
694
695 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
696 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
697 goto reject_redirect;
698 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
699 goto reject_redirect;
700 } else {
701 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
702 goto reject_redirect;
703 }
704
705 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
706 if (n) {
707 if (!(n->nud_state & NUD_VALID)) {
708 neigh_event_send(n, NULL);
709 } else {
710 if (fib_lookup(net, fl4, &res) == 0) {
711 struct fib_nh *nh = &FIB_RES_NH(res);
712
713 update_or_create_fnhe(nh, fl4->daddr, new_gw,
714 0, 0);
715 }
716 if (kill_route)
717 rt->dst.obsolete = DST_OBSOLETE_KILL;
718 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
719 }
720 neigh_release(n);
721 }
722 return;
723
724reject_redirect:
725#ifdef CONFIG_IP_ROUTE_VERBOSE
726 if (IN_DEV_LOG_MARTIANS(in_dev)) {
727 const struct iphdr *iph = (const struct iphdr *) skb->data;
728 __be32 daddr = iph->daddr;
729 __be32 saddr = iph->saddr;
730
731 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
732 " Advised path = %pI4 -> %pI4\n",
733 &old_gw, dev->name, &new_gw,
734 &saddr, &daddr);
735 }
736#endif
737 ;
738}
739
740static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
741{
742 struct rtable *rt;
743 struct flowi4 fl4;
744
745 rt = (struct rtable *) dst;
746
747 ip_rt_build_flow_key(&fl4, sk, skb);
748 __ip_do_redirect(rt, skb, &fl4, true);
749}
750
751static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
752{
753 struct rtable *rt = (struct rtable *)dst;
754 struct dst_entry *ret = dst;
755
756 if (rt) {
757 if (dst->obsolete > 0) {
758 ip_rt_put(rt);
759 ret = NULL;
760 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
761 rt->dst.expires) {
762 ip_rt_put(rt);
763 ret = NULL;
764 }
765 }
766 return ret;
767}
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785void ip_rt_send_redirect(struct sk_buff *skb)
786{
787 struct rtable *rt = skb_rtable(skb);
788 struct in_device *in_dev;
789 struct inet_peer *peer;
790 struct net *net;
791 int log_martians;
792
793 rcu_read_lock();
794 in_dev = __in_dev_get_rcu(rt->dst.dev);
795 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
796 rcu_read_unlock();
797 return;
798 }
799 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
800 rcu_read_unlock();
801
802 net = dev_net(rt->dst.dev);
803 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
804 if (!peer) {
805 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
806 return;
807 }
808
809
810
811
812 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
813 peer->rate_tokens = 0;
814
815
816
817
818 if (peer->rate_tokens >= ip_rt_redirect_number) {
819 peer->rate_last = jiffies;
820 goto out_put_peer;
821 }
822
823
824
825
826 if (peer->rate_tokens == 0 ||
827 time_after(jiffies,
828 (peer->rate_last +
829 (ip_rt_redirect_load << peer->rate_tokens)))) {
830 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
831 peer->rate_last = jiffies;
832 ++peer->rate_tokens;
833#ifdef CONFIG_IP_ROUTE_VERBOSE
834 if (log_martians &&
835 peer->rate_tokens == ip_rt_redirect_number)
836 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
837 &ip_hdr(skb)->saddr, inet_iif(skb),
838 &ip_hdr(skb)->daddr, &rt->rt_gateway);
839#endif
840 }
841out_put_peer:
842 inet_putpeer(peer);
843}
844
845static int ip_error(struct sk_buff *skb)
846{
847 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
848 struct rtable *rt = skb_rtable(skb);
849 struct inet_peer *peer;
850 unsigned long now;
851 struct net *net;
852 bool send;
853 int code;
854
855 net = dev_net(rt->dst.dev);
856 if (!IN_DEV_FORWARD(in_dev)) {
857 switch (rt->dst.error) {
858 case EHOSTUNREACH:
859 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
860 break;
861
862 case ENETUNREACH:
863 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
864 break;
865 }
866 goto out;
867 }
868
869 switch (rt->dst.error) {
870 case EINVAL:
871 default:
872 goto out;
873 case EHOSTUNREACH:
874 code = ICMP_HOST_UNREACH;
875 break;
876 case ENETUNREACH:
877 code = ICMP_NET_UNREACH;
878 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
879 break;
880 case EACCES:
881 code = ICMP_PKT_FILTERED;
882 break;
883 }
884
885 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
886
887 send = true;
888 if (peer) {
889 now = jiffies;
890 peer->rate_tokens += now - peer->rate_last;
891 if (peer->rate_tokens > ip_rt_error_burst)
892 peer->rate_tokens = ip_rt_error_burst;
893 peer->rate_last = now;
894 if (peer->rate_tokens >= ip_rt_error_cost)
895 peer->rate_tokens -= ip_rt_error_cost;
896 else
897 send = false;
898 inet_putpeer(peer);
899 }
900 if (send)
901 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
902
903out: kfree_skb(skb);
904 return 0;
905}
906
907static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
908{
909 struct fib_result res;
910
911 if (mtu < ip_rt_min_pmtu)
912 mtu = ip_rt_min_pmtu;
913
914 rcu_read_lock();
915 if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
916 struct fib_nh *nh = &FIB_RES_NH(res);
917
918 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
919 jiffies + ip_rt_mtu_expires);
920 }
921 rcu_read_unlock();
922 return mtu;
923}
924
925static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
926 struct sk_buff *skb, u32 mtu)
927{
928 struct rtable *rt = (struct rtable *) dst;
929 struct flowi4 fl4;
930
931 ip_rt_build_flow_key(&fl4, sk, skb);
932 mtu = __ip_rt_update_pmtu(rt, &fl4, mtu);
933
934 if (!rt->rt_pmtu) {
935 dst->obsolete = DST_OBSOLETE_KILL;
936 } else {
937 rt->rt_pmtu = mtu;
938 rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires);
939 }
940}
941
942void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
943 int oif, u32 mark, u8 protocol, int flow_flags)
944{
945 const struct iphdr *iph = (const struct iphdr *) skb->data;
946 struct flowi4 fl4;
947 struct rtable *rt;
948
949 __build_flow_key(&fl4, NULL, iph, oif,
950 RT_TOS(iph->tos), protocol, mark, flow_flags);
951 rt = __ip_route_output_key(net, &fl4);
952 if (!IS_ERR(rt)) {
953 __ip_rt_update_pmtu(rt, &fl4, mtu);
954 ip_rt_put(rt);
955 }
956}
957EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
958
959void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
960{
961 const struct iphdr *iph = (const struct iphdr *) skb->data;
962 struct flowi4 fl4;
963 struct rtable *rt;
964
965 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
966 rt = __ip_route_output_key(sock_net(sk), &fl4);
967 if (!IS_ERR(rt)) {
968 __ip_rt_update_pmtu(rt, &fl4, mtu);
969 ip_rt_put(rt);
970 }
971}
972EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
973
974void ipv4_redirect(struct sk_buff *skb, struct net *net,
975 int oif, u32 mark, u8 protocol, int flow_flags)
976{
977 const struct iphdr *iph = (const struct iphdr *) skb->data;
978 struct flowi4 fl4;
979 struct rtable *rt;
980
981 __build_flow_key(&fl4, NULL, iph, oif,
982 RT_TOS(iph->tos), protocol, mark, flow_flags);
983 rt = __ip_route_output_key(net, &fl4);
984 if (!IS_ERR(rt)) {
985 __ip_do_redirect(rt, skb, &fl4, false);
986 ip_rt_put(rt);
987 }
988}
989EXPORT_SYMBOL_GPL(ipv4_redirect);
990
991void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
992{
993 const struct iphdr *iph = (const struct iphdr *) skb->data;
994 struct flowi4 fl4;
995 struct rtable *rt;
996
997 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
998 rt = __ip_route_output_key(sock_net(sk), &fl4);
999 if (!IS_ERR(rt)) {
1000 __ip_do_redirect(rt, skb, &fl4, false);
1001 ip_rt_put(rt);
1002 }
1003}
1004EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1005
1006static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1007{
1008 struct rtable *rt = (struct rtable *) dst;
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018 if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
1019 return NULL;
1020 return dst;
1021}
1022
1023static void ipv4_link_failure(struct sk_buff *skb)
1024{
1025 struct rtable *rt;
1026
1027 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1028
1029 rt = skb_rtable(skb);
1030 if (rt)
1031 dst_set_expires(&rt->dst, 0);
1032}
1033
1034static int ip_rt_bug(struct sk_buff *skb)
1035{
1036 pr_debug("%s: %pI4 -> %pI4, %s\n",
1037 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1038 skb->dev ? skb->dev->name : "?");
1039 kfree_skb(skb);
1040 WARN_ON(1);
1041 return 0;
1042}
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1054{
1055 __be32 src;
1056
1057 if (rt_is_output_route(rt))
1058 src = ip_hdr(skb)->saddr;
1059 else {
1060 struct fib_result res;
1061 struct flowi4 fl4;
1062 struct iphdr *iph;
1063
1064 iph = ip_hdr(skb);
1065
1066 memset(&fl4, 0, sizeof(fl4));
1067 fl4.daddr = iph->daddr;
1068 fl4.saddr = iph->saddr;
1069 fl4.flowi4_tos = RT_TOS(iph->tos);
1070 fl4.flowi4_oif = rt->dst.dev->ifindex;
1071 fl4.flowi4_iif = skb->dev->ifindex;
1072 fl4.flowi4_mark = skb->mark;
1073
1074 rcu_read_lock();
1075 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
1076 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
1077 else
1078 src = inet_select_addr(rt->dst.dev,
1079 rt_nexthop(rt, iph->daddr),
1080 RT_SCOPE_UNIVERSE);
1081 rcu_read_unlock();
1082 }
1083 memcpy(addr, &src, 4);
1084}
1085
1086#ifdef CONFIG_IP_ROUTE_CLASSID
1087static void set_class_tag(struct rtable *rt, u32 tag)
1088{
1089 if (!(rt->dst.tclassid & 0xFFFF))
1090 rt->dst.tclassid |= tag & 0xFFFF;
1091 if (!(rt->dst.tclassid & 0xFFFF0000))
1092 rt->dst.tclassid |= tag & 0xFFFF0000;
1093}
1094#endif
1095
1096static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1097{
1098 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1099
1100 if (advmss == 0) {
1101 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1102 ip_rt_min_advmss);
1103 if (advmss > 65535 - 40)
1104 advmss = 65535 - 40;
1105 }
1106 return advmss;
1107}
1108
1109static unsigned int ipv4_mtu(const struct dst_entry *dst)
1110{
1111 const struct rtable *rt = (const struct rtable *) dst;
1112 unsigned int mtu = rt->rt_pmtu;
1113
1114 if (mtu && time_after_eq(jiffies, rt->dst.expires))
1115 mtu = 0;
1116
1117 if (!mtu)
1118 mtu = dst_metric_raw(dst, RTAX_MTU);
1119
1120 if (mtu && rt_is_output_route(rt))
1121 return mtu;
1122
1123 mtu = dst->dev->mtu;
1124
1125 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
1126 if (rt->rt_gateway && mtu > 576)
1127 mtu = 576;
1128 }
1129
1130 if (mtu > IP_MAX_MTU)
1131 mtu = IP_MAX_MTU;
1132
1133 return mtu;
1134}
1135
1136static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
1137{
1138 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1139 struct fib_nh_exception *fnhe;
1140 u32 hval;
1141
1142 if (!hash)
1143 return NULL;
1144
1145 hval = fnhe_hashfun(daddr);
1146
1147 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1148 fnhe = rcu_dereference(fnhe->fnhe_next)) {
1149 if (fnhe->fnhe_daddr == daddr)
1150 return fnhe;
1151 }
1152 return NULL;
1153}
1154
1155static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1156 __be32 daddr)
1157{
1158 bool ret = false;
1159
1160 spin_lock_bh(&fnhe_lock);
1161
1162 if (daddr == fnhe->fnhe_daddr) {
1163 struct rtable *orig;
1164
1165 if (fnhe->fnhe_pmtu) {
1166 unsigned long expires = fnhe->fnhe_expires;
1167 unsigned long diff = expires - jiffies;
1168
1169 if (time_before(jiffies, expires)) {
1170 rt->rt_pmtu = fnhe->fnhe_pmtu;
1171 dst_set_expires(&rt->dst, diff);
1172 }
1173 }
1174 if (fnhe->fnhe_gw) {
1175 rt->rt_flags |= RTCF_REDIRECTED;
1176 rt->rt_gateway = fnhe->fnhe_gw;
1177 }
1178
1179 orig = rcu_dereference(fnhe->fnhe_rth);
1180 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1181 if (orig)
1182 rt_free(orig);
1183
1184 fnhe->fnhe_stamp = jiffies;
1185 ret = true;
1186 } else {
1187
1188
1189
1190
1191
1192 rt->dst.flags |= DST_NOCACHE;
1193 }
1194 spin_unlock_bh(&fnhe_lock);
1195
1196 return ret;
1197}
1198
1199static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1200{
1201 struct rtable *orig, *prev, **p;
1202 bool ret = true;
1203
1204 if (rt_is_input_route(rt)) {
1205 p = (struct rtable **)&nh->nh_rth_input;
1206 } else {
1207 if (!nh->nh_pcpu_rth_output)
1208 goto nocache;
1209 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1210 }
1211 orig = *p;
1212
1213 prev = cmpxchg(p, orig, rt);
1214 if (prev == orig) {
1215 if (orig)
1216 rt_free(orig);
1217 } else {
1218
1219
1220
1221
1222
1223nocache:
1224 rt->dst.flags |= DST_NOCACHE;
1225 ret = false;
1226 }
1227
1228 return ret;
1229}
1230
1231static DEFINE_SPINLOCK(rt_uncached_lock);
1232static LIST_HEAD(rt_uncached_list);
1233
1234static void rt_add_uncached_list(struct rtable *rt)
1235{
1236 spin_lock_bh(&rt_uncached_lock);
1237 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1238 spin_unlock_bh(&rt_uncached_lock);
1239}
1240
1241static void ipv4_dst_destroy(struct dst_entry *dst)
1242{
1243 struct rtable *rt = (struct rtable *) dst;
1244
1245 if (!list_empty(&rt->rt_uncached)) {
1246 spin_lock_bh(&rt_uncached_lock);
1247 list_del(&rt->rt_uncached);
1248 spin_unlock_bh(&rt_uncached_lock);
1249 }
1250}
1251
1252void rt_flush_dev(struct net_device *dev)
1253{
1254 if (!list_empty(&rt_uncached_list)) {
1255 struct net *net = dev_net(dev);
1256 struct rtable *rt;
1257
1258 spin_lock_bh(&rt_uncached_lock);
1259 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1260 if (rt->dst.dev != dev)
1261 continue;
1262 rt->dst.dev = net->loopback_dev;
1263 dev_hold(rt->dst.dev);
1264 dev_put(dev);
1265 }
1266 spin_unlock_bh(&rt_uncached_lock);
1267 }
1268}
1269
1270static bool rt_cache_valid(const struct rtable *rt)
1271{
1272 return rt &&
1273 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274 !rt_is_expired(rt);
1275}
1276
1277static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
1278 const struct fib_result *res,
1279 struct fib_nh_exception *fnhe,
1280 struct fib_info *fi, u16 type, u32 itag)
1281{
1282 bool cached = false;
1283
1284 if (fi) {
1285 struct fib_nh *nh = &FIB_RES_NH(*res);
1286
1287 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
1288 rt->rt_gateway = nh->nh_gw;
1289 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1290#ifdef CONFIG_IP_ROUTE_CLASSID
1291 rt->dst.tclassid = nh->nh_tclassid;
1292#endif
1293 if (unlikely(fnhe))
1294 cached = rt_bind_exception(rt, fnhe, daddr);
1295 else if (!(rt->dst.flags & DST_NOCACHE))
1296 cached = rt_cache_route(nh, rt);
1297 }
1298 if (unlikely(!cached))
1299 rt_add_uncached_list(rt);
1300
1301#ifdef CONFIG_IP_ROUTE_CLASSID
1302#ifdef CONFIG_IP_MULTIPLE_TABLES
1303 set_class_tag(rt, res->tclassid);
1304#endif
1305 set_class_tag(rt, itag);
1306#endif
1307}
1308
1309static struct rtable *rt_dst_alloc(struct net_device *dev,
1310 bool nopolicy, bool noxfrm, bool will_cache)
1311{
1312 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1313 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
1314 (nopolicy ? DST_NOPOLICY : 0) |
1315 (noxfrm ? DST_NOXFRM : 0));
1316}
1317
1318
1319static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1320 u8 tos, struct net_device *dev, int our)
1321{
1322 struct rtable *rth;
1323 struct in_device *in_dev = __in_dev_get_rcu(dev);
1324 u32 itag = 0;
1325 int err;
1326
1327
1328
1329 if (in_dev == NULL)
1330 return -EINVAL;
1331
1332 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1333 skb->protocol != htons(ETH_P_IP))
1334 goto e_inval;
1335
1336 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1337 if (ipv4_is_loopback(saddr))
1338 goto e_inval;
1339
1340 if (ipv4_is_zeronet(saddr)) {
1341 if (!ipv4_is_local_multicast(daddr))
1342 goto e_inval;
1343 } else {
1344 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1345 in_dev, &itag);
1346 if (err < 0)
1347 goto e_err;
1348 }
1349 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
1350 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1351 if (!rth)
1352 goto e_nobufs;
1353
1354#ifdef CONFIG_IP_ROUTE_CLASSID
1355 rth->dst.tclassid = itag;
1356#endif
1357 rth->dst.output = ip_rt_bug;
1358
1359 rth->rt_genid = rt_genid(dev_net(dev));
1360 rth->rt_flags = RTCF_MULTICAST;
1361 rth->rt_type = RTN_MULTICAST;
1362 rth->rt_is_input= 1;
1363 rth->rt_iif = 0;
1364 rth->rt_pmtu = 0;
1365 rth->rt_gateway = 0;
1366 INIT_LIST_HEAD(&rth->rt_uncached);
1367 if (our) {
1368 rth->dst.input= ip_local_deliver;
1369 rth->rt_flags |= RTCF_LOCAL;
1370 }
1371
1372#ifdef CONFIG_IP_MROUTE
1373 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1374 rth->dst.input = ip_mr_input;
1375#endif
1376 RT_CACHE_STAT_INC(in_slow_mc);
1377
1378 skb_dst_set(skb, &rth->dst);
1379 return 0;
1380
1381e_nobufs:
1382 return -ENOBUFS;
1383e_inval:
1384 return -EINVAL;
1385e_err:
1386 return err;
1387}
1388
1389
1390static void ip_handle_martian_source(struct net_device *dev,
1391 struct in_device *in_dev,
1392 struct sk_buff *skb,
1393 __be32 daddr,
1394 __be32 saddr)
1395{
1396 RT_CACHE_STAT_INC(in_martian_src);
1397#ifdef CONFIG_IP_ROUTE_VERBOSE
1398 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1399
1400
1401
1402
1403 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
1404 &daddr, &saddr, dev->name);
1405 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
1406 print_hex_dump(KERN_WARNING, "ll header: ",
1407 DUMP_PREFIX_OFFSET, 16, 1,
1408 skb_mac_header(skb),
1409 dev->hard_header_len, true);
1410 }
1411 }
1412#endif
1413}
1414
1415
1416static int __mkroute_input(struct sk_buff *skb,
1417 const struct fib_result *res,
1418 struct in_device *in_dev,
1419 __be32 daddr, __be32 saddr, u32 tos)
1420{
1421 struct rtable *rth;
1422 int err;
1423 struct in_device *out_dev;
1424 unsigned int flags = 0;
1425 bool do_cache;
1426 u32 itag;
1427
1428
1429 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1430 if (out_dev == NULL) {
1431 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
1432 return -EINVAL;
1433 }
1434
1435
1436 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
1437 in_dev->dev, in_dev, &itag);
1438 if (err < 0) {
1439 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1440 saddr);
1441
1442 goto cleanup;
1443 }
1444
1445 if (out_dev == in_dev && err &&
1446 (IN_DEV_SHARED_MEDIA(out_dev) ||
1447 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1448 flags |= RTCF_DOREDIRECT;
1449
1450 if (skb->protocol != htons(ETH_P_IP)) {
1451
1452
1453
1454
1455
1456
1457
1458 if (out_dev == in_dev &&
1459 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
1460 err = -EINVAL;
1461 goto cleanup;
1462 }
1463 }
1464
1465 do_cache = false;
1466 if (res->fi) {
1467 if (!itag) {
1468 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1469 if (rt_cache_valid(rth)) {
1470 skb_dst_set_noref(skb, &rth->dst);
1471 goto out;
1472 }
1473 do_cache = true;
1474 }
1475 }
1476
1477 rth = rt_dst_alloc(out_dev->dev,
1478 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1479 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
1480 if (!rth) {
1481 err = -ENOBUFS;
1482 goto cleanup;
1483 }
1484
1485 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1486 rth->rt_flags = flags;
1487 rth->rt_type = res->type;
1488 rth->rt_is_input = 1;
1489 rth->rt_iif = 0;
1490 rth->rt_pmtu = 0;
1491 rth->rt_gateway = 0;
1492 INIT_LIST_HEAD(&rth->rt_uncached);
1493
1494 rth->dst.input = ip_forward;
1495 rth->dst.output = ip_output;
1496
1497 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
1498 skb_dst_set(skb, &rth->dst);
1499out:
1500 err = 0;
1501 cleanup:
1502 return err;
1503}
1504
1505static int ip_mkroute_input(struct sk_buff *skb,
1506 struct fib_result *res,
1507 const struct flowi4 *fl4,
1508 struct in_device *in_dev,
1509 __be32 daddr, __be32 saddr, u32 tos)
1510{
1511#ifdef CONFIG_IP_ROUTE_MULTIPATH
1512 if (res->fi && res->fi->fib_nhs > 1)
1513 fib_select_multipath(res);
1514#endif
1515
1516
1517 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
1518}
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1532 u8 tos, struct net_device *dev)
1533{
1534 struct fib_result res;
1535 struct in_device *in_dev = __in_dev_get_rcu(dev);
1536 struct flowi4 fl4;
1537 unsigned int flags = 0;
1538 u32 itag = 0;
1539 struct rtable *rth;
1540 int err = -EINVAL;
1541 struct net *net = dev_net(dev);
1542 bool do_cache;
1543
1544
1545
1546 if (!in_dev)
1547 goto out;
1548
1549
1550
1551
1552
1553 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
1554 goto martian_source;
1555
1556 res.fi = NULL;
1557 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
1558 goto brd_input;
1559
1560
1561
1562
1563 if (ipv4_is_zeronet(saddr))
1564 goto martian_source;
1565
1566 if (ipv4_is_zeronet(daddr))
1567 goto martian_destination;
1568
1569 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) {
1570 if (ipv4_is_loopback(daddr))
1571 goto martian_destination;
1572
1573 if (ipv4_is_loopback(saddr))
1574 goto martian_source;
1575 }
1576
1577
1578
1579
1580 fl4.flowi4_oif = 0;
1581 fl4.flowi4_iif = dev->ifindex;
1582 fl4.flowi4_mark = skb->mark;
1583 fl4.flowi4_tos = tos;
1584 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1585 fl4.daddr = daddr;
1586 fl4.saddr = saddr;
1587 err = fib_lookup(net, &fl4, &res);
1588 if (err != 0)
1589 goto no_route;
1590
1591 RT_CACHE_STAT_INC(in_slow_tot);
1592
1593 if (res.type == RTN_BROADCAST)
1594 goto brd_input;
1595
1596 if (res.type == RTN_LOCAL) {
1597 err = fib_validate_source(skb, saddr, daddr, tos,
1598 net->loopback_dev->ifindex,
1599 dev, in_dev, &itag);
1600 if (err < 0)
1601 goto martian_source_keep_err;
1602 goto local_input;
1603 }
1604
1605 if (!IN_DEV_FORWARD(in_dev))
1606 goto no_route;
1607 if (res.type != RTN_UNICAST)
1608 goto martian_destination;
1609
1610 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
1611out: return err;
1612
1613brd_input:
1614 if (skb->protocol != htons(ETH_P_IP))
1615 goto e_inval;
1616
1617 if (!ipv4_is_zeronet(saddr)) {
1618 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1619 in_dev, &itag);
1620 if (err < 0)
1621 goto martian_source_keep_err;
1622 }
1623 flags |= RTCF_BROADCAST;
1624 res.type = RTN_BROADCAST;
1625 RT_CACHE_STAT_INC(in_brd);
1626
1627local_input:
1628 do_cache = false;
1629 if (res.fi) {
1630 if (!itag) {
1631 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
1632 if (rt_cache_valid(rth)) {
1633 skb_dst_set_noref(skb, &rth->dst);
1634 err = 0;
1635 goto out;
1636 }
1637 do_cache = true;
1638 }
1639 }
1640
1641 rth = rt_dst_alloc(net->loopback_dev,
1642 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
1643 if (!rth)
1644 goto e_nobufs;
1645
1646 rth->dst.input= ip_local_deliver;
1647 rth->dst.output= ip_rt_bug;
1648#ifdef CONFIG_IP_ROUTE_CLASSID
1649 rth->dst.tclassid = itag;
1650#endif
1651
1652 rth->rt_genid = rt_genid(net);
1653 rth->rt_flags = flags|RTCF_LOCAL;
1654 rth->rt_type = res.type;
1655 rth->rt_is_input = 1;
1656 rth->rt_iif = 0;
1657 rth->rt_pmtu = 0;
1658 rth->rt_gateway = 0;
1659 INIT_LIST_HEAD(&rth->rt_uncached);
1660 if (res.type == RTN_UNREACHABLE) {
1661 rth->dst.input= ip_error;
1662 rth->dst.error= -err;
1663 rth->rt_flags &= ~RTCF_LOCAL;
1664 }
1665 if (do_cache)
1666 rt_cache_route(&FIB_RES_NH(res), rth);
1667 skb_dst_set(skb, &rth->dst);
1668 err = 0;
1669 goto out;
1670
1671no_route:
1672 RT_CACHE_STAT_INC(in_no_route);
1673 res.type = RTN_UNREACHABLE;
1674 if (err == -ESRCH)
1675 err = -ENETUNREACH;
1676 goto local_input;
1677
1678
1679
1680
1681martian_destination:
1682 RT_CACHE_STAT_INC(in_martian_dst);
1683#ifdef CONFIG_IP_ROUTE_VERBOSE
1684 if (IN_DEV_LOG_MARTIANS(in_dev))
1685 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1686 &daddr, &saddr, dev->name);
1687#endif
1688
1689e_inval:
1690 err = -EINVAL;
1691 goto out;
1692
1693e_nobufs:
1694 err = -ENOBUFS;
1695 goto out;
1696
1697martian_source:
1698 err = -EINVAL;
1699martian_source_keep_err:
1700 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
1701 goto out;
1702}
1703
1704int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1705 u8 tos, struct net_device *dev)
1706{
1707 int res;
1708
1709 rcu_read_lock();
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722 if (ipv4_is_multicast(daddr)) {
1723 struct in_device *in_dev = __in_dev_get_rcu(dev);
1724
1725 if (in_dev) {
1726 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1727 ip_hdr(skb)->protocol);
1728 if (our
1729#ifdef CONFIG_IP_MROUTE
1730 ||
1731 (!ipv4_is_local_multicast(daddr) &&
1732 IN_DEV_MFORWARD(in_dev))
1733#endif
1734 ) {
1735 int res = ip_route_input_mc(skb, daddr, saddr,
1736 tos, dev, our);
1737 rcu_read_unlock();
1738 return res;
1739 }
1740 }
1741 rcu_read_unlock();
1742 return -EINVAL;
1743 }
1744 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
1745 rcu_read_unlock();
1746 return res;
1747}
1748EXPORT_SYMBOL(ip_route_input_noref);
1749
1750
1751static struct rtable *__mkroute_output(const struct fib_result *res,
1752 const struct flowi4 *fl4, int orig_oif,
1753 struct net_device *dev_out,
1754 unsigned int flags)
1755{
1756 struct fib_info *fi = res->fi;
1757 struct fib_nh_exception *fnhe;
1758 struct in_device *in_dev;
1759 u16 type = res->type;
1760 struct rtable *rth;
1761
1762 in_dev = __in_dev_get_rcu(dev_out);
1763 if (!in_dev)
1764 return ERR_PTR(-EINVAL);
1765
1766 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1767 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1768 return ERR_PTR(-EINVAL);
1769
1770 if (ipv4_is_lbcast(fl4->daddr))
1771 type = RTN_BROADCAST;
1772 else if (ipv4_is_multicast(fl4->daddr))
1773 type = RTN_MULTICAST;
1774 else if (ipv4_is_zeronet(fl4->daddr))
1775 return ERR_PTR(-EINVAL);
1776
1777 if (dev_out->flags & IFF_LOOPBACK)
1778 flags |= RTCF_LOCAL;
1779
1780 if (type == RTN_BROADCAST) {
1781 flags |= RTCF_BROADCAST | RTCF_LOCAL;
1782 fi = NULL;
1783 } else if (type == RTN_MULTICAST) {
1784 flags |= RTCF_MULTICAST | RTCF_LOCAL;
1785 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1786 fl4->flowi4_proto))
1787 flags &= ~RTCF_LOCAL;
1788
1789
1790
1791
1792 if (fi && res->prefixlen < 4)
1793 fi = NULL;
1794 }
1795
1796 fnhe = NULL;
1797 if (fi) {
1798 struct rtable __rcu **prth;
1799
1800 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
1801 if (fnhe)
1802 prth = &fnhe->fnhe_rth;
1803 else
1804 prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
1805 rth = rcu_dereference(*prth);
1806 if (rt_cache_valid(rth)) {
1807 dst_hold(&rth->dst);
1808 return rth;
1809 }
1810 }
1811 rth = rt_dst_alloc(dev_out,
1812 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1813 IN_DEV_CONF_GET(in_dev, NOXFRM),
1814 fi);
1815 if (!rth)
1816 return ERR_PTR(-ENOBUFS);
1817
1818 rth->dst.output = ip_output;
1819
1820 rth->rt_genid = rt_genid(dev_net(dev_out));
1821 rth->rt_flags = flags;
1822 rth->rt_type = type;
1823 rth->rt_is_input = 0;
1824 rth->rt_iif = orig_oif ? : 0;
1825 rth->rt_pmtu = 0;
1826 rth->rt_gateway = 0;
1827 INIT_LIST_HEAD(&rth->rt_uncached);
1828
1829 RT_CACHE_STAT_INC(out_slow_tot);
1830
1831 if (flags & RTCF_LOCAL)
1832 rth->dst.input = ip_local_deliver;
1833 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
1834 if (flags & RTCF_LOCAL &&
1835 !(dev_out->flags & IFF_LOOPBACK)) {
1836 rth->dst.output = ip_mc_output;
1837 RT_CACHE_STAT_INC(out_slow_mc);
1838 }
1839#ifdef CONFIG_IP_MROUTE
1840 if (type == RTN_MULTICAST) {
1841 if (IN_DEV_MFORWARD(in_dev) &&
1842 !ipv4_is_local_multicast(fl4->daddr)) {
1843 rth->dst.input = ip_mr_input;
1844 rth->dst.output = ip_mc_output;
1845 }
1846 }
1847#endif
1848 }
1849
1850 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
1851
1852 return rth;
1853}
1854
1855
1856
1857
1858
1859struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
1860{
1861 struct net_device *dev_out = NULL;
1862 __u8 tos = RT_FL_TOS(fl4);
1863 unsigned int flags = 0;
1864 struct fib_result res;
1865 struct rtable *rth;
1866 int orig_oif;
1867
1868 res.tclassid = 0;
1869 res.fi = NULL;
1870 res.table = NULL;
1871
1872 orig_oif = fl4->flowi4_oif;
1873
1874 fl4->flowi4_iif = net->loopback_dev->ifindex;
1875 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1876 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1877 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
1878
1879 rcu_read_lock();
1880 if (fl4->saddr) {
1881 rth = ERR_PTR(-EINVAL);
1882 if (ipv4_is_multicast(fl4->saddr) ||
1883 ipv4_is_lbcast(fl4->saddr) ||
1884 ipv4_is_zeronet(fl4->saddr))
1885 goto out;
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895 if (fl4->flowi4_oif == 0 &&
1896 (ipv4_is_multicast(fl4->daddr) ||
1897 ipv4_is_lbcast(fl4->daddr))) {
1898
1899 dev_out = __ip_dev_find(net, fl4->saddr, false);
1900 if (dev_out == NULL)
1901 goto out;
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918 fl4->flowi4_oif = dev_out->ifindex;
1919 goto make_route;
1920 }
1921
1922 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
1923
1924 if (!__ip_dev_find(net, fl4->saddr, false))
1925 goto out;
1926 }
1927 }
1928
1929
1930 if (fl4->flowi4_oif) {
1931 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
1932 rth = ERR_PTR(-ENODEV);
1933 if (dev_out == NULL)
1934 goto out;
1935
1936
1937 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
1938 rth = ERR_PTR(-ENETUNREACH);
1939 goto out;
1940 }
1941 if (ipv4_is_local_multicast(fl4->daddr) ||
1942 ipv4_is_lbcast(fl4->daddr)) {
1943 if (!fl4->saddr)
1944 fl4->saddr = inet_select_addr(dev_out, 0,
1945 RT_SCOPE_LINK);
1946 goto make_route;
1947 }
1948 if (fl4->saddr) {
1949 if (ipv4_is_multicast(fl4->daddr))
1950 fl4->saddr = inet_select_addr(dev_out, 0,
1951 fl4->flowi4_scope);
1952 else if (!fl4->daddr)
1953 fl4->saddr = inet_select_addr(dev_out, 0,
1954 RT_SCOPE_HOST);
1955 }
1956 }
1957
1958 if (!fl4->daddr) {
1959 fl4->daddr = fl4->saddr;
1960 if (!fl4->daddr)
1961 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
1962 dev_out = net->loopback_dev;
1963 fl4->flowi4_oif = net->loopback_dev->ifindex;
1964 res.type = RTN_LOCAL;
1965 flags |= RTCF_LOCAL;
1966 goto make_route;
1967 }
1968
1969 if (fib_lookup(net, fl4, &res)) {
1970 res.fi = NULL;
1971 res.table = NULL;
1972 if (fl4->flowi4_oif) {
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991 if (fl4->saddr == 0)
1992 fl4->saddr = inet_select_addr(dev_out, 0,
1993 RT_SCOPE_LINK);
1994 res.type = RTN_UNICAST;
1995 goto make_route;
1996 }
1997 rth = ERR_PTR(-ENETUNREACH);
1998 goto out;
1999 }
2000
2001 if (res.type == RTN_LOCAL) {
2002 if (!fl4->saddr) {
2003 if (res.fi->fib_prefsrc)
2004 fl4->saddr = res.fi->fib_prefsrc;
2005 else
2006 fl4->saddr = fl4->daddr;
2007 }
2008 dev_out = net->loopback_dev;
2009 fl4->flowi4_oif = dev_out->ifindex;
2010 flags |= RTCF_LOCAL;
2011 goto make_route;
2012 }
2013
2014#ifdef CONFIG_IP_ROUTE_MULTIPATH
2015 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
2016 fib_select_multipath(&res);
2017 else
2018#endif
2019 if (!res.prefixlen &&
2020 res.table->tb_num_default > 1 &&
2021 res.type == RTN_UNICAST && !fl4->flowi4_oif)
2022 fib_select_default(&res);
2023
2024 if (!fl4->saddr)
2025 fl4->saddr = FIB_RES_PREFSRC(net, res);
2026
2027 dev_out = FIB_RES_DEV(res);
2028 fl4->flowi4_oif = dev_out->ifindex;
2029
2030
2031make_route:
2032 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
2033
2034out:
2035 rcu_read_unlock();
2036 return rth;
2037}
2038EXPORT_SYMBOL_GPL(__ip_route_output_key);
2039
2040static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2041{
2042 return NULL;
2043}
2044
2045static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
2046{
2047 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2048
2049 return mtu ? : dst->dev->mtu;
2050}
2051
2052static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2053 struct sk_buff *skb, u32 mtu)
2054{
2055}
2056
2057static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2058 struct sk_buff *skb)
2059{
2060}
2061
2062static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2063 unsigned long old)
2064{
2065 return NULL;
2066}
2067
2068static struct dst_ops ipv4_dst_blackhole_ops = {
2069 .family = AF_INET,
2070 .protocol = cpu_to_be16(ETH_P_IP),
2071 .check = ipv4_blackhole_dst_check,
2072 .mtu = ipv4_blackhole_mtu,
2073 .default_advmss = ipv4_default_advmss,
2074 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2075 .redirect = ipv4_rt_blackhole_redirect,
2076 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
2077 .neigh_lookup = ipv4_neigh_lookup,
2078};
2079
2080struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2081{
2082 struct rtable *ort = (struct rtable *) dst_orig;
2083 struct rtable *rt;
2084
2085 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
2086 if (rt) {
2087 struct dst_entry *new = &rt->dst;
2088
2089 new->__use = 1;
2090 new->input = dst_discard;
2091 new->output = dst_discard;
2092
2093 new->dev = ort->dst.dev;
2094 if (new->dev)
2095 dev_hold(new->dev);
2096
2097 rt->rt_is_input = ort->rt_is_input;
2098 rt->rt_iif = ort->rt_iif;
2099 rt->rt_pmtu = ort->rt_pmtu;
2100
2101 rt->rt_genid = rt_genid(net);
2102 rt->rt_flags = ort->rt_flags;
2103 rt->rt_type = ort->rt_type;
2104 rt->rt_gateway = ort->rt_gateway;
2105
2106 INIT_LIST_HEAD(&rt->rt_uncached);
2107
2108 dst_free(new);
2109 }
2110
2111 dst_release(dst_orig);
2112
2113 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
2114}
2115
2116struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2117 struct sock *sk)
2118{
2119 struct rtable *rt = __ip_route_output_key(net, flp4);
2120
2121 if (IS_ERR(rt))
2122 return rt;
2123
2124 if (flp4->flowi4_proto)
2125 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2126 flowi4_to_flowi(flp4),
2127 sk, 0);
2128
2129 return rt;
2130}
2131EXPORT_SYMBOL_GPL(ip_route_output_flow);
2132
2133static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2134 struct flowi4 *fl4, struct sk_buff *skb, u32 pid,
2135 u32 seq, int event, int nowait, unsigned int flags)
2136{
2137 struct rtable *rt = skb_rtable(skb);
2138 struct rtmsg *r;
2139 struct nlmsghdr *nlh;
2140 unsigned long expires = 0;
2141 u32 error;
2142 u32 metrics[RTAX_MAX];
2143
2144 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
2145 if (nlh == NULL)
2146 return -EMSGSIZE;
2147
2148 r = nlmsg_data(nlh);
2149 r->rtm_family = AF_INET;
2150 r->rtm_dst_len = 32;
2151 r->rtm_src_len = 0;
2152 r->rtm_tos = fl4->flowi4_tos;
2153 r->rtm_table = RT_TABLE_MAIN;
2154 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2155 goto nla_put_failure;
2156 r->rtm_type = rt->rt_type;
2157 r->rtm_scope = RT_SCOPE_UNIVERSE;
2158 r->rtm_protocol = RTPROT_UNSPEC;
2159 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2160 if (rt->rt_flags & RTCF_NOTIFY)
2161 r->rtm_flags |= RTM_F_NOTIFY;
2162
2163 if (nla_put_be32(skb, RTA_DST, dst))
2164 goto nla_put_failure;
2165 if (src) {
2166 r->rtm_src_len = 32;
2167 if (nla_put_be32(skb, RTA_SRC, src))
2168 goto nla_put_failure;
2169 }
2170 if (rt->dst.dev &&
2171 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2172 goto nla_put_failure;
2173#ifdef CONFIG_IP_ROUTE_CLASSID
2174 if (rt->dst.tclassid &&
2175 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2176 goto nla_put_failure;
2177#endif
2178 if (!rt_is_input_route(rt) &&
2179 fl4->saddr != src) {
2180 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
2181 goto nla_put_failure;
2182 }
2183 if (rt->rt_gateway &&
2184 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2185 goto nla_put_failure;
2186
2187 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2188 if (rt->rt_pmtu)
2189 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2190 if (rtnetlink_put_metrics(skb, metrics) < 0)
2191 goto nla_put_failure;
2192
2193 if (fl4->flowi4_mark &&
2194 nla_put_be32(skb, RTA_MARK, fl4->flowi4_mark))
2195 goto nla_put_failure;
2196
2197 error = rt->dst.error;
2198 expires = rt->dst.expires;
2199 if (expires) {
2200 if (time_before(jiffies, expires))
2201 expires -= jiffies;
2202 else
2203 expires = 0;
2204 }
2205
2206 if (rt_is_input_route(rt)) {
2207 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2208 goto nla_put_failure;
2209 }
2210
2211 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
2212 goto nla_put_failure;
2213
2214 return nlmsg_end(skb, nlh);
2215
2216nla_put_failure:
2217 nlmsg_cancel(skb, nlh);
2218 return -EMSGSIZE;
2219}
2220
2221static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
2222{
2223 struct net *net = sock_net(in_skb->sk);
2224 struct rtmsg *rtm;
2225 struct nlattr *tb[RTA_MAX+1];
2226 struct rtable *rt = NULL;
2227 struct flowi4 fl4;
2228 __be32 dst = 0;
2229 __be32 src = 0;
2230 u32 iif;
2231 int err;
2232 int mark;
2233 struct sk_buff *skb;
2234
2235 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2236 if (err < 0)
2237 goto errout;
2238
2239 rtm = nlmsg_data(nlh);
2240
2241 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2242 if (skb == NULL) {
2243 err = -ENOBUFS;
2244 goto errout;
2245 }
2246
2247
2248
2249
2250 skb_reset_mac_header(skb);
2251 skb_reset_network_header(skb);
2252
2253
2254 ip_hdr(skb)->protocol = IPPROTO_ICMP;
2255 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2256
2257 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2258 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
2259 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2260 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
2261
2262 memset(&fl4, 0, sizeof(fl4));
2263 fl4.daddr = dst;
2264 fl4.saddr = src;
2265 fl4.flowi4_tos = rtm->rtm_tos;
2266 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2267 fl4.flowi4_mark = mark;
2268
2269 if (iif) {
2270 struct net_device *dev;
2271
2272 dev = __dev_get_by_index(net, iif);
2273 if (dev == NULL) {
2274 err = -ENODEV;
2275 goto errout_free;
2276 }
2277
2278 skb->protocol = htons(ETH_P_IP);
2279 skb->dev = dev;
2280 skb->mark = mark;
2281 local_bh_disable();
2282 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2283 local_bh_enable();
2284
2285 rt = skb_rtable(skb);
2286 if (err == 0 && rt->dst.error)
2287 err = -rt->dst.error;
2288 } else {
2289 rt = ip_route_output_key(net, &fl4);
2290
2291 err = 0;
2292 if (IS_ERR(rt))
2293 err = PTR_ERR(rt);
2294 }
2295
2296 if (err)
2297 goto errout_free;
2298
2299 skb_dst_set(skb, &rt->dst);
2300 if (rtm->rtm_flags & RTM_F_NOTIFY)
2301 rt->rt_flags |= RTCF_NOTIFY;
2302
2303 err = rt_fill_info(net, dst, src, &fl4, skb,
2304 NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2305 RTM_NEWROUTE, 0, 0);
2306 if (err <= 0)
2307 goto errout_free;
2308
2309 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2310errout:
2311 return err;
2312
2313errout_free:
2314 kfree_skb(skb);
2315 goto errout;
2316}
2317
2318int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2319{
2320 return skb->len;
2321}
2322
2323void ip_rt_multicast_event(struct in_device *in_dev)
2324{
2325 rt_cache_flush(dev_net(in_dev->dev));
2326}
2327
2328#ifdef CONFIG_SYSCTL
2329static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
2330 void __user *buffer,
2331 size_t *lenp, loff_t *ppos)
2332{
2333 if (write) {
2334 rt_cache_flush((struct net *)__ctl->extra1);
2335 return 0;
2336 }
2337
2338 return -EINVAL;
2339}
2340
2341static ctl_table ipv4_route_table[] = {
2342 {
2343 .procname = "gc_thresh",
2344 .data = &ipv4_dst_ops.gc_thresh,
2345 .maxlen = sizeof(int),
2346 .mode = 0644,
2347 .proc_handler = proc_dointvec,
2348 },
2349 {
2350 .procname = "max_size",
2351 .data = &ip_rt_max_size,
2352 .maxlen = sizeof(int),
2353 .mode = 0644,
2354 .proc_handler = proc_dointvec,
2355 },
2356 {
2357
2358
2359 .procname = "gc_min_interval",
2360 .data = &ip_rt_gc_min_interval,
2361 .maxlen = sizeof(int),
2362 .mode = 0644,
2363 .proc_handler = proc_dointvec_jiffies,
2364 },
2365 {
2366 .procname = "gc_min_interval_ms",
2367 .data = &ip_rt_gc_min_interval,
2368 .maxlen = sizeof(int),
2369 .mode = 0644,
2370 .proc_handler = proc_dointvec_ms_jiffies,
2371 },
2372 {
2373 .procname = "gc_timeout",
2374 .data = &ip_rt_gc_timeout,
2375 .maxlen = sizeof(int),
2376 .mode = 0644,
2377 .proc_handler = proc_dointvec_jiffies,
2378 },
2379 {
2380 .procname = "gc_interval",
2381 .data = &ip_rt_gc_interval,
2382 .maxlen = sizeof(int),
2383 .mode = 0644,
2384 .proc_handler = proc_dointvec_jiffies,
2385 },
2386 {
2387 .procname = "redirect_load",
2388 .data = &ip_rt_redirect_load,
2389 .maxlen = sizeof(int),
2390 .mode = 0644,
2391 .proc_handler = proc_dointvec,
2392 },
2393 {
2394 .procname = "redirect_number",
2395 .data = &ip_rt_redirect_number,
2396 .maxlen = sizeof(int),
2397 .mode = 0644,
2398 .proc_handler = proc_dointvec,
2399 },
2400 {
2401 .procname = "redirect_silence",
2402 .data = &ip_rt_redirect_silence,
2403 .maxlen = sizeof(int),
2404 .mode = 0644,
2405 .proc_handler = proc_dointvec,
2406 },
2407 {
2408 .procname = "error_cost",
2409 .data = &ip_rt_error_cost,
2410 .maxlen = sizeof(int),
2411 .mode = 0644,
2412 .proc_handler = proc_dointvec,
2413 },
2414 {
2415 .procname = "error_burst",
2416 .data = &ip_rt_error_burst,
2417 .maxlen = sizeof(int),
2418 .mode = 0644,
2419 .proc_handler = proc_dointvec,
2420 },
2421 {
2422 .procname = "gc_elasticity",
2423 .data = &ip_rt_gc_elasticity,
2424 .maxlen = sizeof(int),
2425 .mode = 0644,
2426 .proc_handler = proc_dointvec,
2427 },
2428 {
2429 .procname = "mtu_expires",
2430 .data = &ip_rt_mtu_expires,
2431 .maxlen = sizeof(int),
2432 .mode = 0644,
2433 .proc_handler = proc_dointvec_jiffies,
2434 },
2435 {
2436 .procname = "min_pmtu",
2437 .data = &ip_rt_min_pmtu,
2438 .maxlen = sizeof(int),
2439 .mode = 0644,
2440 .proc_handler = proc_dointvec,
2441 },
2442 {
2443 .procname = "min_adv_mss",
2444 .data = &ip_rt_min_advmss,
2445 .maxlen = sizeof(int),
2446 .mode = 0644,
2447 .proc_handler = proc_dointvec,
2448 },
2449 { }
2450};
2451
2452static struct ctl_table ipv4_route_flush_table[] = {
2453 {
2454 .procname = "flush",
2455 .maxlen = sizeof(int),
2456 .mode = 0200,
2457 .proc_handler = ipv4_sysctl_rtcache_flush,
2458 },
2459 { },
2460};
2461
2462static __net_init int sysctl_route_net_init(struct net *net)
2463{
2464 struct ctl_table *tbl;
2465
2466 tbl = ipv4_route_flush_table;
2467 if (!net_eq(net, &init_net)) {
2468 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2469 if (tbl == NULL)
2470 goto err_dup;
2471 }
2472 tbl[0].extra1 = net;
2473
2474 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
2475 if (net->ipv4.route_hdr == NULL)
2476 goto err_reg;
2477 return 0;
2478
2479err_reg:
2480 if (tbl != ipv4_route_flush_table)
2481 kfree(tbl);
2482err_dup:
2483 return -ENOMEM;
2484}
2485
2486static __net_exit void sysctl_route_net_exit(struct net *net)
2487{
2488 struct ctl_table *tbl;
2489
2490 tbl = net->ipv4.route_hdr->ctl_table_arg;
2491 unregister_net_sysctl_table(net->ipv4.route_hdr);
2492 BUG_ON(tbl == ipv4_route_flush_table);
2493 kfree(tbl);
2494}
2495
2496static __net_initdata struct pernet_operations sysctl_route_ops = {
2497 .init = sysctl_route_net_init,
2498 .exit = sysctl_route_net_exit,
2499};
2500#endif
2501
2502static __net_init int rt_genid_init(struct net *net)
2503{
2504 atomic_set(&net->rt_genid, 0);
2505 get_random_bytes(&net->ipv4.dev_addr_genid,
2506 sizeof(net->ipv4.dev_addr_genid));
2507 return 0;
2508}
2509
2510static __net_initdata struct pernet_operations rt_genid_ops = {
2511 .init = rt_genid_init,
2512};
2513
2514static int __net_init ipv4_inetpeer_init(struct net *net)
2515{
2516 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2517
2518 if (!bp)
2519 return -ENOMEM;
2520 inet_peer_base_init(bp);
2521 net->ipv4.peers = bp;
2522 return 0;
2523}
2524
2525static void __net_exit ipv4_inetpeer_exit(struct net *net)
2526{
2527 struct inet_peer_base *bp = net->ipv4.peers;
2528
2529 net->ipv4.peers = NULL;
2530 inetpeer_invalidate_tree(bp);
2531 kfree(bp);
2532}
2533
2534static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2535 .init = ipv4_inetpeer_init,
2536 .exit = ipv4_inetpeer_exit,
2537};
2538
2539#ifdef CONFIG_IP_ROUTE_CLASSID
2540struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
2541#endif
2542
2543int __init ip_rt_init(void)
2544{
2545 int rc = 0;
2546
2547#ifdef CONFIG_IP_ROUTE_CLASSID
2548 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
2549 if (!ip_rt_acct)
2550 panic("IP: failed to allocate ip_rt_acct\n");
2551#endif
2552
2553 ipv4_dst_ops.kmem_cachep =
2554 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
2555 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2556
2557 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2558
2559 if (dst_entries_init(&ipv4_dst_ops) < 0)
2560 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2561
2562 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2563 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2564
2565 ipv4_dst_ops.gc_thresh = ~0;
2566 ip_rt_max_size = INT_MAX;
2567
2568 devinet_init();
2569 ip_fib_init();
2570
2571 if (ip_rt_proc_init())
2572 pr_err("Unable to create route proc files\n");
2573#ifdef CONFIG_XFRM
2574 xfrm_init();
2575 xfrm4_init(ip_rt_max_size);
2576#endif
2577 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
2578
2579#ifdef CONFIG_SYSCTL
2580 register_pernet_subsys(&sysctl_route_ops);
2581#endif
2582 register_pernet_subsys(&rt_genid_ops);
2583 register_pernet_subsys(&ipv4_inetpeer_ops);
2584 return rc;
2585}
2586
2587#ifdef CONFIG_SYSCTL
2588
2589
2590
2591
2592void __init ip_static_sysctl_init(void)
2593{
2594 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
2595}
2596#endif
2597