1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65#define pr_fmt(fmt) "IPv4: " fmt
66
67#include <linux/module.h>
68#include <asm/uaccess.h>
69#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
72#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
83#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
89#include <linux/rcupdate.h>
90#include <linux/times.h>
91#include <linux/slab.h>
92#include <linux/jhash.h>
93#include <net/dst.h>
94#include <net/dst_metadata.h>
95#include <net/net_namespace.h>
96#include <net/protocol.h>
97#include <net/ip.h>
98#include <net/route.h>
99#include <net/inetpeer.h>
100#include <net/sock.h>
101#include <net/ip_fib.h>
102#include <net/arp.h>
103#include <net/tcp.h>
104#include <net/icmp.h>
105#include <net/xfrm.h>
106#include <net/lwtunnel.h>
107#include <net/netevent.h>
108#include <net/rtnetlink.h>
109#ifdef CONFIG_SYSCTL
110#include <linux/sysctl.h>
111#include <linux/kmemleak.h>
112#endif
113#include <net/secure_seq.h>
114#include <net/ip_tunnels.h>
115#include <net/l3mdev.h>
116
117#define RT_FL_TOS(oldflp4) \
118 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
119
120#define RT_GC_TIMEOUT (300*HZ)
121
122static int ip_rt_max_size;
123static int ip_rt_redirect_number __read_mostly = 9;
124static int ip_rt_redirect_load __read_mostly = HZ / 50;
125static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
126static int ip_rt_error_cost __read_mostly = HZ;
127static int ip_rt_error_burst __read_mostly = 5 * HZ;
128static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
129static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
130static int ip_rt_min_advmss __read_mostly = 256;
131
132static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
133
134
135
136
137static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
138static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
139static unsigned int ipv4_mtu(const struct dst_entry *dst);
140static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
141static void ipv4_link_failure(struct sk_buff *skb);
142static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
143 struct sk_buff *skb, u32 mtu);
144static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
145 struct sk_buff *skb);
146static void ipv4_dst_destroy(struct dst_entry *dst);
147
148static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
149{
150 WARN_ON(1);
151 return NULL;
152}
153
154static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
155 struct sk_buff *skb,
156 const void *daddr);
157
158static struct dst_ops ipv4_dst_ops = {
159 .family = AF_INET,
160 .check = ipv4_dst_check,
161 .default_advmss = ipv4_default_advmss,
162 .mtu = ipv4_mtu,
163 .cow_metrics = ipv4_cow_metrics,
164 .destroy = ipv4_dst_destroy,
165 .negative_advice = ipv4_negative_advice,
166 .link_failure = ipv4_link_failure,
167 .update_pmtu = ip_rt_update_pmtu,
168 .redirect = ip_do_redirect,
169 .local_out = __ip_local_out,
170 .neigh_lookup = ipv4_neigh_lookup,
171};
172
173#define ECN_OR_COST(class) TC_PRIO_##class
174
175const __u8 ip_tos2prio[16] = {
176 TC_PRIO_BESTEFFORT,
177 ECN_OR_COST(BESTEFFORT),
178 TC_PRIO_BESTEFFORT,
179 ECN_OR_COST(BESTEFFORT),
180 TC_PRIO_BULK,
181 ECN_OR_COST(BULK),
182 TC_PRIO_BULK,
183 ECN_OR_COST(BULK),
184 TC_PRIO_INTERACTIVE,
185 ECN_OR_COST(INTERACTIVE),
186 TC_PRIO_INTERACTIVE,
187 ECN_OR_COST(INTERACTIVE),
188 TC_PRIO_INTERACTIVE_BULK,
189 ECN_OR_COST(INTERACTIVE_BULK),
190 TC_PRIO_INTERACTIVE_BULK,
191 ECN_OR_COST(INTERACTIVE_BULK)
192};
193EXPORT_SYMBOL(ip_tos2prio);
194
195static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
196#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
197
198#ifdef CONFIG_PROC_FS
199static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
200{
201 if (*pos)
202 return NULL;
203 return SEQ_START_TOKEN;
204}
205
206static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
207{
208 ++*pos;
209 return NULL;
210}
211
212static void rt_cache_seq_stop(struct seq_file *seq, void *v)
213{
214}
215
216static int rt_cache_seq_show(struct seq_file *seq, void *v)
217{
218 if (v == SEQ_START_TOKEN)
219 seq_printf(seq, "%-127s\n",
220 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
221 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
222 "HHUptod\tSpecDst");
223 return 0;
224}
225
226static const struct seq_operations rt_cache_seq_ops = {
227 .start = rt_cache_seq_start,
228 .next = rt_cache_seq_next,
229 .stop = rt_cache_seq_stop,
230 .show = rt_cache_seq_show,
231};
232
233static int rt_cache_seq_open(struct inode *inode, struct file *file)
234{
235 return seq_open(file, &rt_cache_seq_ops);
236}
237
238static const struct file_operations rt_cache_seq_fops = {
239 .owner = THIS_MODULE,
240 .open = rt_cache_seq_open,
241 .read = seq_read,
242 .llseek = seq_lseek,
243 .release = seq_release,
244};
245
246
247static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
248{
249 int cpu;
250
251 if (*pos == 0)
252 return SEQ_START_TOKEN;
253
254 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
255 if (!cpu_possible(cpu))
256 continue;
257 *pos = cpu+1;
258 return &per_cpu(rt_cache_stat, cpu);
259 }
260 return NULL;
261}
262
263static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
264{
265 int cpu;
266
267 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
268 if (!cpu_possible(cpu))
269 continue;
270 *pos = cpu+1;
271 return &per_cpu(rt_cache_stat, cpu);
272 }
273 return NULL;
274
275}
276
277static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
278{
279
280}
281
282static int rt_cpu_seq_show(struct seq_file *seq, void *v)
283{
284 struct rt_cache_stat *st = v;
285
286 if (v == SEQ_START_TOKEN) {
287 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
288 return 0;
289 }
290
291 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
292 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
293 dst_entries_get_slow(&ipv4_dst_ops),
294 0,
295 st->in_slow_tot,
296 st->in_slow_mc,
297 st->in_no_route,
298 st->in_brd,
299 st->in_martian_dst,
300 st->in_martian_src,
301
302 0,
303 st->out_slow_tot,
304 st->out_slow_mc,
305
306 0,
307 0,
308 0,
309 0,
310 0,
311 0
312 );
313 return 0;
314}
315
316static const struct seq_operations rt_cpu_seq_ops = {
317 .start = rt_cpu_seq_start,
318 .next = rt_cpu_seq_next,
319 .stop = rt_cpu_seq_stop,
320 .show = rt_cpu_seq_show,
321};
322
323
324static int rt_cpu_seq_open(struct inode *inode, struct file *file)
325{
326 return seq_open(file, &rt_cpu_seq_ops);
327}
328
329static const struct file_operations rt_cpu_seq_fops = {
330 .owner = THIS_MODULE,
331 .open = rt_cpu_seq_open,
332 .read = seq_read,
333 .llseek = seq_lseek,
334 .release = seq_release,
335};
336
337#ifdef CONFIG_IP_ROUTE_CLASSID
338static int rt_acct_proc_show(struct seq_file *m, void *v)
339{
340 struct ip_rt_acct *dst, *src;
341 unsigned int i, j;
342
343 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
344 if (!dst)
345 return -ENOMEM;
346
347 for_each_possible_cpu(i) {
348 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
349 for (j = 0; j < 256; j++) {
350 dst[j].o_bytes += src[j].o_bytes;
351 dst[j].o_packets += src[j].o_packets;
352 dst[j].i_bytes += src[j].i_bytes;
353 dst[j].i_packets += src[j].i_packets;
354 }
355 }
356
357 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
358 kfree(dst);
359 return 0;
360}
361
362static int rt_acct_proc_open(struct inode *inode, struct file *file)
363{
364 return single_open(file, rt_acct_proc_show, NULL);
365}
366
367static const struct file_operations rt_acct_proc_fops = {
368 .owner = THIS_MODULE,
369 .open = rt_acct_proc_open,
370 .read = seq_read,
371 .llseek = seq_lseek,
372 .release = single_release,
373};
374#endif
375
376static int __net_init ip_rt_do_proc_init(struct net *net)
377{
378 struct proc_dir_entry *pde;
379
380 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
381 &rt_cache_seq_fops);
382 if (!pde)
383 goto err1;
384
385 pde = proc_create("rt_cache", S_IRUGO,
386 net->proc_net_stat, &rt_cpu_seq_fops);
387 if (!pde)
388 goto err2;
389
390#ifdef CONFIG_IP_ROUTE_CLASSID
391 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
392 if (!pde)
393 goto err3;
394#endif
395 return 0;
396
397#ifdef CONFIG_IP_ROUTE_CLASSID
398err3:
399 remove_proc_entry("rt_cache", net->proc_net_stat);
400#endif
401err2:
402 remove_proc_entry("rt_cache", net->proc_net);
403err1:
404 return -ENOMEM;
405}
406
407static void __net_exit ip_rt_do_proc_exit(struct net *net)
408{
409 remove_proc_entry("rt_cache", net->proc_net_stat);
410 remove_proc_entry("rt_cache", net->proc_net);
411#ifdef CONFIG_IP_ROUTE_CLASSID
412 remove_proc_entry("rt_acct", net->proc_net);
413#endif
414}
415
416static struct pernet_operations ip_rt_proc_ops __net_initdata = {
417 .init = ip_rt_do_proc_init,
418 .exit = ip_rt_do_proc_exit,
419};
420
421static int __init ip_rt_proc_init(void)
422{
423 return register_pernet_subsys(&ip_rt_proc_ops);
424}
425
426#else
427static inline int ip_rt_proc_init(void)
428{
429 return 0;
430}
431#endif
432
433static inline bool rt_is_expired(const struct rtable *rth)
434{
435 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
436}
437
438void rt_cache_flush(struct net *net)
439{
440 rt_genid_bump_ipv4(net);
441}
442
443static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
444 struct sk_buff *skb,
445 const void *daddr)
446{
447 struct net_device *dev = dst->dev;
448 const __be32 *pkey = daddr;
449 const struct rtable *rt;
450 struct neighbour *n;
451
452 rt = (const struct rtable *) dst;
453 if (rt->rt_gateway)
454 pkey = (const __be32 *) &rt->rt_gateway;
455 else if (skb)
456 pkey = &ip_hdr(skb)->daddr;
457
458 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
459 if (n)
460 return n;
461 return neigh_create(&arp_tbl, pkey, dev);
462}
463
464#define IP_IDENTS_SZ 2048u
465
466static atomic_t *ip_idents __read_mostly;
467static u32 *ip_tstamps __read_mostly;
468
469
470
471
472
473u32 ip_idents_reserve(u32 hash, int segs)
474{
475 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
476 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
477 u32 old = ACCESS_ONCE(*p_tstamp);
478 u32 now = (u32)jiffies;
479 u32 new, delta = 0;
480
481 if (old != now && cmpxchg(p_tstamp, old, now) == old)
482 delta = prandom_u32_max(now - old);
483
484
485 do {
486 old = (u32)atomic_read(p_id);
487 new = old + delta + segs;
488 } while (atomic_cmpxchg(p_id, old, new) != old);
489
490 return new - segs;
491}
492EXPORT_SYMBOL(ip_idents_reserve);
493
494void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
495{
496 static u32 ip_idents_hashrnd __read_mostly;
497 u32 hash, id;
498
499 net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
500
501 hash = jhash_3words((__force u32)iph->daddr,
502 (__force u32)iph->saddr,
503 iph->protocol ^ net_hash_mix(net),
504 ip_idents_hashrnd);
505 id = ip_idents_reserve(hash, segs);
506 iph->id = htons(id);
507}
508EXPORT_SYMBOL(__ip_select_ident);
509
510static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
511 const struct iphdr *iph,
512 int oif, u8 tos,
513 u8 prot, u32 mark, int flow_flags)
514{
515 if (sk) {
516 const struct inet_sock *inet = inet_sk(sk);
517
518 oif = sk->sk_bound_dev_if;
519 mark = sk->sk_mark;
520 tos = RT_CONN_FLAGS(sk);
521 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
522 }
523 flowi4_init_output(fl4, oif, mark, tos,
524 RT_SCOPE_UNIVERSE, prot,
525 flow_flags,
526 iph->daddr, iph->saddr, 0, 0);
527}
528
529static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
530 const struct sock *sk)
531{
532 const struct iphdr *iph = ip_hdr(skb);
533 int oif = skb->dev->ifindex;
534 u8 tos = RT_TOS(iph->tos);
535 u8 prot = iph->protocol;
536 u32 mark = skb->mark;
537
538 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
539}
540
541static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
542{
543 const struct inet_sock *inet = inet_sk(sk);
544 const struct ip_options_rcu *inet_opt;
545 __be32 daddr = inet->inet_daddr;
546
547 rcu_read_lock();
548 inet_opt = rcu_dereference(inet->inet_opt);
549 if (inet_opt && inet_opt->opt.srr)
550 daddr = inet_opt->opt.faddr;
551 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
552 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
553 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
554 inet_sk_flowi_flags(sk),
555 daddr, inet->inet_saddr, 0, 0);
556 rcu_read_unlock();
557}
558
559static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
560 const struct sk_buff *skb)
561{
562 if (skb)
563 build_skb_flow_key(fl4, skb, sk);
564 else
565 build_sk_flow_key(fl4, sk);
566}
567
568static inline void rt_free(struct rtable *rt)
569{
570 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
571}
572
573static DEFINE_SPINLOCK(fnhe_lock);
574
575static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
576{
577 struct rtable *rt;
578
579 rt = rcu_dereference(fnhe->fnhe_rth_input);
580 if (rt) {
581 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
582 rt_free(rt);
583 }
584 rt = rcu_dereference(fnhe->fnhe_rth_output);
585 if (rt) {
586 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
587 rt_free(rt);
588 }
589}
590
591static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
592{
593 struct fib_nh_exception *fnhe, *oldest;
594
595 oldest = rcu_dereference(hash->chain);
596 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
597 fnhe = rcu_dereference(fnhe->fnhe_next)) {
598 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
599 oldest = fnhe;
600 }
601 fnhe_flush_routes(oldest);
602 return oldest;
603}
604
605static inline u32 fnhe_hashfun(__be32 daddr)
606{
607 static u32 fnhe_hashrnd __read_mostly;
608 u32 hval;
609
610 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
611 hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
612 return hash_32(hval, FNHE_HASH_SHIFT);
613}
614
615static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
616{
617 rt->rt_pmtu = fnhe->fnhe_pmtu;
618 rt->dst.expires = fnhe->fnhe_expires;
619
620 if (fnhe->fnhe_gw) {
621 rt->rt_flags |= RTCF_REDIRECTED;
622 rt->rt_gateway = fnhe->fnhe_gw;
623 rt->rt_uses_gateway = 1;
624 }
625}
626
627static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
628 u32 pmtu, unsigned long expires)
629{
630 struct fnhe_hash_bucket *hash;
631 struct fib_nh_exception *fnhe;
632 struct rtable *rt;
633 unsigned int i;
634 int depth;
635 u32 hval = fnhe_hashfun(daddr);
636
637 spin_lock_bh(&fnhe_lock);
638
639 hash = rcu_dereference(nh->nh_exceptions);
640 if (!hash) {
641 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
642 if (!hash)
643 goto out_unlock;
644 rcu_assign_pointer(nh->nh_exceptions, hash);
645 }
646
647 hash += hval;
648
649 depth = 0;
650 for (fnhe = rcu_dereference(hash->chain); fnhe;
651 fnhe = rcu_dereference(fnhe->fnhe_next)) {
652 if (fnhe->fnhe_daddr == daddr)
653 break;
654 depth++;
655 }
656
657 if (fnhe) {
658 if (gw)
659 fnhe->fnhe_gw = gw;
660 if (pmtu) {
661 fnhe->fnhe_pmtu = pmtu;
662 fnhe->fnhe_expires = max(1UL, expires);
663 }
664
665 rt = rcu_dereference(fnhe->fnhe_rth_input);
666 if (rt)
667 fill_route_from_fnhe(rt, fnhe);
668 rt = rcu_dereference(fnhe->fnhe_rth_output);
669 if (rt)
670 fill_route_from_fnhe(rt, fnhe);
671 } else {
672 if (depth > FNHE_RECLAIM_DEPTH)
673 fnhe = fnhe_oldest(hash);
674 else {
675 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
676 if (!fnhe)
677 goto out_unlock;
678
679 fnhe->fnhe_next = hash->chain;
680 rcu_assign_pointer(hash->chain, fnhe);
681 }
682 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
683 fnhe->fnhe_daddr = daddr;
684 fnhe->fnhe_gw = gw;
685 fnhe->fnhe_pmtu = pmtu;
686 fnhe->fnhe_expires = expires;
687
688
689
690
691
692 rt = rcu_dereference(nh->nh_rth_input);
693 if (rt)
694 rt->dst.obsolete = DST_OBSOLETE_KILL;
695
696 for_each_possible_cpu(i) {
697 struct rtable __rcu **prt;
698 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
699 rt = rcu_dereference(*prt);
700 if (rt)
701 rt->dst.obsolete = DST_OBSOLETE_KILL;
702 }
703 }
704
705 fnhe->fnhe_stamp = jiffies;
706
707out_unlock:
708 spin_unlock_bh(&fnhe_lock);
709}
710
711static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
712 bool kill_route)
713{
714 __be32 new_gw = icmp_hdr(skb)->un.gateway;
715 __be32 old_gw = ip_hdr(skb)->saddr;
716 struct net_device *dev = skb->dev;
717 struct in_device *in_dev;
718 struct fib_result res;
719 struct neighbour *n;
720 struct net *net;
721
722 switch (icmp_hdr(skb)->code & 7) {
723 case ICMP_REDIR_NET:
724 case ICMP_REDIR_NETTOS:
725 case ICMP_REDIR_HOST:
726 case ICMP_REDIR_HOSTTOS:
727 break;
728
729 default:
730 return;
731 }
732
733 if (rt->rt_gateway != old_gw)
734 return;
735
736 in_dev = __in_dev_get_rcu(dev);
737 if (!in_dev)
738 return;
739
740 net = dev_net(dev);
741 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
742 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
743 ipv4_is_zeronet(new_gw))
744 goto reject_redirect;
745
746 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
747 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
748 goto reject_redirect;
749 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
750 goto reject_redirect;
751 } else {
752 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
753 goto reject_redirect;
754 }
755
756 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
757 if (!n)
758 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
759 if (!IS_ERR(n)) {
760 if (!(n->nud_state & NUD_VALID)) {
761 neigh_event_send(n, NULL);
762 } else {
763 if (fib_lookup(net, fl4, &res, 0) == 0) {
764 struct fib_nh *nh = &FIB_RES_NH(res);
765
766 update_or_create_fnhe(nh, fl4->daddr, new_gw,
767 0, jiffies + ip_rt_gc_timeout);
768 }
769 if (kill_route)
770 rt->dst.obsolete = DST_OBSOLETE_KILL;
771 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
772 }
773 neigh_release(n);
774 }
775 return;
776
777reject_redirect:
778#ifdef CONFIG_IP_ROUTE_VERBOSE
779 if (IN_DEV_LOG_MARTIANS(in_dev)) {
780 const struct iphdr *iph = (const struct iphdr *) skb->data;
781 __be32 daddr = iph->daddr;
782 __be32 saddr = iph->saddr;
783
784 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
785 " Advised path = %pI4 -> %pI4\n",
786 &old_gw, dev->name, &new_gw,
787 &saddr, &daddr);
788 }
789#endif
790 ;
791}
792
793static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
794{
795 struct rtable *rt;
796 struct flowi4 fl4;
797 const struct iphdr *iph = (const struct iphdr *) skb->data;
798 int oif = skb->dev->ifindex;
799 u8 tos = RT_TOS(iph->tos);
800 u8 prot = iph->protocol;
801 u32 mark = skb->mark;
802
803 rt = (struct rtable *) dst;
804
805 __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0);
806 __ip_do_redirect(rt, skb, &fl4, true);
807}
808
809static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
810{
811 struct rtable *rt = (struct rtable *)dst;
812 struct dst_entry *ret = dst;
813
814 if (rt) {
815 if (dst->obsolete > 0) {
816 ip_rt_put(rt);
817 ret = NULL;
818 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
819 rt->dst.expires) {
820 ip_rt_put(rt);
821 ret = NULL;
822 }
823 }
824 return ret;
825}
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843void ip_rt_send_redirect(struct sk_buff *skb)
844{
845 struct rtable *rt = skb_rtable(skb);
846 struct in_device *in_dev;
847 struct inet_peer *peer;
848 struct net *net;
849 int log_martians;
850 int vif;
851
852 rcu_read_lock();
853 in_dev = __in_dev_get_rcu(rt->dst.dev);
854 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
855 rcu_read_unlock();
856 return;
857 }
858 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
859 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
860 rcu_read_unlock();
861
862 net = dev_net(rt->dst.dev);
863 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
864 if (!peer) {
865 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
866 rt_nexthop(rt, ip_hdr(skb)->daddr));
867 return;
868 }
869
870
871
872
873 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
874 peer->rate_tokens = 0;
875
876
877
878
879 if (peer->rate_tokens >= ip_rt_redirect_number) {
880 peer->rate_last = jiffies;
881 goto out_put_peer;
882 }
883
884
885
886
887 if (peer->rate_tokens == 0 ||
888 time_after(jiffies,
889 (peer->rate_last +
890 (ip_rt_redirect_load << peer->rate_tokens)))) {
891 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
892
893 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
894 peer->rate_last = jiffies;
895 ++peer->rate_tokens;
896#ifdef CONFIG_IP_ROUTE_VERBOSE
897 if (log_martians &&
898 peer->rate_tokens == ip_rt_redirect_number)
899 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
900 &ip_hdr(skb)->saddr, inet_iif(skb),
901 &ip_hdr(skb)->daddr, &gw);
902#endif
903 }
904out_put_peer:
905 inet_putpeer(peer);
906}
907
908static int ip_error(struct sk_buff *skb)
909{
910 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
911 struct rtable *rt = skb_rtable(skb);
912 struct inet_peer *peer;
913 unsigned long now;
914 struct net *net;
915 bool send;
916 int code;
917
918
919 if (!in_dev)
920 goto out;
921
922 net = dev_net(rt->dst.dev);
923 if (!IN_DEV_FORWARD(in_dev)) {
924 switch (rt->dst.error) {
925 case EHOSTUNREACH:
926 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
927 break;
928
929 case ENETUNREACH:
930 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
931 break;
932 }
933 goto out;
934 }
935
936 switch (rt->dst.error) {
937 case EINVAL:
938 default:
939 goto out;
940 case EHOSTUNREACH:
941 code = ICMP_HOST_UNREACH;
942 break;
943 case ENETUNREACH:
944 code = ICMP_NET_UNREACH;
945 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
946 break;
947 case EACCES:
948 code = ICMP_PKT_FILTERED;
949 break;
950 }
951
952 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
953 l3mdev_master_ifindex(skb->dev), 1);
954
955 send = true;
956 if (peer) {
957 now = jiffies;
958 peer->rate_tokens += now - peer->rate_last;
959 if (peer->rate_tokens > ip_rt_error_burst)
960 peer->rate_tokens = ip_rt_error_burst;
961 peer->rate_last = now;
962 if (peer->rate_tokens >= ip_rt_error_cost)
963 peer->rate_tokens -= ip_rt_error_cost;
964 else
965 send = false;
966 inet_putpeer(peer);
967 }
968 if (send)
969 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
970
971out: kfree_skb(skb);
972 return 0;
973}
974
975static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
976{
977 struct dst_entry *dst = &rt->dst;
978 struct fib_result res;
979
980 if (dst_metric_locked(dst, RTAX_MTU))
981 return;
982
983 if (ipv4_mtu(dst) < mtu)
984 return;
985
986 if (mtu < ip_rt_min_pmtu)
987 mtu = ip_rt_min_pmtu;
988
989 if (rt->rt_pmtu == mtu &&
990 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
991 return;
992
993 rcu_read_lock();
994 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
995 struct fib_nh *nh = &FIB_RES_NH(res);
996
997 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
998 jiffies + ip_rt_mtu_expires);
999 }
1000 rcu_read_unlock();
1001}
1002
1003static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1004 struct sk_buff *skb, u32 mtu)
1005{
1006 struct rtable *rt = (struct rtable *) dst;
1007 struct flowi4 fl4;
1008
1009 ip_rt_build_flow_key(&fl4, sk, skb);
1010 __ip_rt_update_pmtu(rt, &fl4, mtu);
1011}
1012
1013void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1014 int oif, u32 mark, u8 protocol, int flow_flags)
1015{
1016 const struct iphdr *iph = (const struct iphdr *) skb->data;
1017 struct flowi4 fl4;
1018 struct rtable *rt;
1019
1020 if (!mark)
1021 mark = IP4_REPLY_MARK(net, skb->mark);
1022
1023 __build_flow_key(&fl4, NULL, iph, oif,
1024 RT_TOS(iph->tos), protocol, mark, flow_flags);
1025 rt = __ip_route_output_key(net, &fl4);
1026 if (!IS_ERR(rt)) {
1027 __ip_rt_update_pmtu(rt, &fl4, mtu);
1028 ip_rt_put(rt);
1029 }
1030}
1031EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1032
1033static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1034{
1035 const struct iphdr *iph = (const struct iphdr *) skb->data;
1036 struct flowi4 fl4;
1037 struct rtable *rt;
1038
1039 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1040
1041 if (!fl4.flowi4_mark)
1042 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1043
1044 rt = __ip_route_output_key(sock_net(sk), &fl4);
1045 if (!IS_ERR(rt)) {
1046 __ip_rt_update_pmtu(rt, &fl4, mtu);
1047 ip_rt_put(rt);
1048 }
1049}
1050
1051void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1052{
1053 const struct iphdr *iph = (const struct iphdr *) skb->data;
1054 struct flowi4 fl4;
1055 struct rtable *rt;
1056 struct dst_entry *odst = NULL;
1057 bool new = false;
1058
1059 bh_lock_sock(sk);
1060
1061 if (!ip_sk_accept_pmtu(sk))
1062 goto out;
1063
1064 odst = sk_dst_get(sk);
1065
1066 if (sock_owned_by_user(sk) || !odst) {
1067 __ipv4_sk_update_pmtu(skb, sk, mtu);
1068 goto out;
1069 }
1070
1071 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1072
1073 rt = (struct rtable *)odst;
1074 if (odst->obsolete && !odst->ops->check(odst, 0)) {
1075 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1076 if (IS_ERR(rt))
1077 goto out;
1078
1079 new = true;
1080 }
1081
1082 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1083
1084 if (!dst_check(&rt->dst, 0)) {
1085 if (new)
1086 dst_release(&rt->dst);
1087
1088 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1089 if (IS_ERR(rt))
1090 goto out;
1091
1092 new = true;
1093 }
1094
1095 if (new)
1096 sk_dst_set(sk, &rt->dst);
1097
1098out:
1099 bh_unlock_sock(sk);
1100 dst_release(odst);
1101}
1102EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
1103
1104void ipv4_redirect(struct sk_buff *skb, struct net *net,
1105 int oif, u32 mark, u8 protocol, int flow_flags)
1106{
1107 const struct iphdr *iph = (const struct iphdr *) skb->data;
1108 struct flowi4 fl4;
1109 struct rtable *rt;
1110
1111 __build_flow_key(&fl4, NULL, iph, oif,
1112 RT_TOS(iph->tos), protocol, mark, flow_flags);
1113 rt = __ip_route_output_key(net, &fl4);
1114 if (!IS_ERR(rt)) {
1115 __ip_do_redirect(rt, skb, &fl4, false);
1116 ip_rt_put(rt);
1117 }
1118}
1119EXPORT_SYMBOL_GPL(ipv4_redirect);
1120
1121void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1122{
1123 const struct iphdr *iph = (const struct iphdr *) skb->data;
1124 struct flowi4 fl4;
1125 struct rtable *rt;
1126
1127 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1128 rt = __ip_route_output_key(sock_net(sk), &fl4);
1129 if (!IS_ERR(rt)) {
1130 __ip_do_redirect(rt, skb, &fl4, false);
1131 ip_rt_put(rt);
1132 }
1133}
1134EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1135
1136static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1137{
1138 struct rtable *rt = (struct rtable *) dst;
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
1149 return NULL;
1150 return dst;
1151}
1152
1153static void ipv4_link_failure(struct sk_buff *skb)
1154{
1155 struct rtable *rt;
1156
1157 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1158
1159 rt = skb_rtable(skb);
1160 if (rt)
1161 dst_set_expires(&rt->dst, 0);
1162}
1163
1164static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
1165{
1166 pr_debug("%s: %pI4 -> %pI4, %s\n",
1167 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1168 skb->dev ? skb->dev->name : "?");
1169 kfree_skb(skb);
1170 WARN_ON(1);
1171 return 0;
1172}
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1184{
1185 __be32 src;
1186
1187 if (rt_is_output_route(rt))
1188 src = ip_hdr(skb)->saddr;
1189 else {
1190 struct fib_result res;
1191 struct flowi4 fl4;
1192 struct iphdr *iph;
1193
1194 iph = ip_hdr(skb);
1195
1196 memset(&fl4, 0, sizeof(fl4));
1197 fl4.daddr = iph->daddr;
1198 fl4.saddr = iph->saddr;
1199 fl4.flowi4_tos = RT_TOS(iph->tos);
1200 fl4.flowi4_oif = rt->dst.dev->ifindex;
1201 fl4.flowi4_iif = skb->dev->ifindex;
1202 fl4.flowi4_mark = skb->mark;
1203
1204 rcu_read_lock();
1205 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
1206 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
1207 else
1208 src = inet_select_addr(rt->dst.dev,
1209 rt_nexthop(rt, iph->daddr),
1210 RT_SCOPE_UNIVERSE);
1211 rcu_read_unlock();
1212 }
1213 memcpy(addr, &src, 4);
1214}
1215
1216#ifdef CONFIG_IP_ROUTE_CLASSID
1217static void set_class_tag(struct rtable *rt, u32 tag)
1218{
1219 if (!(rt->dst.tclassid & 0xFFFF))
1220 rt->dst.tclassid |= tag & 0xFFFF;
1221 if (!(rt->dst.tclassid & 0xFFFF0000))
1222 rt->dst.tclassid |= tag & 0xFFFF0000;
1223}
1224#endif
1225
1226static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1227{
1228 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1229
1230 if (advmss == 0) {
1231 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1232 ip_rt_min_advmss);
1233 if (advmss > 65535 - 40)
1234 advmss = 65535 - 40;
1235 }
1236 return advmss;
1237}
1238
1239static unsigned int ipv4_mtu(const struct dst_entry *dst)
1240{
1241 const struct rtable *rt = (const struct rtable *) dst;
1242 unsigned int mtu = rt->rt_pmtu;
1243
1244 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
1245 mtu = dst_metric_raw(dst, RTAX_MTU);
1246
1247 if (mtu)
1248 return mtu;
1249
1250 mtu = dst->dev->mtu;
1251
1252 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
1253 if (rt->rt_uses_gateway && mtu > 576)
1254 mtu = 576;
1255 }
1256
1257 mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1258
1259 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1260}
1261
1262static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
1263{
1264 struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
1265 struct fib_nh_exception *fnhe;
1266 u32 hval;
1267
1268 if (!hash)
1269 return NULL;
1270
1271 hval = fnhe_hashfun(daddr);
1272
1273 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1274 fnhe = rcu_dereference(fnhe->fnhe_next)) {
1275 if (fnhe->fnhe_daddr == daddr)
1276 return fnhe;
1277 }
1278 return NULL;
1279}
1280
1281static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1282 __be32 daddr)
1283{
1284 bool ret = false;
1285
1286 spin_lock_bh(&fnhe_lock);
1287
1288 if (daddr == fnhe->fnhe_daddr) {
1289 struct rtable __rcu **porig;
1290 struct rtable *orig;
1291 int genid = fnhe_genid(dev_net(rt->dst.dev));
1292
1293 if (rt_is_input_route(rt))
1294 porig = &fnhe->fnhe_rth_input;
1295 else
1296 porig = &fnhe->fnhe_rth_output;
1297 orig = rcu_dereference(*porig);
1298
1299 if (fnhe->fnhe_genid != genid) {
1300 fnhe->fnhe_genid = genid;
1301 fnhe->fnhe_gw = 0;
1302 fnhe->fnhe_pmtu = 0;
1303 fnhe->fnhe_expires = 0;
1304 fnhe_flush_routes(fnhe);
1305 orig = NULL;
1306 }
1307 fill_route_from_fnhe(rt, fnhe);
1308 if (!rt->rt_gateway)
1309 rt->rt_gateway = daddr;
1310
1311 if (!(rt->dst.flags & DST_NOCACHE)) {
1312 rcu_assign_pointer(*porig, rt);
1313 if (orig)
1314 rt_free(orig);
1315 ret = true;
1316 }
1317
1318 fnhe->fnhe_stamp = jiffies;
1319 }
1320 spin_unlock_bh(&fnhe_lock);
1321
1322 return ret;
1323}
1324
1325static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1326{
1327 struct rtable *orig, *prev, **p;
1328 bool ret = true;
1329
1330 if (rt_is_input_route(rt)) {
1331 p = (struct rtable **)&nh->nh_rth_input;
1332 } else {
1333 p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
1334 }
1335 orig = *p;
1336
1337 prev = cmpxchg(p, orig, rt);
1338 if (prev == orig) {
1339 if (orig)
1340 rt_free(orig);
1341 } else
1342 ret = false;
1343
1344 return ret;
1345}
1346
1347struct uncached_list {
1348 spinlock_t lock;
1349 struct list_head head;
1350};
1351
1352static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
1353
1354static void rt_add_uncached_list(struct rtable *rt)
1355{
1356 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1357
1358 rt->rt_uncached_list = ul;
1359
1360 spin_lock_bh(&ul->lock);
1361 list_add_tail(&rt->rt_uncached, &ul->head);
1362 spin_unlock_bh(&ul->lock);
1363}
1364
1365static void ipv4_dst_destroy(struct dst_entry *dst)
1366{
1367 struct rtable *rt = (struct rtable *) dst;
1368
1369 if (!list_empty(&rt->rt_uncached)) {
1370 struct uncached_list *ul = rt->rt_uncached_list;
1371
1372 spin_lock_bh(&ul->lock);
1373 list_del(&rt->rt_uncached);
1374 spin_unlock_bh(&ul->lock);
1375 }
1376}
1377
1378void rt_flush_dev(struct net_device *dev)
1379{
1380 struct net *net = dev_net(dev);
1381 struct rtable *rt;
1382 int cpu;
1383
1384 for_each_possible_cpu(cpu) {
1385 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1386
1387 spin_lock_bh(&ul->lock);
1388 list_for_each_entry(rt, &ul->head, rt_uncached) {
1389 if (rt->dst.dev != dev)
1390 continue;
1391 rt->dst.dev = net->loopback_dev;
1392 dev_hold(rt->dst.dev);
1393 dev_put(dev);
1394 }
1395 spin_unlock_bh(&ul->lock);
1396 }
1397}
1398
1399static bool rt_cache_valid(const struct rtable *rt)
1400{
1401 return rt &&
1402 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1403 !rt_is_expired(rt);
1404}
1405
1406static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
1407 const struct fib_result *res,
1408 struct fib_nh_exception *fnhe,
1409 struct fib_info *fi, u16 type, u32 itag)
1410{
1411 bool cached = false;
1412
1413 if (fi) {
1414 struct fib_nh *nh = &FIB_RES_NH(*res);
1415
1416 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
1417 rt->rt_gateway = nh->nh_gw;
1418 rt->rt_uses_gateway = 1;
1419 }
1420 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1421#ifdef CONFIG_IP_ROUTE_CLASSID
1422 rt->dst.tclassid = nh->nh_tclassid;
1423#endif
1424 rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
1425 if (unlikely(fnhe))
1426 cached = rt_bind_exception(rt, fnhe, daddr);
1427 else if (!(rt->dst.flags & DST_NOCACHE))
1428 cached = rt_cache_route(nh, rt);
1429 if (unlikely(!cached)) {
1430
1431
1432
1433
1434
1435 rt->dst.flags |= DST_NOCACHE;
1436 if (!rt->rt_gateway)
1437 rt->rt_gateway = daddr;
1438 rt_add_uncached_list(rt);
1439 }
1440 } else
1441 rt_add_uncached_list(rt);
1442
1443#ifdef CONFIG_IP_ROUTE_CLASSID
1444#ifdef CONFIG_IP_MULTIPLE_TABLES
1445 set_class_tag(rt, res->tclassid);
1446#endif
1447 set_class_tag(rt, itag);
1448#endif
1449}
1450
1451struct rtable *rt_dst_alloc(struct net_device *dev,
1452 unsigned int flags, u16 type,
1453 bool nopolicy, bool noxfrm, bool will_cache)
1454{
1455 struct rtable *rt;
1456
1457 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1458 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
1459 (nopolicy ? DST_NOPOLICY : 0) |
1460 (noxfrm ? DST_NOXFRM : 0));
1461
1462 if (rt) {
1463 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1464 rt->rt_flags = flags;
1465 rt->rt_type = type;
1466 rt->rt_is_input = 0;
1467 rt->rt_iif = 0;
1468 rt->rt_pmtu = 0;
1469 rt->rt_gateway = 0;
1470 rt->rt_uses_gateway = 0;
1471 rt->rt_table_id = 0;
1472 INIT_LIST_HEAD(&rt->rt_uncached);
1473
1474 rt->dst.output = ip_output;
1475 if (flags & RTCF_LOCAL)
1476 rt->dst.input = ip_local_deliver;
1477 }
1478
1479 return rt;
1480}
1481EXPORT_SYMBOL(rt_dst_alloc);
1482
1483
1484static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1485 u8 tos, struct net_device *dev, int our)
1486{
1487 struct rtable *rth;
1488 struct in_device *in_dev = __in_dev_get_rcu(dev);
1489 unsigned int flags = RTCF_MULTICAST;
1490 u32 itag = 0;
1491 int err;
1492
1493
1494
1495 if (!in_dev)
1496 return -EINVAL;
1497
1498 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1499 skb->protocol != htons(ETH_P_IP))
1500 goto e_inval;
1501
1502 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1503 goto e_inval;
1504
1505 if (ipv4_is_zeronet(saddr)) {
1506 if (!ipv4_is_local_multicast(daddr))
1507 goto e_inval;
1508 } else {
1509 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1510 in_dev, &itag);
1511 if (err < 0)
1512 goto e_err;
1513 }
1514 if (our)
1515 flags |= RTCF_LOCAL;
1516
1517 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
1518 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1519 if (!rth)
1520 goto e_nobufs;
1521
1522#ifdef CONFIG_IP_ROUTE_CLASSID
1523 rth->dst.tclassid = itag;
1524#endif
1525 rth->dst.output = ip_rt_bug;
1526 rth->rt_is_input= 1;
1527
1528#ifdef CONFIG_IP_MROUTE
1529 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1530 rth->dst.input = ip_mr_input;
1531#endif
1532 RT_CACHE_STAT_INC(in_slow_mc);
1533
1534 skb_dst_set(skb, &rth->dst);
1535 return 0;
1536
1537e_nobufs:
1538 return -ENOBUFS;
1539e_inval:
1540 return -EINVAL;
1541e_err:
1542 return err;
1543}
1544
1545
1546static void ip_handle_martian_source(struct net_device *dev,
1547 struct in_device *in_dev,
1548 struct sk_buff *skb,
1549 __be32 daddr,
1550 __be32 saddr)
1551{
1552 RT_CACHE_STAT_INC(in_martian_src);
1553#ifdef CONFIG_IP_ROUTE_VERBOSE
1554 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1555
1556
1557
1558
1559 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
1560 &daddr, &saddr, dev->name);
1561 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
1562 print_hex_dump(KERN_WARNING, "ll header: ",
1563 DUMP_PREFIX_OFFSET, 16, 1,
1564 skb_mac_header(skb),
1565 dev->hard_header_len, true);
1566 }
1567 }
1568#endif
1569}
1570
1571static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1572{
1573 struct fnhe_hash_bucket *hash;
1574 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1575 u32 hval = fnhe_hashfun(daddr);
1576
1577 spin_lock_bh(&fnhe_lock);
1578
1579 hash = rcu_dereference_protected(nh->nh_exceptions,
1580 lockdep_is_held(&fnhe_lock));
1581 hash += hval;
1582
1583 fnhe_p = &hash->chain;
1584 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1585 while (fnhe) {
1586 if (fnhe->fnhe_daddr == daddr) {
1587 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1588 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1589 fnhe_flush_routes(fnhe);
1590 kfree_rcu(fnhe, rcu);
1591 break;
1592 }
1593 fnhe_p = &fnhe->fnhe_next;
1594 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1595 lockdep_is_held(&fnhe_lock));
1596 }
1597
1598 spin_unlock_bh(&fnhe_lock);
1599}
1600
1601
1602static int __mkroute_input(struct sk_buff *skb,
1603 const struct fib_result *res,
1604 struct in_device *in_dev,
1605 __be32 daddr, __be32 saddr, u32 tos)
1606{
1607 struct fib_nh_exception *fnhe;
1608 struct rtable *rth;
1609 int err;
1610 struct in_device *out_dev;
1611 bool do_cache;
1612 u32 itag = 0;
1613
1614
1615 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1616 if (!out_dev) {
1617 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
1618 return -EINVAL;
1619 }
1620
1621 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
1622 in_dev->dev, in_dev, &itag);
1623 if (err < 0) {
1624 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1625 saddr);
1626
1627 goto cleanup;
1628 }
1629
1630 do_cache = res->fi && !itag;
1631 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
1632 skb->protocol == htons(ETH_P_IP) &&
1633 (IN_DEV_SHARED_MEDIA(out_dev) ||
1634 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1635 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1636
1637 if (skb->protocol != htons(ETH_P_IP)) {
1638
1639
1640
1641
1642
1643
1644
1645 if (out_dev == in_dev &&
1646 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
1647 err = -EINVAL;
1648 goto cleanup;
1649 }
1650 }
1651
1652 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
1653 if (do_cache) {
1654 if (fnhe) {
1655 rth = rcu_dereference(fnhe->fnhe_rth_input);
1656 if (rth && rth->dst.expires &&
1657 time_after(jiffies, rth->dst.expires)) {
1658 ip_del_fnhe(&FIB_RES_NH(*res), daddr);
1659 fnhe = NULL;
1660 } else {
1661 goto rt_cache;
1662 }
1663 }
1664
1665 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1666
1667rt_cache:
1668 if (rt_cache_valid(rth)) {
1669 skb_dst_set_noref(skb, &rth->dst);
1670 goto out;
1671 }
1672 }
1673
1674 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
1675 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1676 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
1677 if (!rth) {
1678 err = -ENOBUFS;
1679 goto cleanup;
1680 }
1681
1682 rth->rt_is_input = 1;
1683 if (res->table)
1684 rth->rt_table_id = res->table->tb_id;
1685 RT_CACHE_STAT_INC(in_slow_tot);
1686
1687 rth->dst.input = ip_forward;
1688
1689 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
1690 if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
1691 rth->dst.lwtstate->orig_output = rth->dst.output;
1692 rth->dst.output = lwtunnel_output;
1693 }
1694 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1695 rth->dst.lwtstate->orig_input = rth->dst.input;
1696 rth->dst.input = lwtunnel_input;
1697 }
1698 skb_dst_set(skb, &rth->dst);
1699out:
1700 err = 0;
1701 cleanup:
1702 return err;
1703}
1704
1705#ifdef CONFIG_IP_ROUTE_MULTIPATH
1706
1707
1708
1709
1710static int ip_multipath_icmp_hash(struct sk_buff *skb)
1711{
1712 const struct iphdr *outer_iph = ip_hdr(skb);
1713 struct icmphdr _icmph;
1714 const struct icmphdr *icmph;
1715 struct iphdr _inner_iph;
1716 const struct iphdr *inner_iph;
1717
1718 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
1719 goto standard_hash;
1720
1721 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1722 &_icmph);
1723 if (!icmph)
1724 goto standard_hash;
1725
1726 if (icmph->type != ICMP_DEST_UNREACH &&
1727 icmph->type != ICMP_REDIRECT &&
1728 icmph->type != ICMP_TIME_EXCEEDED &&
1729 icmph->type != ICMP_PARAMETERPROB) {
1730 goto standard_hash;
1731 }
1732
1733 inner_iph = skb_header_pointer(skb,
1734 outer_iph->ihl * 4 + sizeof(_icmph),
1735 sizeof(_inner_iph), &_inner_iph);
1736 if (!inner_iph)
1737 goto standard_hash;
1738
1739 return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
1740
1741standard_hash:
1742 return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
1743}
1744
1745#endif
1746
1747static int ip_mkroute_input(struct sk_buff *skb,
1748 struct fib_result *res,
1749 const struct flowi4 *fl4,
1750 struct in_device *in_dev,
1751 __be32 daddr, __be32 saddr, u32 tos)
1752{
1753#ifdef CONFIG_IP_ROUTE_MULTIPATH
1754 if (res->fi && res->fi->fib_nhs > 1) {
1755 int h;
1756
1757 if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
1758 h = ip_multipath_icmp_hash(skb);
1759 else
1760 h = fib_multipath_hash(saddr, daddr);
1761 fib_select_multipath(res, h);
1762 }
1763#endif
1764
1765
1766 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
1767}
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1781 u8 tos, struct net_device *dev)
1782{
1783 struct fib_result res;
1784 struct in_device *in_dev = __in_dev_get_rcu(dev);
1785 struct ip_tunnel_info *tun_info;
1786 struct flowi4 fl4;
1787 unsigned int flags = 0;
1788 u32 itag = 0;
1789 struct rtable *rth;
1790 int err = -EINVAL;
1791 struct net *net = dev_net(dev);
1792 bool do_cache;
1793
1794
1795
1796 if (!in_dev)
1797 goto out;
1798
1799
1800
1801
1802
1803 tun_info = skb_tunnel_info(skb);
1804 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1805 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
1806 else
1807 fl4.flowi4_tun_key.tun_id = 0;
1808 skb_dst_drop(skb);
1809
1810 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
1811 goto martian_source;
1812
1813 res.fi = NULL;
1814 res.table = NULL;
1815 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
1816 goto brd_input;
1817
1818
1819
1820
1821 if (ipv4_is_zeronet(saddr))
1822 goto martian_source;
1823
1824 if (ipv4_is_zeronet(daddr))
1825 goto martian_destination;
1826
1827
1828
1829
1830 if (ipv4_is_loopback(daddr)) {
1831 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
1832 goto martian_destination;
1833 } else if (ipv4_is_loopback(saddr)) {
1834 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
1835 goto martian_source;
1836 }
1837
1838
1839
1840
1841 fl4.flowi4_oif = 0;
1842 fl4.flowi4_iif = dev->ifindex;
1843 fl4.flowi4_mark = skb->mark;
1844 fl4.flowi4_tos = tos;
1845 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1846 fl4.flowi4_flags = 0;
1847 fl4.daddr = daddr;
1848 fl4.saddr = saddr;
1849 err = fib_lookup(net, &fl4, &res, 0);
1850 if (err != 0) {
1851 if (!IN_DEV_FORWARD(in_dev))
1852 err = -EHOSTUNREACH;
1853 goto no_route;
1854 }
1855
1856 if (res.type == RTN_BROADCAST)
1857 goto brd_input;
1858
1859 if (res.type == RTN_LOCAL) {
1860 err = fib_validate_source(skb, saddr, daddr, tos,
1861 0, dev, in_dev, &itag);
1862 if (err < 0)
1863 goto martian_source;
1864 goto local_input;
1865 }
1866
1867 if (!IN_DEV_FORWARD(in_dev)) {
1868 err = -EHOSTUNREACH;
1869 goto no_route;
1870 }
1871 if (res.type != RTN_UNICAST)
1872 goto martian_destination;
1873
1874 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
1875out: return err;
1876
1877brd_input:
1878 if (skb->protocol != htons(ETH_P_IP))
1879 goto e_inval;
1880
1881 if (!ipv4_is_zeronet(saddr)) {
1882 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1883 in_dev, &itag);
1884 if (err < 0)
1885 goto martian_source;
1886 }
1887 flags |= RTCF_BROADCAST;
1888 res.type = RTN_BROADCAST;
1889 RT_CACHE_STAT_INC(in_brd);
1890
1891local_input:
1892 do_cache = false;
1893 if (res.fi) {
1894 if (!itag) {
1895 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
1896 if (rt_cache_valid(rth)) {
1897 skb_dst_set_noref(skb, &rth->dst);
1898 err = 0;
1899 goto out;
1900 }
1901 do_cache = true;
1902 }
1903 }
1904
1905 rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type,
1906 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
1907 if (!rth)
1908 goto e_nobufs;
1909
1910 rth->dst.output= ip_rt_bug;
1911#ifdef CONFIG_IP_ROUTE_CLASSID
1912 rth->dst.tclassid = itag;
1913#endif
1914 rth->rt_is_input = 1;
1915 if (res.table)
1916 rth->rt_table_id = res.table->tb_id;
1917
1918 RT_CACHE_STAT_INC(in_slow_tot);
1919 if (res.type == RTN_UNREACHABLE) {
1920 rth->dst.input= ip_error;
1921 rth->dst.error= -err;
1922 rth->rt_flags &= ~RTCF_LOCAL;
1923 }
1924 if (do_cache) {
1925 if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) {
1926 rth->dst.flags |= DST_NOCACHE;
1927 rt_add_uncached_list(rth);
1928 }
1929 }
1930 skb_dst_set(skb, &rth->dst);
1931 err = 0;
1932 goto out;
1933
1934no_route:
1935 RT_CACHE_STAT_INC(in_no_route);
1936 res.type = RTN_UNREACHABLE;
1937 res.fi = NULL;
1938 res.table = NULL;
1939 goto local_input;
1940
1941
1942
1943
1944martian_destination:
1945 RT_CACHE_STAT_INC(in_martian_dst);
1946#ifdef CONFIG_IP_ROUTE_VERBOSE
1947 if (IN_DEV_LOG_MARTIANS(in_dev))
1948 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1949 &daddr, &saddr, dev->name);
1950#endif
1951
1952e_inval:
1953 err = -EINVAL;
1954 goto out;
1955
1956e_nobufs:
1957 err = -ENOBUFS;
1958 goto out;
1959
1960martian_source:
1961 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
1962 goto out;
1963}
1964
1965int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1966 u8 tos, struct net_device *dev)
1967{
1968 int res;
1969
1970 rcu_read_lock();
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983 if (ipv4_is_multicast(daddr)) {
1984 struct in_device *in_dev = __in_dev_get_rcu(dev);
1985
1986 if (in_dev) {
1987 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1988 ip_hdr(skb)->protocol);
1989 if (our
1990#ifdef CONFIG_IP_MROUTE
1991 ||
1992 (!ipv4_is_local_multicast(daddr) &&
1993 IN_DEV_MFORWARD(in_dev))
1994#endif
1995 ) {
1996 int res = ip_route_input_mc(skb, daddr, saddr,
1997 tos, dev, our);
1998 rcu_read_unlock();
1999 return res;
2000 }
2001 }
2002 rcu_read_unlock();
2003 return -EINVAL;
2004 }
2005 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
2006 rcu_read_unlock();
2007 return res;
2008}
2009EXPORT_SYMBOL(ip_route_input_noref);
2010
2011
2012static struct rtable *__mkroute_output(const struct fib_result *res,
2013 const struct flowi4 *fl4, int orig_oif,
2014 struct net_device *dev_out,
2015 unsigned int flags)
2016{
2017 struct fib_info *fi = res->fi;
2018 struct fib_nh_exception *fnhe;
2019 struct in_device *in_dev;
2020 u16 type = res->type;
2021 struct rtable *rth;
2022 bool do_cache;
2023
2024 in_dev = __in_dev_get_rcu(dev_out);
2025 if (!in_dev)
2026 return ERR_PTR(-EINVAL);
2027
2028 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
2029 if (ipv4_is_loopback(fl4->saddr) &&
2030 !(dev_out->flags & IFF_LOOPBACK) &&
2031 !netif_is_l3_master(dev_out))
2032 return ERR_PTR(-EINVAL);
2033
2034 if (ipv4_is_lbcast(fl4->daddr))
2035 type = RTN_BROADCAST;
2036 else if (ipv4_is_multicast(fl4->daddr))
2037 type = RTN_MULTICAST;
2038 else if (ipv4_is_zeronet(fl4->daddr))
2039 return ERR_PTR(-EINVAL);
2040
2041 if (dev_out->flags & IFF_LOOPBACK)
2042 flags |= RTCF_LOCAL;
2043
2044 do_cache = true;
2045 if (type == RTN_BROADCAST) {
2046 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2047 fi = NULL;
2048 } else if (type == RTN_MULTICAST) {
2049 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2050 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2051 fl4->flowi4_proto))
2052 flags &= ~RTCF_LOCAL;
2053 else
2054 do_cache = false;
2055
2056
2057
2058
2059 if (fi && res->prefixlen < 4)
2060 fi = NULL;
2061 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2062 (orig_oif != dev_out->ifindex)) {
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072 fi = NULL;
2073 }
2074
2075 fnhe = NULL;
2076 do_cache &= fi != NULL;
2077 if (do_cache) {
2078 struct rtable __rcu **prth;
2079 struct fib_nh *nh = &FIB_RES_NH(*res);
2080
2081 fnhe = find_exception(nh, fl4->daddr);
2082 if (fnhe) {
2083 prth = &fnhe->fnhe_rth_output;
2084 rth = rcu_dereference(*prth);
2085 if (rth && rth->dst.expires &&
2086 time_after(jiffies, rth->dst.expires)) {
2087 ip_del_fnhe(nh, fl4->daddr);
2088 fnhe = NULL;
2089 } else {
2090 goto rt_cache;
2091 }
2092 }
2093
2094 if (unlikely(fl4->flowi4_flags &
2095 FLOWI_FLAG_KNOWN_NH &&
2096 !(nh->nh_gw &&
2097 nh->nh_scope == RT_SCOPE_LINK))) {
2098 do_cache = false;
2099 goto add;
2100 }
2101 prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
2102 rth = rcu_dereference(*prth);
2103
2104rt_cache:
2105 if (rt_cache_valid(rth)) {
2106 dst_hold(&rth->dst);
2107 return rth;
2108 }
2109 }
2110
2111add:
2112 rth = rt_dst_alloc(dev_out, flags, type,
2113 IN_DEV_CONF_GET(in_dev, NOPOLICY),
2114 IN_DEV_CONF_GET(in_dev, NOXFRM),
2115 do_cache);
2116 if (!rth)
2117 return ERR_PTR(-ENOBUFS);
2118
2119 rth->rt_iif = orig_oif ? : 0;
2120 if (res->table)
2121 rth->rt_table_id = res->table->tb_id;
2122
2123 RT_CACHE_STAT_INC(out_slow_tot);
2124
2125 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2126 if (flags & RTCF_LOCAL &&
2127 !(dev_out->flags & IFF_LOOPBACK)) {
2128 rth->dst.output = ip_mc_output;
2129 RT_CACHE_STAT_INC(out_slow_mc);
2130 }
2131#ifdef CONFIG_IP_MROUTE
2132 if (type == RTN_MULTICAST) {
2133 if (IN_DEV_MFORWARD(in_dev) &&
2134 !ipv4_is_local_multicast(fl4->daddr)) {
2135 rth->dst.input = ip_mr_input;
2136 rth->dst.output = ip_mc_output;
2137 }
2138 }
2139#endif
2140 }
2141
2142 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
2143 if (lwtunnel_output_redirect(rth->dst.lwtstate))
2144 rth->dst.output = lwtunnel_output;
2145
2146 return rth;
2147}
2148
2149
2150
2151
2152
2153struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2154 int mp_hash)
2155{
2156 struct net_device *dev_out = NULL;
2157 __u8 tos = RT_FL_TOS(fl4);
2158 unsigned int flags = 0;
2159 struct fib_result res;
2160 struct rtable *rth;
2161 int orig_oif;
2162 int err = -ENETUNREACH;
2163
2164 res.tclassid = 0;
2165 res.fi = NULL;
2166 res.table = NULL;
2167
2168 orig_oif = fl4->flowi4_oif;
2169
2170 fl4->flowi4_iif = LOOPBACK_IFINDEX;
2171 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2172 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2173 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
2174
2175 rcu_read_lock();
2176 if (fl4->saddr) {
2177 rth = ERR_PTR(-EINVAL);
2178 if (ipv4_is_multicast(fl4->saddr) ||
2179 ipv4_is_lbcast(fl4->saddr) ||
2180 ipv4_is_zeronet(fl4->saddr))
2181 goto out;
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191 if (fl4->flowi4_oif == 0 &&
2192 (ipv4_is_multicast(fl4->daddr) ||
2193 ipv4_is_lbcast(fl4->daddr))) {
2194
2195 dev_out = __ip_dev_find(net, fl4->saddr, false);
2196 if (!dev_out)
2197 goto out;
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214 fl4->flowi4_oif = dev_out->ifindex;
2215 goto make_route;
2216 }
2217
2218 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2219
2220 if (!__ip_dev_find(net, fl4->saddr, false))
2221 goto out;
2222 }
2223 }
2224
2225
2226 if (fl4->flowi4_oif) {
2227 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
2228 rth = ERR_PTR(-ENODEV);
2229 if (!dev_out)
2230 goto out;
2231
2232
2233 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
2234 rth = ERR_PTR(-ENETUNREACH);
2235 goto out;
2236 }
2237 if (ipv4_is_local_multicast(fl4->daddr) ||
2238 ipv4_is_lbcast(fl4->daddr) ||
2239 fl4->flowi4_proto == IPPROTO_IGMP) {
2240 if (!fl4->saddr)
2241 fl4->saddr = inet_select_addr(dev_out, 0,
2242 RT_SCOPE_LINK);
2243 goto make_route;
2244 }
2245 if (!fl4->saddr) {
2246 if (ipv4_is_multicast(fl4->daddr))
2247 fl4->saddr = inet_select_addr(dev_out, 0,
2248 fl4->flowi4_scope);
2249 else if (!fl4->daddr)
2250 fl4->saddr = inet_select_addr(dev_out, 0,
2251 RT_SCOPE_HOST);
2252 }
2253 }
2254
2255 if (!fl4->daddr) {
2256 fl4->daddr = fl4->saddr;
2257 if (!fl4->daddr)
2258 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
2259 dev_out = net->loopback_dev;
2260 fl4->flowi4_oif = LOOPBACK_IFINDEX;
2261 res.type = RTN_LOCAL;
2262 flags |= RTCF_LOCAL;
2263 goto make_route;
2264 }
2265
2266 err = fib_lookup(net, fl4, &res, 0);
2267 if (err) {
2268 res.fi = NULL;
2269 res.table = NULL;
2270 if (fl4->flowi4_oif &&
2271 !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290 if (fl4->saddr == 0)
2291 fl4->saddr = inet_select_addr(dev_out, 0,
2292 RT_SCOPE_LINK);
2293 res.type = RTN_UNICAST;
2294 goto make_route;
2295 }
2296 rth = ERR_PTR(err);
2297 goto out;
2298 }
2299
2300 if (res.type == RTN_LOCAL) {
2301 if (!fl4->saddr) {
2302 if (res.fi->fib_prefsrc)
2303 fl4->saddr = res.fi->fib_prefsrc;
2304 else
2305 fl4->saddr = fl4->daddr;
2306 }
2307
2308
2309 dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
2310 fl4->flowi4_oif = dev_out->ifindex;
2311 flags |= RTCF_LOCAL;
2312 goto make_route;
2313 }
2314
2315 fib_select_path(net, &res, fl4, mp_hash);
2316
2317 dev_out = FIB_RES_DEV(res);
2318 fl4->flowi4_oif = dev_out->ifindex;
2319
2320
2321make_route:
2322 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
2323
2324out:
2325 rcu_read_unlock();
2326 return rth;
2327}
2328EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
2329
2330static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2331{
2332 return NULL;
2333}
2334
2335static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
2336{
2337 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2338
2339 return mtu ? : dst->dev->mtu;
2340}
2341
2342static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2343 struct sk_buff *skb, u32 mtu)
2344{
2345}
2346
2347static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2348 struct sk_buff *skb)
2349{
2350}
2351
2352static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2353 unsigned long old)
2354{
2355 return NULL;
2356}
2357
2358static struct dst_ops ipv4_dst_blackhole_ops = {
2359 .family = AF_INET,
2360 .check = ipv4_blackhole_dst_check,
2361 .mtu = ipv4_blackhole_mtu,
2362 .default_advmss = ipv4_default_advmss,
2363 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2364 .redirect = ipv4_rt_blackhole_redirect,
2365 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
2366 .neigh_lookup = ipv4_neigh_lookup,
2367};
2368
2369struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2370{
2371 struct rtable *ort = (struct rtable *) dst_orig;
2372 struct rtable *rt;
2373
2374 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
2375 if (rt) {
2376 struct dst_entry *new = &rt->dst;
2377
2378 new->__use = 1;
2379 new->input = dst_discard;
2380 new->output = dst_discard_out;
2381
2382 new->dev = ort->dst.dev;
2383 if (new->dev)
2384 dev_hold(new->dev);
2385
2386 rt->rt_is_input = ort->rt_is_input;
2387 rt->rt_iif = ort->rt_iif;
2388 rt->rt_pmtu = ort->rt_pmtu;
2389
2390 rt->rt_genid = rt_genid_ipv4(net);
2391 rt->rt_flags = ort->rt_flags;
2392 rt->rt_type = ort->rt_type;
2393 rt->rt_gateway = ort->rt_gateway;
2394 rt->rt_uses_gateway = ort->rt_uses_gateway;
2395
2396 INIT_LIST_HEAD(&rt->rt_uncached);
2397 dst_free(new);
2398 }
2399
2400 dst_release(dst_orig);
2401
2402 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
2403}
2404
2405struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2406 const struct sock *sk)
2407{
2408 struct rtable *rt = __ip_route_output_key(net, flp4);
2409
2410 if (IS_ERR(rt))
2411 return rt;
2412
2413 if (flp4->flowi4_proto)
2414 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2415 flowi4_to_flowi(flp4),
2416 sk, 0);
2417
2418 return rt;
2419}
2420EXPORT_SYMBOL_GPL(ip_route_output_flow);
2421
2422static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
2423 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
2424 u32 seq, int event, int nowait, unsigned int flags)
2425{
2426 struct rtable *rt = skb_rtable(skb);
2427 struct rtmsg *r;
2428 struct nlmsghdr *nlh;
2429 unsigned long expires = 0;
2430 u32 error;
2431 u32 metrics[RTAX_MAX];
2432
2433 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
2434 if (!nlh)
2435 return -EMSGSIZE;
2436
2437 r = nlmsg_data(nlh);
2438 r->rtm_family = AF_INET;
2439 r->rtm_dst_len = 32;
2440 r->rtm_src_len = 0;
2441 r->rtm_tos = fl4->flowi4_tos;
2442 r->rtm_table = table_id;
2443 if (nla_put_u32(skb, RTA_TABLE, table_id))
2444 goto nla_put_failure;
2445 r->rtm_type = rt->rt_type;
2446 r->rtm_scope = RT_SCOPE_UNIVERSE;
2447 r->rtm_protocol = RTPROT_UNSPEC;
2448 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2449 if (rt->rt_flags & RTCF_NOTIFY)
2450 r->rtm_flags |= RTM_F_NOTIFY;
2451 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2452 r->rtm_flags |= RTCF_DOREDIRECT;
2453
2454 if (nla_put_in_addr(skb, RTA_DST, dst))
2455 goto nla_put_failure;
2456 if (src) {
2457 r->rtm_src_len = 32;
2458 if (nla_put_in_addr(skb, RTA_SRC, src))
2459 goto nla_put_failure;
2460 }
2461 if (rt->dst.dev &&
2462 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2463 goto nla_put_failure;
2464#ifdef CONFIG_IP_ROUTE_CLASSID
2465 if (rt->dst.tclassid &&
2466 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2467 goto nla_put_failure;
2468#endif
2469 if (!rt_is_input_route(rt) &&
2470 fl4->saddr != src) {
2471 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
2472 goto nla_put_failure;
2473 }
2474 if (rt->rt_uses_gateway &&
2475 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
2476 goto nla_put_failure;
2477
2478 expires = rt->dst.expires;
2479 if (expires) {
2480 unsigned long now = jiffies;
2481
2482 if (time_before(now, expires))
2483 expires -= now;
2484 else
2485 expires = 0;
2486 }
2487
2488 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2489 if (rt->rt_pmtu && expires)
2490 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2491 if (rtnetlink_put_metrics(skb, metrics) < 0)
2492 goto nla_put_failure;
2493
2494 if (fl4->flowi4_mark &&
2495 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
2496 goto nla_put_failure;
2497
2498 error = rt->dst.error;
2499
2500 if (rt_is_input_route(rt)) {
2501#ifdef CONFIG_IP_MROUTE
2502 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2503 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2504 int err = ipmr_get_route(net, skb,
2505 fl4->saddr, fl4->daddr,
2506 r, nowait, portid);
2507
2508 if (err <= 0) {
2509 if (!nowait) {
2510 if (err == 0)
2511 return 0;
2512 goto nla_put_failure;
2513 } else {
2514 if (err == -EMSGSIZE)
2515 goto nla_put_failure;
2516 error = err;
2517 }
2518 }
2519 } else
2520#endif
2521 if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
2522 goto nla_put_failure;
2523 }
2524
2525 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
2526 goto nla_put_failure;
2527
2528 nlmsg_end(skb, nlh);
2529 return 0;
2530
2531nla_put_failure:
2532 nlmsg_cancel(skb, nlh);
2533 return -EMSGSIZE;
2534}
2535
2536static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2537{
2538 struct net *net = sock_net(in_skb->sk);
2539 struct rtmsg *rtm;
2540 struct nlattr *tb[RTA_MAX+1];
2541 struct rtable *rt = NULL;
2542 struct flowi4 fl4;
2543 __be32 dst = 0;
2544 __be32 src = 0;
2545 u32 iif;
2546 int err;
2547 int mark;
2548 struct sk_buff *skb;
2549 u32 table_id = RT_TABLE_MAIN;
2550
2551 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2552 if (err < 0)
2553 goto errout;
2554
2555 rtm = nlmsg_data(nlh);
2556
2557 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2558 if (!skb) {
2559 err = -ENOBUFS;
2560 goto errout;
2561 }
2562
2563
2564
2565
2566 skb_reset_mac_header(skb);
2567 skb_reset_network_header(skb);
2568
2569
2570 ip_hdr(skb)->protocol = IPPROTO_ICMP;
2571 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2572
2573 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2574 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
2575 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2576 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
2577
2578 memset(&fl4, 0, sizeof(fl4));
2579 fl4.daddr = dst;
2580 fl4.saddr = src;
2581 fl4.flowi4_tos = rtm->rtm_tos;
2582 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2583 fl4.flowi4_mark = mark;
2584
2585 if (iif) {
2586 struct net_device *dev;
2587
2588 dev = __dev_get_by_index(net, iif);
2589 if (!dev) {
2590 err = -ENODEV;
2591 goto errout_free;
2592 }
2593
2594 skb->protocol = htons(ETH_P_IP);
2595 skb->dev = dev;
2596 skb->mark = mark;
2597 local_bh_disable();
2598 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2599 local_bh_enable();
2600
2601 rt = skb_rtable(skb);
2602 if (err == 0 && rt->dst.error)
2603 err = -rt->dst.error;
2604 } else {
2605 rt = ip_route_output_key(net, &fl4);
2606
2607 err = 0;
2608 if (IS_ERR(rt))
2609 err = PTR_ERR(rt);
2610 }
2611
2612 if (err)
2613 goto errout_free;
2614
2615 skb_dst_set(skb, &rt->dst);
2616 if (rtm->rtm_flags & RTM_F_NOTIFY)
2617 rt->rt_flags |= RTCF_NOTIFY;
2618
2619 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
2620 table_id = rt->rt_table_id;
2621
2622 err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
2623 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
2624 RTM_NEWROUTE, 0, 0);
2625 if (err < 0)
2626 goto errout_free;
2627
2628 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2629errout:
2630 return err;
2631
2632errout_free:
2633 kfree_skb(skb);
2634 goto errout;
2635}
2636
2637void ip_rt_multicast_event(struct in_device *in_dev)
2638{
2639 rt_cache_flush(dev_net(in_dev->dev));
2640}
2641
2642#ifdef CONFIG_SYSCTL
2643static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2644static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2645static int ip_rt_gc_elasticity __read_mostly = 8;
2646
2647static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
2648 void __user *buffer,
2649 size_t *lenp, loff_t *ppos)
2650{
2651 struct net *net = (struct net *)__ctl->extra1;
2652
2653 if (write) {
2654 rt_cache_flush(net);
2655 fnhe_genid_bump(net);
2656 return 0;
2657 }
2658
2659 return -EINVAL;
2660}
2661
2662static struct ctl_table ipv4_route_table[] = {
2663 {
2664 .procname = "gc_thresh",
2665 .data = &ipv4_dst_ops.gc_thresh,
2666 .maxlen = sizeof(int),
2667 .mode = 0644,
2668 .proc_handler = proc_dointvec,
2669 },
2670 {
2671 .procname = "max_size",
2672 .data = &ip_rt_max_size,
2673 .maxlen = sizeof(int),
2674 .mode = 0644,
2675 .proc_handler = proc_dointvec,
2676 },
2677 {
2678
2679
2680 .procname = "gc_min_interval",
2681 .data = &ip_rt_gc_min_interval,
2682 .maxlen = sizeof(int),
2683 .mode = 0644,
2684 .proc_handler = proc_dointvec_jiffies,
2685 },
2686 {
2687 .procname = "gc_min_interval_ms",
2688 .data = &ip_rt_gc_min_interval,
2689 .maxlen = sizeof(int),
2690 .mode = 0644,
2691 .proc_handler = proc_dointvec_ms_jiffies,
2692 },
2693 {
2694 .procname = "gc_timeout",
2695 .data = &ip_rt_gc_timeout,
2696 .maxlen = sizeof(int),
2697 .mode = 0644,
2698 .proc_handler = proc_dointvec_jiffies,
2699 },
2700 {
2701 .procname = "gc_interval",
2702 .data = &ip_rt_gc_interval,
2703 .maxlen = sizeof(int),
2704 .mode = 0644,
2705 .proc_handler = proc_dointvec_jiffies,
2706 },
2707 {
2708 .procname = "redirect_load",
2709 .data = &ip_rt_redirect_load,
2710 .maxlen = sizeof(int),
2711 .mode = 0644,
2712 .proc_handler = proc_dointvec,
2713 },
2714 {
2715 .procname = "redirect_number",
2716 .data = &ip_rt_redirect_number,
2717 .maxlen = sizeof(int),
2718 .mode = 0644,
2719 .proc_handler = proc_dointvec,
2720 },
2721 {
2722 .procname = "redirect_silence",
2723 .data = &ip_rt_redirect_silence,
2724 .maxlen = sizeof(int),
2725 .mode = 0644,
2726 .proc_handler = proc_dointvec,
2727 },
2728 {
2729 .procname = "error_cost",
2730 .data = &ip_rt_error_cost,
2731 .maxlen = sizeof(int),
2732 .mode = 0644,
2733 .proc_handler = proc_dointvec,
2734 },
2735 {
2736 .procname = "error_burst",
2737 .data = &ip_rt_error_burst,
2738 .maxlen = sizeof(int),
2739 .mode = 0644,
2740 .proc_handler = proc_dointvec,
2741 },
2742 {
2743 .procname = "gc_elasticity",
2744 .data = &ip_rt_gc_elasticity,
2745 .maxlen = sizeof(int),
2746 .mode = 0644,
2747 .proc_handler = proc_dointvec,
2748 },
2749 {
2750 .procname = "mtu_expires",
2751 .data = &ip_rt_mtu_expires,
2752 .maxlen = sizeof(int),
2753 .mode = 0644,
2754 .proc_handler = proc_dointvec_jiffies,
2755 },
2756 {
2757 .procname = "min_pmtu",
2758 .data = &ip_rt_min_pmtu,
2759 .maxlen = sizeof(int),
2760 .mode = 0644,
2761 .proc_handler = proc_dointvec,
2762 },
2763 {
2764 .procname = "min_adv_mss",
2765 .data = &ip_rt_min_advmss,
2766 .maxlen = sizeof(int),
2767 .mode = 0644,
2768 .proc_handler = proc_dointvec,
2769 },
2770 { }
2771};
2772
2773static struct ctl_table ipv4_route_flush_table[] = {
2774 {
2775 .procname = "flush",
2776 .maxlen = sizeof(int),
2777 .mode = 0200,
2778 .proc_handler = ipv4_sysctl_rtcache_flush,
2779 },
2780 { },
2781};
2782
2783static __net_init int sysctl_route_net_init(struct net *net)
2784{
2785 struct ctl_table *tbl;
2786
2787 tbl = ipv4_route_flush_table;
2788 if (!net_eq(net, &init_net)) {
2789 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2790 if (!tbl)
2791 goto err_dup;
2792
2793
2794 if (net->user_ns != &init_user_ns)
2795 tbl[0].procname = NULL;
2796 }
2797 tbl[0].extra1 = net;
2798
2799 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
2800 if (!net->ipv4.route_hdr)
2801 goto err_reg;
2802 return 0;
2803
2804err_reg:
2805 if (tbl != ipv4_route_flush_table)
2806 kfree(tbl);
2807err_dup:
2808 return -ENOMEM;
2809}
2810
2811static __net_exit void sysctl_route_net_exit(struct net *net)
2812{
2813 struct ctl_table *tbl;
2814
2815 tbl = net->ipv4.route_hdr->ctl_table_arg;
2816 unregister_net_sysctl_table(net->ipv4.route_hdr);
2817 BUG_ON(tbl == ipv4_route_flush_table);
2818 kfree(tbl);
2819}
2820
2821static __net_initdata struct pernet_operations sysctl_route_ops = {
2822 .init = sysctl_route_net_init,
2823 .exit = sysctl_route_net_exit,
2824};
2825#endif
2826
2827static __net_init int rt_genid_init(struct net *net)
2828{
2829 atomic_set(&net->ipv4.rt_genid, 0);
2830 atomic_set(&net->fnhe_genid, 0);
2831 get_random_bytes(&net->ipv4.dev_addr_genid,
2832 sizeof(net->ipv4.dev_addr_genid));
2833 return 0;
2834}
2835
2836static __net_initdata struct pernet_operations rt_genid_ops = {
2837 .init = rt_genid_init,
2838};
2839
2840static int __net_init ipv4_inetpeer_init(struct net *net)
2841{
2842 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2843
2844 if (!bp)
2845 return -ENOMEM;
2846 inet_peer_base_init(bp);
2847 net->ipv4.peers = bp;
2848 return 0;
2849}
2850
2851static void __net_exit ipv4_inetpeer_exit(struct net *net)
2852{
2853 struct inet_peer_base *bp = net->ipv4.peers;
2854
2855 net->ipv4.peers = NULL;
2856 inetpeer_invalidate_tree(bp);
2857 kfree(bp);
2858}
2859
2860static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2861 .init = ipv4_inetpeer_init,
2862 .exit = ipv4_inetpeer_exit,
2863};
2864
2865#ifdef CONFIG_IP_ROUTE_CLASSID
2866struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
2867#endif
2868
2869int __init ip_rt_init(void)
2870{
2871 int rc = 0;
2872 int cpu;
2873
2874 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
2875 if (!ip_idents)
2876 panic("IP: failed to allocate ip_idents\n");
2877
2878 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
2879
2880 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
2881 if (!ip_tstamps)
2882 panic("IP: failed to allocate ip_tstamps\n");
2883
2884 for_each_possible_cpu(cpu) {
2885 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
2886
2887 INIT_LIST_HEAD(&ul->head);
2888 spin_lock_init(&ul->lock);
2889 }
2890#ifdef CONFIG_IP_ROUTE_CLASSID
2891 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
2892 if (!ip_rt_acct)
2893 panic("IP: failed to allocate ip_rt_acct\n");
2894#endif
2895
2896 ipv4_dst_ops.kmem_cachep =
2897 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
2898 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2899
2900 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2901
2902 if (dst_entries_init(&ipv4_dst_ops) < 0)
2903 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2904
2905 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2906 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2907
2908 ipv4_dst_ops.gc_thresh = ~0;
2909 ip_rt_max_size = INT_MAX;
2910
2911 devinet_init();
2912 ip_fib_init();
2913
2914 if (ip_rt_proc_init())
2915 pr_err("Unable to create route proc files\n");
2916#ifdef CONFIG_XFRM
2917 xfrm_init();
2918 xfrm4_init();
2919#endif
2920 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
2921
2922#ifdef CONFIG_SYSCTL
2923 register_pernet_subsys(&sysctl_route_ops);
2924#endif
2925 register_pernet_subsys(&rt_genid_ops);
2926 register_pernet_subsys(&ipv4_inetpeer_ops);
2927 return rc;
2928}
2929
2930#ifdef CONFIG_SYSCTL
2931
2932
2933
2934
2935void __init ip_static_sysctl_init(void)
2936{
2937 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
2938}
2939#endif
2940