1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65#define pr_fmt(fmt) "IPv4: " fmt
66
67#include <linux/module.h>
68#include <asm/uaccess.h>
69#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
72#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
83#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
89#include <linux/rcupdate.h>
90#include <linux/times.h>
91#include <linux/slab.h>
92#include <linux/jhash.h>
93#include <net/dst.h>
94#include <net/dst_metadata.h>
95#include <net/net_namespace.h>
96#include <net/protocol.h>
97#include <net/ip.h>
98#include <net/route.h>
99#include <net/inetpeer.h>
100#include <net/sock.h>
101#include <net/ip_fib.h>
102#include <net/arp.h>
103#include <net/tcp.h>
104#include <net/icmp.h>
105#include <net/xfrm.h>
106#include <net/lwtunnel.h>
107#include <net/netevent.h>
108#include <net/rtnetlink.h>
109#ifdef CONFIG_SYSCTL
110#include <linux/sysctl.h>
111#include <linux/kmemleak.h>
112#endif
113#include <net/secure_seq.h>
114#include <net/ip_tunnels.h>
115#include <net/l3mdev.h>
116
117#define RT_FL_TOS(oldflp4) \
118 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
119
120#define RT_GC_TIMEOUT (300*HZ)
121
122static int ip_rt_max_size;
123static int ip_rt_redirect_number __read_mostly = 9;
124static int ip_rt_redirect_load __read_mostly = HZ / 50;
125static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
126static int ip_rt_error_cost __read_mostly = HZ;
127static int ip_rt_error_burst __read_mostly = 5 * HZ;
128static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
129static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
130static int ip_rt_min_advmss __read_mostly = 256;
131
132static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
133
134
135
136
137static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
138static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
139static unsigned int ipv4_mtu(const struct dst_entry *dst);
140static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
141static void ipv4_link_failure(struct sk_buff *skb);
142static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
143 struct sk_buff *skb, u32 mtu);
144static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
145 struct sk_buff *skb);
146static void ipv4_dst_destroy(struct dst_entry *dst);
147
148static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
149{
150 WARN_ON(1);
151 return NULL;
152}
153
154static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
155 struct sk_buff *skb,
156 const void *daddr);
157
158static struct dst_ops ipv4_dst_ops = {
159 .family = AF_INET,
160 .check = ipv4_dst_check,
161 .default_advmss = ipv4_default_advmss,
162 .mtu = ipv4_mtu,
163 .cow_metrics = ipv4_cow_metrics,
164 .destroy = ipv4_dst_destroy,
165 .negative_advice = ipv4_negative_advice,
166 .link_failure = ipv4_link_failure,
167 .update_pmtu = ip_rt_update_pmtu,
168 .redirect = ip_do_redirect,
169 .local_out = __ip_local_out,
170 .neigh_lookup = ipv4_neigh_lookup,
171};
172
173#define ECN_OR_COST(class) TC_PRIO_##class
174
175const __u8 ip_tos2prio[16] = {
176 TC_PRIO_BESTEFFORT,
177 ECN_OR_COST(BESTEFFORT),
178 TC_PRIO_BESTEFFORT,
179 ECN_OR_COST(BESTEFFORT),
180 TC_PRIO_BULK,
181 ECN_OR_COST(BULK),
182 TC_PRIO_BULK,
183 ECN_OR_COST(BULK),
184 TC_PRIO_INTERACTIVE,
185 ECN_OR_COST(INTERACTIVE),
186 TC_PRIO_INTERACTIVE,
187 ECN_OR_COST(INTERACTIVE),
188 TC_PRIO_INTERACTIVE_BULK,
189 ECN_OR_COST(INTERACTIVE_BULK),
190 TC_PRIO_INTERACTIVE_BULK,
191 ECN_OR_COST(INTERACTIVE_BULK)
192};
193EXPORT_SYMBOL(ip_tos2prio);
194
195static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
196#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
197
198#ifdef CONFIG_PROC_FS
199static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
200{
201 if (*pos)
202 return NULL;
203 return SEQ_START_TOKEN;
204}
205
206static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
207{
208 ++*pos;
209 return NULL;
210}
211
212static void rt_cache_seq_stop(struct seq_file *seq, void *v)
213{
214}
215
216static int rt_cache_seq_show(struct seq_file *seq, void *v)
217{
218 if (v == SEQ_START_TOKEN)
219 seq_printf(seq, "%-127s\n",
220 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
221 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
222 "HHUptod\tSpecDst");
223 return 0;
224}
225
226static const struct seq_operations rt_cache_seq_ops = {
227 .start = rt_cache_seq_start,
228 .next = rt_cache_seq_next,
229 .stop = rt_cache_seq_stop,
230 .show = rt_cache_seq_show,
231};
232
233static int rt_cache_seq_open(struct inode *inode, struct file *file)
234{
235 return seq_open(file, &rt_cache_seq_ops);
236}
237
238static const struct file_operations rt_cache_seq_fops = {
239 .owner = THIS_MODULE,
240 .open = rt_cache_seq_open,
241 .read = seq_read,
242 .llseek = seq_lseek,
243 .release = seq_release,
244};
245
246
247static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
248{
249 int cpu;
250
251 if (*pos == 0)
252 return SEQ_START_TOKEN;
253
254 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
255 if (!cpu_possible(cpu))
256 continue;
257 *pos = cpu+1;
258 return &per_cpu(rt_cache_stat, cpu);
259 }
260 return NULL;
261}
262
263static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
264{
265 int cpu;
266
267 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
268 if (!cpu_possible(cpu))
269 continue;
270 *pos = cpu+1;
271 return &per_cpu(rt_cache_stat, cpu);
272 }
273 return NULL;
274
275}
276
277static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
278{
279
280}
281
282static int rt_cpu_seq_show(struct seq_file *seq, void *v)
283{
284 struct rt_cache_stat *st = v;
285
286 if (v == SEQ_START_TOKEN) {
287 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
288 return 0;
289 }
290
291 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
292 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
293 dst_entries_get_slow(&ipv4_dst_ops),
294 0,
295 st->in_slow_tot,
296 st->in_slow_mc,
297 st->in_no_route,
298 st->in_brd,
299 st->in_martian_dst,
300 st->in_martian_src,
301
302 0,
303 st->out_slow_tot,
304 st->out_slow_mc,
305
306 0,
307 0,
308 0,
309 0,
310 0,
311 0
312 );
313 return 0;
314}
315
316static const struct seq_operations rt_cpu_seq_ops = {
317 .start = rt_cpu_seq_start,
318 .next = rt_cpu_seq_next,
319 .stop = rt_cpu_seq_stop,
320 .show = rt_cpu_seq_show,
321};
322
323
324static int rt_cpu_seq_open(struct inode *inode, struct file *file)
325{
326 return seq_open(file, &rt_cpu_seq_ops);
327}
328
329static const struct file_operations rt_cpu_seq_fops = {
330 .owner = THIS_MODULE,
331 .open = rt_cpu_seq_open,
332 .read = seq_read,
333 .llseek = seq_lseek,
334 .release = seq_release,
335};
336
337#ifdef CONFIG_IP_ROUTE_CLASSID
338static int rt_acct_proc_show(struct seq_file *m, void *v)
339{
340 struct ip_rt_acct *dst, *src;
341 unsigned int i, j;
342
343 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
344 if (!dst)
345 return -ENOMEM;
346
347 for_each_possible_cpu(i) {
348 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
349 for (j = 0; j < 256; j++) {
350 dst[j].o_bytes += src[j].o_bytes;
351 dst[j].o_packets += src[j].o_packets;
352 dst[j].i_bytes += src[j].i_bytes;
353 dst[j].i_packets += src[j].i_packets;
354 }
355 }
356
357 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
358 kfree(dst);
359 return 0;
360}
361
362static int rt_acct_proc_open(struct inode *inode, struct file *file)
363{
364 return single_open(file, rt_acct_proc_show, NULL);
365}
366
367static const struct file_operations rt_acct_proc_fops = {
368 .owner = THIS_MODULE,
369 .open = rt_acct_proc_open,
370 .read = seq_read,
371 .llseek = seq_lseek,
372 .release = single_release,
373};
374#endif
375
376static int __net_init ip_rt_do_proc_init(struct net *net)
377{
378 struct proc_dir_entry *pde;
379
380 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
381 &rt_cache_seq_fops);
382 if (!pde)
383 goto err1;
384
385 pde = proc_create("rt_cache", S_IRUGO,
386 net->proc_net_stat, &rt_cpu_seq_fops);
387 if (!pde)
388 goto err2;
389
390#ifdef CONFIG_IP_ROUTE_CLASSID
391 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
392 if (!pde)
393 goto err3;
394#endif
395 return 0;
396
397#ifdef CONFIG_IP_ROUTE_CLASSID
398err3:
399 remove_proc_entry("rt_cache", net->proc_net_stat);
400#endif
401err2:
402 remove_proc_entry("rt_cache", net->proc_net);
403err1:
404 return -ENOMEM;
405}
406
407static void __net_exit ip_rt_do_proc_exit(struct net *net)
408{
409 remove_proc_entry("rt_cache", net->proc_net_stat);
410 remove_proc_entry("rt_cache", net->proc_net);
411#ifdef CONFIG_IP_ROUTE_CLASSID
412 remove_proc_entry("rt_acct", net->proc_net);
413#endif
414}
415
416static struct pernet_operations ip_rt_proc_ops __net_initdata = {
417 .init = ip_rt_do_proc_init,
418 .exit = ip_rt_do_proc_exit,
419};
420
421static int __init ip_rt_proc_init(void)
422{
423 return register_pernet_subsys(&ip_rt_proc_ops);
424}
425
426#else
427static inline int ip_rt_proc_init(void)
428{
429 return 0;
430}
431#endif
432
433static inline bool rt_is_expired(const struct rtable *rth)
434{
435 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
436}
437
438void rt_cache_flush(struct net *net)
439{
440 rt_genid_bump_ipv4(net);
441}
442
443static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
444 struct sk_buff *skb,
445 const void *daddr)
446{
447 struct net_device *dev = dst->dev;
448 const __be32 *pkey = daddr;
449 const struct rtable *rt;
450 struct neighbour *n;
451
452 rt = (const struct rtable *) dst;
453 if (rt->rt_gateway)
454 pkey = (const __be32 *) &rt->rt_gateway;
455 else if (skb)
456 pkey = &ip_hdr(skb)->daddr;
457
458 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
459 if (n)
460 return n;
461 return neigh_create(&arp_tbl, pkey, dev);
462}
463
464#define IP_IDENTS_SZ 2048u
465
466static atomic_t *ip_idents __read_mostly;
467static u32 *ip_tstamps __read_mostly;
468
469
470
471
472
473u32 ip_idents_reserve(u32 hash, int segs)
474{
475 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
476 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
477 u32 old = ACCESS_ONCE(*p_tstamp);
478 u32 now = (u32)jiffies;
479 u32 new, delta = 0;
480
481 if (old != now && cmpxchg(p_tstamp, old, now) == old)
482 delta = prandom_u32_max(now - old);
483
484
485 do {
486 old = (u32)atomic_read(p_id);
487 new = old + delta + segs;
488 } while (atomic_cmpxchg(p_id, old, new) != old);
489
490 return new - segs;
491}
492EXPORT_SYMBOL(ip_idents_reserve);
493
494void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
495{
496 static u32 ip_idents_hashrnd __read_mostly;
497 u32 hash, id;
498
499 net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
500
501 hash = jhash_3words((__force u32)iph->daddr,
502 (__force u32)iph->saddr,
503 iph->protocol ^ net_hash_mix(net),
504 ip_idents_hashrnd);
505 id = ip_idents_reserve(hash, segs);
506 iph->id = htons(id);
507}
508EXPORT_SYMBOL(__ip_select_ident);
509
510static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
511 const struct iphdr *iph,
512 int oif, u8 tos,
513 u8 prot, u32 mark, int flow_flags)
514{
515 if (sk) {
516 const struct inet_sock *inet = inet_sk(sk);
517
518 oif = sk->sk_bound_dev_if;
519 mark = sk->sk_mark;
520 tos = RT_CONN_FLAGS(sk);
521 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
522 }
523 flowi4_init_output(fl4, oif, mark, tos,
524 RT_SCOPE_UNIVERSE, prot,
525 flow_flags,
526 iph->daddr, iph->saddr, 0, 0);
527}
528
529static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
530 const struct sock *sk)
531{
532 const struct iphdr *iph = ip_hdr(skb);
533 int oif = skb->dev->ifindex;
534 u8 tos = RT_TOS(iph->tos);
535 u8 prot = iph->protocol;
536 u32 mark = skb->mark;
537
538 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
539}
540
541static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
542{
543 const struct inet_sock *inet = inet_sk(sk);
544 const struct ip_options_rcu *inet_opt;
545 __be32 daddr = inet->inet_daddr;
546
547 rcu_read_lock();
548 inet_opt = rcu_dereference(inet->inet_opt);
549 if (inet_opt && inet_opt->opt.srr)
550 daddr = inet_opt->opt.faddr;
551 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
552 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
553 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
554 inet_sk_flowi_flags(sk),
555 daddr, inet->inet_saddr, 0, 0);
556 rcu_read_unlock();
557}
558
559static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
560 const struct sk_buff *skb)
561{
562 if (skb)
563 build_skb_flow_key(fl4, skb, sk);
564 else
565 build_sk_flow_key(fl4, sk);
566}
567
568static inline void rt_free(struct rtable *rt)
569{
570 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
571}
572
573static DEFINE_SPINLOCK(fnhe_lock);
574
575static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
576{
577 struct rtable *rt;
578
579 rt = rcu_dereference(fnhe->fnhe_rth_input);
580 if (rt) {
581 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
582 rt_free(rt);
583 }
584 rt = rcu_dereference(fnhe->fnhe_rth_output);
585 if (rt) {
586 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
587 rt_free(rt);
588 }
589}
590
591static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
592{
593 struct fib_nh_exception *fnhe, *oldest;
594
595 oldest = rcu_dereference(hash->chain);
596 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
597 fnhe = rcu_dereference(fnhe->fnhe_next)) {
598 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
599 oldest = fnhe;
600 }
601 fnhe_flush_routes(oldest);
602 return oldest;
603}
604
605static inline u32 fnhe_hashfun(__be32 daddr)
606{
607 static u32 fnhe_hashrnd __read_mostly;
608 u32 hval;
609
610 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
611 hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
612 return hash_32(hval, FNHE_HASH_SHIFT);
613}
614
615static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
616{
617 rt->rt_pmtu = fnhe->fnhe_pmtu;
618 rt->dst.expires = fnhe->fnhe_expires;
619
620 if (fnhe->fnhe_gw) {
621 rt->rt_flags |= RTCF_REDIRECTED;
622 rt->rt_gateway = fnhe->fnhe_gw;
623 rt->rt_uses_gateway = 1;
624 }
625}
626
627static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
628 u32 pmtu, unsigned long expires)
629{
630 struct fnhe_hash_bucket *hash;
631 struct fib_nh_exception *fnhe;
632 struct rtable *rt;
633 unsigned int i;
634 int depth;
635 u32 hval = fnhe_hashfun(daddr);
636
637 spin_lock_bh(&fnhe_lock);
638
639 hash = rcu_dereference(nh->nh_exceptions);
640 if (!hash) {
641 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
642 if (!hash)
643 goto out_unlock;
644 rcu_assign_pointer(nh->nh_exceptions, hash);
645 }
646
647 hash += hval;
648
649 depth = 0;
650 for (fnhe = rcu_dereference(hash->chain); fnhe;
651 fnhe = rcu_dereference(fnhe->fnhe_next)) {
652 if (fnhe->fnhe_daddr == daddr)
653 break;
654 depth++;
655 }
656
657 if (fnhe) {
658 if (gw)
659 fnhe->fnhe_gw = gw;
660 if (pmtu) {
661 fnhe->fnhe_pmtu = pmtu;
662 fnhe->fnhe_expires = max(1UL, expires);
663 }
664
665 rt = rcu_dereference(fnhe->fnhe_rth_input);
666 if (rt)
667 fill_route_from_fnhe(rt, fnhe);
668 rt = rcu_dereference(fnhe->fnhe_rth_output);
669 if (rt)
670 fill_route_from_fnhe(rt, fnhe);
671 } else {
672 if (depth > FNHE_RECLAIM_DEPTH)
673 fnhe = fnhe_oldest(hash);
674 else {
675 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
676 if (!fnhe)
677 goto out_unlock;
678
679 fnhe->fnhe_next = hash->chain;
680 rcu_assign_pointer(hash->chain, fnhe);
681 }
682 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
683 fnhe->fnhe_daddr = daddr;
684 fnhe->fnhe_gw = gw;
685 fnhe->fnhe_pmtu = pmtu;
686 fnhe->fnhe_expires = expires;
687
688
689
690
691
692 rt = rcu_dereference(nh->nh_rth_input);
693 if (rt)
694 rt->dst.obsolete = DST_OBSOLETE_KILL;
695
696 for_each_possible_cpu(i) {
697 struct rtable __rcu **prt;
698 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
699 rt = rcu_dereference(*prt);
700 if (rt)
701 rt->dst.obsolete = DST_OBSOLETE_KILL;
702 }
703 }
704
705 fnhe->fnhe_stamp = jiffies;
706
707out_unlock:
708 spin_unlock_bh(&fnhe_lock);
709}
710
711static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
712 bool kill_route)
713{
714 __be32 new_gw = icmp_hdr(skb)->un.gateway;
715 __be32 old_gw = ip_hdr(skb)->saddr;
716 struct net_device *dev = skb->dev;
717 struct in_device *in_dev;
718 struct fib_result res;
719 struct neighbour *n;
720 struct net *net;
721
722 switch (icmp_hdr(skb)->code & 7) {
723 case ICMP_REDIR_NET:
724 case ICMP_REDIR_NETTOS:
725 case ICMP_REDIR_HOST:
726 case ICMP_REDIR_HOSTTOS:
727 break;
728
729 default:
730 return;
731 }
732
733 if (rt->rt_gateway != old_gw)
734 return;
735
736 in_dev = __in_dev_get_rcu(dev);
737 if (!in_dev)
738 return;
739
740 net = dev_net(dev);
741 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
742 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
743 ipv4_is_zeronet(new_gw))
744 goto reject_redirect;
745
746 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
747 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
748 goto reject_redirect;
749 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
750 goto reject_redirect;
751 } else {
752 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
753 goto reject_redirect;
754 }
755
756 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
757 if (!IS_ERR(n)) {
758 if (!(n->nud_state & NUD_VALID)) {
759 neigh_event_send(n, NULL);
760 } else {
761 if (fib_lookup(net, fl4, &res, 0) == 0) {
762 struct fib_nh *nh = &FIB_RES_NH(res);
763
764 update_or_create_fnhe(nh, fl4->daddr, new_gw,
765 0, jiffies + ip_rt_gc_timeout);
766 }
767 if (kill_route)
768 rt->dst.obsolete = DST_OBSOLETE_KILL;
769 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
770 }
771 neigh_release(n);
772 }
773 return;
774
775reject_redirect:
776#ifdef CONFIG_IP_ROUTE_VERBOSE
777 if (IN_DEV_LOG_MARTIANS(in_dev)) {
778 const struct iphdr *iph = (const struct iphdr *) skb->data;
779 __be32 daddr = iph->daddr;
780 __be32 saddr = iph->saddr;
781
782 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
783 " Advised path = %pI4 -> %pI4\n",
784 &old_gw, dev->name, &new_gw,
785 &saddr, &daddr);
786 }
787#endif
788 ;
789}
790
791static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
792{
793 struct rtable *rt;
794 struct flowi4 fl4;
795 const struct iphdr *iph = (const struct iphdr *) skb->data;
796 int oif = skb->dev->ifindex;
797 u8 tos = RT_TOS(iph->tos);
798 u8 prot = iph->protocol;
799 u32 mark = skb->mark;
800
801 rt = (struct rtable *) dst;
802
803 __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0);
804 __ip_do_redirect(rt, skb, &fl4, true);
805}
806
807static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
808{
809 struct rtable *rt = (struct rtable *)dst;
810 struct dst_entry *ret = dst;
811
812 if (rt) {
813 if (dst->obsolete > 0) {
814 ip_rt_put(rt);
815 ret = NULL;
816 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
817 rt->dst.expires) {
818 ip_rt_put(rt);
819 ret = NULL;
820 }
821 }
822 return ret;
823}
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841void ip_rt_send_redirect(struct sk_buff *skb)
842{
843 struct rtable *rt = skb_rtable(skb);
844 struct in_device *in_dev;
845 struct inet_peer *peer;
846 struct net *net;
847 int log_martians;
848 int vif;
849
850 rcu_read_lock();
851 in_dev = __in_dev_get_rcu(rt->dst.dev);
852 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
853 rcu_read_unlock();
854 return;
855 }
856 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
857 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
858 rcu_read_unlock();
859
860 net = dev_net(rt->dst.dev);
861 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
862 if (!peer) {
863 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
864 rt_nexthop(rt, ip_hdr(skb)->daddr));
865 return;
866 }
867
868
869
870
871 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
872 peer->rate_tokens = 0;
873
874
875
876
877 if (peer->rate_tokens >= ip_rt_redirect_number) {
878 peer->rate_last = jiffies;
879 goto out_put_peer;
880 }
881
882
883
884
885 if (peer->rate_tokens == 0 ||
886 time_after(jiffies,
887 (peer->rate_last +
888 (ip_rt_redirect_load << peer->rate_tokens)))) {
889 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
890
891 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
892 peer->rate_last = jiffies;
893 ++peer->rate_tokens;
894#ifdef CONFIG_IP_ROUTE_VERBOSE
895 if (log_martians &&
896 peer->rate_tokens == ip_rt_redirect_number)
897 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
898 &ip_hdr(skb)->saddr, inet_iif(skb),
899 &ip_hdr(skb)->daddr, &gw);
900#endif
901 }
902out_put_peer:
903 inet_putpeer(peer);
904}
905
906static int ip_error(struct sk_buff *skb)
907{
908 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
909 struct rtable *rt = skb_rtable(skb);
910 struct inet_peer *peer;
911 unsigned long now;
912 struct net *net;
913 bool send;
914 int code;
915
916
917 if (!in_dev)
918 goto out;
919
920 net = dev_net(rt->dst.dev);
921 if (!IN_DEV_FORWARD(in_dev)) {
922 switch (rt->dst.error) {
923 case EHOSTUNREACH:
924 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
925 break;
926
927 case ENETUNREACH:
928 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
929 break;
930 }
931 goto out;
932 }
933
934 switch (rt->dst.error) {
935 case EINVAL:
936 default:
937 goto out;
938 case EHOSTUNREACH:
939 code = ICMP_HOST_UNREACH;
940 break;
941 case ENETUNREACH:
942 code = ICMP_NET_UNREACH;
943 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
944 break;
945 case EACCES:
946 code = ICMP_PKT_FILTERED;
947 break;
948 }
949
950 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
951 l3mdev_master_ifindex(skb->dev), 1);
952
953 send = true;
954 if (peer) {
955 now = jiffies;
956 peer->rate_tokens += now - peer->rate_last;
957 if (peer->rate_tokens > ip_rt_error_burst)
958 peer->rate_tokens = ip_rt_error_burst;
959 peer->rate_last = now;
960 if (peer->rate_tokens >= ip_rt_error_cost)
961 peer->rate_tokens -= ip_rt_error_cost;
962 else
963 send = false;
964 inet_putpeer(peer);
965 }
966 if (send)
967 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
968
969out: kfree_skb(skb);
970 return 0;
971}
972
973static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
974{
975 struct dst_entry *dst = &rt->dst;
976 struct fib_result res;
977
978 if (dst_metric_locked(dst, RTAX_MTU))
979 return;
980
981 if (ipv4_mtu(dst) < mtu)
982 return;
983
984 if (mtu < ip_rt_min_pmtu)
985 mtu = ip_rt_min_pmtu;
986
987 if (rt->rt_pmtu == mtu &&
988 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
989 return;
990
991 rcu_read_lock();
992 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
993 struct fib_nh *nh = &FIB_RES_NH(res);
994
995 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
996 jiffies + ip_rt_mtu_expires);
997 }
998 rcu_read_unlock();
999}
1000
1001static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1002 struct sk_buff *skb, u32 mtu)
1003{
1004 struct rtable *rt = (struct rtable *) dst;
1005 struct flowi4 fl4;
1006
1007 ip_rt_build_flow_key(&fl4, sk, skb);
1008 __ip_rt_update_pmtu(rt, &fl4, mtu);
1009}
1010
1011void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1012 int oif, u32 mark, u8 protocol, int flow_flags)
1013{
1014 const struct iphdr *iph = (const struct iphdr *) skb->data;
1015 struct flowi4 fl4;
1016 struct rtable *rt;
1017
1018 if (!mark)
1019 mark = IP4_REPLY_MARK(net, skb->mark);
1020
1021 __build_flow_key(&fl4, NULL, iph, oif,
1022 RT_TOS(iph->tos), protocol, mark, flow_flags);
1023 rt = __ip_route_output_key(net, &fl4);
1024 if (!IS_ERR(rt)) {
1025 __ip_rt_update_pmtu(rt, &fl4, mtu);
1026 ip_rt_put(rt);
1027 }
1028}
1029EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1030
1031static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1032{
1033 const struct iphdr *iph = (const struct iphdr *) skb->data;
1034 struct flowi4 fl4;
1035 struct rtable *rt;
1036
1037 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1038
1039 if (!fl4.flowi4_mark)
1040 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1041
1042 rt = __ip_route_output_key(sock_net(sk), &fl4);
1043 if (!IS_ERR(rt)) {
1044 __ip_rt_update_pmtu(rt, &fl4, mtu);
1045 ip_rt_put(rt);
1046 }
1047}
1048
1049void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1050{
1051 const struct iphdr *iph = (const struct iphdr *) skb->data;
1052 struct flowi4 fl4;
1053 struct rtable *rt;
1054 struct dst_entry *odst = NULL;
1055 bool new = false;
1056
1057 bh_lock_sock(sk);
1058
1059 if (!ip_sk_accept_pmtu(sk))
1060 goto out;
1061
1062 odst = sk_dst_get(sk);
1063
1064 if (sock_owned_by_user(sk) || !odst) {
1065 __ipv4_sk_update_pmtu(skb, sk, mtu);
1066 goto out;
1067 }
1068
1069 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1070
1071 rt = (struct rtable *)odst;
1072 if (odst->obsolete && !odst->ops->check(odst, 0)) {
1073 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1074 if (IS_ERR(rt))
1075 goto out;
1076
1077 new = true;
1078 }
1079
1080 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1081
1082 if (!dst_check(&rt->dst, 0)) {
1083 if (new)
1084 dst_release(&rt->dst);
1085
1086 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1087 if (IS_ERR(rt))
1088 goto out;
1089
1090 new = true;
1091 }
1092
1093 if (new)
1094 sk_dst_set(sk, &rt->dst);
1095
1096out:
1097 bh_unlock_sock(sk);
1098 dst_release(odst);
1099}
1100EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
1101
1102void ipv4_redirect(struct sk_buff *skb, struct net *net,
1103 int oif, u32 mark, u8 protocol, int flow_flags)
1104{
1105 const struct iphdr *iph = (const struct iphdr *) skb->data;
1106 struct flowi4 fl4;
1107 struct rtable *rt;
1108
1109 __build_flow_key(&fl4, NULL, iph, oif,
1110 RT_TOS(iph->tos), protocol, mark, flow_flags);
1111 rt = __ip_route_output_key(net, &fl4);
1112 if (!IS_ERR(rt)) {
1113 __ip_do_redirect(rt, skb, &fl4, false);
1114 ip_rt_put(rt);
1115 }
1116}
1117EXPORT_SYMBOL_GPL(ipv4_redirect);
1118
1119void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1120{
1121 const struct iphdr *iph = (const struct iphdr *) skb->data;
1122 struct flowi4 fl4;
1123 struct rtable *rt;
1124
1125 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1126 rt = __ip_route_output_key(sock_net(sk), &fl4);
1127 if (!IS_ERR(rt)) {
1128 __ip_do_redirect(rt, skb, &fl4, false);
1129 ip_rt_put(rt);
1130 }
1131}
1132EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1133
1134static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1135{
1136 struct rtable *rt = (struct rtable *) dst;
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
1147 return NULL;
1148 return dst;
1149}
1150
1151static void ipv4_link_failure(struct sk_buff *skb)
1152{
1153 struct rtable *rt;
1154
1155 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1156
1157 rt = skb_rtable(skb);
1158 if (rt)
1159 dst_set_expires(&rt->dst, 0);
1160}
1161
1162static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
1163{
1164 pr_debug("%s: %pI4 -> %pI4, %s\n",
1165 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1166 skb->dev ? skb->dev->name : "?");
1167 kfree_skb(skb);
1168 WARN_ON(1);
1169 return 0;
1170}
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1182{
1183 __be32 src;
1184
1185 if (rt_is_output_route(rt))
1186 src = ip_hdr(skb)->saddr;
1187 else {
1188 struct fib_result res;
1189 struct flowi4 fl4;
1190 struct iphdr *iph;
1191
1192 iph = ip_hdr(skb);
1193
1194 memset(&fl4, 0, sizeof(fl4));
1195 fl4.daddr = iph->daddr;
1196 fl4.saddr = iph->saddr;
1197 fl4.flowi4_tos = RT_TOS(iph->tos);
1198 fl4.flowi4_oif = rt->dst.dev->ifindex;
1199 fl4.flowi4_iif = skb->dev->ifindex;
1200 fl4.flowi4_mark = skb->mark;
1201
1202 rcu_read_lock();
1203 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
1204 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
1205 else
1206 src = inet_select_addr(rt->dst.dev,
1207 rt_nexthop(rt, iph->daddr),
1208 RT_SCOPE_UNIVERSE);
1209 rcu_read_unlock();
1210 }
1211 memcpy(addr, &src, 4);
1212}
1213
1214#ifdef CONFIG_IP_ROUTE_CLASSID
1215static void set_class_tag(struct rtable *rt, u32 tag)
1216{
1217 if (!(rt->dst.tclassid & 0xFFFF))
1218 rt->dst.tclassid |= tag & 0xFFFF;
1219 if (!(rt->dst.tclassid & 0xFFFF0000))
1220 rt->dst.tclassid |= tag & 0xFFFF0000;
1221}
1222#endif
1223
1224static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1225{
1226 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1227
1228 if (advmss == 0) {
1229 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1230 ip_rt_min_advmss);
1231 if (advmss > 65535 - 40)
1232 advmss = 65535 - 40;
1233 }
1234 return advmss;
1235}
1236
1237static unsigned int ipv4_mtu(const struct dst_entry *dst)
1238{
1239 const struct rtable *rt = (const struct rtable *) dst;
1240 unsigned int mtu = rt->rt_pmtu;
1241
1242 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
1243 mtu = dst_metric_raw(dst, RTAX_MTU);
1244
1245 if (mtu)
1246 return mtu;
1247
1248 mtu = dst->dev->mtu;
1249
1250 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
1251 if (rt->rt_uses_gateway && mtu > 576)
1252 mtu = 576;
1253 }
1254
1255 return min_t(unsigned int, mtu, IP_MAX_MTU);
1256}
1257
1258static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
1259{
1260 struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
1261 struct fib_nh_exception *fnhe;
1262 u32 hval;
1263
1264 if (!hash)
1265 return NULL;
1266
1267 hval = fnhe_hashfun(daddr);
1268
1269 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1270 fnhe = rcu_dereference(fnhe->fnhe_next)) {
1271 if (fnhe->fnhe_daddr == daddr)
1272 return fnhe;
1273 }
1274 return NULL;
1275}
1276
1277static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1278 __be32 daddr)
1279{
1280 bool ret = false;
1281
1282 spin_lock_bh(&fnhe_lock);
1283
1284 if (daddr == fnhe->fnhe_daddr) {
1285 struct rtable __rcu **porig;
1286 struct rtable *orig;
1287 int genid = fnhe_genid(dev_net(rt->dst.dev));
1288
1289 if (rt_is_input_route(rt))
1290 porig = &fnhe->fnhe_rth_input;
1291 else
1292 porig = &fnhe->fnhe_rth_output;
1293 orig = rcu_dereference(*porig);
1294
1295 if (fnhe->fnhe_genid != genid) {
1296 fnhe->fnhe_genid = genid;
1297 fnhe->fnhe_gw = 0;
1298 fnhe->fnhe_pmtu = 0;
1299 fnhe->fnhe_expires = 0;
1300 fnhe_flush_routes(fnhe);
1301 orig = NULL;
1302 }
1303 fill_route_from_fnhe(rt, fnhe);
1304 if (!rt->rt_gateway)
1305 rt->rt_gateway = daddr;
1306
1307 if (!(rt->dst.flags & DST_NOCACHE)) {
1308 rcu_assign_pointer(*porig, rt);
1309 if (orig)
1310 rt_free(orig);
1311 ret = true;
1312 }
1313
1314 fnhe->fnhe_stamp = jiffies;
1315 }
1316 spin_unlock_bh(&fnhe_lock);
1317
1318 return ret;
1319}
1320
1321static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1322{
1323 struct rtable *orig, *prev, **p;
1324 bool ret = true;
1325
1326 if (rt_is_input_route(rt)) {
1327 p = (struct rtable **)&nh->nh_rth_input;
1328 } else {
1329 p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
1330 }
1331 orig = *p;
1332
1333 prev = cmpxchg(p, orig, rt);
1334 if (prev == orig) {
1335 if (orig)
1336 rt_free(orig);
1337 } else
1338 ret = false;
1339
1340 return ret;
1341}
1342
1343struct uncached_list {
1344 spinlock_t lock;
1345 struct list_head head;
1346};
1347
1348static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
1349
1350static void rt_add_uncached_list(struct rtable *rt)
1351{
1352 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1353
1354 rt->rt_uncached_list = ul;
1355
1356 spin_lock_bh(&ul->lock);
1357 list_add_tail(&rt->rt_uncached, &ul->head);
1358 spin_unlock_bh(&ul->lock);
1359}
1360
1361static void ipv4_dst_destroy(struct dst_entry *dst)
1362{
1363 struct rtable *rt = (struct rtable *) dst;
1364
1365 if (!list_empty(&rt->rt_uncached)) {
1366 struct uncached_list *ul = rt->rt_uncached_list;
1367
1368 spin_lock_bh(&ul->lock);
1369 list_del(&rt->rt_uncached);
1370 spin_unlock_bh(&ul->lock);
1371 }
1372}
1373
1374void rt_flush_dev(struct net_device *dev)
1375{
1376 struct net *net = dev_net(dev);
1377 struct rtable *rt;
1378 int cpu;
1379
1380 for_each_possible_cpu(cpu) {
1381 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1382
1383 spin_lock_bh(&ul->lock);
1384 list_for_each_entry(rt, &ul->head, rt_uncached) {
1385 if (rt->dst.dev != dev)
1386 continue;
1387 rt->dst.dev = net->loopback_dev;
1388 dev_hold(rt->dst.dev);
1389 dev_put(dev);
1390 }
1391 spin_unlock_bh(&ul->lock);
1392 }
1393}
1394
1395static bool rt_cache_valid(const struct rtable *rt)
1396{
1397 return rt &&
1398 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1399 !rt_is_expired(rt);
1400}
1401
1402static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
1403 const struct fib_result *res,
1404 struct fib_nh_exception *fnhe,
1405 struct fib_info *fi, u16 type, u32 itag)
1406{
1407 bool cached = false;
1408
1409 if (fi) {
1410 struct fib_nh *nh = &FIB_RES_NH(*res);
1411
1412 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
1413 rt->rt_gateway = nh->nh_gw;
1414 rt->rt_uses_gateway = 1;
1415 }
1416 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1417#ifdef CONFIG_IP_ROUTE_CLASSID
1418 rt->dst.tclassid = nh->nh_tclassid;
1419#endif
1420 rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
1421 if (unlikely(fnhe))
1422 cached = rt_bind_exception(rt, fnhe, daddr);
1423 else if (!(rt->dst.flags & DST_NOCACHE))
1424 cached = rt_cache_route(nh, rt);
1425 if (unlikely(!cached)) {
1426
1427
1428
1429
1430
1431 rt->dst.flags |= DST_NOCACHE;
1432 if (!rt->rt_gateway)
1433 rt->rt_gateway = daddr;
1434 rt_add_uncached_list(rt);
1435 }
1436 } else
1437 rt_add_uncached_list(rt);
1438
1439#ifdef CONFIG_IP_ROUTE_CLASSID
1440#ifdef CONFIG_IP_MULTIPLE_TABLES
1441 set_class_tag(rt, res->tclassid);
1442#endif
1443 set_class_tag(rt, itag);
1444#endif
1445}
1446
1447struct rtable *rt_dst_alloc(struct net_device *dev,
1448 unsigned int flags, u16 type,
1449 bool nopolicy, bool noxfrm, bool will_cache)
1450{
1451 struct rtable *rt;
1452
1453 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1454 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
1455 (nopolicy ? DST_NOPOLICY : 0) |
1456 (noxfrm ? DST_NOXFRM : 0));
1457
1458 if (rt) {
1459 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1460 rt->rt_flags = flags;
1461 rt->rt_type = type;
1462 rt->rt_is_input = 0;
1463 rt->rt_iif = 0;
1464 rt->rt_pmtu = 0;
1465 rt->rt_gateway = 0;
1466 rt->rt_uses_gateway = 0;
1467 rt->rt_table_id = 0;
1468 INIT_LIST_HEAD(&rt->rt_uncached);
1469
1470 rt->dst.output = ip_output;
1471 if (flags & RTCF_LOCAL)
1472 rt->dst.input = ip_local_deliver;
1473 }
1474
1475 return rt;
1476}
1477EXPORT_SYMBOL(rt_dst_alloc);
1478
1479
1480static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1481 u8 tos, struct net_device *dev, int our)
1482{
1483 struct rtable *rth;
1484 struct in_device *in_dev = __in_dev_get_rcu(dev);
1485 unsigned int flags = RTCF_MULTICAST;
1486 u32 itag = 0;
1487 int err;
1488
1489
1490
1491 if (!in_dev)
1492 return -EINVAL;
1493
1494 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1495 skb->protocol != htons(ETH_P_IP))
1496 goto e_inval;
1497
1498 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1499 goto e_inval;
1500
1501 if (ipv4_is_zeronet(saddr)) {
1502 if (!ipv4_is_local_multicast(daddr))
1503 goto e_inval;
1504 } else {
1505 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1506 in_dev, &itag);
1507 if (err < 0)
1508 goto e_err;
1509 }
1510 if (our)
1511 flags |= RTCF_LOCAL;
1512
1513 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
1514 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1515 if (!rth)
1516 goto e_nobufs;
1517
1518#ifdef CONFIG_IP_ROUTE_CLASSID
1519 rth->dst.tclassid = itag;
1520#endif
1521 rth->dst.output = ip_rt_bug;
1522 rth->rt_is_input= 1;
1523
1524#ifdef CONFIG_IP_MROUTE
1525 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1526 rth->dst.input = ip_mr_input;
1527#endif
1528 RT_CACHE_STAT_INC(in_slow_mc);
1529
1530 skb_dst_set(skb, &rth->dst);
1531 return 0;
1532
1533e_nobufs:
1534 return -ENOBUFS;
1535e_inval:
1536 return -EINVAL;
1537e_err:
1538 return err;
1539}
1540
1541
1542static void ip_handle_martian_source(struct net_device *dev,
1543 struct in_device *in_dev,
1544 struct sk_buff *skb,
1545 __be32 daddr,
1546 __be32 saddr)
1547{
1548 RT_CACHE_STAT_INC(in_martian_src);
1549#ifdef CONFIG_IP_ROUTE_VERBOSE
1550 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1551
1552
1553
1554
1555 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
1556 &daddr, &saddr, dev->name);
1557 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
1558 print_hex_dump(KERN_WARNING, "ll header: ",
1559 DUMP_PREFIX_OFFSET, 16, 1,
1560 skb_mac_header(skb),
1561 dev->hard_header_len, true);
1562 }
1563 }
1564#endif
1565}
1566
1567static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1568{
1569 struct fnhe_hash_bucket *hash;
1570 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1571 u32 hval = fnhe_hashfun(daddr);
1572
1573 spin_lock_bh(&fnhe_lock);
1574
1575 hash = rcu_dereference_protected(nh->nh_exceptions,
1576 lockdep_is_held(&fnhe_lock));
1577 hash += hval;
1578
1579 fnhe_p = &hash->chain;
1580 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1581 while (fnhe) {
1582 if (fnhe->fnhe_daddr == daddr) {
1583 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1584 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1585 fnhe_flush_routes(fnhe);
1586 kfree_rcu(fnhe, rcu);
1587 break;
1588 }
1589 fnhe_p = &fnhe->fnhe_next;
1590 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1591 lockdep_is_held(&fnhe_lock));
1592 }
1593
1594 spin_unlock_bh(&fnhe_lock);
1595}
1596
1597
1598static int __mkroute_input(struct sk_buff *skb,
1599 const struct fib_result *res,
1600 struct in_device *in_dev,
1601 __be32 daddr, __be32 saddr, u32 tos)
1602{
1603 struct fib_nh_exception *fnhe;
1604 struct rtable *rth;
1605 int err;
1606 struct in_device *out_dev;
1607 bool do_cache;
1608 u32 itag = 0;
1609
1610
1611 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1612 if (!out_dev) {
1613 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
1614 return -EINVAL;
1615 }
1616
1617 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
1618 in_dev->dev, in_dev, &itag);
1619 if (err < 0) {
1620 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1621 saddr);
1622
1623 goto cleanup;
1624 }
1625
1626 do_cache = res->fi && !itag;
1627 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
1628 skb->protocol == htons(ETH_P_IP) &&
1629 (IN_DEV_SHARED_MEDIA(out_dev) ||
1630 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1631 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1632
1633 if (skb->protocol != htons(ETH_P_IP)) {
1634
1635
1636
1637
1638
1639
1640
1641 if (out_dev == in_dev &&
1642 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
1643 err = -EINVAL;
1644 goto cleanup;
1645 }
1646 }
1647
1648 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
1649 if (do_cache) {
1650 if (fnhe) {
1651 rth = rcu_dereference(fnhe->fnhe_rth_input);
1652 if (rth && rth->dst.expires &&
1653 time_after(jiffies, rth->dst.expires)) {
1654 ip_del_fnhe(&FIB_RES_NH(*res), daddr);
1655 fnhe = NULL;
1656 } else {
1657 goto rt_cache;
1658 }
1659 }
1660
1661 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1662
1663rt_cache:
1664 if (rt_cache_valid(rth)) {
1665 skb_dst_set_noref(skb, &rth->dst);
1666 goto out;
1667 }
1668 }
1669
1670 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
1671 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1672 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
1673 if (!rth) {
1674 err = -ENOBUFS;
1675 goto cleanup;
1676 }
1677
1678 rth->rt_is_input = 1;
1679 if (res->table)
1680 rth->rt_table_id = res->table->tb_id;
1681 RT_CACHE_STAT_INC(in_slow_tot);
1682
1683 rth->dst.input = ip_forward;
1684
1685 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
1686 if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
1687 rth->dst.lwtstate->orig_output = rth->dst.output;
1688 rth->dst.output = lwtunnel_output;
1689 }
1690 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1691 rth->dst.lwtstate->orig_input = rth->dst.input;
1692 rth->dst.input = lwtunnel_input;
1693 }
1694 skb_dst_set(skb, &rth->dst);
1695out:
1696 err = 0;
1697 cleanup:
1698 return err;
1699}
1700
1701#ifdef CONFIG_IP_ROUTE_MULTIPATH
1702
1703
1704
1705
1706static int ip_multipath_icmp_hash(struct sk_buff *skb)
1707{
1708 const struct iphdr *outer_iph = ip_hdr(skb);
1709 struct icmphdr _icmph;
1710 const struct icmphdr *icmph;
1711 struct iphdr _inner_iph;
1712 const struct iphdr *inner_iph;
1713
1714 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
1715 goto standard_hash;
1716
1717 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1718 &_icmph);
1719 if (!icmph)
1720 goto standard_hash;
1721
1722 if (icmph->type != ICMP_DEST_UNREACH &&
1723 icmph->type != ICMP_REDIRECT &&
1724 icmph->type != ICMP_TIME_EXCEEDED &&
1725 icmph->type != ICMP_PARAMETERPROB) {
1726 goto standard_hash;
1727 }
1728
1729 inner_iph = skb_header_pointer(skb,
1730 outer_iph->ihl * 4 + sizeof(_icmph),
1731 sizeof(_inner_iph), &_inner_iph);
1732 if (!inner_iph)
1733 goto standard_hash;
1734
1735 return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
1736
1737standard_hash:
1738 return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
1739}
1740
1741#endif
1742
1743static int ip_mkroute_input(struct sk_buff *skb,
1744 struct fib_result *res,
1745 const struct flowi4 *fl4,
1746 struct in_device *in_dev,
1747 __be32 daddr, __be32 saddr, u32 tos)
1748{
1749#ifdef CONFIG_IP_ROUTE_MULTIPATH
1750 if (res->fi && res->fi->fib_nhs > 1) {
1751 int h;
1752
1753 if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
1754 h = ip_multipath_icmp_hash(skb);
1755 else
1756 h = fib_multipath_hash(saddr, daddr);
1757 fib_select_multipath(res, h);
1758 }
1759#endif
1760
1761
1762 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
1763}
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1777 u8 tos, struct net_device *dev)
1778{
1779 struct fib_result res;
1780 struct in_device *in_dev = __in_dev_get_rcu(dev);
1781 struct ip_tunnel_info *tun_info;
1782 struct flowi4 fl4;
1783 unsigned int flags = 0;
1784 u32 itag = 0;
1785 struct rtable *rth;
1786 int err = -EINVAL;
1787 struct net *net = dev_net(dev);
1788 bool do_cache;
1789
1790
1791
1792 if (!in_dev)
1793 goto out;
1794
1795
1796
1797
1798
1799 tun_info = skb_tunnel_info(skb);
1800 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1801 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
1802 else
1803 fl4.flowi4_tun_key.tun_id = 0;
1804 skb_dst_drop(skb);
1805
1806 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
1807 goto martian_source;
1808
1809 res.fi = NULL;
1810 res.table = NULL;
1811 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
1812 goto brd_input;
1813
1814
1815
1816
1817 if (ipv4_is_zeronet(saddr))
1818 goto martian_source;
1819
1820 if (ipv4_is_zeronet(daddr))
1821 goto martian_destination;
1822
1823
1824
1825
1826 if (ipv4_is_loopback(daddr)) {
1827 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
1828 goto martian_destination;
1829 } else if (ipv4_is_loopback(saddr)) {
1830 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
1831 goto martian_source;
1832 }
1833
1834
1835
1836
1837 fl4.flowi4_oif = 0;
1838 fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
1839 fl4.flowi4_mark = skb->mark;
1840 fl4.flowi4_tos = tos;
1841 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1842 fl4.flowi4_flags = 0;
1843 fl4.daddr = daddr;
1844 fl4.saddr = saddr;
1845 err = fib_lookup(net, &fl4, &res, 0);
1846 if (err != 0) {
1847 if (!IN_DEV_FORWARD(in_dev))
1848 err = -EHOSTUNREACH;
1849 goto no_route;
1850 }
1851
1852 if (res.type == RTN_BROADCAST)
1853 goto brd_input;
1854
1855 if (res.type == RTN_LOCAL) {
1856 err = fib_validate_source(skb, saddr, daddr, tos,
1857 0, dev, in_dev, &itag);
1858 if (err < 0)
1859 goto martian_source;
1860 goto local_input;
1861 }
1862
1863 if (!IN_DEV_FORWARD(in_dev)) {
1864 err = -EHOSTUNREACH;
1865 goto no_route;
1866 }
1867 if (res.type != RTN_UNICAST)
1868 goto martian_destination;
1869
1870 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
1871out: return err;
1872
1873brd_input:
1874 if (skb->protocol != htons(ETH_P_IP))
1875 goto e_inval;
1876
1877 if (!ipv4_is_zeronet(saddr)) {
1878 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1879 in_dev, &itag);
1880 if (err < 0)
1881 goto martian_source;
1882 }
1883 flags |= RTCF_BROADCAST;
1884 res.type = RTN_BROADCAST;
1885 RT_CACHE_STAT_INC(in_brd);
1886
1887local_input:
1888 do_cache = false;
1889 if (res.fi) {
1890 if (!itag) {
1891 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
1892 if (rt_cache_valid(rth)) {
1893 skb_dst_set_noref(skb, &rth->dst);
1894 err = 0;
1895 goto out;
1896 }
1897 do_cache = true;
1898 }
1899 }
1900
1901 rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type,
1902 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
1903 if (!rth)
1904 goto e_nobufs;
1905
1906 rth->dst.output= ip_rt_bug;
1907#ifdef CONFIG_IP_ROUTE_CLASSID
1908 rth->dst.tclassid = itag;
1909#endif
1910 rth->rt_is_input = 1;
1911 if (res.table)
1912 rth->rt_table_id = res.table->tb_id;
1913
1914 RT_CACHE_STAT_INC(in_slow_tot);
1915 if (res.type == RTN_UNREACHABLE) {
1916 rth->dst.input= ip_error;
1917 rth->dst.error= -err;
1918 rth->rt_flags &= ~RTCF_LOCAL;
1919 }
1920 if (do_cache) {
1921 if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) {
1922 rth->dst.flags |= DST_NOCACHE;
1923 rt_add_uncached_list(rth);
1924 }
1925 }
1926 skb_dst_set(skb, &rth->dst);
1927 err = 0;
1928 goto out;
1929
1930no_route:
1931 RT_CACHE_STAT_INC(in_no_route);
1932 res.type = RTN_UNREACHABLE;
1933 res.fi = NULL;
1934 res.table = NULL;
1935 goto local_input;
1936
1937
1938
1939
1940martian_destination:
1941 RT_CACHE_STAT_INC(in_martian_dst);
1942#ifdef CONFIG_IP_ROUTE_VERBOSE
1943 if (IN_DEV_LOG_MARTIANS(in_dev))
1944 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1945 &daddr, &saddr, dev->name);
1946#endif
1947
1948e_inval:
1949 err = -EINVAL;
1950 goto out;
1951
1952e_nobufs:
1953 err = -ENOBUFS;
1954 goto out;
1955
1956martian_source:
1957 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
1958 goto out;
1959}
1960
1961int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1962 u8 tos, struct net_device *dev)
1963{
1964 int res;
1965
1966 rcu_read_lock();
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979 if (ipv4_is_multicast(daddr)) {
1980 struct in_device *in_dev = __in_dev_get_rcu(dev);
1981
1982 if (in_dev) {
1983 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1984 ip_hdr(skb)->protocol);
1985 if (our
1986#ifdef CONFIG_IP_MROUTE
1987 ||
1988 (!ipv4_is_local_multicast(daddr) &&
1989 IN_DEV_MFORWARD(in_dev))
1990#endif
1991 ) {
1992 int res = ip_route_input_mc(skb, daddr, saddr,
1993 tos, dev, our);
1994 rcu_read_unlock();
1995 return res;
1996 }
1997 }
1998 rcu_read_unlock();
1999 return -EINVAL;
2000 }
2001 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
2002 rcu_read_unlock();
2003 return res;
2004}
2005EXPORT_SYMBOL(ip_route_input_noref);
2006
2007
2008static struct rtable *__mkroute_output(const struct fib_result *res,
2009 const struct flowi4 *fl4, int orig_oif,
2010 struct net_device *dev_out,
2011 unsigned int flags)
2012{
2013 struct fib_info *fi = res->fi;
2014 struct fib_nh_exception *fnhe;
2015 struct in_device *in_dev;
2016 u16 type = res->type;
2017 struct rtable *rth;
2018 bool do_cache;
2019
2020 in_dev = __in_dev_get_rcu(dev_out);
2021 if (!in_dev)
2022 return ERR_PTR(-EINVAL);
2023
2024 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
2025 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
2026 return ERR_PTR(-EINVAL);
2027
2028 if (ipv4_is_lbcast(fl4->daddr))
2029 type = RTN_BROADCAST;
2030 else if (ipv4_is_multicast(fl4->daddr))
2031 type = RTN_MULTICAST;
2032 else if (ipv4_is_zeronet(fl4->daddr))
2033 return ERR_PTR(-EINVAL);
2034
2035 if (dev_out->flags & IFF_LOOPBACK)
2036 flags |= RTCF_LOCAL;
2037
2038 do_cache = true;
2039 if (type == RTN_BROADCAST) {
2040 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2041 fi = NULL;
2042 } else if (type == RTN_MULTICAST) {
2043 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2044 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2045 fl4->flowi4_proto))
2046 flags &= ~RTCF_LOCAL;
2047 else
2048 do_cache = false;
2049
2050
2051
2052
2053 if (fi && res->prefixlen < 4)
2054 fi = NULL;
2055 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2056 (orig_oif != dev_out->ifindex)) {
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066 fi = NULL;
2067 }
2068
2069 fnhe = NULL;
2070 do_cache &= fi != NULL;
2071 if (do_cache) {
2072 struct rtable __rcu **prth;
2073 struct fib_nh *nh = &FIB_RES_NH(*res);
2074
2075 fnhe = find_exception(nh, fl4->daddr);
2076 if (fnhe) {
2077 prth = &fnhe->fnhe_rth_output;
2078 rth = rcu_dereference(*prth);
2079 if (rth && rth->dst.expires &&
2080 time_after(jiffies, rth->dst.expires)) {
2081 ip_del_fnhe(nh, fl4->daddr);
2082 fnhe = NULL;
2083 } else {
2084 goto rt_cache;
2085 }
2086 }
2087
2088 if (unlikely(fl4->flowi4_flags &
2089 FLOWI_FLAG_KNOWN_NH &&
2090 !(nh->nh_gw &&
2091 nh->nh_scope == RT_SCOPE_LINK))) {
2092 do_cache = false;
2093 goto add;
2094 }
2095 prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
2096 rth = rcu_dereference(*prth);
2097
2098rt_cache:
2099 if (rt_cache_valid(rth)) {
2100 dst_hold(&rth->dst);
2101 return rth;
2102 }
2103 }
2104
2105add:
2106 rth = rt_dst_alloc(dev_out, flags, type,
2107 IN_DEV_CONF_GET(in_dev, NOPOLICY),
2108 IN_DEV_CONF_GET(in_dev, NOXFRM),
2109 do_cache);
2110 if (!rth)
2111 return ERR_PTR(-ENOBUFS);
2112
2113 rth->rt_iif = orig_oif ? : 0;
2114 if (res->table)
2115 rth->rt_table_id = res->table->tb_id;
2116
2117 RT_CACHE_STAT_INC(out_slow_tot);
2118
2119 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2120 if (flags & RTCF_LOCAL &&
2121 !(dev_out->flags & IFF_LOOPBACK)) {
2122 rth->dst.output = ip_mc_output;
2123 RT_CACHE_STAT_INC(out_slow_mc);
2124 }
2125#ifdef CONFIG_IP_MROUTE
2126 if (type == RTN_MULTICAST) {
2127 if (IN_DEV_MFORWARD(in_dev) &&
2128 !ipv4_is_local_multicast(fl4->daddr)) {
2129 rth->dst.input = ip_mr_input;
2130 rth->dst.output = ip_mc_output;
2131 }
2132 }
2133#endif
2134 }
2135
2136 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
2137 if (lwtunnel_output_redirect(rth->dst.lwtstate))
2138 rth->dst.output = lwtunnel_output;
2139
2140 return rth;
2141}
2142
2143
2144
2145
2146
2147struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2148 int mp_hash)
2149{
2150 struct net_device *dev_out = NULL;
2151 __u8 tos = RT_FL_TOS(fl4);
2152 unsigned int flags = 0;
2153 struct fib_result res;
2154 struct rtable *rth;
2155 int master_idx;
2156 int orig_oif;
2157 int err = -ENETUNREACH;
2158
2159 res.tclassid = 0;
2160 res.fi = NULL;
2161 res.table = NULL;
2162
2163 orig_oif = fl4->flowi4_oif;
2164
2165 master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif);
2166 if (master_idx)
2167 fl4->flowi4_oif = master_idx;
2168 fl4->flowi4_iif = LOOPBACK_IFINDEX;
2169 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2170 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2171 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
2172
2173 rcu_read_lock();
2174 if (fl4->saddr) {
2175 rth = ERR_PTR(-EINVAL);
2176 if (ipv4_is_multicast(fl4->saddr) ||
2177 ipv4_is_lbcast(fl4->saddr) ||
2178 ipv4_is_zeronet(fl4->saddr))
2179 goto out;
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189 if (fl4->flowi4_oif == 0 &&
2190 (ipv4_is_multicast(fl4->daddr) ||
2191 ipv4_is_lbcast(fl4->daddr))) {
2192
2193 dev_out = __ip_dev_find(net, fl4->saddr, false);
2194 if (!dev_out)
2195 goto out;
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212 fl4->flowi4_oif = dev_out->ifindex;
2213 goto make_route;
2214 }
2215
2216 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2217
2218 if (!__ip_dev_find(net, fl4->saddr, false))
2219 goto out;
2220 }
2221 }
2222
2223
2224 if (fl4->flowi4_oif) {
2225 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
2226 rth = ERR_PTR(-ENODEV);
2227 if (!dev_out)
2228 goto out;
2229
2230
2231 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
2232 rth = ERR_PTR(-ENETUNREACH);
2233 goto out;
2234 }
2235 if (ipv4_is_local_multicast(fl4->daddr) ||
2236 ipv4_is_lbcast(fl4->daddr) ||
2237 fl4->flowi4_proto == IPPROTO_IGMP) {
2238 if (!fl4->saddr)
2239 fl4->saddr = inet_select_addr(dev_out, 0,
2240 RT_SCOPE_LINK);
2241 goto make_route;
2242 }
2243 if (!fl4->saddr) {
2244 if (ipv4_is_multicast(fl4->daddr))
2245 fl4->saddr = inet_select_addr(dev_out, 0,
2246 fl4->flowi4_scope);
2247 else if (!fl4->daddr)
2248 fl4->saddr = inet_select_addr(dev_out, 0,
2249 RT_SCOPE_HOST);
2250 }
2251
2252 rth = l3mdev_get_rtable(dev_out, fl4);
2253 if (rth)
2254 goto out;
2255 }
2256
2257 if (!fl4->daddr) {
2258 fl4->daddr = fl4->saddr;
2259 if (!fl4->daddr)
2260 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
2261 dev_out = net->loopback_dev;
2262 fl4->flowi4_oif = LOOPBACK_IFINDEX;
2263 res.type = RTN_LOCAL;
2264 flags |= RTCF_LOCAL;
2265 goto make_route;
2266 }
2267
2268 err = fib_lookup(net, fl4, &res, 0);
2269 if (err) {
2270 res.fi = NULL;
2271 res.table = NULL;
2272 if (fl4->flowi4_oif &&
2273 !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292 if (fl4->saddr == 0)
2293 fl4->saddr = inet_select_addr(dev_out, 0,
2294 RT_SCOPE_LINK);
2295 res.type = RTN_UNICAST;
2296 goto make_route;
2297 }
2298 rth = ERR_PTR(err);
2299 goto out;
2300 }
2301
2302 if (res.type == RTN_LOCAL) {
2303 if (!fl4->saddr) {
2304 if (res.fi->fib_prefsrc)
2305 fl4->saddr = res.fi->fib_prefsrc;
2306 else
2307 fl4->saddr = fl4->daddr;
2308 }
2309 dev_out = net->loopback_dev;
2310 fl4->flowi4_oif = dev_out->ifindex;
2311 flags |= RTCF_LOCAL;
2312 goto make_route;
2313 }
2314
2315 fib_select_path(net, &res, fl4, mp_hash);
2316
2317 dev_out = FIB_RES_DEV(res);
2318 fl4->flowi4_oif = dev_out->ifindex;
2319
2320
2321make_route:
2322 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
2323
2324out:
2325 rcu_read_unlock();
2326 return rth;
2327}
2328EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
2329
2330static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2331{
2332 return NULL;
2333}
2334
2335static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
2336{
2337 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2338
2339 return mtu ? : dst->dev->mtu;
2340}
2341
2342static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2343 struct sk_buff *skb, u32 mtu)
2344{
2345}
2346
2347static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2348 struct sk_buff *skb)
2349{
2350}
2351
2352static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2353 unsigned long old)
2354{
2355 return NULL;
2356}
2357
2358static struct dst_ops ipv4_dst_blackhole_ops = {
2359 .family = AF_INET,
2360 .check = ipv4_blackhole_dst_check,
2361 .mtu = ipv4_blackhole_mtu,
2362 .default_advmss = ipv4_default_advmss,
2363 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2364 .redirect = ipv4_rt_blackhole_redirect,
2365 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
2366 .neigh_lookup = ipv4_neigh_lookup,
2367};
2368
2369struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2370{
2371 struct rtable *ort = (struct rtable *) dst_orig;
2372 struct rtable *rt;
2373
2374 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
2375 if (rt) {
2376 struct dst_entry *new = &rt->dst;
2377
2378 new->__use = 1;
2379 new->input = dst_discard;
2380 new->output = dst_discard_out;
2381
2382 new->dev = ort->dst.dev;
2383 if (new->dev)
2384 dev_hold(new->dev);
2385
2386 rt->rt_is_input = ort->rt_is_input;
2387 rt->rt_iif = ort->rt_iif;
2388 rt->rt_pmtu = ort->rt_pmtu;
2389
2390 rt->rt_genid = rt_genid_ipv4(net);
2391 rt->rt_flags = ort->rt_flags;
2392 rt->rt_type = ort->rt_type;
2393 rt->rt_gateway = ort->rt_gateway;
2394 rt->rt_uses_gateway = ort->rt_uses_gateway;
2395
2396 INIT_LIST_HEAD(&rt->rt_uncached);
2397 dst_free(new);
2398 }
2399
2400 dst_release(dst_orig);
2401
2402 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
2403}
2404
2405struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2406 const struct sock *sk)
2407{
2408 struct rtable *rt = __ip_route_output_key(net, flp4);
2409
2410 if (IS_ERR(rt))
2411 return rt;
2412
2413 if (flp4->flowi4_proto)
2414 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2415 flowi4_to_flowi(flp4),
2416 sk, 0);
2417
2418 return rt;
2419}
2420EXPORT_SYMBOL_GPL(ip_route_output_flow);
2421
2422static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
2423 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
2424 u32 seq, int event, int nowait, unsigned int flags)
2425{
2426 struct rtable *rt = skb_rtable(skb);
2427 struct rtmsg *r;
2428 struct nlmsghdr *nlh;
2429 unsigned long expires = 0;
2430 u32 error;
2431 u32 metrics[RTAX_MAX];
2432
2433 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
2434 if (!nlh)
2435 return -EMSGSIZE;
2436
2437 r = nlmsg_data(nlh);
2438 r->rtm_family = AF_INET;
2439 r->rtm_dst_len = 32;
2440 r->rtm_src_len = 0;
2441 r->rtm_tos = fl4->flowi4_tos;
2442 r->rtm_table = table_id;
2443 if (nla_put_u32(skb, RTA_TABLE, table_id))
2444 goto nla_put_failure;
2445 r->rtm_type = rt->rt_type;
2446 r->rtm_scope = RT_SCOPE_UNIVERSE;
2447 r->rtm_protocol = RTPROT_UNSPEC;
2448 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2449 if (rt->rt_flags & RTCF_NOTIFY)
2450 r->rtm_flags |= RTM_F_NOTIFY;
2451 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2452 r->rtm_flags |= RTCF_DOREDIRECT;
2453
2454 if (nla_put_in_addr(skb, RTA_DST, dst))
2455 goto nla_put_failure;
2456 if (src) {
2457 r->rtm_src_len = 32;
2458 if (nla_put_in_addr(skb, RTA_SRC, src))
2459 goto nla_put_failure;
2460 }
2461 if (rt->dst.dev &&
2462 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2463 goto nla_put_failure;
2464#ifdef CONFIG_IP_ROUTE_CLASSID
2465 if (rt->dst.tclassid &&
2466 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2467 goto nla_put_failure;
2468#endif
2469 if (!rt_is_input_route(rt) &&
2470 fl4->saddr != src) {
2471 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
2472 goto nla_put_failure;
2473 }
2474 if (rt->rt_uses_gateway &&
2475 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
2476 goto nla_put_failure;
2477
2478 expires = rt->dst.expires;
2479 if (expires) {
2480 unsigned long now = jiffies;
2481
2482 if (time_before(now, expires))
2483 expires -= now;
2484 else
2485 expires = 0;
2486 }
2487
2488 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2489 if (rt->rt_pmtu && expires)
2490 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2491 if (rtnetlink_put_metrics(skb, metrics) < 0)
2492 goto nla_put_failure;
2493
2494 if (fl4->flowi4_mark &&
2495 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
2496 goto nla_put_failure;
2497
2498 error = rt->dst.error;
2499
2500 if (rt_is_input_route(rt)) {
2501#ifdef CONFIG_IP_MROUTE
2502 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2503 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2504 int err = ipmr_get_route(net, skb,
2505 fl4->saddr, fl4->daddr,
2506 r, nowait, portid);
2507
2508 if (err <= 0) {
2509 if (!nowait) {
2510 if (err == 0)
2511 return 0;
2512 goto nla_put_failure;
2513 } else {
2514 if (err == -EMSGSIZE)
2515 goto nla_put_failure;
2516 error = err;
2517 }
2518 }
2519 } else
2520#endif
2521 if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
2522 goto nla_put_failure;
2523 }
2524
2525 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
2526 goto nla_put_failure;
2527
2528 nlmsg_end(skb, nlh);
2529 return 0;
2530
2531nla_put_failure:
2532 nlmsg_cancel(skb, nlh);
2533 return -EMSGSIZE;
2534}
2535
2536static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2537{
2538 struct net *net = sock_net(in_skb->sk);
2539 struct rtmsg *rtm;
2540 struct nlattr *tb[RTA_MAX+1];
2541 struct rtable *rt = NULL;
2542 struct flowi4 fl4;
2543 __be32 dst = 0;
2544 __be32 src = 0;
2545 u32 iif;
2546 int err;
2547 int mark;
2548 struct sk_buff *skb;
2549 u32 table_id = RT_TABLE_MAIN;
2550
2551 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2552 if (err < 0)
2553 goto errout;
2554
2555 rtm = nlmsg_data(nlh);
2556
2557 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2558 if (!skb) {
2559 err = -ENOBUFS;
2560 goto errout;
2561 }
2562
2563
2564
2565
2566 skb_reset_mac_header(skb);
2567 skb_reset_network_header(skb);
2568
2569
2570 ip_hdr(skb)->protocol = IPPROTO_ICMP;
2571 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2572
2573 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2574 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
2575 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2576 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
2577
2578 memset(&fl4, 0, sizeof(fl4));
2579 fl4.daddr = dst;
2580 fl4.saddr = src;
2581 fl4.flowi4_tos = rtm->rtm_tos;
2582 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2583 fl4.flowi4_mark = mark;
2584
2585 if (netif_index_is_l3_master(net, fl4.flowi4_oif))
2586 fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
2587
2588 if (iif) {
2589 struct net_device *dev;
2590
2591 dev = __dev_get_by_index(net, iif);
2592 if (!dev) {
2593 err = -ENODEV;
2594 goto errout_free;
2595 }
2596
2597 skb->protocol = htons(ETH_P_IP);
2598 skb->dev = dev;
2599 skb->mark = mark;
2600 local_bh_disable();
2601 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2602 local_bh_enable();
2603
2604 rt = skb_rtable(skb);
2605 if (err == 0 && rt->dst.error)
2606 err = -rt->dst.error;
2607 } else {
2608 rt = ip_route_output_key(net, &fl4);
2609
2610 err = 0;
2611 if (IS_ERR(rt))
2612 err = PTR_ERR(rt);
2613 }
2614
2615 if (err)
2616 goto errout_free;
2617
2618 skb_dst_set(skb, &rt->dst);
2619 if (rtm->rtm_flags & RTM_F_NOTIFY)
2620 rt->rt_flags |= RTCF_NOTIFY;
2621
2622 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
2623 table_id = rt->rt_table_id;
2624
2625 err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
2626 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
2627 RTM_NEWROUTE, 0, 0);
2628 if (err < 0)
2629 goto errout_free;
2630
2631 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2632errout:
2633 return err;
2634
2635errout_free:
2636 kfree_skb(skb);
2637 goto errout;
2638}
2639
2640void ip_rt_multicast_event(struct in_device *in_dev)
2641{
2642 rt_cache_flush(dev_net(in_dev->dev));
2643}
2644
2645#ifdef CONFIG_SYSCTL
2646static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2647static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2648static int ip_rt_gc_elasticity __read_mostly = 8;
2649
2650static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
2651 void __user *buffer,
2652 size_t *lenp, loff_t *ppos)
2653{
2654 struct net *net = (struct net *)__ctl->extra1;
2655
2656 if (write) {
2657 rt_cache_flush(net);
2658 fnhe_genid_bump(net);
2659 return 0;
2660 }
2661
2662 return -EINVAL;
2663}
2664
2665static struct ctl_table ipv4_route_table[] = {
2666 {
2667 .procname = "gc_thresh",
2668 .data = &ipv4_dst_ops.gc_thresh,
2669 .maxlen = sizeof(int),
2670 .mode = 0644,
2671 .proc_handler = proc_dointvec,
2672 },
2673 {
2674 .procname = "max_size",
2675 .data = &ip_rt_max_size,
2676 .maxlen = sizeof(int),
2677 .mode = 0644,
2678 .proc_handler = proc_dointvec,
2679 },
2680 {
2681
2682
2683 .procname = "gc_min_interval",
2684 .data = &ip_rt_gc_min_interval,
2685 .maxlen = sizeof(int),
2686 .mode = 0644,
2687 .proc_handler = proc_dointvec_jiffies,
2688 },
2689 {
2690 .procname = "gc_min_interval_ms",
2691 .data = &ip_rt_gc_min_interval,
2692 .maxlen = sizeof(int),
2693 .mode = 0644,
2694 .proc_handler = proc_dointvec_ms_jiffies,
2695 },
2696 {
2697 .procname = "gc_timeout",
2698 .data = &ip_rt_gc_timeout,
2699 .maxlen = sizeof(int),
2700 .mode = 0644,
2701 .proc_handler = proc_dointvec_jiffies,
2702 },
2703 {
2704 .procname = "gc_interval",
2705 .data = &ip_rt_gc_interval,
2706 .maxlen = sizeof(int),
2707 .mode = 0644,
2708 .proc_handler = proc_dointvec_jiffies,
2709 },
2710 {
2711 .procname = "redirect_load",
2712 .data = &ip_rt_redirect_load,
2713 .maxlen = sizeof(int),
2714 .mode = 0644,
2715 .proc_handler = proc_dointvec,
2716 },
2717 {
2718 .procname = "redirect_number",
2719 .data = &ip_rt_redirect_number,
2720 .maxlen = sizeof(int),
2721 .mode = 0644,
2722 .proc_handler = proc_dointvec,
2723 },
2724 {
2725 .procname = "redirect_silence",
2726 .data = &ip_rt_redirect_silence,
2727 .maxlen = sizeof(int),
2728 .mode = 0644,
2729 .proc_handler = proc_dointvec,
2730 },
2731 {
2732 .procname = "error_cost",
2733 .data = &ip_rt_error_cost,
2734 .maxlen = sizeof(int),
2735 .mode = 0644,
2736 .proc_handler = proc_dointvec,
2737 },
2738 {
2739 .procname = "error_burst",
2740 .data = &ip_rt_error_burst,
2741 .maxlen = sizeof(int),
2742 .mode = 0644,
2743 .proc_handler = proc_dointvec,
2744 },
2745 {
2746 .procname = "gc_elasticity",
2747 .data = &ip_rt_gc_elasticity,
2748 .maxlen = sizeof(int),
2749 .mode = 0644,
2750 .proc_handler = proc_dointvec,
2751 },
2752 {
2753 .procname = "mtu_expires",
2754 .data = &ip_rt_mtu_expires,
2755 .maxlen = sizeof(int),
2756 .mode = 0644,
2757 .proc_handler = proc_dointvec_jiffies,
2758 },
2759 {
2760 .procname = "min_pmtu",
2761 .data = &ip_rt_min_pmtu,
2762 .maxlen = sizeof(int),
2763 .mode = 0644,
2764 .proc_handler = proc_dointvec,
2765 },
2766 {
2767 .procname = "min_adv_mss",
2768 .data = &ip_rt_min_advmss,
2769 .maxlen = sizeof(int),
2770 .mode = 0644,
2771 .proc_handler = proc_dointvec,
2772 },
2773 { }
2774};
2775
2776static struct ctl_table ipv4_route_flush_table[] = {
2777 {
2778 .procname = "flush",
2779 .maxlen = sizeof(int),
2780 .mode = 0200,
2781 .proc_handler = ipv4_sysctl_rtcache_flush,
2782 },
2783 { },
2784};
2785
2786static __net_init int sysctl_route_net_init(struct net *net)
2787{
2788 struct ctl_table *tbl;
2789
2790 tbl = ipv4_route_flush_table;
2791 if (!net_eq(net, &init_net)) {
2792 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2793 if (!tbl)
2794 goto err_dup;
2795
2796
2797 if (net->user_ns != &init_user_ns)
2798 tbl[0].procname = NULL;
2799 }
2800 tbl[0].extra1 = net;
2801
2802 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
2803 if (!net->ipv4.route_hdr)
2804 goto err_reg;
2805 return 0;
2806
2807err_reg:
2808 if (tbl != ipv4_route_flush_table)
2809 kfree(tbl);
2810err_dup:
2811 return -ENOMEM;
2812}
2813
2814static __net_exit void sysctl_route_net_exit(struct net *net)
2815{
2816 struct ctl_table *tbl;
2817
2818 tbl = net->ipv4.route_hdr->ctl_table_arg;
2819 unregister_net_sysctl_table(net->ipv4.route_hdr);
2820 BUG_ON(tbl == ipv4_route_flush_table);
2821 kfree(tbl);
2822}
2823
2824static __net_initdata struct pernet_operations sysctl_route_ops = {
2825 .init = sysctl_route_net_init,
2826 .exit = sysctl_route_net_exit,
2827};
2828#endif
2829
2830static __net_init int rt_genid_init(struct net *net)
2831{
2832 atomic_set(&net->ipv4.rt_genid, 0);
2833 atomic_set(&net->fnhe_genid, 0);
2834 get_random_bytes(&net->ipv4.dev_addr_genid,
2835 sizeof(net->ipv4.dev_addr_genid));
2836 return 0;
2837}
2838
2839static __net_initdata struct pernet_operations rt_genid_ops = {
2840 .init = rt_genid_init,
2841};
2842
2843static int __net_init ipv4_inetpeer_init(struct net *net)
2844{
2845 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2846
2847 if (!bp)
2848 return -ENOMEM;
2849 inet_peer_base_init(bp);
2850 net->ipv4.peers = bp;
2851 return 0;
2852}
2853
2854static void __net_exit ipv4_inetpeer_exit(struct net *net)
2855{
2856 struct inet_peer_base *bp = net->ipv4.peers;
2857
2858 net->ipv4.peers = NULL;
2859 inetpeer_invalidate_tree(bp);
2860 kfree(bp);
2861}
2862
2863static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2864 .init = ipv4_inetpeer_init,
2865 .exit = ipv4_inetpeer_exit,
2866};
2867
2868#ifdef CONFIG_IP_ROUTE_CLASSID
2869struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
2870#endif
2871
2872int __init ip_rt_init(void)
2873{
2874 int rc = 0;
2875 int cpu;
2876
2877 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
2878 if (!ip_idents)
2879 panic("IP: failed to allocate ip_idents\n");
2880
2881 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
2882
2883 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
2884 if (!ip_tstamps)
2885 panic("IP: failed to allocate ip_tstamps\n");
2886
2887 for_each_possible_cpu(cpu) {
2888 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
2889
2890 INIT_LIST_HEAD(&ul->head);
2891 spin_lock_init(&ul->lock);
2892 }
2893#ifdef CONFIG_IP_ROUTE_CLASSID
2894 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
2895 if (!ip_rt_acct)
2896 panic("IP: failed to allocate ip_rt_acct\n");
2897#endif
2898
2899 ipv4_dst_ops.kmem_cachep =
2900 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
2901 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2902
2903 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2904
2905 if (dst_entries_init(&ipv4_dst_ops) < 0)
2906 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2907
2908 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2909 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2910
2911 ipv4_dst_ops.gc_thresh = ~0;
2912 ip_rt_max_size = INT_MAX;
2913
2914 devinet_init();
2915 ip_fib_init();
2916
2917 if (ip_rt_proc_init())
2918 pr_err("Unable to create route proc files\n");
2919#ifdef CONFIG_XFRM
2920 xfrm_init();
2921 xfrm4_init();
2922#endif
2923 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
2924
2925#ifdef CONFIG_SYSCTL
2926 register_pernet_subsys(&sysctl_route_ops);
2927#endif
2928 register_pernet_subsys(&rt_genid_ops);
2929 register_pernet_subsys(&ipv4_inetpeer_ops);
2930 return rc;
2931}
2932
2933#ifdef CONFIG_SYSCTL
2934
2935
2936
2937
2938void __init ip_static_sysctl_init(void)
2939{
2940 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
2941}
2942#endif
2943