1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116#define pr_fmt(fmt) "IPv4: " fmt
117
118#include <linux/module.h>
119#include <linux/types.h>
120#include <linux/kernel.h>
121#include <linux/string.h>
122#include <linux/errno.h>
123#include <linux/slab.h>
124
125#include <linux/net.h>
126#include <linux/socket.h>
127#include <linux/sockios.h>
128#include <linux/in.h>
129#include <linux/inet.h>
130#include <linux/inetdevice.h>
131#include <linux/netdevice.h>
132#include <linux/etherdevice.h>
133
134#include <net/snmp.h>
135#include <net/ip.h>
136#include <net/protocol.h>
137#include <net/route.h>
138#include <linux/skbuff.h>
139#include <net/sock.h>
140#include <net/arp.h>
141#include <net/icmp.h>
142#include <net/raw.h>
143#include <net/checksum.h>
144#include <net/inet_ecn.h>
145#include <linux/netfilter_ipv4.h>
146#include <net/xfrm.h>
147#include <linux/mroute.h>
148#include <linux/netlink.h>
149#include <net/dst_metadata.h>
150
151
152
153
154bool ip_call_ra_chain(struct sk_buff *skb)
155{
156 struct ip_ra_chain *ra;
157 u8 protocol = ip_hdr(skb)->protocol;
158 struct sock *last = NULL;
159 struct net_device *dev = skb->dev;
160 struct net *net = dev_net(dev);
161
162 for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
163 struct sock *sk = ra->sk;
164
165
166
167
168 if (sk && inet_sk(sk)->inet_num == protocol &&
169 (!sk->sk_bound_dev_if ||
170 sk->sk_bound_dev_if == dev->ifindex)) {
171 if (ip_is_fragment(ip_hdr(skb))) {
172 if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
173 return true;
174 }
175 if (last) {
176 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
177 if (skb2)
178 raw_rcv(last, skb2);
179 }
180 last = sk;
181 }
182 }
183
184 if (last) {
185 raw_rcv(last, skb);
186 return true;
187 }
188 return false;
189}
190
191void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol)
192{
193 const struct net_protocol *ipprot;
194 int raw, ret;
195
196resubmit:
197 raw = raw_local_deliver(skb, protocol);
198
199 ipprot = rcu_dereference(inet_protos[protocol]);
200 if (ipprot) {
201 if (!ipprot->no_policy) {
202 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
203 kfree_skb(skb);
204 return;
205 }
206 nf_reset(skb);
207 }
208 ret = ipprot->handler(skb);
209 if (ret < 0) {
210 protocol = -ret;
211 goto resubmit;
212 }
213 __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
214 } else {
215 if (!raw) {
216 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
217 __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
218 icmp_send(skb, ICMP_DEST_UNREACH,
219 ICMP_PROT_UNREACH, 0);
220 }
221 kfree_skb(skb);
222 } else {
223 __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
224 consume_skb(skb);
225 }
226 }
227}
228
229static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
230{
231 __skb_pull(skb, skb_network_header_len(skb));
232
233 rcu_read_lock();
234 ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol);
235 rcu_read_unlock();
236
237 return 0;
238}
239
240
241
242
243int ip_local_deliver(struct sk_buff *skb)
244{
245
246
247
248 struct net *net = dev_net(skb->dev);
249
250 if (ip_is_fragment(ip_hdr(skb))) {
251 if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
252 return 0;
253 }
254
255 return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
256 net, NULL, skb, skb->dev, NULL,
257 ip_local_deliver_finish);
258}
259
260static inline bool ip_rcv_options(struct sk_buff *skb)
261{
262 struct ip_options *opt;
263 const struct iphdr *iph;
264 struct net_device *dev = skb->dev;
265
266
267
268
269
270
271
272
273 if (skb_cow(skb, skb_headroom(skb))) {
274 __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS);
275 goto drop;
276 }
277
278 iph = ip_hdr(skb);
279 opt = &(IPCB(skb)->opt);
280 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
281
282 if (ip_options_compile(dev_net(dev), opt, skb)) {
283 __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
284 goto drop;
285 }
286
287 if (unlikely(opt->srr)) {
288 struct in_device *in_dev = __in_dev_get_rcu(dev);
289
290 if (in_dev) {
291 if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
292 if (IN_DEV_LOG_MARTIANS(in_dev))
293 net_info_ratelimited("source route option %pI4 -> %pI4\n",
294 &iph->saddr,
295 &iph->daddr);
296 goto drop;
297 }
298 }
299
300 if (ip_options_rcv_srr(skb))
301 goto drop;
302 }
303
304 return false;
305drop:
306 return true;
307}
308
309static int ip_rcv_finish_core(struct net *net, struct sock *sk,
310 struct sk_buff *skb, struct net_device *dev)
311{
312 const struct iphdr *iph = ip_hdr(skb);
313 int (*edemux)(struct sk_buff *skb);
314 struct rtable *rt;
315 int err;
316
317 if (net->ipv4.sysctl_ip_early_demux &&
318 !skb_dst(skb) &&
319 !skb->sk &&
320 !ip_is_fragment(iph)) {
321 const struct net_protocol *ipprot;
322 int protocol = iph->protocol;
323
324 ipprot = rcu_dereference(inet_protos[protocol]);
325 if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
326 err = edemux(skb);
327 if (unlikely(err))
328 goto drop_error;
329
330 iph = ip_hdr(skb);
331 }
332 }
333
334
335
336
337
338 if (!skb_valid_dst(skb)) {
339 err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
340 iph->tos, dev);
341 if (unlikely(err))
342 goto drop_error;
343 }
344
345#ifdef CONFIG_IP_ROUTE_CLASSID
346 if (unlikely(skb_dst(skb)->tclassid)) {
347 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
348 u32 idx = skb_dst(skb)->tclassid;
349 st[idx&0xFF].o_packets++;
350 st[idx&0xFF].o_bytes += skb->len;
351 st[(idx>>16)&0xFF].i_packets++;
352 st[(idx>>16)&0xFF].i_bytes += skb->len;
353 }
354#endif
355
356 if (iph->ihl > 5 && ip_rcv_options(skb))
357 goto drop;
358
359 rt = skb_rtable(skb);
360 if (rt->rt_type == RTN_MULTICAST) {
361 __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
362 } else if (rt->rt_type == RTN_BROADCAST) {
363 __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
364 } else if (skb->pkt_type == PACKET_BROADCAST ||
365 skb->pkt_type == PACKET_MULTICAST) {
366 struct in_device *in_dev = __in_dev_get_rcu(dev);
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383 if (in_dev &&
384 IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
385 goto drop;
386 }
387
388 return NET_RX_SUCCESS;
389
390drop:
391 kfree_skb(skb);
392 return NET_RX_DROP;
393
394drop_error:
395 if (err == -EXDEV)
396 __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
397 goto drop;
398}
399
400static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
401{
402 struct net_device *dev = skb->dev;
403 int ret;
404
405
406
407
408 skb = l3mdev_ip_rcv(skb);
409 if (!skb)
410 return NET_RX_SUCCESS;
411
412 ret = ip_rcv_finish_core(net, sk, skb, dev);
413 if (ret != NET_RX_DROP)
414 ret = dst_input(skb);
415 return ret;
416}
417
418
419
420
421static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
422{
423 const struct iphdr *iph;
424 u32 len;
425
426
427
428
429 if (skb->pkt_type == PACKET_OTHERHOST)
430 goto drop;
431
432
433 __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
434
435 skb = skb_share_check(skb, GFP_ATOMIC);
436 if (!skb) {
437 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
438 goto out;
439 }
440
441 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
442 goto inhdr_error;
443
444 iph = ip_hdr(skb);
445
446
447
448
449
450
451
452
453
454
455
456
457 if (iph->ihl < 5 || iph->version != 4)
458 goto inhdr_error;
459
460 BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
461 BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
462 BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
463 __IP_ADD_STATS(net,
464 IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
465 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
466
467 if (!pskb_may_pull(skb, iph->ihl*4))
468 goto inhdr_error;
469
470 iph = ip_hdr(skb);
471
472 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
473 goto csum_error;
474
475 len = ntohs(iph->tot_len);
476 if (skb->len < len) {
477 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
478 goto drop;
479 } else if (len < (iph->ihl*4))
480 goto inhdr_error;
481
482
483
484
485
486 if (pskb_trim_rcsum(skb, len)) {
487 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
488 goto drop;
489 }
490
491 iph = ip_hdr(skb);
492 skb->transport_header = skb->network_header + iph->ihl*4;
493
494
495 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
496 IPCB(skb)->iif = skb->skb_iif;
497
498
499 skb_orphan(skb);
500
501 return skb;
502
503csum_error:
504 __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
505inhdr_error:
506 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
507drop:
508 kfree_skb(skb);
509out:
510 return NULL;
511}
512
513
514
515
516int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
517 struct net_device *orig_dev)
518{
519 struct net *net = dev_net(dev);
520
521 skb = ip_rcv_core(skb, net);
522 if (skb == NULL)
523 return NET_RX_DROP;
524 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
525 net, NULL, skb, dev, NULL,
526 ip_rcv_finish);
527}
528
529static void ip_sublist_rcv_finish(struct list_head *head)
530{
531 struct sk_buff *skb, *next;
532
533 list_for_each_entry_safe(skb, next, head, list) {
534 skb_list_del_init(skb);
535 dst_input(skb);
536 }
537}
538
539static void ip_list_rcv_finish(struct net *net, struct sock *sk,
540 struct list_head *head)
541{
542 struct dst_entry *curr_dst = NULL;
543 struct sk_buff *skb, *next;
544 struct list_head sublist;
545
546 INIT_LIST_HEAD(&sublist);
547 list_for_each_entry_safe(skb, next, head, list) {
548 struct net_device *dev = skb->dev;
549 struct dst_entry *dst;
550
551 skb_list_del_init(skb);
552
553
554
555 skb = l3mdev_ip_rcv(skb);
556 if (!skb)
557 continue;
558 if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
559 continue;
560
561 dst = skb_dst(skb);
562 if (curr_dst != dst) {
563
564 if (!list_empty(&sublist))
565 ip_sublist_rcv_finish(&sublist);
566
567 INIT_LIST_HEAD(&sublist);
568 curr_dst = dst;
569 }
570 list_add_tail(&skb->list, &sublist);
571 }
572
573 ip_sublist_rcv_finish(&sublist);
574}
575
576static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
577 struct net *net)
578{
579 NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
580 head, dev, NULL, ip_rcv_finish);
581 ip_list_rcv_finish(net, NULL, head);
582}
583
584
585void ip_list_rcv(struct list_head *head, struct packet_type *pt,
586 struct net_device *orig_dev)
587{
588 struct net_device *curr_dev = NULL;
589 struct net *curr_net = NULL;
590 struct sk_buff *skb, *next;
591 struct list_head sublist;
592
593 INIT_LIST_HEAD(&sublist);
594 list_for_each_entry_safe(skb, next, head, list) {
595 struct net_device *dev = skb->dev;
596 struct net *net = dev_net(dev);
597
598 skb_list_del_init(skb);
599 skb = ip_rcv_core(skb, net);
600 if (skb == NULL)
601 continue;
602
603 if (curr_dev != dev || curr_net != net) {
604
605 if (!list_empty(&sublist))
606 ip_sublist_rcv(&sublist, curr_dev, curr_net);
607
608 INIT_LIST_HEAD(&sublist);
609 curr_dev = dev;
610 curr_net = net;
611 }
612 list_add_tail(&skb->list, &sublist);
613 }
614
615 ip_sublist_rcv(&sublist, curr_dev, curr_net);
616}
617