1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109#define pr_fmt(fmt) "IPv4: " fmt
110
111#include <linux/module.h>
112#include <linux/types.h>
113#include <linux/kernel.h>
114#include <linux/string.h>
115#include <linux/errno.h>
116#include <linux/slab.h>
117
118#include <linux/net.h>
119#include <linux/socket.h>
120#include <linux/sockios.h>
121#include <linux/in.h>
122#include <linux/inet.h>
123#include <linux/inetdevice.h>
124#include <linux/netdevice.h>
125#include <linux/etherdevice.h>
126#include <linux/indirect_call_wrapper.h>
127
128#include <net/snmp.h>
129#include <net/ip.h>
130#include <net/protocol.h>
131#include <net/route.h>
132#include <linux/skbuff.h>
133#include <net/sock.h>
134#include <net/arp.h>
135#include <net/icmp.h>
136#include <net/raw.h>
137#include <net/checksum.h>
138#include <net/inet_ecn.h>
139#include <linux/netfilter_ipv4.h>
140#include <net/xfrm.h>
141#include <linux/mroute.h>
142#include <linux/netlink.h>
143#include <net/dst_metadata.h>
144
145
146
147
148bool ip_call_ra_chain(struct sk_buff *skb)
149{
150 struct ip_ra_chain *ra;
151 u8 protocol = ip_hdr(skb)->protocol;
152 struct sock *last = NULL;
153 struct net_device *dev = skb->dev;
154 struct net *net = dev_net(dev);
155
156 for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
157 struct sock *sk = ra->sk;
158
159
160
161
162 if (sk && inet_sk(sk)->inet_num == protocol &&
163 (!sk->sk_bound_dev_if ||
164 sk->sk_bound_dev_if == dev->ifindex)) {
165 if (ip_is_fragment(ip_hdr(skb))) {
166 if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
167 return true;
168 }
169 if (last) {
170 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
171 if (skb2)
172 raw_rcv(last, skb2);
173 }
174 last = sk;
175 }
176 }
177
178 if (last) {
179 raw_rcv(last, skb);
180 return true;
181 }
182 return false;
183}
184
185INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *));
186INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *));
187void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol)
188{
189 const struct net_protocol *ipprot;
190 int raw, ret;
191
192resubmit:
193 raw = raw_local_deliver(skb, protocol);
194
195 ipprot = rcu_dereference(inet_protos[protocol]);
196 if (ipprot) {
197 if (!ipprot->no_policy) {
198 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
199 kfree_skb(skb);
200 return;
201 }
202 nf_reset_ct(skb);
203 }
204 ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv,
205 skb);
206 if (ret < 0) {
207 protocol = -ret;
208 goto resubmit;
209 }
210 __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
211 } else {
212 if (!raw) {
213 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
214 __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
215 icmp_send(skb, ICMP_DEST_UNREACH,
216 ICMP_PROT_UNREACH, 0);
217 }
218 kfree_skb(skb);
219 } else {
220 __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
221 consume_skb(skb);
222 }
223 }
224}
225
226static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
227{
228 __skb_pull(skb, skb_network_header_len(skb));
229
230 rcu_read_lock();
231 ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol);
232 rcu_read_unlock();
233
234 return 0;
235}
236
237
238
239
240int ip_local_deliver(struct sk_buff *skb)
241{
242
243
244
245 struct net *net = dev_net(skb->dev);
246
247 if (ip_is_fragment(ip_hdr(skb))) {
248 if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
249 return 0;
250 }
251
252 return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
253 net, NULL, skb, skb->dev, NULL,
254 ip_local_deliver_finish);
255}
256
257static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
258{
259 struct ip_options *opt;
260 const struct iphdr *iph;
261
262
263
264
265
266
267
268
269 if (skb_cow(skb, skb_headroom(skb))) {
270 __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS);
271 goto drop;
272 }
273
274 iph = ip_hdr(skb);
275 opt = &(IPCB(skb)->opt);
276 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
277
278 if (ip_options_compile(dev_net(dev), opt, skb)) {
279 __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
280 goto drop;
281 }
282
283 if (unlikely(opt->srr)) {
284 struct in_device *in_dev = __in_dev_get_rcu(dev);
285
286 if (in_dev) {
287 if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
288 if (IN_DEV_LOG_MARTIANS(in_dev))
289 net_info_ratelimited("source route option %pI4 -> %pI4\n",
290 &iph->saddr,
291 &iph->daddr);
292 goto drop;
293 }
294 }
295
296 if (ip_options_rcv_srr(skb, dev))
297 goto drop;
298 }
299
300 return false;
301drop:
302 return true;
303}
304
305INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
306INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
307static int ip_rcv_finish_core(struct net *net, struct sock *sk,
308 struct sk_buff *skb, struct net_device *dev)
309{
310 const struct iphdr *iph = ip_hdr(skb);
311 int (*edemux)(struct sk_buff *skb);
312 struct rtable *rt;
313 int err;
314
315 if (net->ipv4.sysctl_ip_early_demux &&
316 !skb_dst(skb) &&
317 !skb->sk &&
318 !ip_is_fragment(iph)) {
319 const struct net_protocol *ipprot;
320 int protocol = iph->protocol;
321
322 ipprot = rcu_dereference(inet_protos[protocol]);
323 if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
324 err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
325 udp_v4_early_demux, skb);
326 if (unlikely(err))
327 goto drop_error;
328
329 iph = ip_hdr(skb);
330 }
331 }
332
333
334
335
336
337 if (!skb_valid_dst(skb)) {
338 err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
339 iph->tos, dev);
340 if (unlikely(err))
341 goto drop_error;
342 }
343
344#ifdef CONFIG_IP_ROUTE_CLASSID
345 if (unlikely(skb_dst(skb)->tclassid)) {
346 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
347 u32 idx = skb_dst(skb)->tclassid;
348 st[idx&0xFF].o_packets++;
349 st[idx&0xFF].o_bytes += skb->len;
350 st[(idx>>16)&0xFF].i_packets++;
351 st[(idx>>16)&0xFF].i_bytes += skb->len;
352 }
353#endif
354
355 if (iph->ihl > 5 && ip_rcv_options(skb, dev))
356 goto drop;
357
358 rt = skb_rtable(skb);
359 if (rt->rt_type == RTN_MULTICAST) {
360 __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
361 } else if (rt->rt_type == RTN_BROADCAST) {
362 __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
363 } else if (skb->pkt_type == PACKET_BROADCAST ||
364 skb->pkt_type == PACKET_MULTICAST) {
365 struct in_device *in_dev = __in_dev_get_rcu(dev);
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382 if (in_dev &&
383 IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
384 goto drop;
385 }
386
387 return NET_RX_SUCCESS;
388
389drop:
390 kfree_skb(skb);
391 return NET_RX_DROP;
392
393drop_error:
394 if (err == -EXDEV)
395 __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
396 goto drop;
397}
398
399static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
400{
401 struct net_device *dev = skb->dev;
402 int ret;
403
404
405
406
407 skb = l3mdev_ip_rcv(skb);
408 if (!skb)
409 return NET_RX_SUCCESS;
410
411 ret = ip_rcv_finish_core(net, sk, skb, dev);
412 if (ret != NET_RX_DROP)
413 ret = dst_input(skb);
414 return ret;
415}
416
417
418
419
420static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
421{
422 const struct iphdr *iph;
423 u32 len;
424
425
426
427
428 if (skb->pkt_type == PACKET_OTHERHOST)
429 goto drop;
430
431 __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
432
433 skb = skb_share_check(skb, GFP_ATOMIC);
434 if (!skb) {
435 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
436 goto out;
437 }
438
439 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
440 goto inhdr_error;
441
442 iph = ip_hdr(skb);
443
444
445
446
447
448
449
450
451
452
453
454
455 if (iph->ihl < 5 || iph->version != 4)
456 goto inhdr_error;
457
458 BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
459 BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
460 BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
461 __IP_ADD_STATS(net,
462 IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
463 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
464
465 if (!pskb_may_pull(skb, iph->ihl*4))
466 goto inhdr_error;
467
468 iph = ip_hdr(skb);
469
470 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
471 goto csum_error;
472
473 len = ntohs(iph->tot_len);
474 if (skb->len < len) {
475 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
476 goto drop;
477 } else if (len < (iph->ihl*4))
478 goto inhdr_error;
479
480
481
482
483
484 if (pskb_trim_rcsum(skb, len)) {
485 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
486 goto drop;
487 }
488
489 iph = ip_hdr(skb);
490 skb->transport_header = skb->network_header + iph->ihl*4;
491
492
493 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
494 IPCB(skb)->iif = skb->skb_iif;
495
496
497 skb_orphan(skb);
498
499 return skb;
500
501csum_error:
502 __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
503inhdr_error:
504 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
505drop:
506 kfree_skb(skb);
507out:
508 return NULL;
509}
510
511
512
513
514int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
515 struct net_device *orig_dev)
516{
517 struct net *net = dev_net(dev);
518
519 skb = ip_rcv_core(skb, net);
520 if (skb == NULL)
521 return NET_RX_DROP;
522
523 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
524 net, NULL, skb, dev, NULL,
525 ip_rcv_finish);
526}
527
528static void ip_sublist_rcv_finish(struct list_head *head)
529{
530 struct sk_buff *skb, *next;
531
532 list_for_each_entry_safe(skb, next, head, list) {
533 skb_list_del_init(skb);
534 dst_input(skb);
535 }
536}
537
538static void ip_list_rcv_finish(struct net *net, struct sock *sk,
539 struct list_head *head)
540{
541 struct dst_entry *curr_dst = NULL;
542 struct sk_buff *skb, *next;
543 struct list_head sublist;
544
545 INIT_LIST_HEAD(&sublist);
546 list_for_each_entry_safe(skb, next, head, list) {
547 struct net_device *dev = skb->dev;
548 struct dst_entry *dst;
549
550 skb_list_del_init(skb);
551
552
553
554 skb = l3mdev_ip_rcv(skb);
555 if (!skb)
556 continue;
557 if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
558 continue;
559
560 dst = skb_dst(skb);
561 if (curr_dst != dst) {
562
563 if (!list_empty(&sublist))
564 ip_sublist_rcv_finish(&sublist);
565
566 INIT_LIST_HEAD(&sublist);
567 curr_dst = dst;
568 }
569 list_add_tail(&skb->list, &sublist);
570 }
571
572 ip_sublist_rcv_finish(&sublist);
573}
574
575static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
576 struct net *net)
577{
578 NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
579 head, dev, NULL, ip_rcv_finish);
580 ip_list_rcv_finish(net, NULL, head);
581}
582
583
584void ip_list_rcv(struct list_head *head, struct packet_type *pt,
585 struct net_device *orig_dev)
586{
587 struct net_device *curr_dev = NULL;
588 struct net *curr_net = NULL;
589 struct sk_buff *skb, *next;
590 struct list_head sublist;
591
592 INIT_LIST_HEAD(&sublist);
593 list_for_each_entry_safe(skb, next, head, list) {
594 struct net_device *dev = skb->dev;
595 struct net *net = dev_net(dev);
596
597 skb_list_del_init(skb);
598 skb = ip_rcv_core(skb, net);
599 if (skb == NULL)
600 continue;
601
602 if (curr_dev != dev || curr_net != net) {
603
604 if (!list_empty(&sublist))
605 ip_sublist_rcv(&sublist, curr_dev, curr_net);
606
607 INIT_LIST_HEAD(&sublist);
608 curr_dev = dev;
609 curr_net = net;
610 }
611 list_add_tail(&skb->list, &sublist);
612 }
613
614 ip_sublist_rcv(&sublist, curr_dev, curr_net);
615}
616