1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#define KMSG_COMPONENT "IPVS"
27#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
28
29#include <linux/kernel.h>
30#include <linux/slab.h>
31#include <linux/tcp.h>
32#include <net/ip.h>
33#include <net/tcp.h>
34#include <net/udp.h>
35#include <net/icmp.h>
36#include <net/route.h>
37#include <net/ipv6.h>
38#include <net/ip6_route.h>
39#include <net/addrconf.h>
40#include <linux/icmpv6.h>
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv4.h>
43
44#include <net/ip_vs.h>
45
46enum {
47 IP_VS_RT_MODE_LOCAL = 1,
48 IP_VS_RT_MODE_NON_LOCAL = 2,
49 IP_VS_RT_MODE_RDR = 4,
50
51
52 IP_VS_RT_MODE_CONNECT = 8,
53 IP_VS_RT_MODE_KNOWN_NH = 16,
54};
55
56
57
58
59static inline void
60__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
61 u32 dst_cookie)
62{
63 struct dst_entry *old_dst;
64
65 old_dst = dest->dst_cache;
66 dest->dst_cache = dst;
67 dest->dst_rtos = rtos;
68 dest->dst_cookie = dst_cookie;
69 dst_release(old_dst);
70}
71
72static inline struct dst_entry *
73__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
74{
75 struct dst_entry *dst = dest->dst_cache;
76
77 if (!dst)
78 return NULL;
79 if ((dst->obsolete || rtos != dest->dst_rtos) &&
80 dst->ops->check(dst, dest->dst_cookie) == NULL) {
81 dest->dst_cache = NULL;
82 dst_release(dst);
83 return NULL;
84 }
85 dst_hold(dst);
86 return dst;
87}
88
89static inline bool
90__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
91{
92 if (IP6CB(skb)->frag_max_size) {
93
94
95
96 if (IP6CB(skb)->frag_max_size > mtu)
97 return true;
98 }
99 else if (skb->len > mtu && !skb_is_gso(skb)) {
100 return true;
101 }
102 return false;
103}
104
105
106static struct rtable *do_output_route4(struct net *net, __be32 daddr,
107 u32 rtos, int rt_mode, __be32 *saddr)
108{
109 struct flowi4 fl4;
110 struct rtable *rt;
111 int loop = 0;
112
113 memset(&fl4, 0, sizeof(fl4));
114 fl4.daddr = daddr;
115 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
116 fl4.flowi4_tos = rtos;
117 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
118 FLOWI_FLAG_KNOWN_NH : 0;
119
120retry:
121 rt = ip_route_output_key(net, &fl4);
122 if (IS_ERR(rt)) {
123
124 if (PTR_ERR(rt) == -EINVAL && *saddr &&
125 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
126 *saddr = 0;
127 flowi4_update_output(&fl4, 0, rtos, daddr, 0);
128 goto retry;
129 }
130 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
131 return NULL;
132 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
133 ip_rt_put(rt);
134 *saddr = fl4.saddr;
135 flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
136 loop++;
137 goto retry;
138 }
139 *saddr = fl4.saddr;
140 return rt;
141}
142
143
144static struct rtable *
145__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
146 __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
147{
148 struct net *net = dev_net(skb_dst(skb)->dev);
149 struct rtable *rt;
150 struct rtable *ort;
151 int local;
152
153 if (dest) {
154 spin_lock(&dest->dst_lock);
155 if (!(rt = (struct rtable *)
156 __ip_vs_dst_check(dest, rtos))) {
157 rt = do_output_route4(net, dest->addr.ip, rtos,
158 rt_mode, &dest->dst_saddr.ip);
159 if (!rt) {
160 spin_unlock(&dest->dst_lock);
161 return NULL;
162 }
163 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
164 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
165 "rtos=%X\n",
166 &dest->addr.ip, &dest->dst_saddr.ip,
167 atomic_read(&rt->dst.__refcnt), rtos);
168 }
169 daddr = dest->addr.ip;
170 if (ret_saddr)
171 *ret_saddr = dest->dst_saddr.ip;
172 spin_unlock(&dest->dst_lock);
173 } else {
174 __be32 saddr = htonl(INADDR_ANY);
175
176
177
178
179 rt_mode &= ~IP_VS_RT_MODE_CONNECT;
180 rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
181 if (!rt)
182 return NULL;
183 if (ret_saddr)
184 *ret_saddr = saddr;
185 }
186
187 local = rt->rt_flags & RTCF_LOCAL;
188 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
189 rt_mode)) {
190 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
191 (rt->rt_flags & RTCF_LOCAL) ?
192 "local":"non-local", &daddr);
193 ip_rt_put(rt);
194 return NULL;
195 }
196 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
197 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
198 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
199 "requires NAT method, dest: %pI4\n",
200 &ip_hdr(skb)->daddr, &daddr);
201 ip_rt_put(rt);
202 return NULL;
203 }
204 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
205 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
206 "to non-local address, dest: %pI4\n",
207 &ip_hdr(skb)->saddr, &daddr);
208 ip_rt_put(rt);
209 return NULL;
210 }
211
212 return rt;
213}
214
215
216static int
217__ip_vs_reroute_locally(struct sk_buff *skb)
218{
219 struct rtable *rt = skb_rtable(skb);
220 struct net_device *dev = rt->dst.dev;
221 struct net *net = dev_net(dev);
222 struct iphdr *iph = ip_hdr(skb);
223
224 if (rt_is_input_route(rt)) {
225 unsigned long orefdst = skb->_skb_refdst;
226
227 if (ip_route_input(skb, iph->daddr, iph->saddr,
228 iph->tos, skb->dev))
229 return 0;
230 refdst_drop(orefdst);
231 } else {
232 struct flowi4 fl4 = {
233 .daddr = iph->daddr,
234 .saddr = iph->saddr,
235 .flowi4_tos = RT_TOS(iph->tos),
236 .flowi4_mark = skb->mark,
237 };
238
239 rt = ip_route_output_key(net, &fl4);
240 if (IS_ERR(rt))
241 return 0;
242 if (!(rt->rt_flags & RTCF_LOCAL)) {
243 ip_rt_put(rt);
244 return 0;
245 }
246
247 skb_dst_drop(skb);
248 skb_dst_set(skb, &rt->dst);
249 }
250 return 1;
251}
252
253#ifdef CONFIG_IP_VS_IPV6
254
255static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
256{
257 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
258}
259
260static struct dst_entry *
261__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
262 struct in6_addr *ret_saddr, int do_xfrm)
263{
264 struct dst_entry *dst;
265 struct flowi6 fl6 = {
266 .daddr = *daddr,
267 };
268
269 dst = ip6_route_output(net, NULL, &fl6);
270 if (dst->error)
271 goto out_err;
272 if (!ret_saddr)
273 return dst;
274 if (ipv6_addr_any(&fl6.saddr) &&
275 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
276 &fl6.daddr, 0, &fl6.saddr) < 0)
277 goto out_err;
278 if (do_xfrm) {
279 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
280 if (IS_ERR(dst)) {
281 dst = NULL;
282 goto out_err;
283 }
284 }
285 *ret_saddr = fl6.saddr;
286 return dst;
287
288out_err:
289 dst_release(dst);
290 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
291 return NULL;
292}
293
294
295
296
297static struct rt6_info *
298__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
299 struct in6_addr *daddr, struct in6_addr *ret_saddr,
300 int do_xfrm, int rt_mode)
301{
302 struct net *net = dev_net(skb_dst(skb)->dev);
303 struct rt6_info *rt;
304 struct rt6_info *ort;
305 struct dst_entry *dst;
306 int local;
307
308 if (dest) {
309 spin_lock(&dest->dst_lock);
310 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
311 if (!rt) {
312 u32 cookie;
313
314 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
315 &dest->dst_saddr.in6,
316 do_xfrm);
317 if (!dst) {
318 spin_unlock(&dest->dst_lock);
319 return NULL;
320 }
321 rt = (struct rt6_info *) dst;
322 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
323 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
324 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
325 &dest->addr.in6, &dest->dst_saddr.in6,
326 atomic_read(&rt->dst.__refcnt));
327 }
328 if (ret_saddr)
329 *ret_saddr = dest->dst_saddr.in6;
330 spin_unlock(&dest->dst_lock);
331 } else {
332 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
333 if (!dst)
334 return NULL;
335 rt = (struct rt6_info *) dst;
336 }
337
338 local = __ip_vs_is_local_route6(rt);
339 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
340 rt_mode)) {
341 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
342 local ? "local":"non-local", daddr);
343 dst_release(&rt->dst);
344 return NULL;
345 }
346 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
347 !((ort = (struct rt6_info *) skb_dst(skb)) &&
348 __ip_vs_is_local_route6(ort))) {
349 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
350 "requires NAT method, dest: %pI6\n",
351 &ipv6_hdr(skb)->daddr, daddr);
352 dst_release(&rt->dst);
353 return NULL;
354 }
355 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
356 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
357 IPV6_ADDR_LOOPBACK)) {
358 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
359 "to non-local address, dest: %pI6\n",
360 &ipv6_hdr(skb)->saddr, daddr);
361 dst_release(&rt->dst);
362 return NULL;
363 }
364
365 return rt;
366}
367#endif
368
369
370
371
372
373void
374ip_vs_dst_reset(struct ip_vs_dest *dest)
375{
376 struct dst_entry *old_dst;
377
378 old_dst = dest->dst_cache;
379 dest->dst_cache = NULL;
380 dst_release(old_dst);
381 dest->dst_saddr.ip = 0;
382}
383
384#define IP_VS_XMIT_TUNNEL(skb, cp) \
385({ \
386 int __ret = NF_ACCEPT; \
387 \
388 (skb)->ipvs_property = 1; \
389 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
390 __ret = ip_vs_confirm_conntrack(skb); \
391 if (__ret == NF_ACCEPT) { \
392 nf_reset(skb); \
393 skb_forward_csum(skb); \
394 } \
395 __ret; \
396})
397
398#define IP_VS_XMIT_NAT(pf, skb, cp, local) \
399do { \
400 (skb)->ipvs_property = 1; \
401 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
402 ip_vs_notrack(skb); \
403 else \
404 ip_vs_update_conntrack(skb, cp, 1); \
405 if (local) \
406 return NF_ACCEPT; \
407 skb_forward_csum(skb); \
408 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
409 skb_dst(skb)->dev, dst_output); \
410} while (0)
411
412#define IP_VS_XMIT(pf, skb, cp, local) \
413do { \
414 (skb)->ipvs_property = 1; \
415 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
416 ip_vs_notrack(skb); \
417 if (local) \
418 return NF_ACCEPT; \
419 skb_forward_csum(skb); \
420 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
421 skb_dst(skb)->dev, dst_output); \
422} while (0)
423
424
425
426
427
428int
429ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
430 struct ip_vs_protocol *pp)
431{
432
433 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
434}
435
436
437
438
439
440
441
442int
443ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
444 struct ip_vs_protocol *pp)
445{
446 struct rtable *rt;
447 struct iphdr *iph = ip_hdr(skb);
448 int mtu;
449
450 EnterFunction(10);
451
452 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
453 IP_VS_RT_MODE_NON_LOCAL, NULL)))
454 goto tx_error_icmp;
455
456
457 mtu = dst_mtu(&rt->dst);
458 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
459 !skb_is_gso(skb)) {
460 ip_rt_put(rt);
461 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
462 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
463 goto tx_error;
464 }
465
466
467
468
469
470 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
471 ip_rt_put(rt);
472 return NF_STOLEN;
473 }
474 ip_send_check(ip_hdr(skb));
475
476
477 skb_dst_drop(skb);
478 skb_dst_set(skb, &rt->dst);
479
480
481 skb->local_df = 1;
482
483 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
484
485 LeaveFunction(10);
486 return NF_STOLEN;
487
488 tx_error_icmp:
489 dst_link_failure(skb);
490 tx_error:
491 kfree_skb(skb);
492 LeaveFunction(10);
493 return NF_STOLEN;
494}
495
496#ifdef CONFIG_IP_VS_IPV6
497int
498ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
499 struct ip_vs_protocol *pp)
500{
501 struct rt6_info *rt;
502 struct ipv6hdr *iph = ipv6_hdr(skb);
503 int mtu;
504
505 EnterFunction(10);
506
507 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0,
508 IP_VS_RT_MODE_NON_LOCAL)))
509 goto tx_error_icmp;
510
511
512 mtu = dst_mtu(&rt->dst);
513 if (__mtu_check_toobig_v6(skb, mtu)) {
514 if (!skb->dev) {
515 struct net *net = dev_net(skb_dst(skb)->dev);
516
517 skb->dev = net->loopback_dev;
518 }
519 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
520 dst_release(&rt->dst);
521 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
522 goto tx_error;
523 }
524
525
526
527
528
529 skb = skb_share_check(skb, GFP_ATOMIC);
530 if (unlikely(skb == NULL)) {
531 dst_release(&rt->dst);
532 return NF_STOLEN;
533 }
534
535
536 skb_dst_drop(skb);
537 skb_dst_set(skb, &rt->dst);
538
539
540 skb->local_df = 1;
541
542 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
543
544 LeaveFunction(10);
545 return NF_STOLEN;
546
547 tx_error_icmp:
548 dst_link_failure(skb);
549 tx_error:
550 kfree_skb(skb);
551 LeaveFunction(10);
552 return NF_STOLEN;
553}
554#endif
555
556
557
558
559
560int
561ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
562 struct ip_vs_protocol *pp)
563{
564 struct rtable *rt;
565 int mtu;
566 struct iphdr *iph = ip_hdr(skb);
567 int local;
568
569 EnterFunction(10);
570
571
572 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
573 __be16 _pt, *p;
574 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
575 if (p == NULL)
576 goto tx_error;
577 ip_vs_conn_fill_cport(cp, *p);
578 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
579 }
580
581 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
582 RT_TOS(iph->tos),
583 IP_VS_RT_MODE_LOCAL |
584 IP_VS_RT_MODE_NON_LOCAL |
585 IP_VS_RT_MODE_RDR, NULL)))
586 goto tx_error_icmp;
587 local = rt->rt_flags & RTCF_LOCAL;
588
589
590
591
592#if IS_ENABLED(CONFIG_NF_CONNTRACK)
593 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
594 enum ip_conntrack_info ctinfo;
595 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
596
597 if (ct && !nf_ct_is_untracked(ct)) {
598 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
599 "ip_vs_nat_xmit(): "
600 "stopping DNAT to local address");
601 goto tx_error_put;
602 }
603 }
604#endif
605
606
607 if (local && ipv4_is_loopback(cp->daddr.ip) &&
608 rt_is_input_route(skb_rtable(skb))) {
609 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
610 "stopping DNAT to loopback address");
611 goto tx_error_put;
612 }
613
614
615 mtu = dst_mtu(&rt->dst);
616 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
617 !skb_is_gso(skb)) {
618 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
619 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
620 "ip_vs_nat_xmit(): frag needed for");
621 goto tx_error_put;
622 }
623
624
625 if (!skb_make_writable(skb, sizeof(struct iphdr)))
626 goto tx_error_put;
627
628 if (skb_cow(skb, rt->dst.dev->hard_header_len))
629 goto tx_error_put;
630
631
632 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
633 goto tx_error_put;
634 ip_hdr(skb)->daddr = cp->daddr.ip;
635 ip_send_check(ip_hdr(skb));
636
637 if (!local) {
638
639 skb_dst_drop(skb);
640 skb_dst_set(skb, &rt->dst);
641 } else {
642 ip_rt_put(rt);
643
644
645
646
647
648 if (!__ip_vs_reroute_locally(skb))
649 goto tx_error;
650 }
651
652 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
653
654
655
656
657
658
659 skb->local_df = 1;
660
661 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
662
663 LeaveFunction(10);
664 return NF_STOLEN;
665
666 tx_error_icmp:
667 dst_link_failure(skb);
668 tx_error:
669 kfree_skb(skb);
670 LeaveFunction(10);
671 return NF_STOLEN;
672 tx_error_put:
673 ip_rt_put(rt);
674 goto tx_error;
675}
676
677#ifdef CONFIG_IP_VS_IPV6
678int
679ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
680 struct ip_vs_protocol *pp)
681{
682 struct rt6_info *rt;
683 int mtu;
684 int local;
685
686 EnterFunction(10);
687
688
689 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
690 __be16 _pt, *p;
691 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
692 sizeof(_pt), &_pt);
693 if (p == NULL)
694 goto tx_error;
695 ip_vs_conn_fill_cport(cp, *p);
696 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
697 }
698
699 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
700 0, (IP_VS_RT_MODE_LOCAL |
701 IP_VS_RT_MODE_NON_LOCAL |
702 IP_VS_RT_MODE_RDR))))
703 goto tx_error_icmp;
704 local = __ip_vs_is_local_route6(rt);
705
706
707
708
709#if IS_ENABLED(CONFIG_NF_CONNTRACK)
710 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
711 enum ip_conntrack_info ctinfo;
712 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
713
714 if (ct && !nf_ct_is_untracked(ct)) {
715 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
716 "ip_vs_nat_xmit_v6(): "
717 "stopping DNAT to local address");
718 goto tx_error_put;
719 }
720 }
721#endif
722
723
724 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
725 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
726 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
727 "ip_vs_nat_xmit_v6(): "
728 "stopping DNAT to loopback address");
729 goto tx_error_put;
730 }
731
732
733 mtu = dst_mtu(&rt->dst);
734 if (__mtu_check_toobig_v6(skb, mtu)) {
735 if (!skb->dev) {
736 struct net *net = dev_net(skb_dst(skb)->dev);
737
738 skb->dev = net->loopback_dev;
739 }
740 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
741 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
742 "ip_vs_nat_xmit_v6(): frag needed for");
743 goto tx_error_put;
744 }
745
746
747 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
748 goto tx_error_put;
749
750 if (skb_cow(skb, rt->dst.dev->hard_header_len))
751 goto tx_error_put;
752
753
754 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
755 goto tx_error;
756 ipv6_hdr(skb)->daddr = cp->daddr.in6;
757
758 if (!local || !skb->dev) {
759
760 skb_dst_drop(skb);
761 skb_dst_set(skb, &rt->dst);
762 } else {
763
764 dst_release(&rt->dst);
765 }
766
767 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
768
769
770
771
772
773
774 skb->local_df = 1;
775
776 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
777
778 LeaveFunction(10);
779 return NF_STOLEN;
780
781tx_error_icmp:
782 dst_link_failure(skb);
783tx_error:
784 LeaveFunction(10);
785 kfree_skb(skb);
786 return NF_STOLEN;
787tx_error_put:
788 dst_release(&rt->dst);
789 goto tx_error;
790}
791#endif
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813int
814ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
815 struct ip_vs_protocol *pp)
816{
817 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
818 struct rtable *rt;
819 __be32 saddr;
820 struct net_device *tdev;
821 struct iphdr *old_iph = ip_hdr(skb);
822 u8 tos = old_iph->tos;
823 __be16 df;
824 struct iphdr *iph;
825 unsigned int max_headroom;
826 int mtu;
827 int ret;
828
829 EnterFunction(10);
830
831 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
832 RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
833 IP_VS_RT_MODE_NON_LOCAL |
834 IP_VS_RT_MODE_CONNECT,
835 &saddr)))
836 goto tx_error_icmp;
837 if (rt->rt_flags & RTCF_LOCAL) {
838 ip_rt_put(rt);
839 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
840 }
841
842 tdev = rt->dst.dev;
843
844 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
845 if (mtu < 68) {
846 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
847 goto tx_error_put;
848 }
849 if (rt_is_output_route(skb_rtable(skb)))
850 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
851
852
853 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
854
855 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
856 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
857 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
858 goto tx_error_put;
859 }
860
861
862
863
864 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
865
866 if (skb_headroom(skb) < max_headroom
867 || skb_cloned(skb) || skb_shared(skb)) {
868 struct sk_buff *new_skb =
869 skb_realloc_headroom(skb, max_headroom);
870 if (!new_skb) {
871 ip_rt_put(rt);
872 kfree_skb(skb);
873 IP_VS_ERR_RL("%s(): no memory\n", __func__);
874 return NF_STOLEN;
875 }
876 consume_skb(skb);
877 skb = new_skb;
878 old_iph = ip_hdr(skb);
879 }
880
881 skb->transport_header = skb->network_header;
882
883
884 ip_send_check(old_iph);
885
886 skb_push(skb, sizeof(struct iphdr));
887 skb_reset_network_header(skb);
888 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
889
890
891 skb_dst_drop(skb);
892 skb_dst_set(skb, &rt->dst);
893
894
895
896
897 iph = ip_hdr(skb);
898 iph->version = 4;
899 iph->ihl = sizeof(struct iphdr)>>2;
900 iph->frag_off = df;
901 iph->protocol = IPPROTO_IPIP;
902 iph->tos = tos;
903 iph->daddr = cp->daddr.ip;
904 iph->saddr = saddr;
905 iph->ttl = old_iph->ttl;
906 ip_select_ident(iph, &rt->dst, NULL);
907
908
909 skb->local_df = 1;
910
911 ret = IP_VS_XMIT_TUNNEL(skb, cp);
912 if (ret == NF_ACCEPT)
913 ip_local_out(skb);
914 else if (ret == NF_DROP)
915 kfree_skb(skb);
916
917 LeaveFunction(10);
918
919 return NF_STOLEN;
920
921 tx_error_icmp:
922 dst_link_failure(skb);
923 tx_error:
924 kfree_skb(skb);
925 LeaveFunction(10);
926 return NF_STOLEN;
927tx_error_put:
928 ip_rt_put(rt);
929 goto tx_error;
930}
931
932#ifdef CONFIG_IP_VS_IPV6
933int
934ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
935 struct ip_vs_protocol *pp)
936{
937 struct rt6_info *rt;
938 struct in6_addr saddr;
939 struct net_device *tdev;
940 struct ipv6hdr *old_iph = ipv6_hdr(skb);
941 struct ipv6hdr *iph;
942 unsigned int max_headroom;
943 int mtu;
944 int ret;
945
946 EnterFunction(10);
947
948 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
949 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
950 IP_VS_RT_MODE_NON_LOCAL))))
951 goto tx_error_icmp;
952 if (__ip_vs_is_local_route6(rt)) {
953 dst_release(&rt->dst);
954 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
955 }
956
957 tdev = rt->dst.dev;
958
959 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
960 if (mtu < IPV6_MIN_MTU) {
961 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
962 IPV6_MIN_MTU);
963 goto tx_error_put;
964 }
965 if (skb_dst(skb))
966 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
967
968
969 if (__mtu_check_toobig_v6(skb, mtu)) {
970 if (!skb->dev) {
971 struct net *net = dev_net(skb_dst(skb)->dev);
972
973 skb->dev = net->loopback_dev;
974 }
975 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
976 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
977 goto tx_error_put;
978 }
979
980
981
982
983 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
984
985 if (skb_headroom(skb) < max_headroom
986 || skb_cloned(skb) || skb_shared(skb)) {
987 struct sk_buff *new_skb =
988 skb_realloc_headroom(skb, max_headroom);
989 if (!new_skb) {
990 dst_release(&rt->dst);
991 kfree_skb(skb);
992 IP_VS_ERR_RL("%s(): no memory\n", __func__);
993 return NF_STOLEN;
994 }
995 consume_skb(skb);
996 skb = new_skb;
997 old_iph = ipv6_hdr(skb);
998 }
999
1000 skb->transport_header = skb->network_header;
1001
1002 skb_push(skb, sizeof(struct ipv6hdr));
1003 skb_reset_network_header(skb);
1004 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1005
1006
1007 skb_dst_drop(skb);
1008 skb_dst_set(skb, &rt->dst);
1009
1010
1011
1012
1013 iph = ipv6_hdr(skb);
1014 iph->version = 6;
1015 iph->nexthdr = IPPROTO_IPV6;
1016 iph->payload_len = old_iph->payload_len;
1017 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
1018 iph->priority = old_iph->priority;
1019 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
1020 iph->daddr = cp->daddr.in6;
1021 iph->saddr = saddr;
1022 iph->hop_limit = old_iph->hop_limit;
1023
1024
1025 skb->local_df = 1;
1026
1027 ret = IP_VS_XMIT_TUNNEL(skb, cp);
1028 if (ret == NF_ACCEPT)
1029 ip6_local_out(skb);
1030 else if (ret == NF_DROP)
1031 kfree_skb(skb);
1032
1033 LeaveFunction(10);
1034
1035 return NF_STOLEN;
1036
1037tx_error_icmp:
1038 dst_link_failure(skb);
1039tx_error:
1040 kfree_skb(skb);
1041 LeaveFunction(10);
1042 return NF_STOLEN;
1043tx_error_put:
1044 dst_release(&rt->dst);
1045 goto tx_error;
1046}
1047#endif
1048
1049
1050
1051
1052
1053
1054int
1055ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1056 struct ip_vs_protocol *pp)
1057{
1058 struct rtable *rt;
1059 struct iphdr *iph = ip_hdr(skb);
1060 int mtu;
1061
1062 EnterFunction(10);
1063
1064 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1065 RT_TOS(iph->tos),
1066 IP_VS_RT_MODE_LOCAL |
1067 IP_VS_RT_MODE_NON_LOCAL |
1068 IP_VS_RT_MODE_KNOWN_NH, NULL)))
1069 goto tx_error_icmp;
1070 if (rt->rt_flags & RTCF_LOCAL) {
1071 ip_rt_put(rt);
1072 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1073 }
1074
1075
1076 mtu = dst_mtu(&rt->dst);
1077 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1078 !skb_is_gso(skb)) {
1079 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1080 ip_rt_put(rt);
1081 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1082 goto tx_error;
1083 }
1084
1085
1086
1087
1088
1089 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1090 ip_rt_put(rt);
1091 return NF_STOLEN;
1092 }
1093 ip_send_check(ip_hdr(skb));
1094
1095
1096 skb_dst_drop(skb);
1097 skb_dst_set(skb, &rt->dst);
1098
1099
1100 skb->local_df = 1;
1101
1102 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
1103
1104 LeaveFunction(10);
1105 return NF_STOLEN;
1106
1107 tx_error_icmp:
1108 dst_link_failure(skb);
1109 tx_error:
1110 kfree_skb(skb);
1111 LeaveFunction(10);
1112 return NF_STOLEN;
1113}
1114
1115#ifdef CONFIG_IP_VS_IPV6
1116int
1117ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1118 struct ip_vs_protocol *pp)
1119{
1120 struct rt6_info *rt;
1121 int mtu;
1122
1123 EnterFunction(10);
1124
1125 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1126 0, (IP_VS_RT_MODE_LOCAL |
1127 IP_VS_RT_MODE_NON_LOCAL))))
1128 goto tx_error_icmp;
1129 if (__ip_vs_is_local_route6(rt)) {
1130 dst_release(&rt->dst);
1131 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1132 }
1133
1134
1135 mtu = dst_mtu(&rt->dst);
1136 if (__mtu_check_toobig_v6(skb, mtu)) {
1137 if (!skb->dev) {
1138 struct net *net = dev_net(skb_dst(skb)->dev);
1139
1140 skb->dev = net->loopback_dev;
1141 }
1142 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1143 dst_release(&rt->dst);
1144 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1145 goto tx_error;
1146 }
1147
1148
1149
1150
1151
1152 skb = skb_share_check(skb, GFP_ATOMIC);
1153 if (unlikely(skb == NULL)) {
1154 dst_release(&rt->dst);
1155 return NF_STOLEN;
1156 }
1157
1158
1159 skb_dst_drop(skb);
1160 skb_dst_set(skb, &rt->dst);
1161
1162
1163 skb->local_df = 1;
1164
1165 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
1166
1167 LeaveFunction(10);
1168 return NF_STOLEN;
1169
1170tx_error_icmp:
1171 dst_link_failure(skb);
1172tx_error:
1173 kfree_skb(skb);
1174 LeaveFunction(10);
1175 return NF_STOLEN;
1176}
1177#endif
1178
1179
1180
1181
1182
1183
1184int
1185ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1186 struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
1187{
1188 struct rtable *rt;
1189 int mtu;
1190 int rc;
1191 int local;
1192 int rt_mode;
1193
1194 EnterFunction(10);
1195
1196
1197
1198
1199 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1200 if (cp->packet_xmit)
1201 rc = cp->packet_xmit(skb, cp, pp);
1202 else
1203 rc = NF_ACCEPT;
1204
1205 atomic_inc(&cp->in_pkts);
1206 goto out;
1207 }
1208
1209
1210
1211
1212
1213
1214 rt_mode = (hooknum != NF_INET_FORWARD) ?
1215 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1216 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1217 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1218 RT_TOS(ip_hdr(skb)->tos),
1219 rt_mode, NULL)))
1220 goto tx_error_icmp;
1221 local = rt->rt_flags & RTCF_LOCAL;
1222
1223
1224
1225
1226
1227#if IS_ENABLED(CONFIG_NF_CONNTRACK)
1228 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1229 enum ip_conntrack_info ctinfo;
1230 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1231
1232 if (ct && !nf_ct_is_untracked(ct)) {
1233 IP_VS_DBG(10, "%s(): "
1234 "stopping DNAT to local address %pI4\n",
1235 __func__, &cp->daddr.ip);
1236 goto tx_error_put;
1237 }
1238 }
1239#endif
1240
1241
1242 if (local && ipv4_is_loopback(cp->daddr.ip) &&
1243 rt_is_input_route(skb_rtable(skb))) {
1244 IP_VS_DBG(1, "%s(): "
1245 "stopping DNAT to loopback %pI4\n",
1246 __func__, &cp->daddr.ip);
1247 goto tx_error_put;
1248 }
1249
1250
1251 mtu = dst_mtu(&rt->dst);
1252 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1253 !skb_is_gso(skb)) {
1254 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1255 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1256 goto tx_error_put;
1257 }
1258
1259
1260 if (!skb_make_writable(skb, offset))
1261 goto tx_error_put;
1262
1263 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1264 goto tx_error_put;
1265
1266 ip_vs_nat_icmp(skb, pp, cp, 0);
1267
1268 if (!local) {
1269
1270 skb_dst_drop(skb);
1271 skb_dst_set(skb, &rt->dst);
1272 } else {
1273 ip_rt_put(rt);
1274
1275
1276
1277
1278
1279 if (!__ip_vs_reroute_locally(skb))
1280 goto tx_error;
1281 }
1282
1283
1284 skb->local_df = 1;
1285
1286 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
1287
1288 rc = NF_STOLEN;
1289 goto out;
1290
1291 tx_error_icmp:
1292 dst_link_failure(skb);
1293 tx_error:
1294 dev_kfree_skb(skb);
1295 rc = NF_STOLEN;
1296 out:
1297 LeaveFunction(10);
1298 return rc;
1299 tx_error_put:
1300 ip_rt_put(rt);
1301 goto tx_error;
1302}
1303
1304#ifdef CONFIG_IP_VS_IPV6
1305int
1306ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1307 struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
1308{
1309 struct rt6_info *rt;
1310 int mtu;
1311 int rc;
1312 int local;
1313 int rt_mode;
1314
1315 EnterFunction(10);
1316
1317
1318
1319
1320 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1321 if (cp->packet_xmit)
1322 rc = cp->packet_xmit(skb, cp, pp);
1323 else
1324 rc = NF_ACCEPT;
1325
1326 atomic_inc(&cp->in_pkts);
1327 goto out;
1328 }
1329
1330
1331
1332
1333
1334
1335 rt_mode = (hooknum != NF_INET_FORWARD) ?
1336 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1337 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1338 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1339 0, rt_mode)))
1340 goto tx_error_icmp;
1341
1342 local = __ip_vs_is_local_route6(rt);
1343
1344
1345
1346
1347#if IS_ENABLED(CONFIG_NF_CONNTRACK)
1348 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1349 enum ip_conntrack_info ctinfo;
1350 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1351
1352 if (ct && !nf_ct_is_untracked(ct)) {
1353 IP_VS_DBG(10, "%s(): "
1354 "stopping DNAT to local address %pI6\n",
1355 __func__, &cp->daddr.in6);
1356 goto tx_error_put;
1357 }
1358 }
1359#endif
1360
1361
1362 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1363 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1364 IP_VS_DBG(1, "%s(): "
1365 "stopping DNAT to loopback %pI6\n",
1366 __func__, &cp->daddr.in6);
1367 goto tx_error_put;
1368 }
1369
1370
1371 mtu = dst_mtu(&rt->dst);
1372 if (__mtu_check_toobig_v6(skb, mtu)) {
1373 if (!skb->dev) {
1374 struct net *net = dev_net(skb_dst(skb)->dev);
1375
1376 skb->dev = net->loopback_dev;
1377 }
1378 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1379 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1380 goto tx_error_put;
1381 }
1382
1383
1384 if (!skb_make_writable(skb, offset))
1385 goto tx_error_put;
1386
1387 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1388 goto tx_error_put;
1389
1390 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1391
1392 if (!local || !skb->dev) {
1393
1394 skb_dst_drop(skb);
1395 skb_dst_set(skb, &rt->dst);
1396 } else {
1397
1398 dst_release(&rt->dst);
1399 }
1400
1401
1402 skb->local_df = 1;
1403
1404 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
1405
1406 rc = NF_STOLEN;
1407 goto out;
1408
1409tx_error_icmp:
1410 dst_link_failure(skb);
1411tx_error:
1412 dev_kfree_skb(skb);
1413 rc = NF_STOLEN;
1414out:
1415 LeaveFunction(10);
1416 return rc;
1417tx_error_put:
1418 dst_release(&rt->dst);
1419 goto tx_error;
1420}
1421#endif
1422