1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#define KMSG_COMPONENT "IPVS"
36#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
37
38#include <linux/module.h>
39#include <linux/slab.h>
40#include <linux/inetdevice.h>
41#include <linux/net.h>
42#include <linux/completion.h>
43#include <linux/delay.h>
44#include <linux/skbuff.h>
45#include <linux/in.h>
46#include <linux/igmp.h>
47#include <linux/udp.h>
48#include <linux/err.h>
49#include <linux/kthread.h>
50#include <linux/wait.h>
51#include <linux/kernel.h>
52
53#include <asm/unaligned.h>
54
55#include <net/ip.h>
56#include <net/sock.h>
57
58#include <net/ip_vs.h>
59
60#define IP_VS_SYNC_GROUP 0xe0000051
61#define IP_VS_SYNC_PORT 8848
62
63#define SYNC_PROTO_VER 1
64
65static struct lock_class_key __ipvs_sync_key;
66
67
68
69
70struct ip_vs_sync_conn_v0 {
71 __u8 reserved;
72
73
74 __u8 protocol;
75 __be16 cport;
76 __be16 vport;
77 __be16 dport;
78 __be32 caddr;
79 __be32 vaddr;
80 __be32 daddr;
81
82
83 __be16 flags;
84 __be16 state;
85
86
87};
88
89struct ip_vs_sync_conn_options {
90 struct ip_vs_seq in_seq;
91 struct ip_vs_seq out_seq;
92};
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132struct ip_vs_sync_v4 {
133 __u8 type;
134 __u8 protocol;
135 __be16 ver_size;
136
137 __be32 flags;
138 __be16 state;
139
140 __be16 cport;
141 __be16 vport;
142 __be16 dport;
143 __be32 fwmark;
144 __be32 timeout;
145 __be32 caddr;
146 __be32 vaddr;
147 __be32 daddr;
148
149
150};
151
152
153
154struct ip_vs_sync_v6 {
155 __u8 type;
156 __u8 protocol;
157 __be16 ver_size;
158
159 __be32 flags;
160 __be16 state;
161
162 __be16 cport;
163 __be16 vport;
164 __be16 dport;
165 __be32 fwmark;
166 __be32 timeout;
167 struct in6_addr caddr;
168 struct in6_addr vaddr;
169 struct in6_addr daddr;
170
171
172};
173
174union ip_vs_sync_conn {
175 struct ip_vs_sync_v4 v4;
176 struct ip_vs_sync_v6 v6;
177};
178
179
180#define STYPE_INET6 0
181#define STYPE_F_INET6 (1 << STYPE_INET6)
182
183#define SVER_SHIFT 12
184#define SVER_MASK 0x0fff
185
186#define IPVS_OPT_SEQ_DATA 1
187#define IPVS_OPT_PE_DATA 2
188#define IPVS_OPT_PE_NAME 3
189#define IPVS_OPT_PARAM 7
190
191#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
192#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
193#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
194#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
195
196struct ip_vs_sync_thread_data {
197 struct netns_ipvs *ipvs;
198 struct socket *sock;
199 char *buf;
200 int id;
201};
202
203
204#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
205#define FULL_CONN_SIZE \
206(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243#define SYNC_MESG_HEADER_LEN 4
244#define MAX_CONNS_PER_SYNCBUFF 255
245
246
247struct ip_vs_sync_mesg_v0 {
248 __u8 nr_conns;
249 __u8 syncid;
250 __be16 size;
251
252
253};
254
255
256struct ip_vs_sync_mesg {
257 __u8 reserved;
258 __u8 syncid;
259 __be16 size;
260 __u8 nr_conns;
261 __s8 version;
262 __u16 spare;
263
264};
265
266union ipvs_sockaddr {
267 struct sockaddr_in in;
268 struct sockaddr_in6 in6;
269};
270
271struct ip_vs_sync_buff {
272 struct list_head list;
273 unsigned long firstuse;
274
275
276 struct ip_vs_sync_mesg *mesg;
277 unsigned char *head;
278 unsigned char *end;
279};
280
281
282
283
284
285static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
286{
287 memset(ho, 0, sizeof(*ho));
288 ho->init_seq = get_unaligned_be32(&no->init_seq);
289 ho->delta = get_unaligned_be32(&no->delta);
290 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
291}
292
293
294
295
296
297static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
298{
299 put_unaligned_be32(ho->init_seq, &no->init_seq);
300 put_unaligned_be32(ho->delta, &no->delta);
301 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
302}
303
304static inline struct ip_vs_sync_buff *
305sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
306{
307 struct ip_vs_sync_buff *sb;
308
309 spin_lock_bh(&ipvs->sync_lock);
310 if (list_empty(&ms->sync_queue)) {
311 sb = NULL;
312 __set_current_state(TASK_INTERRUPTIBLE);
313 } else {
314 sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
315 list);
316 list_del(&sb->list);
317 ms->sync_queue_len--;
318 if (!ms->sync_queue_len)
319 ms->sync_queue_delay = 0;
320 }
321 spin_unlock_bh(&ipvs->sync_lock);
322
323 return sb;
324}
325
326
327
328
329static inline struct ip_vs_sync_buff *
330ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len)
331{
332 struct ip_vs_sync_buff *sb;
333
334 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
335 return NULL;
336
337 len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg),
338 ipvs->mcfg.sync_maxlen);
339 sb->mesg = kmalloc(len, GFP_ATOMIC);
340 if (!sb->mesg) {
341 kfree(sb);
342 return NULL;
343 }
344 sb->mesg->reserved = 0;
345 sb->mesg->version = SYNC_PROTO_VER;
346 sb->mesg->syncid = ipvs->mcfg.syncid;
347 sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
348 sb->mesg->nr_conns = 0;
349 sb->mesg->spare = 0;
350 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
351 sb->end = (unsigned char *)sb->mesg + len;
352
353 sb->firstuse = jiffies;
354 return sb;
355}
356
357static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
358{
359 kfree(sb->mesg);
360 kfree(sb);
361}
362
363static inline void sb_queue_tail(struct netns_ipvs *ipvs,
364 struct ipvs_master_sync_state *ms)
365{
366 struct ip_vs_sync_buff *sb = ms->sync_buff;
367
368 spin_lock(&ipvs->sync_lock);
369 if (ipvs->sync_state & IP_VS_STATE_MASTER &&
370 ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
371 if (!ms->sync_queue_len)
372 schedule_delayed_work(&ms->master_wakeup_work,
373 max(IPVS_SYNC_SEND_DELAY, 1));
374 ms->sync_queue_len++;
375 list_add_tail(&sb->list, &ms->sync_queue);
376 if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
377 wake_up_process(ms->master_thread);
378 } else
379 ip_vs_sync_buff_release(sb);
380 spin_unlock(&ipvs->sync_lock);
381}
382
383
384
385
386
387static inline struct ip_vs_sync_buff *
388get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
389 unsigned long time)
390{
391 struct ip_vs_sync_buff *sb;
392
393 spin_lock_bh(&ipvs->sync_buff_lock);
394 sb = ms->sync_buff;
395 if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
396 ms->sync_buff = NULL;
397 __set_current_state(TASK_RUNNING);
398 } else
399 sb = NULL;
400 spin_unlock_bh(&ipvs->sync_buff_lock);
401 return sb;
402}
403
404static inline int
405select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
406{
407 return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
408}
409
410
411
412
413static inline struct ip_vs_sync_buff *
414ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len)
415{
416 struct ip_vs_sync_buff *sb;
417 struct ip_vs_sync_mesg_v0 *mesg;
418
419 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
420 return NULL;
421
422 len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0),
423 ipvs->mcfg.sync_maxlen);
424 sb->mesg = kmalloc(len, GFP_ATOMIC);
425 if (!sb->mesg) {
426 kfree(sb);
427 return NULL;
428 }
429 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
430 mesg->nr_conns = 0;
431 mesg->syncid = ipvs->mcfg.syncid;
432 mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
433 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
434 sb->end = (unsigned char *)mesg + len;
435 sb->firstuse = jiffies;
436 return sb;
437}
438
439
440static inline bool in_persistence(struct ip_vs_conn *cp)
441{
442 for (cp = cp->control; cp; cp = cp->control) {
443 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
444 return true;
445 }
446 return false;
447}
448
449
450
451
452
453
454
455
456
457
458static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
459 struct ip_vs_conn *cp, int pkts)
460{
461 unsigned long orig = READ_ONCE(cp->sync_endtime);
462 unsigned long now = jiffies;
463 unsigned long n = (now + cp->timeout) & ~3UL;
464 unsigned int sync_refresh_period;
465 int sync_period;
466 int force;
467
468
469 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
470 force = 0;
471 else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp)))
472 return 0;
473 else if (likely(cp->protocol == IPPROTO_TCP)) {
474 if (!((1 << cp->state) &
475 ((1 << IP_VS_TCP_S_ESTABLISHED) |
476 (1 << IP_VS_TCP_S_FIN_WAIT) |
477 (1 << IP_VS_TCP_S_CLOSE) |
478 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
479 (1 << IP_VS_TCP_S_TIME_WAIT))))
480 return 0;
481 force = cp->state != cp->old_state;
482 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
483 goto set;
484 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
485 if (!((1 << cp->state) &
486 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
487 (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) |
488 (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) |
489 (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) |
490 (1 << IP_VS_SCTP_S_CLOSED))))
491 return 0;
492 force = cp->state != cp->old_state;
493 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
494 goto set;
495 } else {
496
497 force = 0;
498 }
499
500 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
501 if (sync_refresh_period > 0) {
502 long diff = n - orig;
503 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
504
505
506
507
508 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
509 int retries = orig & 3;
510
511 if (retries >= sysctl_sync_retries(ipvs))
512 return 0;
513 if (time_before(now, orig - cp->timeout +
514 (sync_refresh_period >> 3)))
515 return 0;
516 n |= retries + 1;
517 }
518 }
519 sync_period = sysctl_sync_period(ipvs);
520 if (sync_period > 0) {
521 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
522 pkts % sync_period != sysctl_sync_threshold(ipvs))
523 return 0;
524 } else if (!sync_refresh_period &&
525 pkts != sysctl_sync_threshold(ipvs))
526 return 0;
527
528set:
529 cp->old_state = cp->state;
530 n = cmpxchg(&cp->sync_endtime, orig, n);
531 return n == orig || force;
532}
533
534
535
536
537
538static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp,
539 int pkts)
540{
541 struct ip_vs_sync_mesg_v0 *m;
542 struct ip_vs_sync_conn_v0 *s;
543 struct ip_vs_sync_buff *buff;
544 struct ipvs_master_sync_state *ms;
545 int id;
546 unsigned int len;
547
548 if (unlikely(cp->af != AF_INET))
549 return;
550
551 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
552 return;
553
554 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
555 return;
556
557 spin_lock_bh(&ipvs->sync_buff_lock);
558 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
559 spin_unlock_bh(&ipvs->sync_buff_lock);
560 return;
561 }
562
563 id = select_master_thread_id(ipvs, cp);
564 ms = &ipvs->ms[id];
565 buff = ms->sync_buff;
566 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
567 SIMPLE_CONN_SIZE;
568 if (buff) {
569 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
570
571 if (buff->head + len > buff->end || !m->nr_conns) {
572 sb_queue_tail(ipvs, ms);
573 ms->sync_buff = NULL;
574 buff = NULL;
575 }
576 }
577 if (!buff) {
578 buff = ip_vs_sync_buff_create_v0(ipvs, len);
579 if (!buff) {
580 spin_unlock_bh(&ipvs->sync_buff_lock);
581 pr_err("ip_vs_sync_buff_create failed.\n");
582 return;
583 }
584 ms->sync_buff = buff;
585 }
586
587 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
588 s = (struct ip_vs_sync_conn_v0 *) buff->head;
589
590
591 s->reserved = 0;
592 s->protocol = cp->protocol;
593 s->cport = cp->cport;
594 s->vport = cp->vport;
595 s->dport = cp->dport;
596 s->caddr = cp->caddr.ip;
597 s->vaddr = cp->vaddr.ip;
598 s->daddr = cp->daddr.ip;
599 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
600 s->state = htons(cp->state);
601 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
602 struct ip_vs_sync_conn_options *opt =
603 (struct ip_vs_sync_conn_options *)&s[1];
604 memcpy(opt, &cp->in_seq, sizeof(*opt));
605 }
606
607 m->nr_conns++;
608 m->size = htons(ntohs(m->size) + len);
609 buff->head += len;
610 spin_unlock_bh(&ipvs->sync_buff_lock);
611
612
613 cp = cp->control;
614 if (cp) {
615 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
616 pkts = atomic_add_return(1, &cp->in_pkts);
617 else
618 pkts = sysctl_sync_threshold(ipvs);
619 ip_vs_sync_conn(ipvs, cp, pkts);
620 }
621}
622
623
624
625
626
627
628void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts)
629{
630 struct ip_vs_sync_mesg *m;
631 union ip_vs_sync_conn *s;
632 struct ip_vs_sync_buff *buff;
633 struct ipvs_master_sync_state *ms;
634 int id;
635 __u8 *p;
636 unsigned int len, pe_name_len, pad;
637
638
639 if (sysctl_sync_ver(ipvs) == 0) {
640 ip_vs_sync_conn_v0(ipvs, cp, pkts);
641 return;
642 }
643
644 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
645 goto control;
646sloop:
647 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
648 goto control;
649
650
651 pe_name_len = 0;
652 if (cp->pe_data_len) {
653 if (!cp->pe_data || !cp->dest) {
654 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
655 return;
656 }
657 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
658 }
659
660 spin_lock_bh(&ipvs->sync_buff_lock);
661 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
662 spin_unlock_bh(&ipvs->sync_buff_lock);
663 return;
664 }
665
666 id = select_master_thread_id(ipvs, cp);
667 ms = &ipvs->ms[id];
668
669#ifdef CONFIG_IP_VS_IPV6
670 if (cp->af == AF_INET6)
671 len = sizeof(struct ip_vs_sync_v6);
672 else
673#endif
674 len = sizeof(struct ip_vs_sync_v4);
675
676 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
677 len += sizeof(struct ip_vs_sync_conn_options) + 2;
678
679 if (cp->pe_data_len)
680 len += cp->pe_data_len + 2;
681 if (pe_name_len)
682 len += pe_name_len + 2;
683
684
685 pad = 0;
686 buff = ms->sync_buff;
687 if (buff) {
688 m = buff->mesg;
689 pad = (4 - (size_t) buff->head) & 3;
690
691 if (buff->head + len + pad > buff->end || m->reserved) {
692 sb_queue_tail(ipvs, ms);
693 ms->sync_buff = NULL;
694 buff = NULL;
695 pad = 0;
696 }
697 }
698
699 if (!buff) {
700 buff = ip_vs_sync_buff_create(ipvs, len);
701 if (!buff) {
702 spin_unlock_bh(&ipvs->sync_buff_lock);
703 pr_err("ip_vs_sync_buff_create failed.\n");
704 return;
705 }
706 ms->sync_buff = buff;
707 m = buff->mesg;
708 }
709
710 p = buff->head;
711 buff->head += pad + len;
712 m->size = htons(ntohs(m->size) + pad + len);
713
714 while (pad--)
715 *(p++) = 0;
716
717 s = (union ip_vs_sync_conn *)p;
718
719
720 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
721 s->v4.ver_size = htons(len & SVER_MASK);
722 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
723 s->v4.state = htons(cp->state);
724 s->v4.protocol = cp->protocol;
725 s->v4.cport = cp->cport;
726 s->v4.vport = cp->vport;
727 s->v4.dport = cp->dport;
728 s->v4.fwmark = htonl(cp->fwmark);
729 s->v4.timeout = htonl(cp->timeout / HZ);
730 m->nr_conns++;
731
732#ifdef CONFIG_IP_VS_IPV6
733 if (cp->af == AF_INET6) {
734 p += sizeof(struct ip_vs_sync_v6);
735 s->v6.caddr = cp->caddr.in6;
736 s->v6.vaddr = cp->vaddr.in6;
737 s->v6.daddr = cp->daddr.in6;
738 } else
739#endif
740 {
741 p += sizeof(struct ip_vs_sync_v4);
742 s->v4.caddr = cp->caddr.ip;
743 s->v4.vaddr = cp->vaddr.ip;
744 s->v4.daddr = cp->daddr.ip;
745 }
746 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
747 *(p++) = IPVS_OPT_SEQ_DATA;
748 *(p++) = sizeof(struct ip_vs_sync_conn_options);
749 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
750 p += sizeof(struct ip_vs_seq);
751 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
752 p += sizeof(struct ip_vs_seq);
753 }
754
755 if (cp->pe_data_len && cp->pe_data) {
756 *(p++) = IPVS_OPT_PE_DATA;
757 *(p++) = cp->pe_data_len;
758 memcpy(p, cp->pe_data, cp->pe_data_len);
759 p += cp->pe_data_len;
760 if (pe_name_len) {
761
762 *(p++) = IPVS_OPT_PE_NAME;
763 *(p++) = pe_name_len;
764 memcpy(p, cp->pe->name, pe_name_len);
765 p += pe_name_len;
766 }
767 }
768
769 spin_unlock_bh(&ipvs->sync_buff_lock);
770
771control:
772
773 cp = cp->control;
774 if (!cp)
775 return;
776 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
777 pkts = atomic_add_return(1, &cp->in_pkts);
778 else
779 pkts = sysctl_sync_threshold(ipvs);
780 goto sloop;
781}
782
783
784
785
786static inline int
787ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc,
788 struct ip_vs_conn_param *p,
789 __u8 *pe_data, unsigned int pe_data_len,
790 __u8 *pe_name, unsigned int pe_name_len)
791{
792#ifdef CONFIG_IP_VS_IPV6
793 if (af == AF_INET6)
794 ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol,
795 (const union nf_inet_addr *)&sc->v6.caddr,
796 sc->v6.cport,
797 (const union nf_inet_addr *)&sc->v6.vaddr,
798 sc->v6.vport, p);
799 else
800#endif
801 ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol,
802 (const union nf_inet_addr *)&sc->v4.caddr,
803 sc->v4.cport,
804 (const union nf_inet_addr *)&sc->v4.vaddr,
805 sc->v4.vport, p);
806
807 if (pe_data_len) {
808 if (pe_name_len) {
809 char buff[IP_VS_PENAME_MAXLEN+1];
810
811 memcpy(buff, pe_name, pe_name_len);
812 buff[pe_name_len]=0;
813 p->pe = __ip_vs_pe_getbyname(buff);
814 if (!p->pe) {
815 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
816 buff);
817 return 1;
818 }
819 } else {
820 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
821 return 1;
822 }
823
824 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
825 if (!p->pe_data) {
826 module_put(p->pe->module);
827 return -ENOMEM;
828 }
829 p->pe_data_len = pe_data_len;
830 }
831 return 0;
832}
833
834
835
836
837
838
839
840static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param,
841 unsigned int flags, unsigned int state,
842 unsigned int protocol, unsigned int type,
843 const union nf_inet_addr *daddr, __be16 dport,
844 unsigned long timeout, __u32 fwmark,
845 struct ip_vs_sync_conn_options *opt)
846{
847 struct ip_vs_dest *dest;
848 struct ip_vs_conn *cp;
849
850 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
851 cp = ip_vs_conn_in_get(param);
852 if (cp && ((cp->dport != dport) ||
853 !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
854 if (!(flags & IP_VS_CONN_F_INACTIVE)) {
855 ip_vs_conn_expire_now(cp);
856 __ip_vs_conn_put(cp);
857 cp = NULL;
858 } else {
859
860
861
862
863 __ip_vs_conn_put(cp);
864 kfree(param->pe_data);
865 return;
866 }
867 }
868 } else {
869 cp = ip_vs_ct_in_get(param);
870 }
871
872 if (cp) {
873
874 kfree(param->pe_data);
875
876 dest = cp->dest;
877 spin_lock_bh(&cp->lock);
878 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
879 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
880 if (flags & IP_VS_CONN_F_INACTIVE) {
881 atomic_dec(&dest->activeconns);
882 atomic_inc(&dest->inactconns);
883 } else {
884 atomic_inc(&dest->activeconns);
885 atomic_dec(&dest->inactconns);
886 }
887 }
888 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
889 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
890 cp->flags = flags;
891 spin_unlock_bh(&cp->lock);
892 if (!dest)
893 ip_vs_try_bind_dest(cp);
894 } else {
895
896
897
898
899
900 rcu_read_lock();
901
902
903
904
905
906 dest = ip_vs_find_dest(ipvs, type, type, daddr, dport,
907 param->vaddr, param->vport, protocol,
908 fwmark, flags);
909
910 cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest,
911 fwmark);
912 rcu_read_unlock();
913 if (!cp) {
914 kfree(param->pe_data);
915 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
916 return;
917 }
918 if (!(flags & IP_VS_CONN_F_TEMPLATE))
919 kfree(param->pe_data);
920 }
921
922 if (opt) {
923 cp->in_seq = opt->in_seq;
924 cp->out_seq = opt->out_seq;
925 }
926 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
927 cp->state = state;
928 cp->old_state = cp->state;
929
930
931
932
933
934
935
936
937
938 if (timeout) {
939 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
940 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
941 cp->timeout = timeout*HZ;
942 } else {
943 struct ip_vs_proto_data *pd;
944
945 pd = ip_vs_proto_data_get(ipvs, protocol);
946 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
947 cp->timeout = pd->timeout_table[state];
948 else
949 cp->timeout = (3*60*HZ);
950 }
951 ip_vs_conn_put(cp);
952}
953
954
955
956
957static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer,
958 const size_t buflen)
959{
960 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
961 struct ip_vs_sync_conn_v0 *s;
962 struct ip_vs_sync_conn_options *opt;
963 struct ip_vs_protocol *pp;
964 struct ip_vs_conn_param param;
965 char *p;
966 int i;
967
968 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
969 for (i=0; i<m->nr_conns; i++) {
970 unsigned int flags, state;
971
972 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
973 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
974 return;
975 }
976 s = (struct ip_vs_sync_conn_v0 *) p;
977 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
978 flags &= ~IP_VS_CONN_F_HASHED;
979 if (flags & IP_VS_CONN_F_SEQ_MASK) {
980 opt = (struct ip_vs_sync_conn_options *)&s[1];
981 p += FULL_CONN_SIZE;
982 if (p > buffer+buflen) {
983 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
984 return;
985 }
986 } else {
987 opt = NULL;
988 p += SIMPLE_CONN_SIZE;
989 }
990
991 state = ntohs(s->state);
992 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
993 pp = ip_vs_proto_get(s->protocol);
994 if (!pp) {
995 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
996 s->protocol);
997 continue;
998 }
999 if (state >= pp->num_states) {
1000 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
1001 pp->name, state);
1002 continue;
1003 }
1004 } else {
1005
1006 if (state > 0) {
1007 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
1008 state);
1009 state = 0;
1010 }
1011 }
1012
1013 ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
1014 (const union nf_inet_addr *)&s->caddr,
1015 s->cport,
1016 (const union nf_inet_addr *)&s->vaddr,
1017 s->vport, ¶m);
1018
1019
1020 ip_vs_proc_conn(ipvs, ¶m, flags, state, s->protocol, AF_INET,
1021 (union nf_inet_addr *)&s->daddr, s->dport,
1022 0, 0, opt);
1023 }
1024}
1025
1026
1027
1028
1029static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
1030 __u32 *opt_flags,
1031 struct ip_vs_sync_conn_options *opt)
1032{
1033 struct ip_vs_sync_conn_options *topt;
1034
1035 topt = (struct ip_vs_sync_conn_options *)p;
1036
1037 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
1038 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
1039 return -EINVAL;
1040 }
1041 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
1042 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
1043 return -EINVAL;
1044 }
1045 ntoh_seq(&topt->in_seq, &opt->in_seq);
1046 ntoh_seq(&topt->out_seq, &opt->out_seq);
1047 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
1048 return 0;
1049}
1050
1051static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
1052 __u8 **data, unsigned int maxlen,
1053 __u32 *opt_flags, __u32 flag)
1054{
1055 if (plen > maxlen) {
1056 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
1057 return -EINVAL;
1058 }
1059 if (*opt_flags & flag) {
1060 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
1061 return -EINVAL;
1062 }
1063 *data_len = plen;
1064 *data = p;
1065 *opt_flags |= flag;
1066 return 0;
1067}
1068
1069
1070
1071static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end)
1072{
1073 struct ip_vs_sync_conn_options opt;
1074 union ip_vs_sync_conn *s;
1075 struct ip_vs_protocol *pp;
1076 struct ip_vs_conn_param param;
1077 __u32 flags;
1078 unsigned int af, state, pe_data_len=0, pe_name_len=0;
1079 __u8 *pe_data=NULL, *pe_name=NULL;
1080 __u32 opt_flags=0;
1081 int retc=0;
1082
1083 s = (union ip_vs_sync_conn *) p;
1084
1085 if (s->v6.type & STYPE_F_INET6) {
1086#ifdef CONFIG_IP_VS_IPV6
1087 af = AF_INET6;
1088 p += sizeof(struct ip_vs_sync_v6);
1089#else
1090 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
1091 retc = 10;
1092 goto out;
1093#endif
1094 } else if (!s->v4.type) {
1095 af = AF_INET;
1096 p += sizeof(struct ip_vs_sync_v4);
1097 } else {
1098 return -10;
1099 }
1100 if (p > msg_end)
1101 return -20;
1102
1103
1104 while (p < msg_end) {
1105 int ptype;
1106 int plen;
1107
1108 if (p+2 > msg_end)
1109 return -30;
1110 ptype = *(p++);
1111 plen = *(p++);
1112
1113 if (!plen || ((p + plen) > msg_end))
1114 return -40;
1115
1116 switch (ptype & ~IPVS_OPT_F_PARAM) {
1117 case IPVS_OPT_SEQ_DATA:
1118 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
1119 return -50;
1120 break;
1121
1122 case IPVS_OPT_PE_DATA:
1123 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
1124 IP_VS_PEDATA_MAXLEN, &opt_flags,
1125 IPVS_OPT_F_PE_DATA))
1126 return -60;
1127 break;
1128
1129 case IPVS_OPT_PE_NAME:
1130 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1131 IP_VS_PENAME_MAXLEN, &opt_flags,
1132 IPVS_OPT_F_PE_NAME))
1133 return -70;
1134 break;
1135
1136 default:
1137
1138 if (!(ptype & IPVS_OPT_F_PARAM)) {
1139 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1140 ptype & ~IPVS_OPT_F_PARAM);
1141 retc = 20;
1142 goto out;
1143 }
1144 }
1145 p += plen;
1146 }
1147
1148
1149 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
1150 flags |= IP_VS_CONN_F_SYNC;
1151 state = ntohs(s->v4.state);
1152
1153 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1154 pp = ip_vs_proto_get(s->v4.protocol);
1155 if (!pp) {
1156 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
1157 s->v4.protocol);
1158 retc = 30;
1159 goto out;
1160 }
1161 if (state >= pp->num_states) {
1162 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
1163 pp->name, state);
1164 retc = 40;
1165 goto out;
1166 }
1167 } else {
1168
1169 if (state > 0) {
1170 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1171 state);
1172 state = 0;
1173 }
1174 }
1175 if (ip_vs_conn_fill_param_sync(ipvs, af, s, ¶m, pe_data,
1176 pe_data_len, pe_name, pe_name_len)) {
1177 retc = 50;
1178 goto out;
1179 }
1180
1181 if (af == AF_INET)
1182 ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v4.protocol, af,
1183 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1184 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1185 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1186 );
1187#ifdef CONFIG_IP_VS_IPV6
1188 else
1189 ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v6.protocol, af,
1190 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1191 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1192 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1193 );
1194#endif
1195 ip_vs_pe_put(param.pe);
1196 return 0;
1197
1198out:
1199 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1200 return retc;
1201
1202}
1203
1204
1205
1206
1207
1208static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer,
1209 const size_t buflen)
1210{
1211 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1212 __u8 *p, *msg_end;
1213 int i, nr_conns;
1214
1215 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1216 IP_VS_DBG(2, "BACKUP, message header too short\n");
1217 return;
1218 }
1219
1220 if (buflen != ntohs(m2->size)) {
1221 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1222 return;
1223 }
1224
1225 if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) {
1226 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1227 return;
1228 }
1229
1230 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1231 && (m2->spare == 0)) {
1232
1233 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1234 nr_conns = m2->nr_conns;
1235
1236 for (i=0; i<nr_conns; i++) {
1237 union ip_vs_sync_conn *s;
1238 unsigned int size;
1239 int retc;
1240
1241 p = msg_end;
1242 if (p + sizeof(s->v4) > buffer+buflen) {
1243 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1244 return;
1245 }
1246 s = (union ip_vs_sync_conn *)p;
1247 size = ntohs(s->v4.ver_size) & SVER_MASK;
1248 msg_end = p + size;
1249
1250 if (msg_end > buffer+buflen) {
1251 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
1252 return;
1253 }
1254 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
1255 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
1256 ntohs(s->v4.ver_size) >> SVER_SHIFT);
1257 return;
1258 }
1259
1260 retc = ip_vs_proc_sync_conn(ipvs, p, msg_end);
1261 if (retc < 0) {
1262 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1263 retc);
1264 return;
1265 }
1266
1267 msg_end = p + ((size + 3) & ~3);
1268 }
1269 } else {
1270
1271 ip_vs_process_message_v0(ipvs, buffer, buflen);
1272 return;
1273 }
1274}
1275
1276
1277
1278
1279
1280static void set_sock_size(struct sock *sk, int mode, int val)
1281{
1282
1283
1284 lock_sock(sk);
1285 if (mode) {
1286 val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
1287 sysctl_wmem_max);
1288 sk->sk_sndbuf = val * 2;
1289 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1290 } else {
1291 val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
1292 sysctl_rmem_max);
1293 sk->sk_rcvbuf = val * 2;
1294 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1295 }
1296 release_sock(sk);
1297}
1298
1299
1300
1301
1302static void set_mcast_loop(struct sock *sk, u_char loop)
1303{
1304 struct inet_sock *inet = inet_sk(sk);
1305
1306
1307 lock_sock(sk);
1308 inet->mc_loop = loop ? 1 : 0;
1309#ifdef CONFIG_IP_VS_IPV6
1310 if (sk->sk_family == AF_INET6) {
1311 struct ipv6_pinfo *np = inet6_sk(sk);
1312
1313
1314 np->mc_loop = loop ? 1 : 0;
1315 }
1316#endif
1317 release_sock(sk);
1318}
1319
1320
1321
1322
1323static void set_mcast_ttl(struct sock *sk, u_char ttl)
1324{
1325 struct inet_sock *inet = inet_sk(sk);
1326
1327
1328 lock_sock(sk);
1329 inet->mc_ttl = ttl;
1330#ifdef CONFIG_IP_VS_IPV6
1331 if (sk->sk_family == AF_INET6) {
1332 struct ipv6_pinfo *np = inet6_sk(sk);
1333
1334
1335 np->mcast_hops = ttl;
1336 }
1337#endif
1338 release_sock(sk);
1339}
1340
1341
1342static void set_mcast_pmtudisc(struct sock *sk, int val)
1343{
1344 struct inet_sock *inet = inet_sk(sk);
1345
1346
1347 lock_sock(sk);
1348 inet->pmtudisc = val;
1349#ifdef CONFIG_IP_VS_IPV6
1350 if (sk->sk_family == AF_INET6) {
1351 struct ipv6_pinfo *np = inet6_sk(sk);
1352
1353
1354 np->pmtudisc = val;
1355 }
1356#endif
1357 release_sock(sk);
1358}
1359
1360
1361
1362
1363static int set_mcast_if(struct sock *sk, char *ifname)
1364{
1365 struct net_device *dev;
1366 struct inet_sock *inet = inet_sk(sk);
1367 struct net *net = sock_net(sk);
1368
1369 dev = __dev_get_by_name(net, ifname);
1370 if (!dev)
1371 return -ENODEV;
1372
1373 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1374 return -EINVAL;
1375
1376 lock_sock(sk);
1377 inet->mc_index = dev->ifindex;
1378
1379#ifdef CONFIG_IP_VS_IPV6
1380 if (sk->sk_family == AF_INET6) {
1381 struct ipv6_pinfo *np = inet6_sk(sk);
1382
1383
1384 np->mcast_oif = dev->ifindex;
1385 }
1386#endif
1387 release_sock(sk);
1388
1389 return 0;
1390}
1391
1392
1393
1394
1395
1396
1397
1398static int
1399join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
1400{
1401 struct net *net = sock_net(sk);
1402 struct ip_mreqn mreq;
1403 struct net_device *dev;
1404 int ret;
1405
1406 memset(&mreq, 0, sizeof(mreq));
1407 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
1408
1409 dev = __dev_get_by_name(net, ifname);
1410 if (!dev)
1411 return -ENODEV;
1412 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1413 return -EINVAL;
1414
1415 mreq.imr_ifindex = dev->ifindex;
1416
1417 lock_sock(sk);
1418 ret = ip_mc_join_group(sk, &mreq);
1419 release_sock(sk);
1420
1421 return ret;
1422}
1423
1424#ifdef CONFIG_IP_VS_IPV6
1425static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
1426 char *ifname)
1427{
1428 struct net *net = sock_net(sk);
1429 struct net_device *dev;
1430 int ret;
1431
1432 dev = __dev_get_by_name(net, ifname);
1433 if (!dev)
1434 return -ENODEV;
1435 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1436 return -EINVAL;
1437
1438 lock_sock(sk);
1439 ret = ipv6_sock_mc_join(sk, dev->ifindex, addr);
1440 release_sock(sk);
1441
1442 return ret;
1443}
1444#endif
1445
1446static int bind_mcastif_addr(struct socket *sock, char *ifname)
1447{
1448 struct net *net = sock_net(sock->sk);
1449 struct net_device *dev;
1450 __be32 addr;
1451 struct sockaddr_in sin;
1452
1453 dev = __dev_get_by_name(net, ifname);
1454 if (!dev)
1455 return -ENODEV;
1456
1457 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1458 if (!addr)
1459 pr_err("You probably need to specify IP address on "
1460 "multicast interface.\n");
1461
1462 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
1463 ifname, &addr);
1464
1465
1466 sin.sin_family = AF_INET;
1467 sin.sin_addr.s_addr = addr;
1468 sin.sin_port = 0;
1469
1470 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
1471}
1472
1473static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
1474 struct ipvs_sync_daemon_cfg *c, int id)
1475{
1476 if (AF_INET6 == c->mcast_af) {
1477 sa->in6 = (struct sockaddr_in6) {
1478 .sin6_family = AF_INET6,
1479 .sin6_port = htons(c->mcast_port + id),
1480 };
1481 sa->in6.sin6_addr = c->mcast_group.in6;
1482 *salen = sizeof(sa->in6);
1483 } else {
1484 sa->in = (struct sockaddr_in) {
1485 .sin_family = AF_INET,
1486 .sin_port = htons(c->mcast_port + id),
1487 };
1488 sa->in.sin_addr = c->mcast_group.in;
1489 *salen = sizeof(sa->in);
1490 }
1491}
1492
1493
1494
1495
1496static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
1497{
1498
1499 union ipvs_sockaddr mcast_addr;
1500 struct socket *sock;
1501 int result, salen;
1502
1503
1504 result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
1505 IPPROTO_UDP, &sock);
1506 if (result < 0) {
1507 pr_err("Error during creation of socket; terminating\n");
1508 return ERR_PTR(result);
1509 }
1510 result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn);
1511 if (result < 0) {
1512 pr_err("Error setting outbound mcast interface\n");
1513 goto error;
1514 }
1515
1516 set_mcast_loop(sock->sk, 0);
1517 set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl);
1518
1519 set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT);
1520 result = sysctl_sync_sock_size(ipvs);
1521 if (result > 0)
1522 set_sock_size(sock->sk, 1, result);
1523
1524 if (AF_INET == ipvs->mcfg.mcast_af)
1525 result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn);
1526 else
1527 result = 0;
1528 if (result < 0) {
1529 pr_err("Error binding address of the mcast interface\n");
1530 goto error;
1531 }
1532
1533 get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
1534 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
1535 salen, 0);
1536 if (result < 0) {
1537 pr_err("Error connecting to the multicast addr\n");
1538 goto error;
1539 }
1540
1541 return sock;
1542
1543error:
1544 sock_release(sock);
1545 return ERR_PTR(result);
1546}
1547
1548
1549
1550
1551
1552static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
1553 int ifindex)
1554{
1555
1556 union ipvs_sockaddr mcast_addr;
1557 struct socket *sock;
1558 int result, salen;
1559
1560
1561 result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
1562 IPPROTO_UDP, &sock);
1563 if (result < 0) {
1564 pr_err("Error during creation of socket; terminating\n");
1565 return ERR_PTR(result);
1566 }
1567
1568 sock->sk->sk_reuse = SK_CAN_REUSE;
1569 result = sysctl_sync_sock_size(ipvs);
1570 if (result > 0)
1571 set_sock_size(sock->sk, 0, result);
1572
1573 get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
1574 sock->sk->sk_bound_dev_if = ifindex;
1575 result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
1576 if (result < 0) {
1577 pr_err("Error binding to the multicast addr\n");
1578 goto error;
1579 }
1580
1581
1582#ifdef CONFIG_IP_VS_IPV6
1583 if (ipvs->bcfg.mcast_af == AF_INET6)
1584 result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
1585 ipvs->bcfg.mcast_ifn);
1586 else
1587#endif
1588 result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
1589 ipvs->bcfg.mcast_ifn);
1590 if (result < 0) {
1591 pr_err("Error joining to the multicast group\n");
1592 goto error;
1593 }
1594
1595 return sock;
1596
1597error:
1598 sock_release(sock);
1599 return ERR_PTR(result);
1600}
1601
1602
1603static int
1604ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1605{
1606 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
1607 struct kvec iov;
1608 int len;
1609
1610 EnterFunction(7);
1611 iov.iov_base = (void *)buffer;
1612 iov.iov_len = length;
1613
1614 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
1615
1616 LeaveFunction(7);
1617 return len;
1618}
1619
1620static int
1621ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1622{
1623 int msize;
1624 int ret;
1625
1626 msize = ntohs(msg->size);
1627
1628 ret = ip_vs_send_async(sock, (char *)msg, msize);
1629 if (ret >= 0 || ret == -EAGAIN)
1630 return ret;
1631 pr_err("ip_vs_send_async error %d\n", ret);
1632 return 0;
1633}
1634
1635static int
1636ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1637{
1638 struct msghdr msg = {NULL,};
1639 struct kvec iov = {buffer, buflen};
1640 int len;
1641
1642 EnterFunction(7);
1643
1644
1645 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen);
1646 len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
1647 if (len < 0)
1648 return len;
1649
1650 LeaveFunction(7);
1651 return len;
1652}
1653
1654
1655static void master_wakeup_work_handler(struct work_struct *work)
1656{
1657 struct ipvs_master_sync_state *ms =
1658 container_of(work, struct ipvs_master_sync_state,
1659 master_wakeup_work.work);
1660 struct netns_ipvs *ipvs = ms->ipvs;
1661
1662 spin_lock_bh(&ipvs->sync_lock);
1663 if (ms->sync_queue_len &&
1664 ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
1665 ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
1666 wake_up_process(ms->master_thread);
1667 }
1668 spin_unlock_bh(&ipvs->sync_lock);
1669}
1670
1671
1672static inline struct ip_vs_sync_buff *
1673next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
1674{
1675 struct ip_vs_sync_buff *sb;
1676
1677 sb = sb_dequeue(ipvs, ms);
1678 if (sb)
1679 return sb;
1680
1681 return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
1682}
1683
1684static int sync_thread_master(void *data)
1685{
1686 struct ip_vs_sync_thread_data *tinfo = data;
1687 struct netns_ipvs *ipvs = tinfo->ipvs;
1688 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
1689 struct sock *sk = tinfo->sock->sk;
1690 struct ip_vs_sync_buff *sb;
1691
1692 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1693 "syncid = %d, id = %d\n",
1694 ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id);
1695
1696 for (;;) {
1697 sb = next_sync_buff(ipvs, ms);
1698 if (unlikely(kthread_should_stop()))
1699 break;
1700 if (!sb) {
1701 schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
1702 continue;
1703 }
1704 while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
1705
1706
1707
1708 __wait_event_interruptible(*sk_sleep(sk),
1709 sock_writeable(sk) ||
1710 kthread_should_stop());
1711 if (unlikely(kthread_should_stop()))
1712 goto done;
1713 }
1714 ip_vs_sync_buff_release(sb);
1715 }
1716
1717done:
1718 __set_current_state(TASK_RUNNING);
1719 if (sb)
1720 ip_vs_sync_buff_release(sb);
1721
1722
1723 while ((sb = sb_dequeue(ipvs, ms)))
1724 ip_vs_sync_buff_release(sb);
1725 __set_current_state(TASK_RUNNING);
1726
1727
1728 sb = get_curr_sync_buff(ipvs, ms, 0);
1729 if (sb)
1730 ip_vs_sync_buff_release(sb);
1731
1732
1733 sock_release(tinfo->sock);
1734 kfree(tinfo);
1735
1736 return 0;
1737}
1738
1739
1740static int sync_thread_backup(void *data)
1741{
1742 struct ip_vs_sync_thread_data *tinfo = data;
1743 struct netns_ipvs *ipvs = tinfo->ipvs;
1744 int len;
1745
1746 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1747 "syncid = %d, id = %d\n",
1748 ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id);
1749
1750 while (!kthread_should_stop()) {
1751 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
1752 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
1753 || kthread_should_stop());
1754
1755
1756 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
1757 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1758 ipvs->bcfg.sync_maxlen);
1759 if (len <= 0) {
1760 if (len != -EAGAIN)
1761 pr_err("receiving message error\n");
1762 break;
1763 }
1764
1765 ip_vs_process_message(ipvs, tinfo->buf, len);
1766 }
1767 }
1768
1769
1770 sock_release(tinfo->sock);
1771 kfree(tinfo->buf);
1772 kfree(tinfo);
1773
1774 return 0;
1775}
1776
1777
1778int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
1779 int state)
1780{
1781 struct ip_vs_sync_thread_data *tinfo;
1782 struct task_struct **array = NULL, *task;
1783 struct socket *sock;
1784 struct net_device *dev;
1785 char *name;
1786 int (*threadfn)(void *data);
1787 int id, count, hlen;
1788 int result = -ENOMEM;
1789 u16 mtu, min_mtu;
1790
1791 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1792 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n",
1793 sizeof(struct ip_vs_sync_conn_v0));
1794
1795 if (!ipvs->sync_state) {
1796 count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
1797 ipvs->threads_mask = count - 1;
1798 } else
1799 count = ipvs->threads_mask + 1;
1800
1801 if (c->mcast_af == AF_UNSPEC) {
1802 c->mcast_af = AF_INET;
1803 c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP);
1804 }
1805 if (!c->mcast_port)
1806 c->mcast_port = IP_VS_SYNC_PORT;
1807 if (!c->mcast_ttl)
1808 c->mcast_ttl = 1;
1809
1810 dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
1811 if (!dev) {
1812 pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
1813 return -ENODEV;
1814 }
1815 hlen = (AF_INET6 == c->mcast_af) ?
1816 sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
1817 sizeof(struct iphdr) + sizeof(struct udphdr);
1818 mtu = (state == IP_VS_STATE_BACKUP) ?
1819 clamp(dev->mtu, 1500U, 65535U) : 1500U;
1820 min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1;
1821
1822 if (c->sync_maxlen)
1823 c->sync_maxlen = clamp_t(unsigned int,
1824 c->sync_maxlen, min_mtu,
1825 65535 - hlen);
1826 else
1827 c->sync_maxlen = mtu - hlen;
1828
1829 if (state == IP_VS_STATE_MASTER) {
1830 if (ipvs->ms)
1831 return -EEXIST;
1832
1833 ipvs->mcfg = *c;
1834 name = "ipvs-m:%d:%d";
1835 threadfn = sync_thread_master;
1836 } else if (state == IP_VS_STATE_BACKUP) {
1837 if (ipvs->backup_threads)
1838 return -EEXIST;
1839
1840 ipvs->bcfg = *c;
1841 name = "ipvs-b:%d:%d";
1842 threadfn = sync_thread_backup;
1843 } else {
1844 return -EINVAL;
1845 }
1846
1847 if (state == IP_VS_STATE_MASTER) {
1848 struct ipvs_master_sync_state *ms;
1849
1850 ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL);
1851 if (!ipvs->ms)
1852 goto out;
1853 ms = ipvs->ms;
1854 for (id = 0; id < count; id++, ms++) {
1855 INIT_LIST_HEAD(&ms->sync_queue);
1856 ms->sync_queue_len = 0;
1857 ms->sync_queue_delay = 0;
1858 INIT_DELAYED_WORK(&ms->master_wakeup_work,
1859 master_wakeup_work_handler);
1860 ms->ipvs = ipvs;
1861 }
1862 } else {
1863 array = kcalloc(count, sizeof(struct task_struct *),
1864 GFP_KERNEL);
1865 if (!array)
1866 goto out;
1867 }
1868
1869 tinfo = NULL;
1870 for (id = 0; id < count; id++) {
1871 if (state == IP_VS_STATE_MASTER)
1872 sock = make_send_sock(ipvs, id);
1873 else
1874 sock = make_receive_sock(ipvs, id, dev->ifindex);
1875 if (IS_ERR(sock)) {
1876 result = PTR_ERR(sock);
1877 goto outtinfo;
1878 }
1879 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1880 if (!tinfo)
1881 goto outsocket;
1882 tinfo->ipvs = ipvs;
1883 tinfo->sock = sock;
1884 if (state == IP_VS_STATE_BACKUP) {
1885 tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
1886 GFP_KERNEL);
1887 if (!tinfo->buf)
1888 goto outtinfo;
1889 } else {
1890 tinfo->buf = NULL;
1891 }
1892 tinfo->id = id;
1893
1894 task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
1895 if (IS_ERR(task)) {
1896 result = PTR_ERR(task);
1897 goto outtinfo;
1898 }
1899 tinfo = NULL;
1900 if (state == IP_VS_STATE_MASTER)
1901 ipvs->ms[id].master_thread = task;
1902 else
1903 array[id] = task;
1904 }
1905
1906
1907
1908 if (state == IP_VS_STATE_BACKUP)
1909 ipvs->backup_threads = array;
1910 spin_lock_bh(&ipvs->sync_buff_lock);
1911 ipvs->sync_state |= state;
1912 spin_unlock_bh(&ipvs->sync_buff_lock);
1913
1914
1915 ip_vs_use_count_inc();
1916
1917 return 0;
1918
1919outsocket:
1920 sock_release(sock);
1921
1922outtinfo:
1923 if (tinfo) {
1924 sock_release(tinfo->sock);
1925 kfree(tinfo->buf);
1926 kfree(tinfo);
1927 }
1928 count = id;
1929 while (count-- > 0) {
1930 if (state == IP_VS_STATE_MASTER)
1931 kthread_stop(ipvs->ms[count].master_thread);
1932 else
1933 kthread_stop(array[count]);
1934 }
1935 kfree(array);
1936
1937out:
1938 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
1939 kfree(ipvs->ms);
1940 ipvs->ms = NULL;
1941 }
1942 return result;
1943}
1944
1945
1946int stop_sync_thread(struct netns_ipvs *ipvs, int state)
1947{
1948 struct task_struct **array;
1949 int id;
1950 int retc = -EINVAL;
1951
1952 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1953
1954 if (state == IP_VS_STATE_MASTER) {
1955 if (!ipvs->ms)
1956 return -ESRCH;
1957
1958
1959
1960
1961
1962
1963
1964 spin_lock_bh(&ipvs->sync_buff_lock);
1965 spin_lock(&ipvs->sync_lock);
1966 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1967 spin_unlock(&ipvs->sync_lock);
1968 spin_unlock_bh(&ipvs->sync_buff_lock);
1969
1970 retc = 0;
1971 for (id = ipvs->threads_mask; id >= 0; id--) {
1972 struct ipvs_master_sync_state *ms = &ipvs->ms[id];
1973 int ret;
1974
1975 pr_info("stopping master sync thread %d ...\n",
1976 task_pid_nr(ms->master_thread));
1977 cancel_delayed_work_sync(&ms->master_wakeup_work);
1978 ret = kthread_stop(ms->master_thread);
1979 if (retc >= 0)
1980 retc = ret;
1981 }
1982 kfree(ipvs->ms);
1983 ipvs->ms = NULL;
1984 } else if (state == IP_VS_STATE_BACKUP) {
1985 if (!ipvs->backup_threads)
1986 return -ESRCH;
1987
1988 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1989 array = ipvs->backup_threads;
1990 retc = 0;
1991 for (id = ipvs->threads_mask; id >= 0; id--) {
1992 int ret;
1993
1994 pr_info("stopping backup sync thread %d ...\n",
1995 task_pid_nr(array[id]));
1996 ret = kthread_stop(array[id]);
1997 if (retc >= 0)
1998 retc = ret;
1999 }
2000 kfree(array);
2001 ipvs->backup_threads = NULL;
2002 }
2003
2004
2005 ip_vs_use_count_dec();
2006
2007 return retc;
2008}
2009
2010
2011
2012
2013int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs)
2014{
2015 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
2016 spin_lock_init(&ipvs->sync_lock);
2017 spin_lock_init(&ipvs->sync_buff_lock);
2018 return 0;
2019}
2020
2021void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
2022{
2023 int retc;
2024
2025 mutex_lock(&ipvs->sync_mutex);
2026 retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
2027 if (retc && retc != -ESRCH)
2028 pr_err("Failed to stop Master Daemon\n");
2029
2030 retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
2031 if (retc && retc != -ESRCH)
2032 pr_err("Failed to stop Backup Daemon\n");
2033 mutex_unlock(&ipvs->sync_mutex);
2034}
2035