1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#define KMSG_COMPONENT "IPVS"
35#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
36
37#include <linux/module.h>
38#include <linux/slab.h>
39#include <linux/inetdevice.h>
40#include <linux/net.h>
41#include <linux/completion.h>
42#include <linux/delay.h>
43#include <linux/skbuff.h>
44#include <linux/in.h>
45#include <linux/igmp.h>
46#include <linux/udp.h>
47#include <linux/err.h>
48#include <linux/kthread.h>
49#include <linux/wait.h>
50#include <linux/kernel.h>
51
52#include <asm/unaligned.h>
53
54#include <net/ip.h>
55#include <net/sock.h>
56
57#include <net/ip_vs.h>
58
59#define IP_VS_SYNC_GROUP 0xe0000051
60#define IP_VS_SYNC_PORT 8848
61
62#define SYNC_PROTO_VER 1
63
64
65
66
67
68struct ip_vs_sync_conn_v0 {
69 __u8 reserved;
70
71
72 __u8 protocol;
73 __be16 cport;
74 __be16 vport;
75 __be16 dport;
76 __be32 caddr;
77 __be32 vaddr;
78 __be32 daddr;
79
80
81 __be16 flags;
82 __be16 state;
83
84
85};
86
87struct ip_vs_sync_conn_options {
88 struct ip_vs_seq in_seq;
89 struct ip_vs_seq out_seq;
90};
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130struct ip_vs_sync_v4 {
131 __u8 type;
132 __u8 protocol;
133 __be16 ver_size;
134
135 __be32 flags;
136 __be16 state;
137
138 __be16 cport;
139 __be16 vport;
140 __be16 dport;
141 __be32 fwmark;
142 __be32 timeout;
143 __be32 caddr;
144 __be32 vaddr;
145 __be32 daddr;
146
147
148};
149
150
151
152struct ip_vs_sync_v6 {
153 __u8 type;
154 __u8 protocol;
155 __be16 ver_size;
156
157 __be32 flags;
158 __be16 state;
159
160 __be16 cport;
161 __be16 vport;
162 __be16 dport;
163 __be32 fwmark;
164 __be32 timeout;
165 struct in6_addr caddr;
166 struct in6_addr vaddr;
167 struct in6_addr daddr;
168
169
170};
171
172union ip_vs_sync_conn {
173 struct ip_vs_sync_v4 v4;
174 struct ip_vs_sync_v6 v6;
175};
176
177
178#define STYPE_INET6 0
179#define STYPE_F_INET6 (1 << STYPE_INET6)
180
181#define SVER_SHIFT 12
182#define SVER_MASK 0x0fff
183
184#define IPVS_OPT_SEQ_DATA 1
185#define IPVS_OPT_PE_DATA 2
186#define IPVS_OPT_PE_NAME 3
187#define IPVS_OPT_PARAM 7
188
189#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
190#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
191#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
192#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
193
194struct ip_vs_sync_thread_data {
195 struct net *net;
196 struct socket *sock;
197 char *buf;
198};
199
200
201#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
202#define FULL_CONN_SIZE \
203(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240#define SYNC_MESG_HEADER_LEN 4
241#define MAX_CONNS_PER_SYNCBUFF 255
242
243
244struct ip_vs_sync_mesg_v0 {
245 __u8 nr_conns;
246 __u8 syncid;
247 __u16 size;
248
249
250};
251
252
253struct ip_vs_sync_mesg {
254 __u8 reserved;
255 __u8 syncid;
256 __u16 size;
257 __u8 nr_conns;
258 __s8 version;
259 __u16 spare;
260
261};
262
263struct ip_vs_sync_buff {
264 struct list_head list;
265 unsigned long firstuse;
266
267
268 struct ip_vs_sync_mesg *mesg;
269 unsigned char *head;
270 unsigned char *end;
271};
272
273
274static struct sockaddr_in mcast_addr = {
275 .sin_family = AF_INET,
276 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT),
277 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
278};
279
280
281
282
283
284static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
285{
286 ho->init_seq = get_unaligned_be32(&no->init_seq);
287 ho->delta = get_unaligned_be32(&no->delta);
288 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
289}
290
291
292
293
294
295static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
296{
297 put_unaligned_be32(ho->init_seq, &no->init_seq);
298 put_unaligned_be32(ho->delta, &no->delta);
299 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
300}
301
302static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
303{
304 struct ip_vs_sync_buff *sb;
305
306 spin_lock_bh(&ipvs->sync_lock);
307 if (list_empty(&ipvs->sync_queue)) {
308 sb = NULL;
309 } else {
310 sb = list_entry(ipvs->sync_queue.next,
311 struct ip_vs_sync_buff,
312 list);
313 list_del(&sb->list);
314 }
315 spin_unlock_bh(&ipvs->sync_lock);
316
317 return sb;
318}
319
320
321
322
323static inline struct ip_vs_sync_buff *
324ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
325{
326 struct ip_vs_sync_buff *sb;
327
328 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
329 return NULL;
330
331 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
332 if (!sb->mesg) {
333 kfree(sb);
334 return NULL;
335 }
336 sb->mesg->reserved = 0;
337 sb->mesg->version = SYNC_PROTO_VER;
338 sb->mesg->syncid = ipvs->master_syncid;
339 sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
340 sb->mesg->nr_conns = 0;
341 sb->mesg->spare = 0;
342 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
343 sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
344
345 sb->firstuse = jiffies;
346 return sb;
347}
348
349static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
350{
351 kfree(sb->mesg);
352 kfree(sb);
353}
354
355static inline void sb_queue_tail(struct netns_ipvs *ipvs)
356{
357 struct ip_vs_sync_buff *sb = ipvs->sync_buff;
358
359 spin_lock(&ipvs->sync_lock);
360 if (ipvs->sync_state & IP_VS_STATE_MASTER)
361 list_add_tail(&sb->list, &ipvs->sync_queue);
362 else
363 ip_vs_sync_buff_release(sb);
364 spin_unlock(&ipvs->sync_lock);
365}
366
367
368
369
370
371static inline struct ip_vs_sync_buff *
372get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
373{
374 struct ip_vs_sync_buff *sb;
375
376 spin_lock_bh(&ipvs->sync_buff_lock);
377 if (ipvs->sync_buff &&
378 time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) {
379 sb = ipvs->sync_buff;
380 ipvs->sync_buff = NULL;
381 } else
382 sb = NULL;
383 spin_unlock_bh(&ipvs->sync_buff_lock);
384 return sb;
385}
386
387
388
389
390
391void ip_vs_sync_switch_mode(struct net *net, int mode)
392{
393 struct netns_ipvs *ipvs = net_ipvs(net);
394
395 if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
396 return;
397 if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
398 return;
399
400 spin_lock_bh(&ipvs->sync_buff_lock);
401
402 if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
403 kfree(ipvs->sync_buff);
404 ipvs->sync_buff = NULL;
405 } else {
406 spin_lock_bh(&ipvs->sync_lock);
407 if (ipvs->sync_state & IP_VS_STATE_MASTER)
408 list_add_tail(&ipvs->sync_buff->list,
409 &ipvs->sync_queue);
410 else
411 ip_vs_sync_buff_release(ipvs->sync_buff);
412 spin_unlock_bh(&ipvs->sync_lock);
413 }
414 spin_unlock_bh(&ipvs->sync_buff_lock);
415}
416
417
418
419
420static inline struct ip_vs_sync_buff *
421ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
422{
423 struct ip_vs_sync_buff *sb;
424 struct ip_vs_sync_mesg_v0 *mesg;
425
426 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
427 return NULL;
428
429 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
430 if (!sb->mesg) {
431 kfree(sb);
432 return NULL;
433 }
434 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
435 mesg->nr_conns = 0;
436 mesg->syncid = ipvs->master_syncid;
437 mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
438 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
439 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
440 sb->firstuse = jiffies;
441 return sb;
442}
443
444
445
446
447
448void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
449{
450 struct netns_ipvs *ipvs = net_ipvs(net);
451 struct ip_vs_sync_mesg_v0 *m;
452 struct ip_vs_sync_conn_v0 *s;
453 int len;
454
455 if (unlikely(cp->af != AF_INET))
456 return;
457
458 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
459 return;
460
461 spin_lock(&ipvs->sync_buff_lock);
462 if (!ipvs->sync_buff) {
463 ipvs->sync_buff =
464 ip_vs_sync_buff_create_v0(ipvs);
465 if (!ipvs->sync_buff) {
466 spin_unlock(&ipvs->sync_buff_lock);
467 pr_err("ip_vs_sync_buff_create failed.\n");
468 return;
469 }
470 }
471
472 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
473 SIMPLE_CONN_SIZE;
474 m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
475 s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
476
477
478 s->reserved = 0;
479 s->protocol = cp->protocol;
480 s->cport = cp->cport;
481 s->vport = cp->vport;
482 s->dport = cp->dport;
483 s->caddr = cp->caddr.ip;
484 s->vaddr = cp->vaddr.ip;
485 s->daddr = cp->daddr.ip;
486 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
487 s->state = htons(cp->state);
488 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
489 struct ip_vs_sync_conn_options *opt =
490 (struct ip_vs_sync_conn_options *)&s[1];
491 memcpy(opt, &cp->in_seq, sizeof(*opt));
492 }
493
494 m->nr_conns++;
495 m->size += len;
496 ipvs->sync_buff->head += len;
497
498
499 if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
500 sb_queue_tail(ipvs);
501 ipvs->sync_buff = NULL;
502 }
503 spin_unlock(&ipvs->sync_buff_lock);
504
505
506 if (cp->control)
507 ip_vs_sync_conn(net, cp->control);
508}
509
510
511
512
513
514
515void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
516{
517 struct netns_ipvs *ipvs = net_ipvs(net);
518 struct ip_vs_sync_mesg *m;
519 union ip_vs_sync_conn *s;
520 __u8 *p;
521 unsigned int len, pe_name_len, pad;
522
523
524 if (sysctl_sync_ver(ipvs) == 0) {
525 ip_vs_sync_conn_v0(net, cp);
526 return;
527 }
528
529 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
530 goto control;
531sloop:
532
533 pe_name_len = 0;
534 if (cp->pe_data_len) {
535 if (!cp->pe_data || !cp->dest) {
536 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
537 return;
538 }
539 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
540 }
541
542 spin_lock(&ipvs->sync_buff_lock);
543
544#ifdef CONFIG_IP_VS_IPV6
545 if (cp->af == AF_INET6)
546 len = sizeof(struct ip_vs_sync_v6);
547 else
548#endif
549 len = sizeof(struct ip_vs_sync_v4);
550
551 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
552 len += sizeof(struct ip_vs_sync_conn_options) + 2;
553
554 if (cp->pe_data_len)
555 len += cp->pe_data_len + 2;
556 if (pe_name_len)
557 len += pe_name_len + 2;
558
559
560 pad = 0;
561 if (ipvs->sync_buff) {
562 pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
563 if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
564 sb_queue_tail(ipvs);
565 ipvs->sync_buff = NULL;
566 pad = 0;
567 }
568 }
569
570 if (!ipvs->sync_buff) {
571 ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
572 if (!ipvs->sync_buff) {
573 spin_unlock(&ipvs->sync_buff_lock);
574 pr_err("ip_vs_sync_buff_create failed.\n");
575 return;
576 }
577 }
578
579 m = ipvs->sync_buff->mesg;
580 p = ipvs->sync_buff->head;
581 ipvs->sync_buff->head += pad + len;
582 m->size += pad + len;
583
584 while (pad--)
585 *(p++) = 0;
586
587 s = (union ip_vs_sync_conn *)p;
588
589
590 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
591 s->v4.ver_size = htons(len & SVER_MASK);
592 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
593 s->v4.state = htons(cp->state);
594 s->v4.protocol = cp->protocol;
595 s->v4.cport = cp->cport;
596 s->v4.vport = cp->vport;
597 s->v4.dport = cp->dport;
598 s->v4.fwmark = htonl(cp->fwmark);
599 s->v4.timeout = htonl(cp->timeout / HZ);
600 m->nr_conns++;
601
602#ifdef CONFIG_IP_VS_IPV6
603 if (cp->af == AF_INET6) {
604 p += sizeof(struct ip_vs_sync_v6);
605 ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
606 ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
607 ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
608 } else
609#endif
610 {
611 p += sizeof(struct ip_vs_sync_v4);
612 s->v4.caddr = cp->caddr.ip;
613 s->v4.vaddr = cp->vaddr.ip;
614 s->v4.daddr = cp->daddr.ip;
615 }
616 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
617 *(p++) = IPVS_OPT_SEQ_DATA;
618 *(p++) = sizeof(struct ip_vs_sync_conn_options);
619 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
620 p += sizeof(struct ip_vs_seq);
621 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
622 p += sizeof(struct ip_vs_seq);
623 }
624
625 if (cp->pe_data_len && cp->pe_data) {
626 *(p++) = IPVS_OPT_PE_DATA;
627 *(p++) = cp->pe_data_len;
628 memcpy(p, cp->pe_data, cp->pe_data_len);
629 p += cp->pe_data_len;
630 if (pe_name_len) {
631
632 *(p++) = IPVS_OPT_PE_NAME;
633 *(p++) = pe_name_len;
634 memcpy(p, cp->pe->name, pe_name_len);
635 p += pe_name_len;
636 }
637 }
638
639 spin_unlock(&ipvs->sync_buff_lock);
640
641control:
642
643 cp = cp->control;
644 if (!cp)
645 return;
646
647
648
649
650 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
651 int pkts = atomic_add_return(1, &cp->in_pkts);
652
653 if (pkts % sysctl_sync_period(ipvs) != 1)
654 return;
655 }
656 goto sloop;
657}
658
659
660
661
662static inline int
663ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
664 struct ip_vs_conn_param *p,
665 __u8 *pe_data, unsigned int pe_data_len,
666 __u8 *pe_name, unsigned int pe_name_len)
667{
668#ifdef CONFIG_IP_VS_IPV6
669 if (af == AF_INET6)
670 ip_vs_conn_fill_param(net, af, sc->v6.protocol,
671 (const union nf_inet_addr *)&sc->v6.caddr,
672 sc->v6.cport,
673 (const union nf_inet_addr *)&sc->v6.vaddr,
674 sc->v6.vport, p);
675 else
676#endif
677 ip_vs_conn_fill_param(net, af, sc->v4.protocol,
678 (const union nf_inet_addr *)&sc->v4.caddr,
679 sc->v4.cport,
680 (const union nf_inet_addr *)&sc->v4.vaddr,
681 sc->v4.vport, p);
682
683 if (pe_data_len) {
684 if (pe_name_len) {
685 char buff[IP_VS_PENAME_MAXLEN+1];
686
687 memcpy(buff, pe_name, pe_name_len);
688 buff[pe_name_len]=0;
689 p->pe = __ip_vs_pe_getbyname(buff);
690 if (!p->pe) {
691 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
692 buff);
693 return 1;
694 }
695 } else {
696 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
697 return 1;
698 }
699
700 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
701 if (!p->pe_data) {
702 if (p->pe->module)
703 module_put(p->pe->module);
704 return -ENOMEM;
705 }
706 p->pe_data_len = pe_data_len;
707 }
708 return 0;
709}
710
711
712
713
714
715
716
717static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
718 unsigned int flags, unsigned int state,
719 unsigned int protocol, unsigned int type,
720 const union nf_inet_addr *daddr, __be16 dport,
721 unsigned long timeout, __u32 fwmark,
722 struct ip_vs_sync_conn_options *opt)
723{
724 struct ip_vs_dest *dest;
725 struct ip_vs_conn *cp;
726 struct netns_ipvs *ipvs = net_ipvs(net);
727
728 if (!(flags & IP_VS_CONN_F_TEMPLATE))
729 cp = ip_vs_conn_in_get(param);
730 else
731 cp = ip_vs_ct_in_get(param);
732
733 if (cp && param->pe_data)
734 kfree(param->pe_data);
735 if (!cp) {
736
737
738
739
740
741 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
742 param->vport, protocol, fwmark);
743
744
745 if (protocol == IPPROTO_TCP) {
746 if (state != IP_VS_TCP_S_ESTABLISHED)
747 flags |= IP_VS_CONN_F_INACTIVE;
748 else
749 flags &= ~IP_VS_CONN_F_INACTIVE;
750 } else if (protocol == IPPROTO_SCTP) {
751 if (state != IP_VS_SCTP_S_ESTABLISHED)
752 flags |= IP_VS_CONN_F_INACTIVE;
753 else
754 flags &= ~IP_VS_CONN_F_INACTIVE;
755 }
756 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
757 if (dest)
758 atomic_dec(&dest->refcnt);
759 if (!cp) {
760 if (param->pe_data)
761 kfree(param->pe_data);
762 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
763 return;
764 }
765 } else if (!cp->dest) {
766 dest = ip_vs_try_bind_dest(cp);
767 if (dest)
768 atomic_dec(&dest->refcnt);
769 } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
770 (cp->state != state)) {
771
772 dest = cp->dest;
773 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
774 (state != IP_VS_TCP_S_ESTABLISHED)) {
775 atomic_dec(&dest->activeconns);
776 atomic_inc(&dest->inactconns);
777 cp->flags |= IP_VS_CONN_F_INACTIVE;
778 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
779 (state == IP_VS_TCP_S_ESTABLISHED)) {
780 atomic_inc(&dest->activeconns);
781 atomic_dec(&dest->inactconns);
782 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
783 }
784 } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
785 (cp->state != state)) {
786 dest = cp->dest;
787 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
788 (state != IP_VS_SCTP_S_ESTABLISHED)) {
789 atomic_dec(&dest->activeconns);
790 atomic_inc(&dest->inactconns);
791 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
792 }
793 }
794
795 if (opt)
796 memcpy(&cp->in_seq, opt, sizeof(*opt));
797 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
798 cp->state = state;
799 cp->old_state = cp->state;
800
801
802
803
804
805
806
807
808
809 if (timeout) {
810 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
811 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
812 cp->timeout = timeout*HZ;
813 } else {
814 struct ip_vs_proto_data *pd;
815
816 pd = ip_vs_proto_data_get(net, protocol);
817 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
818 cp->timeout = pd->timeout_table[state];
819 else
820 cp->timeout = (3*60*HZ);
821 }
822 ip_vs_conn_put(cp);
823}
824
825
826
827
828static void ip_vs_process_message_v0(struct net *net, const char *buffer,
829 const size_t buflen)
830{
831 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
832 struct ip_vs_sync_conn_v0 *s;
833 struct ip_vs_sync_conn_options *opt;
834 struct ip_vs_protocol *pp;
835 struct ip_vs_conn_param param;
836 char *p;
837 int i;
838
839 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
840 for (i=0; i<m->nr_conns; i++) {
841 unsigned flags, state;
842
843 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
844 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
845 return;
846 }
847 s = (struct ip_vs_sync_conn_v0 *) p;
848 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
849 flags &= ~IP_VS_CONN_F_HASHED;
850 if (flags & IP_VS_CONN_F_SEQ_MASK) {
851 opt = (struct ip_vs_sync_conn_options *)&s[1];
852 p += FULL_CONN_SIZE;
853 if (p > buffer+buflen) {
854 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
855 return;
856 }
857 } else {
858 opt = NULL;
859 p += SIMPLE_CONN_SIZE;
860 }
861
862 state = ntohs(s->state);
863 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
864 pp = ip_vs_proto_get(s->protocol);
865 if (!pp) {
866 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
867 s->protocol);
868 continue;
869 }
870 if (state >= pp->num_states) {
871 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
872 pp->name, state);
873 continue;
874 }
875 } else {
876
877 if (state > 0) {
878 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
879 state);
880 state = 0;
881 }
882 }
883
884 ip_vs_conn_fill_param(net, AF_INET, s->protocol,
885 (const union nf_inet_addr *)&s->caddr,
886 s->cport,
887 (const union nf_inet_addr *)&s->vaddr,
888 s->vport, ¶m);
889
890
891 ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET,
892 (union nf_inet_addr *)&s->daddr, s->dport,
893 0, 0, opt);
894 }
895}
896
897
898
899
900static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
901 __u32 *opt_flags,
902 struct ip_vs_sync_conn_options *opt)
903{
904 struct ip_vs_sync_conn_options *topt;
905
906 topt = (struct ip_vs_sync_conn_options *)p;
907
908 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
909 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
910 return -EINVAL;
911 }
912 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
913 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
914 return -EINVAL;
915 }
916 ntoh_seq(&topt->in_seq, &opt->in_seq);
917 ntoh_seq(&topt->out_seq, &opt->out_seq);
918 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
919 return 0;
920}
921
922static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
923 __u8 **data, unsigned int maxlen,
924 __u32 *opt_flags, __u32 flag)
925{
926 if (plen > maxlen) {
927 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
928 return -EINVAL;
929 }
930 if (*opt_flags & flag) {
931 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
932 return -EINVAL;
933 }
934 *data_len = plen;
935 *data = p;
936 *opt_flags |= flag;
937 return 0;
938}
939
940
941
942static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
943{
944 struct ip_vs_sync_conn_options opt;
945 union ip_vs_sync_conn *s;
946 struct ip_vs_protocol *pp;
947 struct ip_vs_conn_param param;
948 __u32 flags;
949 unsigned int af, state, pe_data_len=0, pe_name_len=0;
950 __u8 *pe_data=NULL, *pe_name=NULL;
951 __u32 opt_flags=0;
952 int retc=0;
953
954 s = (union ip_vs_sync_conn *) p;
955
956 if (s->v6.type & STYPE_F_INET6) {
957#ifdef CONFIG_IP_VS_IPV6
958 af = AF_INET6;
959 p += sizeof(struct ip_vs_sync_v6);
960#else
961 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
962 retc = 10;
963 goto out;
964#endif
965 } else if (!s->v4.type) {
966 af = AF_INET;
967 p += sizeof(struct ip_vs_sync_v4);
968 } else {
969 return -10;
970 }
971 if (p > msg_end)
972 return -20;
973
974
975 while (p < msg_end) {
976 int ptype;
977 int plen;
978
979 if (p+2 > msg_end)
980 return -30;
981 ptype = *(p++);
982 plen = *(p++);
983
984 if (!plen || ((p + plen) > msg_end))
985 return -40;
986
987 switch (ptype & ~IPVS_OPT_F_PARAM) {
988 case IPVS_OPT_SEQ_DATA:
989 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
990 return -50;
991 break;
992
993 case IPVS_OPT_PE_DATA:
994 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
995 IP_VS_PEDATA_MAXLEN, &opt_flags,
996 IPVS_OPT_F_PE_DATA))
997 return -60;
998 break;
999
1000 case IPVS_OPT_PE_NAME:
1001 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1002 IP_VS_PENAME_MAXLEN, &opt_flags,
1003 IPVS_OPT_F_PE_NAME))
1004 return -70;
1005 break;
1006
1007 default:
1008
1009 if (!(ptype & IPVS_OPT_F_PARAM)) {
1010 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1011 ptype & ~IPVS_OPT_F_PARAM);
1012 retc = 20;
1013 goto out;
1014 }
1015 }
1016 p += plen;
1017 }
1018
1019
1020 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
1021 flags |= IP_VS_CONN_F_SYNC;
1022 state = ntohs(s->v4.state);
1023
1024 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1025 pp = ip_vs_proto_get(s->v4.protocol);
1026 if (!pp) {
1027 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
1028 s->v4.protocol);
1029 retc = 30;
1030 goto out;
1031 }
1032 if (state >= pp->num_states) {
1033 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
1034 pp->name, state);
1035 retc = 40;
1036 goto out;
1037 }
1038 } else {
1039
1040 if (state > 0) {
1041 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1042 state);
1043 state = 0;
1044 }
1045 }
1046 if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data,
1047 pe_data_len, pe_name, pe_name_len)) {
1048 retc = 50;
1049 goto out;
1050 }
1051
1052 if (af == AF_INET)
1053 ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af,
1054 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1055 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1056 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1057 );
1058#ifdef CONFIG_IP_VS_IPV6
1059 else
1060 ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af,
1061 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1062 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1063 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1064 );
1065#endif
1066 return 0;
1067
1068out:
1069 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1070 return retc;
1071
1072}
1073
1074
1075
1076
1077
1078static void ip_vs_process_message(struct net *net, __u8 *buffer,
1079 const size_t buflen)
1080{
1081 struct netns_ipvs *ipvs = net_ipvs(net);
1082 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1083 __u8 *p, *msg_end;
1084 int i, nr_conns;
1085
1086 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1087 IP_VS_DBG(2, "BACKUP, message header too short\n");
1088 return;
1089 }
1090
1091 m2->size = ntohs(m2->size);
1092
1093 if (buflen != m2->size) {
1094 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1095 return;
1096 }
1097
1098 if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
1099 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1100 return;
1101 }
1102
1103 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1104 && (m2->spare == 0)) {
1105
1106 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1107 nr_conns = m2->nr_conns;
1108
1109 for (i=0; i<nr_conns; i++) {
1110 union ip_vs_sync_conn *s;
1111 unsigned size;
1112 int retc;
1113
1114 p = msg_end;
1115 if (p + sizeof(s->v4) > buffer+buflen) {
1116 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1117 return;
1118 }
1119 s = (union ip_vs_sync_conn *)p;
1120 size = ntohs(s->v4.ver_size) & SVER_MASK;
1121 msg_end = p + size;
1122
1123 if (msg_end > buffer+buflen) {
1124 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
1125 return;
1126 }
1127 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
1128 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
1129 ntohs(s->v4.ver_size) >> SVER_SHIFT);
1130 return;
1131 }
1132
1133 retc = ip_vs_proc_sync_conn(net, p, msg_end);
1134 if (retc < 0) {
1135 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1136 retc);
1137 return;
1138 }
1139
1140 msg_end = p + ((size + 3) & ~3);
1141 }
1142 } else {
1143
1144 ip_vs_process_message_v0(net, buffer, buflen);
1145 return;
1146 }
1147}
1148
1149
1150
1151
1152
1153static void set_mcast_loop(struct sock *sk, u_char loop)
1154{
1155 struct inet_sock *inet = inet_sk(sk);
1156
1157
1158 lock_sock(sk);
1159 inet->mc_loop = loop ? 1 : 0;
1160 release_sock(sk);
1161}
1162
1163
1164
1165
1166static void set_mcast_ttl(struct sock *sk, u_char ttl)
1167{
1168 struct inet_sock *inet = inet_sk(sk);
1169
1170
1171 lock_sock(sk);
1172 inet->mc_ttl = ttl;
1173 release_sock(sk);
1174}
1175
1176
1177
1178
1179static int set_mcast_if(struct sock *sk, char *ifname)
1180{
1181 struct net_device *dev;
1182 struct inet_sock *inet = inet_sk(sk);
1183 struct net *net = sock_net(sk);
1184
1185 dev = __dev_get_by_name(net, ifname);
1186 if (!dev)
1187 return -ENODEV;
1188
1189 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1190 return -EINVAL;
1191
1192 lock_sock(sk);
1193 inet->mc_index = dev->ifindex;
1194
1195 release_sock(sk);
1196
1197 return 0;
1198}
1199
1200
1201
1202
1203
1204
1205static int set_sync_mesg_maxlen(struct net *net, int sync_state)
1206{
1207 struct netns_ipvs *ipvs = net_ipvs(net);
1208 struct net_device *dev;
1209 int num;
1210
1211 if (sync_state == IP_VS_STATE_MASTER) {
1212 dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
1213 if (!dev)
1214 return -ENODEV;
1215
1216 num = (dev->mtu - sizeof(struct iphdr) -
1217 sizeof(struct udphdr) -
1218 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
1219 ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
1220 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
1221 IP_VS_DBG(7, "setting the maximum length of sync sending "
1222 "message %d.\n", ipvs->send_mesg_maxlen);
1223 } else if (sync_state == IP_VS_STATE_BACKUP) {
1224 dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
1225 if (!dev)
1226 return -ENODEV;
1227
1228 ipvs->recv_mesg_maxlen = dev->mtu -
1229 sizeof(struct iphdr) - sizeof(struct udphdr);
1230 IP_VS_DBG(7, "setting the maximum length of sync receiving "
1231 "message %d.\n", ipvs->recv_mesg_maxlen);
1232 }
1233
1234 return 0;
1235}
1236
1237
1238
1239
1240
1241
1242
1243static int
1244join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
1245{
1246 struct net *net = sock_net(sk);
1247 struct ip_mreqn mreq;
1248 struct net_device *dev;
1249 int ret;
1250
1251 memset(&mreq, 0, sizeof(mreq));
1252 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
1253
1254 dev = __dev_get_by_name(net, ifname);
1255 if (!dev)
1256 return -ENODEV;
1257 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1258 return -EINVAL;
1259
1260 mreq.imr_ifindex = dev->ifindex;
1261
1262 lock_sock(sk);
1263 ret = ip_mc_join_group(sk, &mreq);
1264 release_sock(sk);
1265
1266 return ret;
1267}
1268
1269
1270static int bind_mcastif_addr(struct socket *sock, char *ifname)
1271{
1272 struct net *net = sock_net(sock->sk);
1273 struct net_device *dev;
1274 __be32 addr;
1275 struct sockaddr_in sin;
1276
1277 dev = __dev_get_by_name(net, ifname);
1278 if (!dev)
1279 return -ENODEV;
1280
1281 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1282 if (!addr)
1283 pr_err("You probably need to specify IP address on "
1284 "multicast interface.\n");
1285
1286 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
1287 ifname, &addr);
1288
1289
1290 sin.sin_family = AF_INET;
1291 sin.sin_addr.s_addr = addr;
1292 sin.sin_port = 0;
1293
1294 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
1295}
1296
1297
1298
1299
1300static struct socket *make_send_sock(struct net *net)
1301{
1302 struct netns_ipvs *ipvs = net_ipvs(net);
1303 struct socket *sock;
1304 int result;
1305
1306
1307 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1308 if (result < 0) {
1309 pr_err("Error during creation of socket; terminating\n");
1310 return ERR_PTR(result);
1311 }
1312
1313
1314
1315
1316
1317 sk_change_net(sock->sk, net);
1318 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
1319 if (result < 0) {
1320 pr_err("Error setting outbound mcast interface\n");
1321 goto error;
1322 }
1323
1324 set_mcast_loop(sock->sk, 0);
1325 set_mcast_ttl(sock->sk, 1);
1326
1327 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
1328 if (result < 0) {
1329 pr_err("Error binding address of the mcast interface\n");
1330 goto error;
1331 }
1332
1333 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
1334 sizeof(struct sockaddr), 0);
1335 if (result < 0) {
1336 pr_err("Error connecting to the multicast addr\n");
1337 goto error;
1338 }
1339
1340 return sock;
1341
1342error:
1343 sk_release_kernel(sock->sk);
1344 return ERR_PTR(result);
1345}
1346
1347
1348
1349
1350
1351static struct socket *make_receive_sock(struct net *net)
1352{
1353 struct netns_ipvs *ipvs = net_ipvs(net);
1354 struct socket *sock;
1355 int result;
1356
1357
1358 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1359 if (result < 0) {
1360 pr_err("Error during creation of socket; terminating\n");
1361 return ERR_PTR(result);
1362 }
1363
1364
1365
1366
1367
1368 sk_change_net(sock->sk, net);
1369
1370 sock->sk->sk_reuse = 1;
1371
1372 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
1373 sizeof(struct sockaddr));
1374 if (result < 0) {
1375 pr_err("Error binding to the multicast addr\n");
1376 goto error;
1377 }
1378
1379
1380 result = join_mcast_group(sock->sk,
1381 (struct in_addr *) &mcast_addr.sin_addr,
1382 ipvs->backup_mcast_ifn);
1383 if (result < 0) {
1384 pr_err("Error joining to the multicast group\n");
1385 goto error;
1386 }
1387
1388 return sock;
1389
1390error:
1391 sk_release_kernel(sock->sk);
1392 return ERR_PTR(result);
1393}
1394
1395
1396static int
1397ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1398{
1399 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
1400 struct kvec iov;
1401 int len;
1402
1403 EnterFunction(7);
1404 iov.iov_base = (void *)buffer;
1405 iov.iov_len = length;
1406
1407 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
1408
1409 LeaveFunction(7);
1410 return len;
1411}
1412
1413static void
1414ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1415{
1416 int msize;
1417
1418 msize = msg->size;
1419
1420
1421 msg->size = htons(msg->size);
1422
1423 if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
1424 pr_err("ip_vs_send_async error\n");
1425}
1426
1427static int
1428ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1429{
1430 struct msghdr msg = {NULL,};
1431 struct kvec iov;
1432 int len;
1433
1434 EnterFunction(7);
1435
1436
1437 iov.iov_base = buffer;
1438 iov.iov_len = (size_t)buflen;
1439
1440 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0);
1441
1442 if (len < 0)
1443 return -1;
1444
1445 LeaveFunction(7);
1446 return len;
1447}
1448
1449
1450static int sync_thread_master(void *data)
1451{
1452 struct ip_vs_sync_thread_data *tinfo = data;
1453 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1454 struct ip_vs_sync_buff *sb;
1455
1456 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1457 "syncid = %d\n",
1458 ipvs->master_mcast_ifn, ipvs->master_syncid);
1459
1460 while (!kthread_should_stop()) {
1461 while ((sb = sb_dequeue(ipvs))) {
1462 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
1463 ip_vs_sync_buff_release(sb);
1464 }
1465
1466
1467 sb = get_curr_sync_buff(ipvs, 2 * HZ);
1468 if (sb) {
1469 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
1470 ip_vs_sync_buff_release(sb);
1471 }
1472
1473 schedule_timeout_interruptible(HZ);
1474 }
1475
1476
1477 while ((sb = sb_dequeue(ipvs)))
1478 ip_vs_sync_buff_release(sb);
1479
1480
1481 sb = get_curr_sync_buff(ipvs, 0);
1482 if (sb)
1483 ip_vs_sync_buff_release(sb);
1484
1485
1486 sk_release_kernel(tinfo->sock->sk);
1487 kfree(tinfo);
1488
1489 return 0;
1490}
1491
1492
1493static int sync_thread_backup(void *data)
1494{
1495 struct ip_vs_sync_thread_data *tinfo = data;
1496 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1497 int len;
1498
1499 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1500 "syncid = %d\n",
1501 ipvs->backup_mcast_ifn, ipvs->backup_syncid);
1502
1503 while (!kthread_should_stop()) {
1504 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
1505 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
1506 || kthread_should_stop());
1507
1508
1509 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
1510 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1511 ipvs->recv_mesg_maxlen);
1512 if (len <= 0) {
1513 pr_err("receiving message error\n");
1514 break;
1515 }
1516
1517
1518
1519 local_bh_disable();
1520 ip_vs_process_message(tinfo->net, tinfo->buf, len);
1521 local_bh_enable();
1522 }
1523 }
1524
1525
1526 sk_release_kernel(tinfo->sock->sk);
1527 kfree(tinfo->buf);
1528 kfree(tinfo);
1529
1530 return 0;
1531}
1532
1533
1534int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
1535{
1536 struct ip_vs_sync_thread_data *tinfo;
1537 struct task_struct **realtask, *task;
1538 struct socket *sock;
1539 struct netns_ipvs *ipvs = net_ipvs(net);
1540 char *name, *buf = NULL;
1541 int (*threadfn)(void *data);
1542 int result = -ENOMEM;
1543
1544 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1545 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
1546 sizeof(struct ip_vs_sync_conn_v0));
1547
1548 if (state == IP_VS_STATE_MASTER) {
1549 if (ipvs->master_thread)
1550 return -EEXIST;
1551
1552 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
1553 sizeof(ipvs->master_mcast_ifn));
1554 ipvs->master_syncid = syncid;
1555 realtask = &ipvs->master_thread;
1556 name = "ipvs_master:%d";
1557 threadfn = sync_thread_master;
1558 sock = make_send_sock(net);
1559 } else if (state == IP_VS_STATE_BACKUP) {
1560 if (ipvs->backup_thread)
1561 return -EEXIST;
1562
1563 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
1564 sizeof(ipvs->backup_mcast_ifn));
1565 ipvs->backup_syncid = syncid;
1566 realtask = &ipvs->backup_thread;
1567 name = "ipvs_backup:%d";
1568 threadfn = sync_thread_backup;
1569 sock = make_receive_sock(net);
1570 } else {
1571 return -EINVAL;
1572 }
1573
1574 if (IS_ERR(sock)) {
1575 result = PTR_ERR(sock);
1576 goto out;
1577 }
1578
1579 set_sync_mesg_maxlen(net, state);
1580 if (state == IP_VS_STATE_BACKUP) {
1581 buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
1582 if (!buf)
1583 goto outsocket;
1584 }
1585
1586 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1587 if (!tinfo)
1588 goto outbuf;
1589
1590 tinfo->net = net;
1591 tinfo->sock = sock;
1592 tinfo->buf = buf;
1593
1594 task = kthread_run(threadfn, tinfo, name, ipvs->gen);
1595 if (IS_ERR(task)) {
1596 result = PTR_ERR(task);
1597 goto outtinfo;
1598 }
1599
1600
1601 *realtask = task;
1602 ipvs->sync_state |= state;
1603
1604
1605 ip_vs_use_count_inc();
1606
1607 return 0;
1608
1609outtinfo:
1610 kfree(tinfo);
1611outbuf:
1612 kfree(buf);
1613outsocket:
1614 sk_release_kernel(sock->sk);
1615out:
1616 return result;
1617}
1618
1619
1620int stop_sync_thread(struct net *net, int state)
1621{
1622 struct netns_ipvs *ipvs = net_ipvs(net);
1623 int retc = -EINVAL;
1624
1625 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1626
1627 if (state == IP_VS_STATE_MASTER) {
1628 if (!ipvs->master_thread)
1629 return -ESRCH;
1630
1631 pr_info("stopping master sync thread %d ...\n",
1632 task_pid_nr(ipvs->master_thread));
1633
1634
1635
1636
1637
1638
1639
1640 spin_lock_bh(&ipvs->sync_lock);
1641 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1642 spin_unlock_bh(&ipvs->sync_lock);
1643 retc = kthread_stop(ipvs->master_thread);
1644 ipvs->master_thread = NULL;
1645 } else if (state == IP_VS_STATE_BACKUP) {
1646 if (!ipvs->backup_thread)
1647 return -ESRCH;
1648
1649 pr_info("stopping backup sync thread %d ...\n",
1650 task_pid_nr(ipvs->backup_thread));
1651
1652 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1653 retc = kthread_stop(ipvs->backup_thread);
1654 ipvs->backup_thread = NULL;
1655 }
1656
1657
1658 ip_vs_use_count_dec();
1659
1660 return retc;
1661}
1662
1663
1664
1665
1666int __net_init __ip_vs_sync_init(struct net *net)
1667{
1668 struct netns_ipvs *ipvs = net_ipvs(net);
1669
1670 INIT_LIST_HEAD(&ipvs->sync_queue);
1671 spin_lock_init(&ipvs->sync_lock);
1672 spin_lock_init(&ipvs->sync_buff_lock);
1673
1674 ipvs->sync_mcast_addr.sin_family = AF_INET;
1675 ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
1676 ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
1677 return 0;
1678}
1679
1680void __ip_vs_sync_cleanup(struct net *net)
1681{
1682 int retc;
1683
1684 retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
1685 if (retc && retc != -ESRCH)
1686 pr_err("Failed to stop Master Daemon\n");
1687
1688 retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
1689 if (retc && retc != -ESRCH)
1690 pr_err("Failed to stop Backup Daemon\n");
1691}
1692
1693int __init ip_vs_sync_init(void)
1694{
1695 return 0;
1696}
1697
1698void ip_vs_sync_cleanup(void)
1699{
1700}
1701