1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#define KMSG_COMPONENT "IPVS"
35#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
36
37#include <linux/module.h>
38#include <linux/slab.h>
39#include <linux/inetdevice.h>
40#include <linux/net.h>
41#include <linux/completion.h>
42#include <linux/delay.h>
43#include <linux/skbuff.h>
44#include <linux/in.h>
45#include <linux/igmp.h>
46#include <linux/udp.h>
47#include <linux/err.h>
48#include <linux/kthread.h>
49#include <linux/wait.h>
50#include <linux/kernel.h>
51
52#include <asm/unaligned.h>
53
54#include <net/ip.h>
55#include <net/sock.h>
56
57#include <net/ip_vs.h>
58
59#define IP_VS_SYNC_GROUP 0xe0000051
60#define IP_VS_SYNC_PORT 8848
61
62#define SYNC_PROTO_VER 1
63
64static struct lock_class_key __ipvs_sync_key;
65
66
67
68
69struct ip_vs_sync_conn_v0 {
70 __u8 reserved;
71
72
73 __u8 protocol;
74 __be16 cport;
75 __be16 vport;
76 __be16 dport;
77 __be32 caddr;
78 __be32 vaddr;
79 __be32 daddr;
80
81
82 __be16 flags;
83 __be16 state;
84
85
86};
87
88struct ip_vs_sync_conn_options {
89 struct ip_vs_seq in_seq;
90 struct ip_vs_seq out_seq;
91};
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131struct ip_vs_sync_v4 {
132 __u8 type;
133 __u8 protocol;
134 __be16 ver_size;
135
136 __be32 flags;
137 __be16 state;
138
139 __be16 cport;
140 __be16 vport;
141 __be16 dport;
142 __be32 fwmark;
143 __be32 timeout;
144 __be32 caddr;
145 __be32 vaddr;
146 __be32 daddr;
147
148
149};
150
151
152
153struct ip_vs_sync_v6 {
154 __u8 type;
155 __u8 protocol;
156 __be16 ver_size;
157
158 __be32 flags;
159 __be16 state;
160
161 __be16 cport;
162 __be16 vport;
163 __be16 dport;
164 __be32 fwmark;
165 __be32 timeout;
166 struct in6_addr caddr;
167 struct in6_addr vaddr;
168 struct in6_addr daddr;
169
170
171};
172
173union ip_vs_sync_conn {
174 struct ip_vs_sync_v4 v4;
175 struct ip_vs_sync_v6 v6;
176};
177
178
179#define STYPE_INET6 0
180#define STYPE_F_INET6 (1 << STYPE_INET6)
181
182#define SVER_SHIFT 12
183#define SVER_MASK 0x0fff
184
185#define IPVS_OPT_SEQ_DATA 1
186#define IPVS_OPT_PE_DATA 2
187#define IPVS_OPT_PE_NAME 3
188#define IPVS_OPT_PARAM 7
189
190#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
191#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
192#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
193#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
194
195struct ip_vs_sync_thread_data {
196 struct net *net;
197 struct socket *sock;
198 char *buf;
199 int id;
200};
201
202
203#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
204#define FULL_CONN_SIZE \
205(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242#define SYNC_MESG_HEADER_LEN 4
243#define MAX_CONNS_PER_SYNCBUFF 255
244
245
246struct ip_vs_sync_mesg_v0 {
247 __u8 nr_conns;
248 __u8 syncid;
249 __be16 size;
250
251
252};
253
254
255struct ip_vs_sync_mesg {
256 __u8 reserved;
257 __u8 syncid;
258 __be16 size;
259 __u8 nr_conns;
260 __s8 version;
261 __u16 spare;
262
263};
264
265struct ip_vs_sync_buff {
266 struct list_head list;
267 unsigned long firstuse;
268
269
270 struct ip_vs_sync_mesg *mesg;
271 unsigned char *head;
272 unsigned char *end;
273};
274
275
276
277
278
279static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
280{
281 ho->init_seq = get_unaligned_be32(&no->init_seq);
282 ho->delta = get_unaligned_be32(&no->delta);
283 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
284}
285
286
287
288
289
290static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
291{
292 put_unaligned_be32(ho->init_seq, &no->init_seq);
293 put_unaligned_be32(ho->delta, &no->delta);
294 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
295}
296
297static inline struct ip_vs_sync_buff *
298sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
299{
300 struct ip_vs_sync_buff *sb;
301
302 spin_lock_bh(&ipvs->sync_lock);
303 if (list_empty(&ms->sync_queue)) {
304 sb = NULL;
305 __set_current_state(TASK_INTERRUPTIBLE);
306 } else {
307 sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
308 list);
309 list_del(&sb->list);
310 ms->sync_queue_len--;
311 if (!ms->sync_queue_len)
312 ms->sync_queue_delay = 0;
313 }
314 spin_unlock_bh(&ipvs->sync_lock);
315
316 return sb;
317}
318
319
320
321
322static inline struct ip_vs_sync_buff *
323ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
324{
325 struct ip_vs_sync_buff *sb;
326
327 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
328 return NULL;
329
330 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
331 if (!sb->mesg) {
332 kfree(sb);
333 return NULL;
334 }
335 sb->mesg->reserved = 0;
336 sb->mesg->version = SYNC_PROTO_VER;
337 sb->mesg->syncid = ipvs->master_syncid;
338 sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
339 sb->mesg->nr_conns = 0;
340 sb->mesg->spare = 0;
341 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
342 sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
343
344 sb->firstuse = jiffies;
345 return sb;
346}
347
348static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
349{
350 kfree(sb->mesg);
351 kfree(sb);
352}
353
354static inline void sb_queue_tail(struct netns_ipvs *ipvs,
355 struct ipvs_master_sync_state *ms)
356{
357 struct ip_vs_sync_buff *sb = ms->sync_buff;
358
359 spin_lock(&ipvs->sync_lock);
360 if (ipvs->sync_state & IP_VS_STATE_MASTER &&
361 ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
362 if (!ms->sync_queue_len)
363 schedule_delayed_work(&ms->master_wakeup_work,
364 max(IPVS_SYNC_SEND_DELAY, 1));
365 ms->sync_queue_len++;
366 list_add_tail(&sb->list, &ms->sync_queue);
367 if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
368 wake_up_process(ms->master_thread);
369 } else
370 ip_vs_sync_buff_release(sb);
371 spin_unlock(&ipvs->sync_lock);
372}
373
374
375
376
377
378static inline struct ip_vs_sync_buff *
379get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
380 unsigned long time)
381{
382 struct ip_vs_sync_buff *sb;
383
384 spin_lock_bh(&ipvs->sync_buff_lock);
385 sb = ms->sync_buff;
386 if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
387 ms->sync_buff = NULL;
388 __set_current_state(TASK_RUNNING);
389 } else
390 sb = NULL;
391 spin_unlock_bh(&ipvs->sync_buff_lock);
392 return sb;
393}
394
395static inline int
396select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
397{
398 return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
399}
400
401
402
403
404static inline struct ip_vs_sync_buff *
405ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
406{
407 struct ip_vs_sync_buff *sb;
408 struct ip_vs_sync_mesg_v0 *mesg;
409
410 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
411 return NULL;
412
413 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
414 if (!sb->mesg) {
415 kfree(sb);
416 return NULL;
417 }
418 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
419 mesg->nr_conns = 0;
420 mesg->syncid = ipvs->master_syncid;
421 mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
422 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
423 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
424 sb->firstuse = jiffies;
425 return sb;
426}
427
428
429
430
431
432
433
434
435
436
437static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
438 struct ip_vs_conn *cp, int pkts)
439{
440 unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
441 unsigned long now = jiffies;
442 unsigned long n = (now + cp->timeout) & ~3UL;
443 unsigned int sync_refresh_period;
444 int sync_period;
445 int force;
446
447
448 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
449 force = 0;
450 else if (likely(cp->protocol == IPPROTO_TCP)) {
451 if (!((1 << cp->state) &
452 ((1 << IP_VS_TCP_S_ESTABLISHED) |
453 (1 << IP_VS_TCP_S_FIN_WAIT) |
454 (1 << IP_VS_TCP_S_CLOSE) |
455 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
456 (1 << IP_VS_TCP_S_TIME_WAIT))))
457 return 0;
458 force = cp->state != cp->old_state;
459 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
460 goto set;
461 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
462 if (!((1 << cp->state) &
463 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
464 (1 << IP_VS_SCTP_S_CLOSED) |
465 (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) |
466 (1 << IP_VS_SCTP_S_SHUT_ACK_SER))))
467 return 0;
468 force = cp->state != cp->old_state;
469 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
470 goto set;
471 } else {
472
473 force = 0;
474 }
475
476 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
477 if (sync_refresh_period > 0) {
478 long diff = n - orig;
479 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
480
481
482
483
484 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
485 int retries = orig & 3;
486
487 if (retries >= sysctl_sync_retries(ipvs))
488 return 0;
489 if (time_before(now, orig - cp->timeout +
490 (sync_refresh_period >> 3)))
491 return 0;
492 n |= retries + 1;
493 }
494 }
495 sync_period = sysctl_sync_period(ipvs);
496 if (sync_period > 0) {
497 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
498 pkts % sync_period != sysctl_sync_threshold(ipvs))
499 return 0;
500 } else if (sync_refresh_period <= 0 &&
501 pkts != sysctl_sync_threshold(ipvs))
502 return 0;
503
504set:
505 cp->old_state = cp->state;
506 n = cmpxchg(&cp->sync_endtime, orig, n);
507 return n == orig || force;
508}
509
510
511
512
513
514static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
515 int pkts)
516{
517 struct netns_ipvs *ipvs = net_ipvs(net);
518 struct ip_vs_sync_mesg_v0 *m;
519 struct ip_vs_sync_conn_v0 *s;
520 struct ip_vs_sync_buff *buff;
521 struct ipvs_master_sync_state *ms;
522 int id;
523 int len;
524
525 if (unlikely(cp->af != AF_INET))
526 return;
527
528 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
529 return;
530
531 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
532 return;
533
534 spin_lock_bh(&ipvs->sync_buff_lock);
535 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
536 spin_unlock_bh(&ipvs->sync_buff_lock);
537 return;
538 }
539
540 id = select_master_thread_id(ipvs, cp);
541 ms = &ipvs->ms[id];
542 buff = ms->sync_buff;
543 if (buff) {
544 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
545
546 if (!m->nr_conns) {
547 sb_queue_tail(ipvs, ms);
548 ms->sync_buff = NULL;
549 buff = NULL;
550 }
551 }
552 if (!buff) {
553 buff = ip_vs_sync_buff_create_v0(ipvs);
554 if (!buff) {
555 spin_unlock_bh(&ipvs->sync_buff_lock);
556 pr_err("ip_vs_sync_buff_create failed.\n");
557 return;
558 }
559 ms->sync_buff = buff;
560 }
561
562 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
563 SIMPLE_CONN_SIZE;
564 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
565 s = (struct ip_vs_sync_conn_v0 *) buff->head;
566
567
568 s->reserved = 0;
569 s->protocol = cp->protocol;
570 s->cport = cp->cport;
571 s->vport = cp->vport;
572 s->dport = cp->dport;
573 s->caddr = cp->caddr.ip;
574 s->vaddr = cp->vaddr.ip;
575 s->daddr = cp->daddr.ip;
576 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
577 s->state = htons(cp->state);
578 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
579 struct ip_vs_sync_conn_options *opt =
580 (struct ip_vs_sync_conn_options *)&s[1];
581 memcpy(opt, &cp->in_seq, sizeof(*opt));
582 }
583
584 m->nr_conns++;
585 m->size = htons(ntohs(m->size) + len);
586 buff->head += len;
587
588
589 if (buff->head + FULL_CONN_SIZE > buff->end) {
590 sb_queue_tail(ipvs, ms);
591 ms->sync_buff = NULL;
592 }
593 spin_unlock_bh(&ipvs->sync_buff_lock);
594
595
596 cp = cp->control;
597 if (cp) {
598 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
599 pkts = atomic_add_return(1, &cp->in_pkts);
600 else
601 pkts = sysctl_sync_threshold(ipvs);
602 ip_vs_sync_conn(net, cp->control, pkts);
603 }
604}
605
606
607
608
609
610
611void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
612{
613 struct netns_ipvs *ipvs = net_ipvs(net);
614 struct ip_vs_sync_mesg *m;
615 union ip_vs_sync_conn *s;
616 struct ip_vs_sync_buff *buff;
617 struct ipvs_master_sync_state *ms;
618 int id;
619 __u8 *p;
620 unsigned int len, pe_name_len, pad;
621
622
623 if (sysctl_sync_ver(ipvs) == 0) {
624 ip_vs_sync_conn_v0(net, cp, pkts);
625 return;
626 }
627
628 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
629 goto control;
630sloop:
631 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
632 goto control;
633
634
635 pe_name_len = 0;
636 if (cp->pe_data_len) {
637 if (!cp->pe_data || !cp->dest) {
638 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
639 return;
640 }
641 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
642 }
643
644 spin_lock_bh(&ipvs->sync_buff_lock);
645 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
646 spin_unlock_bh(&ipvs->sync_buff_lock);
647 return;
648 }
649
650 id = select_master_thread_id(ipvs, cp);
651 ms = &ipvs->ms[id];
652
653#ifdef CONFIG_IP_VS_IPV6
654 if (cp->af == AF_INET6)
655 len = sizeof(struct ip_vs_sync_v6);
656 else
657#endif
658 len = sizeof(struct ip_vs_sync_v4);
659
660 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
661 len += sizeof(struct ip_vs_sync_conn_options) + 2;
662
663 if (cp->pe_data_len)
664 len += cp->pe_data_len + 2;
665 if (pe_name_len)
666 len += pe_name_len + 2;
667
668
669 pad = 0;
670 buff = ms->sync_buff;
671 if (buff) {
672 m = buff->mesg;
673 pad = (4 - (size_t) buff->head) & 3;
674
675 if (buff->head + len + pad > buff->end || m->reserved) {
676 sb_queue_tail(ipvs, ms);
677 ms->sync_buff = NULL;
678 buff = NULL;
679 pad = 0;
680 }
681 }
682
683 if (!buff) {
684 buff = ip_vs_sync_buff_create(ipvs);
685 if (!buff) {
686 spin_unlock_bh(&ipvs->sync_buff_lock);
687 pr_err("ip_vs_sync_buff_create failed.\n");
688 return;
689 }
690 ms->sync_buff = buff;
691 m = buff->mesg;
692 }
693
694 p = buff->head;
695 buff->head += pad + len;
696 m->size = htons(ntohs(m->size) + pad + len);
697
698 while (pad--)
699 *(p++) = 0;
700
701 s = (union ip_vs_sync_conn *)p;
702
703
704 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
705 s->v4.ver_size = htons(len & SVER_MASK);
706 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
707 s->v4.state = htons(cp->state);
708 s->v4.protocol = cp->protocol;
709 s->v4.cport = cp->cport;
710 s->v4.vport = cp->vport;
711 s->v4.dport = cp->dport;
712 s->v4.fwmark = htonl(cp->fwmark);
713 s->v4.timeout = htonl(cp->timeout / HZ);
714 m->nr_conns++;
715
716#ifdef CONFIG_IP_VS_IPV6
717 if (cp->af == AF_INET6) {
718 p += sizeof(struct ip_vs_sync_v6);
719 s->v6.caddr = cp->caddr.in6;
720 s->v6.vaddr = cp->vaddr.in6;
721 s->v6.daddr = cp->daddr.in6;
722 } else
723#endif
724 {
725 p += sizeof(struct ip_vs_sync_v4);
726 s->v4.caddr = cp->caddr.ip;
727 s->v4.vaddr = cp->vaddr.ip;
728 s->v4.daddr = cp->daddr.ip;
729 }
730 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
731 *(p++) = IPVS_OPT_SEQ_DATA;
732 *(p++) = sizeof(struct ip_vs_sync_conn_options);
733 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
734 p += sizeof(struct ip_vs_seq);
735 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
736 p += sizeof(struct ip_vs_seq);
737 }
738
739 if (cp->pe_data_len && cp->pe_data) {
740 *(p++) = IPVS_OPT_PE_DATA;
741 *(p++) = cp->pe_data_len;
742 memcpy(p, cp->pe_data, cp->pe_data_len);
743 p += cp->pe_data_len;
744 if (pe_name_len) {
745
746 *(p++) = IPVS_OPT_PE_NAME;
747 *(p++) = pe_name_len;
748 memcpy(p, cp->pe->name, pe_name_len);
749 p += pe_name_len;
750 }
751 }
752
753 spin_unlock_bh(&ipvs->sync_buff_lock);
754
755control:
756
757 cp = cp->control;
758 if (!cp)
759 return;
760 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
761 pkts = atomic_add_return(1, &cp->in_pkts);
762 else
763 pkts = sysctl_sync_threshold(ipvs);
764 goto sloop;
765}
766
767
768
769
770static inline int
771ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
772 struct ip_vs_conn_param *p,
773 __u8 *pe_data, unsigned int pe_data_len,
774 __u8 *pe_name, unsigned int pe_name_len)
775{
776#ifdef CONFIG_IP_VS_IPV6
777 if (af == AF_INET6)
778 ip_vs_conn_fill_param(net, af, sc->v6.protocol,
779 (const union nf_inet_addr *)&sc->v6.caddr,
780 sc->v6.cport,
781 (const union nf_inet_addr *)&sc->v6.vaddr,
782 sc->v6.vport, p);
783 else
784#endif
785 ip_vs_conn_fill_param(net, af, sc->v4.protocol,
786 (const union nf_inet_addr *)&sc->v4.caddr,
787 sc->v4.cport,
788 (const union nf_inet_addr *)&sc->v4.vaddr,
789 sc->v4.vport, p);
790
791 if (pe_data_len) {
792 if (pe_name_len) {
793 char buff[IP_VS_PENAME_MAXLEN+1];
794
795 memcpy(buff, pe_name, pe_name_len);
796 buff[pe_name_len]=0;
797 p->pe = __ip_vs_pe_getbyname(buff);
798 if (!p->pe) {
799 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
800 buff);
801 return 1;
802 }
803 } else {
804 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
805 return 1;
806 }
807
808 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
809 if (!p->pe_data) {
810 if (p->pe->module)
811 module_put(p->pe->module);
812 return -ENOMEM;
813 }
814 p->pe_data_len = pe_data_len;
815 }
816 return 0;
817}
818
819
820
821
822
823
824
825static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
826 unsigned int flags, unsigned int state,
827 unsigned int protocol, unsigned int type,
828 const union nf_inet_addr *daddr, __be16 dport,
829 unsigned long timeout, __u32 fwmark,
830 struct ip_vs_sync_conn_options *opt)
831{
832 struct ip_vs_dest *dest;
833 struct ip_vs_conn *cp;
834 struct netns_ipvs *ipvs = net_ipvs(net);
835
836 if (!(flags & IP_VS_CONN_F_TEMPLATE))
837 cp = ip_vs_conn_in_get(param);
838 else
839 cp = ip_vs_ct_in_get(param);
840
841 if (cp) {
842
843 kfree(param->pe_data);
844
845 dest = cp->dest;
846 spin_lock_bh(&cp->lock);
847 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
848 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
849 if (flags & IP_VS_CONN_F_INACTIVE) {
850 atomic_dec(&dest->activeconns);
851 atomic_inc(&dest->inactconns);
852 } else {
853 atomic_inc(&dest->activeconns);
854 atomic_dec(&dest->inactconns);
855 }
856 }
857 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
858 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
859 cp->flags = flags;
860 spin_unlock_bh(&cp->lock);
861 if (!dest)
862 ip_vs_try_bind_dest(cp);
863 } else {
864
865
866
867
868
869 rcu_read_lock();
870 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
871 param->vport, protocol, fwmark, flags);
872
873 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
874 rcu_read_unlock();
875 if (!cp) {
876 if (param->pe_data)
877 kfree(param->pe_data);
878 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
879 return;
880 }
881 }
882
883 if (opt)
884 memcpy(&cp->in_seq, opt, sizeof(*opt));
885 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
886 cp->state = state;
887 cp->old_state = cp->state;
888
889
890
891
892
893
894
895
896
897 if (timeout) {
898 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
899 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
900 cp->timeout = timeout*HZ;
901 } else {
902 struct ip_vs_proto_data *pd;
903
904 pd = ip_vs_proto_data_get(net, protocol);
905 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
906 cp->timeout = pd->timeout_table[state];
907 else
908 cp->timeout = (3*60*HZ);
909 }
910 ip_vs_conn_put(cp);
911}
912
913
914
915
916static void ip_vs_process_message_v0(struct net *net, const char *buffer,
917 const size_t buflen)
918{
919 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
920 struct ip_vs_sync_conn_v0 *s;
921 struct ip_vs_sync_conn_options *opt;
922 struct ip_vs_protocol *pp;
923 struct ip_vs_conn_param param;
924 char *p;
925 int i;
926
927 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
928 for (i=0; i<m->nr_conns; i++) {
929 unsigned int flags, state;
930
931 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
932 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
933 return;
934 }
935 s = (struct ip_vs_sync_conn_v0 *) p;
936 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
937 flags &= ~IP_VS_CONN_F_HASHED;
938 if (flags & IP_VS_CONN_F_SEQ_MASK) {
939 opt = (struct ip_vs_sync_conn_options *)&s[1];
940 p += FULL_CONN_SIZE;
941 if (p > buffer+buflen) {
942 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
943 return;
944 }
945 } else {
946 opt = NULL;
947 p += SIMPLE_CONN_SIZE;
948 }
949
950 state = ntohs(s->state);
951 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
952 pp = ip_vs_proto_get(s->protocol);
953 if (!pp) {
954 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
955 s->protocol);
956 continue;
957 }
958 if (state >= pp->num_states) {
959 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
960 pp->name, state);
961 continue;
962 }
963 } else {
964
965 if (state > 0) {
966 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
967 state);
968 state = 0;
969 }
970 }
971
972 ip_vs_conn_fill_param(net, AF_INET, s->protocol,
973 (const union nf_inet_addr *)&s->caddr,
974 s->cport,
975 (const union nf_inet_addr *)&s->vaddr,
976 s->vport, ¶m);
977
978
979 ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET,
980 (union nf_inet_addr *)&s->daddr, s->dport,
981 0, 0, opt);
982 }
983}
984
985
986
987
988static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
989 __u32 *opt_flags,
990 struct ip_vs_sync_conn_options *opt)
991{
992 struct ip_vs_sync_conn_options *topt;
993
994 topt = (struct ip_vs_sync_conn_options *)p;
995
996 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
997 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
998 return -EINVAL;
999 }
1000 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
1001 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
1002 return -EINVAL;
1003 }
1004 ntoh_seq(&topt->in_seq, &opt->in_seq);
1005 ntoh_seq(&topt->out_seq, &opt->out_seq);
1006 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
1007 return 0;
1008}
1009
1010static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
1011 __u8 **data, unsigned int maxlen,
1012 __u32 *opt_flags, __u32 flag)
1013{
1014 if (plen > maxlen) {
1015 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
1016 return -EINVAL;
1017 }
1018 if (*opt_flags & flag) {
1019 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
1020 return -EINVAL;
1021 }
1022 *data_len = plen;
1023 *data = p;
1024 *opt_flags |= flag;
1025 return 0;
1026}
1027
1028
1029
1030static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
1031{
1032 struct ip_vs_sync_conn_options opt;
1033 union ip_vs_sync_conn *s;
1034 struct ip_vs_protocol *pp;
1035 struct ip_vs_conn_param param;
1036 __u32 flags;
1037 unsigned int af, state, pe_data_len=0, pe_name_len=0;
1038 __u8 *pe_data=NULL, *pe_name=NULL;
1039 __u32 opt_flags=0;
1040 int retc=0;
1041
1042 s = (union ip_vs_sync_conn *) p;
1043
1044 if (s->v6.type & STYPE_F_INET6) {
1045#ifdef CONFIG_IP_VS_IPV6
1046 af = AF_INET6;
1047 p += sizeof(struct ip_vs_sync_v6);
1048#else
1049 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
1050 retc = 10;
1051 goto out;
1052#endif
1053 } else if (!s->v4.type) {
1054 af = AF_INET;
1055 p += sizeof(struct ip_vs_sync_v4);
1056 } else {
1057 return -10;
1058 }
1059 if (p > msg_end)
1060 return -20;
1061
1062
1063 while (p < msg_end) {
1064 int ptype;
1065 int plen;
1066
1067 if (p+2 > msg_end)
1068 return -30;
1069 ptype = *(p++);
1070 plen = *(p++);
1071
1072 if (!plen || ((p + plen) > msg_end))
1073 return -40;
1074
1075 switch (ptype & ~IPVS_OPT_F_PARAM) {
1076 case IPVS_OPT_SEQ_DATA:
1077 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
1078 return -50;
1079 break;
1080
1081 case IPVS_OPT_PE_DATA:
1082 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
1083 IP_VS_PEDATA_MAXLEN, &opt_flags,
1084 IPVS_OPT_F_PE_DATA))
1085 return -60;
1086 break;
1087
1088 case IPVS_OPT_PE_NAME:
1089 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1090 IP_VS_PENAME_MAXLEN, &opt_flags,
1091 IPVS_OPT_F_PE_NAME))
1092 return -70;
1093 break;
1094
1095 default:
1096
1097 if (!(ptype & IPVS_OPT_F_PARAM)) {
1098 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1099 ptype & ~IPVS_OPT_F_PARAM);
1100 retc = 20;
1101 goto out;
1102 }
1103 }
1104 p += plen;
1105 }
1106
1107
1108 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
1109 flags |= IP_VS_CONN_F_SYNC;
1110 state = ntohs(s->v4.state);
1111
1112 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1113 pp = ip_vs_proto_get(s->v4.protocol);
1114 if (!pp) {
1115 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
1116 s->v4.protocol);
1117 retc = 30;
1118 goto out;
1119 }
1120 if (state >= pp->num_states) {
1121 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
1122 pp->name, state);
1123 retc = 40;
1124 goto out;
1125 }
1126 } else {
1127
1128 if (state > 0) {
1129 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1130 state);
1131 state = 0;
1132 }
1133 }
1134 if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data,
1135 pe_data_len, pe_name, pe_name_len)) {
1136 retc = 50;
1137 goto out;
1138 }
1139
1140 if (af == AF_INET)
1141 ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af,
1142 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1143 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1144 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1145 );
1146#ifdef CONFIG_IP_VS_IPV6
1147 else
1148 ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af,
1149 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1150 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1151 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1152 );
1153#endif
1154 return 0;
1155
1156out:
1157 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1158 return retc;
1159
1160}
1161
1162
1163
1164
1165
1166static void ip_vs_process_message(struct net *net, __u8 *buffer,
1167 const size_t buflen)
1168{
1169 struct netns_ipvs *ipvs = net_ipvs(net);
1170 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1171 __u8 *p, *msg_end;
1172 int i, nr_conns;
1173
1174 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1175 IP_VS_DBG(2, "BACKUP, message header too short\n");
1176 return;
1177 }
1178
1179 if (buflen != ntohs(m2->size)) {
1180 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1181 return;
1182 }
1183
1184 if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
1185 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1186 return;
1187 }
1188
1189 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1190 && (m2->spare == 0)) {
1191
1192 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1193 nr_conns = m2->nr_conns;
1194
1195 for (i=0; i<nr_conns; i++) {
1196 union ip_vs_sync_conn *s;
1197 unsigned int size;
1198 int retc;
1199
1200 p = msg_end;
1201 if (p + sizeof(s->v4) > buffer+buflen) {
1202 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1203 return;
1204 }
1205 s = (union ip_vs_sync_conn *)p;
1206 size = ntohs(s->v4.ver_size) & SVER_MASK;
1207 msg_end = p + size;
1208
1209 if (msg_end > buffer+buflen) {
1210 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
1211 return;
1212 }
1213 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
1214 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
1215 ntohs(s->v4.ver_size) >> SVER_SHIFT);
1216 return;
1217 }
1218
1219 retc = ip_vs_proc_sync_conn(net, p, msg_end);
1220 if (retc < 0) {
1221 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1222 retc);
1223 return;
1224 }
1225
1226 msg_end = p + ((size + 3) & ~3);
1227 }
1228 } else {
1229
1230 ip_vs_process_message_v0(net, buffer, buflen);
1231 return;
1232 }
1233}
1234
1235
1236
1237
1238
1239static void set_sock_size(struct sock *sk, int mode, int val)
1240{
1241
1242
1243 lock_sock(sk);
1244 if (mode) {
1245 val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
1246 sysctl_wmem_max);
1247 sk->sk_sndbuf = val * 2;
1248 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1249 } else {
1250 val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
1251 sysctl_rmem_max);
1252 sk->sk_rcvbuf = val * 2;
1253 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1254 }
1255 release_sock(sk);
1256}
1257
1258
1259
1260
1261static void set_mcast_loop(struct sock *sk, u_char loop)
1262{
1263 struct inet_sock *inet = inet_sk(sk);
1264
1265
1266 lock_sock(sk);
1267 inet->mc_loop = loop ? 1 : 0;
1268 release_sock(sk);
1269}
1270
1271
1272
1273
1274static void set_mcast_ttl(struct sock *sk, u_char ttl)
1275{
1276 struct inet_sock *inet = inet_sk(sk);
1277
1278
1279 lock_sock(sk);
1280 inet->mc_ttl = ttl;
1281 release_sock(sk);
1282}
1283
1284
1285
1286
1287static int set_mcast_if(struct sock *sk, char *ifname)
1288{
1289 struct net_device *dev;
1290 struct inet_sock *inet = inet_sk(sk);
1291 struct net *net = sock_net(sk);
1292
1293 dev = __dev_get_by_name(net, ifname);
1294 if (!dev)
1295 return -ENODEV;
1296
1297 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1298 return -EINVAL;
1299
1300 lock_sock(sk);
1301 inet->mc_index = dev->ifindex;
1302
1303 release_sock(sk);
1304
1305 return 0;
1306}
1307
1308
1309
1310
1311
1312
1313static int set_sync_mesg_maxlen(struct net *net, int sync_state)
1314{
1315 struct netns_ipvs *ipvs = net_ipvs(net);
1316 struct net_device *dev;
1317 int num;
1318
1319 if (sync_state == IP_VS_STATE_MASTER) {
1320 dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
1321 if (!dev)
1322 return -ENODEV;
1323
1324 num = (dev->mtu - sizeof(struct iphdr) -
1325 sizeof(struct udphdr) -
1326 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
1327 ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
1328 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
1329 IP_VS_DBG(7, "setting the maximum length of sync sending "
1330 "message %d.\n", ipvs->send_mesg_maxlen);
1331 } else if (sync_state == IP_VS_STATE_BACKUP) {
1332 dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
1333 if (!dev)
1334 return -ENODEV;
1335
1336 ipvs->recv_mesg_maxlen = dev->mtu -
1337 sizeof(struct iphdr) - sizeof(struct udphdr);
1338 IP_VS_DBG(7, "setting the maximum length of sync receiving "
1339 "message %d.\n", ipvs->recv_mesg_maxlen);
1340 }
1341
1342 return 0;
1343}
1344
1345
1346
1347
1348
1349
1350
1351static int
1352join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
1353{
1354 struct net *net = sock_net(sk);
1355 struct ip_mreqn mreq;
1356 struct net_device *dev;
1357 int ret;
1358
1359 memset(&mreq, 0, sizeof(mreq));
1360 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
1361
1362 dev = __dev_get_by_name(net, ifname);
1363 if (!dev)
1364 return -ENODEV;
1365 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1366 return -EINVAL;
1367
1368 mreq.imr_ifindex = dev->ifindex;
1369
1370 lock_sock(sk);
1371 ret = ip_mc_join_group(sk, &mreq);
1372 release_sock(sk);
1373
1374 return ret;
1375}
1376
1377
1378static int bind_mcastif_addr(struct socket *sock, char *ifname)
1379{
1380 struct net *net = sock_net(sock->sk);
1381 struct net_device *dev;
1382 __be32 addr;
1383 struct sockaddr_in sin;
1384
1385 dev = __dev_get_by_name(net, ifname);
1386 if (!dev)
1387 return -ENODEV;
1388
1389 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1390 if (!addr)
1391 pr_err("You probably need to specify IP address on "
1392 "multicast interface.\n");
1393
1394 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
1395 ifname, &addr);
1396
1397
1398 sin.sin_family = AF_INET;
1399 sin.sin_addr.s_addr = addr;
1400 sin.sin_port = 0;
1401
1402 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
1403}
1404
1405
1406
1407
1408static struct socket *make_send_sock(struct net *net, int id)
1409{
1410 struct netns_ipvs *ipvs = net_ipvs(net);
1411
1412 struct sockaddr_in mcast_addr = {
1413 .sin_family = AF_INET,
1414 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1415 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1416 };
1417 struct socket *sock;
1418 int result;
1419
1420
1421 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1422 if (result < 0) {
1423 pr_err("Error during creation of socket; terminating\n");
1424 return ERR_PTR(result);
1425 }
1426
1427
1428
1429
1430
1431 sk_change_net(sock->sk, net);
1432 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
1433 if (result < 0) {
1434 pr_err("Error setting outbound mcast interface\n");
1435 goto error;
1436 }
1437
1438 set_mcast_loop(sock->sk, 0);
1439 set_mcast_ttl(sock->sk, 1);
1440 result = sysctl_sync_sock_size(ipvs);
1441 if (result > 0)
1442 set_sock_size(sock->sk, 1, result);
1443
1444 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
1445 if (result < 0) {
1446 pr_err("Error binding address of the mcast interface\n");
1447 goto error;
1448 }
1449
1450 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
1451 sizeof(struct sockaddr), 0);
1452 if (result < 0) {
1453 pr_err("Error connecting to the multicast addr\n");
1454 goto error;
1455 }
1456
1457 return sock;
1458
1459error:
1460 sk_release_kernel(sock->sk);
1461 return ERR_PTR(result);
1462}
1463
1464
1465
1466
1467
1468static struct socket *make_receive_sock(struct net *net, int id)
1469{
1470 struct netns_ipvs *ipvs = net_ipvs(net);
1471
1472 struct sockaddr_in mcast_addr = {
1473 .sin_family = AF_INET,
1474 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1475 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1476 };
1477 struct socket *sock;
1478 int result;
1479
1480
1481 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1482 if (result < 0) {
1483 pr_err("Error during creation of socket; terminating\n");
1484 return ERR_PTR(result);
1485 }
1486
1487
1488
1489
1490
1491 sk_change_net(sock->sk, net);
1492
1493 sock->sk->sk_reuse = SK_CAN_REUSE;
1494 result = sysctl_sync_sock_size(ipvs);
1495 if (result > 0)
1496 set_sock_size(sock->sk, 0, result);
1497
1498 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
1499 sizeof(struct sockaddr));
1500 if (result < 0) {
1501 pr_err("Error binding to the multicast addr\n");
1502 goto error;
1503 }
1504
1505
1506 result = join_mcast_group(sock->sk,
1507 (struct in_addr *) &mcast_addr.sin_addr,
1508 ipvs->backup_mcast_ifn);
1509 if (result < 0) {
1510 pr_err("Error joining to the multicast group\n");
1511 goto error;
1512 }
1513
1514 return sock;
1515
1516error:
1517 sk_release_kernel(sock->sk);
1518 return ERR_PTR(result);
1519}
1520
1521
1522static int
1523ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1524{
1525 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
1526 struct kvec iov;
1527 int len;
1528
1529 EnterFunction(7);
1530 iov.iov_base = (void *)buffer;
1531 iov.iov_len = length;
1532
1533 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
1534
1535 LeaveFunction(7);
1536 return len;
1537}
1538
1539static int
1540ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1541{
1542 int msize;
1543 int ret;
1544
1545 msize = ntohs(msg->size);
1546
1547 ret = ip_vs_send_async(sock, (char *)msg, msize);
1548 if (ret >= 0 || ret == -EAGAIN)
1549 return ret;
1550 pr_err("ip_vs_send_async error %d\n", ret);
1551 return 0;
1552}
1553
1554static int
1555ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1556{
1557 struct msghdr msg = {NULL,};
1558 struct kvec iov;
1559 int len;
1560
1561 EnterFunction(7);
1562
1563
1564 iov.iov_base = buffer;
1565 iov.iov_len = (size_t)buflen;
1566
1567 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
1568
1569 if (len < 0)
1570 return len;
1571
1572 LeaveFunction(7);
1573 return len;
1574}
1575
1576
1577static void master_wakeup_work_handler(struct work_struct *work)
1578{
1579 struct ipvs_master_sync_state *ms =
1580 container_of(work, struct ipvs_master_sync_state,
1581 master_wakeup_work.work);
1582 struct netns_ipvs *ipvs = ms->ipvs;
1583
1584 spin_lock_bh(&ipvs->sync_lock);
1585 if (ms->sync_queue_len &&
1586 ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
1587 ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
1588 wake_up_process(ms->master_thread);
1589 }
1590 spin_unlock_bh(&ipvs->sync_lock);
1591}
1592
1593
1594static inline struct ip_vs_sync_buff *
1595next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
1596{
1597 struct ip_vs_sync_buff *sb;
1598
1599 sb = sb_dequeue(ipvs, ms);
1600 if (sb)
1601 return sb;
1602
1603 return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
1604}
1605
1606static int sync_thread_master(void *data)
1607{
1608 struct ip_vs_sync_thread_data *tinfo = data;
1609 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1610 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
1611 struct sock *sk = tinfo->sock->sk;
1612 struct ip_vs_sync_buff *sb;
1613
1614 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1615 "syncid = %d, id = %d\n",
1616 ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);
1617
1618 for (;;) {
1619 sb = next_sync_buff(ipvs, ms);
1620 if (unlikely(kthread_should_stop()))
1621 break;
1622 if (!sb) {
1623 schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
1624 continue;
1625 }
1626 while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
1627 int ret = 0;
1628
1629 __wait_event_interruptible(*sk_sleep(sk),
1630 sock_writeable(sk) ||
1631 kthread_should_stop(),
1632 ret);
1633 if (unlikely(kthread_should_stop()))
1634 goto done;
1635 }
1636 ip_vs_sync_buff_release(sb);
1637 }
1638
1639done:
1640 __set_current_state(TASK_RUNNING);
1641 if (sb)
1642 ip_vs_sync_buff_release(sb);
1643
1644
1645 while ((sb = sb_dequeue(ipvs, ms)))
1646 ip_vs_sync_buff_release(sb);
1647 __set_current_state(TASK_RUNNING);
1648
1649
1650 sb = get_curr_sync_buff(ipvs, ms, 0);
1651 if (sb)
1652 ip_vs_sync_buff_release(sb);
1653
1654
1655 sk_release_kernel(tinfo->sock->sk);
1656 kfree(tinfo);
1657
1658 return 0;
1659}
1660
1661
1662static int sync_thread_backup(void *data)
1663{
1664 struct ip_vs_sync_thread_data *tinfo = data;
1665 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1666 int len;
1667
1668 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1669 "syncid = %d, id = %d\n",
1670 ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);
1671
1672 while (!kthread_should_stop()) {
1673 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
1674 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
1675 || kthread_should_stop());
1676
1677
1678 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
1679 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1680 ipvs->recv_mesg_maxlen);
1681 if (len <= 0) {
1682 if (len != -EAGAIN)
1683 pr_err("receiving message error\n");
1684 break;
1685 }
1686
1687 ip_vs_process_message(tinfo->net, tinfo->buf, len);
1688 }
1689 }
1690
1691
1692 sk_release_kernel(tinfo->sock->sk);
1693 kfree(tinfo->buf);
1694 kfree(tinfo);
1695
1696 return 0;
1697}
1698
1699
1700int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
1701{
1702 struct ip_vs_sync_thread_data *tinfo;
1703 struct task_struct **array = NULL, *task;
1704 struct socket *sock;
1705 struct netns_ipvs *ipvs = net_ipvs(net);
1706 char *name;
1707 int (*threadfn)(void *data);
1708 int id, count;
1709 int result = -ENOMEM;
1710
1711 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1712 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
1713 sizeof(struct ip_vs_sync_conn_v0));
1714
1715 if (!ipvs->sync_state) {
1716 count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
1717 ipvs->threads_mask = count - 1;
1718 } else
1719 count = ipvs->threads_mask + 1;
1720
1721 if (state == IP_VS_STATE_MASTER) {
1722 if (ipvs->ms)
1723 return -EEXIST;
1724
1725 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
1726 sizeof(ipvs->master_mcast_ifn));
1727 ipvs->master_syncid = syncid;
1728 name = "ipvs-m:%d:%d";
1729 threadfn = sync_thread_master;
1730 } else if (state == IP_VS_STATE_BACKUP) {
1731 if (ipvs->backup_threads)
1732 return -EEXIST;
1733
1734 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
1735 sizeof(ipvs->backup_mcast_ifn));
1736 ipvs->backup_syncid = syncid;
1737 name = "ipvs-b:%d:%d";
1738 threadfn = sync_thread_backup;
1739 } else {
1740 return -EINVAL;
1741 }
1742
1743 if (state == IP_VS_STATE_MASTER) {
1744 struct ipvs_master_sync_state *ms;
1745
1746 ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
1747 if (!ipvs->ms)
1748 goto out;
1749 ms = ipvs->ms;
1750 for (id = 0; id < count; id++, ms++) {
1751 INIT_LIST_HEAD(&ms->sync_queue);
1752 ms->sync_queue_len = 0;
1753 ms->sync_queue_delay = 0;
1754 INIT_DELAYED_WORK(&ms->master_wakeup_work,
1755 master_wakeup_work_handler);
1756 ms->ipvs = ipvs;
1757 }
1758 } else {
1759 array = kzalloc(count * sizeof(struct task_struct *),
1760 GFP_KERNEL);
1761 if (!array)
1762 goto out;
1763 }
1764 set_sync_mesg_maxlen(net, state);
1765
1766 tinfo = NULL;
1767 for (id = 0; id < count; id++) {
1768 if (state == IP_VS_STATE_MASTER)
1769 sock = make_send_sock(net, id);
1770 else
1771 sock = make_receive_sock(net, id);
1772 if (IS_ERR(sock)) {
1773 result = PTR_ERR(sock);
1774 goto outtinfo;
1775 }
1776 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1777 if (!tinfo)
1778 goto outsocket;
1779 tinfo->net = net;
1780 tinfo->sock = sock;
1781 if (state == IP_VS_STATE_BACKUP) {
1782 tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
1783 GFP_KERNEL);
1784 if (!tinfo->buf)
1785 goto outtinfo;
1786 } else {
1787 tinfo->buf = NULL;
1788 }
1789 tinfo->id = id;
1790
1791 task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
1792 if (IS_ERR(task)) {
1793 result = PTR_ERR(task);
1794 goto outtinfo;
1795 }
1796 tinfo = NULL;
1797 if (state == IP_VS_STATE_MASTER)
1798 ipvs->ms[id].master_thread = task;
1799 else
1800 array[id] = task;
1801 }
1802
1803
1804
1805 if (state == IP_VS_STATE_BACKUP)
1806 ipvs->backup_threads = array;
1807 spin_lock_bh(&ipvs->sync_buff_lock);
1808 ipvs->sync_state |= state;
1809 spin_unlock_bh(&ipvs->sync_buff_lock);
1810
1811
1812 ip_vs_use_count_inc();
1813
1814 return 0;
1815
1816outsocket:
1817 sk_release_kernel(sock->sk);
1818
1819outtinfo:
1820 if (tinfo) {
1821 sk_release_kernel(tinfo->sock->sk);
1822 kfree(tinfo->buf);
1823 kfree(tinfo);
1824 }
1825 count = id;
1826 while (count-- > 0) {
1827 if (state == IP_VS_STATE_MASTER)
1828 kthread_stop(ipvs->ms[count].master_thread);
1829 else
1830 kthread_stop(array[count]);
1831 }
1832 kfree(array);
1833
1834out:
1835 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
1836 kfree(ipvs->ms);
1837 ipvs->ms = NULL;
1838 }
1839 return result;
1840}
1841
1842
1843int stop_sync_thread(struct net *net, int state)
1844{
1845 struct netns_ipvs *ipvs = net_ipvs(net);
1846 struct task_struct **array;
1847 int id;
1848 int retc = -EINVAL;
1849
1850 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1851
1852 if (state == IP_VS_STATE_MASTER) {
1853 if (!ipvs->ms)
1854 return -ESRCH;
1855
1856
1857
1858
1859
1860
1861
1862 spin_lock_bh(&ipvs->sync_buff_lock);
1863 spin_lock(&ipvs->sync_lock);
1864 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1865 spin_unlock(&ipvs->sync_lock);
1866 spin_unlock_bh(&ipvs->sync_buff_lock);
1867
1868 retc = 0;
1869 for (id = ipvs->threads_mask; id >= 0; id--) {
1870 struct ipvs_master_sync_state *ms = &ipvs->ms[id];
1871 int ret;
1872
1873 pr_info("stopping master sync thread %d ...\n",
1874 task_pid_nr(ms->master_thread));
1875 cancel_delayed_work_sync(&ms->master_wakeup_work);
1876 ret = kthread_stop(ms->master_thread);
1877 if (retc >= 0)
1878 retc = ret;
1879 }
1880 kfree(ipvs->ms);
1881 ipvs->ms = NULL;
1882 } else if (state == IP_VS_STATE_BACKUP) {
1883 if (!ipvs->backup_threads)
1884 return -ESRCH;
1885
1886 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1887 array = ipvs->backup_threads;
1888 retc = 0;
1889 for (id = ipvs->threads_mask; id >= 0; id--) {
1890 int ret;
1891
1892 pr_info("stopping backup sync thread %d ...\n",
1893 task_pid_nr(array[id]));
1894 ret = kthread_stop(array[id]);
1895 if (retc >= 0)
1896 retc = ret;
1897 }
1898 kfree(array);
1899 ipvs->backup_threads = NULL;
1900 }
1901
1902
1903 ip_vs_use_count_dec();
1904
1905 return retc;
1906}
1907
1908
1909
1910
1911int __net_init ip_vs_sync_net_init(struct net *net)
1912{
1913 struct netns_ipvs *ipvs = net_ipvs(net);
1914
1915 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
1916 spin_lock_init(&ipvs->sync_lock);
1917 spin_lock_init(&ipvs->sync_buff_lock);
1918 return 0;
1919}
1920
1921void ip_vs_sync_net_cleanup(struct net *net)
1922{
1923 int retc;
1924 struct netns_ipvs *ipvs = net_ipvs(net);
1925
1926 mutex_lock(&ipvs->sync_mutex);
1927 retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
1928 if (retc && retc != -ESRCH)
1929 pr_err("Failed to stop Master Daemon\n");
1930
1931 retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
1932 if (retc && retc != -ESRCH)
1933 pr_err("Failed to stop Backup Daemon\n");
1934 mutex_unlock(&ipvs->sync_mutex);
1935}
1936