1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#define KMSG_COMPONENT "IPVS"
35#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
36
37#include <linux/module.h>
38#include <linux/slab.h>
39#include <linux/inetdevice.h>
40#include <linux/net.h>
41#include <linux/completion.h>
42#include <linux/delay.h>
43#include <linux/skbuff.h>
44#include <linux/in.h>
45#include <linux/igmp.h>
46#include <linux/udp.h>
47#include <linux/err.h>
48#include <linux/kthread.h>
49#include <linux/wait.h>
50#include <linux/kernel.h>
51
52#include <asm/unaligned.h>
53
54#include <net/ip.h>
55#include <net/sock.h>
56
57#include <net/ip_vs.h>
58
59#define IP_VS_SYNC_GROUP 0xe0000051
60#define IP_VS_SYNC_PORT 8848
61
62#define SYNC_PROTO_VER 1
63
64static struct lock_class_key __ipvs_sync_key;
65
66
67
68
69struct ip_vs_sync_conn_v0 {
70 __u8 reserved;
71
72
73 __u8 protocol;
74 __be16 cport;
75 __be16 vport;
76 __be16 dport;
77 __be32 caddr;
78 __be32 vaddr;
79 __be32 daddr;
80
81
82 __be16 flags;
83 __be16 state;
84
85
86};
87
88struct ip_vs_sync_conn_options {
89 struct ip_vs_seq in_seq;
90 struct ip_vs_seq out_seq;
91};
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131struct ip_vs_sync_v4 {
132 __u8 type;
133 __u8 protocol;
134 __be16 ver_size;
135
136 __be32 flags;
137 __be16 state;
138
139 __be16 cport;
140 __be16 vport;
141 __be16 dport;
142 __be32 fwmark;
143 __be32 timeout;
144 __be32 caddr;
145 __be32 vaddr;
146 __be32 daddr;
147
148
149};
150
151
152
153struct ip_vs_sync_v6 {
154 __u8 type;
155 __u8 protocol;
156 __be16 ver_size;
157
158 __be32 flags;
159 __be16 state;
160
161 __be16 cport;
162 __be16 vport;
163 __be16 dport;
164 __be32 fwmark;
165 __be32 timeout;
166 struct in6_addr caddr;
167 struct in6_addr vaddr;
168 struct in6_addr daddr;
169
170
171};
172
173union ip_vs_sync_conn {
174 struct ip_vs_sync_v4 v4;
175 struct ip_vs_sync_v6 v6;
176};
177
178
179#define STYPE_INET6 0
180#define STYPE_F_INET6 (1 << STYPE_INET6)
181
182#define SVER_SHIFT 12
183#define SVER_MASK 0x0fff
184
185#define IPVS_OPT_SEQ_DATA 1
186#define IPVS_OPT_PE_DATA 2
187#define IPVS_OPT_PE_NAME 3
188#define IPVS_OPT_PARAM 7
189
190#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
191#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
192#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
193#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
194
195struct ip_vs_sync_thread_data {
196 struct net *net;
197 struct socket *sock;
198 char *buf;
199 int id;
200};
201
202
203#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
204#define FULL_CONN_SIZE \
205(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242#define SYNC_MESG_HEADER_LEN 4
243#define MAX_CONNS_PER_SYNCBUFF 255
244
245
246struct ip_vs_sync_mesg_v0 {
247 __u8 nr_conns;
248 __u8 syncid;
249 __be16 size;
250
251
252};
253
254
255struct ip_vs_sync_mesg {
256 __u8 reserved;
257 __u8 syncid;
258 __be16 size;
259 __u8 nr_conns;
260 __s8 version;
261 __u16 spare;
262
263};
264
265struct ip_vs_sync_buff {
266 struct list_head list;
267 unsigned long firstuse;
268
269
270 struct ip_vs_sync_mesg *mesg;
271 unsigned char *head;
272 unsigned char *end;
273};
274
275
276
277
278
279static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
280{
281 ho->init_seq = get_unaligned_be32(&no->init_seq);
282 ho->delta = get_unaligned_be32(&no->delta);
283 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
284}
285
286
287
288
289
290static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
291{
292 put_unaligned_be32(ho->init_seq, &no->init_seq);
293 put_unaligned_be32(ho->delta, &no->delta);
294 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
295}
296
297static inline struct ip_vs_sync_buff *
298sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
299{
300 struct ip_vs_sync_buff *sb;
301
302 spin_lock_bh(&ipvs->sync_lock);
303 if (list_empty(&ms->sync_queue)) {
304 sb = NULL;
305 __set_current_state(TASK_INTERRUPTIBLE);
306 } else {
307 sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
308 list);
309 list_del(&sb->list);
310 ms->sync_queue_len--;
311 if (!ms->sync_queue_len)
312 ms->sync_queue_delay = 0;
313 }
314 spin_unlock_bh(&ipvs->sync_lock);
315
316 return sb;
317}
318
319
320
321
322static inline struct ip_vs_sync_buff *
323ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
324{
325 struct ip_vs_sync_buff *sb;
326
327 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
328 return NULL;
329
330 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
331 if (!sb->mesg) {
332 kfree(sb);
333 return NULL;
334 }
335 sb->mesg->reserved = 0;
336 sb->mesg->version = SYNC_PROTO_VER;
337 sb->mesg->syncid = ipvs->master_syncid;
338 sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
339 sb->mesg->nr_conns = 0;
340 sb->mesg->spare = 0;
341 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
342 sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
343
344 sb->firstuse = jiffies;
345 return sb;
346}
347
348static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
349{
350 kfree(sb->mesg);
351 kfree(sb);
352}
353
354static inline void sb_queue_tail(struct netns_ipvs *ipvs,
355 struct ipvs_master_sync_state *ms)
356{
357 struct ip_vs_sync_buff *sb = ms->sync_buff;
358
359 spin_lock(&ipvs->sync_lock);
360 if (ipvs->sync_state & IP_VS_STATE_MASTER &&
361 ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
362 if (!ms->sync_queue_len)
363 schedule_delayed_work(&ms->master_wakeup_work,
364 max(IPVS_SYNC_SEND_DELAY, 1));
365 ms->sync_queue_len++;
366 list_add_tail(&sb->list, &ms->sync_queue);
367 if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
368 wake_up_process(ms->master_thread);
369 } else
370 ip_vs_sync_buff_release(sb);
371 spin_unlock(&ipvs->sync_lock);
372}
373
374
375
376
377
378static inline struct ip_vs_sync_buff *
379get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
380 unsigned long time)
381{
382 struct ip_vs_sync_buff *sb;
383
384 spin_lock_bh(&ipvs->sync_buff_lock);
385 sb = ms->sync_buff;
386 if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
387 ms->sync_buff = NULL;
388 __set_current_state(TASK_RUNNING);
389 } else
390 sb = NULL;
391 spin_unlock_bh(&ipvs->sync_buff_lock);
392 return sb;
393}
394
395static inline int
396select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
397{
398 return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
399}
400
401
402
403
404static inline struct ip_vs_sync_buff *
405ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
406{
407 struct ip_vs_sync_buff *sb;
408 struct ip_vs_sync_mesg_v0 *mesg;
409
410 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
411 return NULL;
412
413 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
414 if (!sb->mesg) {
415 kfree(sb);
416 return NULL;
417 }
418 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
419 mesg->nr_conns = 0;
420 mesg->syncid = ipvs->master_syncid;
421 mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
422 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
423 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
424 sb->firstuse = jiffies;
425 return sb;
426}
427
428
429
430
431
432
433
434
435
436
437static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
438 struct ip_vs_conn *cp, int pkts)
439{
440 unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
441 unsigned long now = jiffies;
442 unsigned long n = (now + cp->timeout) & ~3UL;
443 unsigned int sync_refresh_period;
444 int sync_period;
445 int force;
446
447
448 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
449 force = 0;
450 else if (likely(cp->protocol == IPPROTO_TCP)) {
451 if (!((1 << cp->state) &
452 ((1 << IP_VS_TCP_S_ESTABLISHED) |
453 (1 << IP_VS_TCP_S_FIN_WAIT) |
454 (1 << IP_VS_TCP_S_CLOSE) |
455 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
456 (1 << IP_VS_TCP_S_TIME_WAIT))))
457 return 0;
458 force = cp->state != cp->old_state;
459 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
460 goto set;
461 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
462 if (!((1 << cp->state) &
463 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
464 (1 << IP_VS_SCTP_S_CLOSED) |
465 (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) |
466 (1 << IP_VS_SCTP_S_SHUT_ACK_SER))))
467 return 0;
468 force = cp->state != cp->old_state;
469 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
470 goto set;
471 } else {
472
473 force = 0;
474 }
475
476 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
477 if (sync_refresh_period > 0) {
478 long diff = n - orig;
479 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
480
481
482
483
484 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
485 int retries = orig & 3;
486
487 if (retries >= sysctl_sync_retries(ipvs))
488 return 0;
489 if (time_before(now, orig - cp->timeout +
490 (sync_refresh_period >> 3)))
491 return 0;
492 n |= retries + 1;
493 }
494 }
495 sync_period = sysctl_sync_period(ipvs);
496 if (sync_period > 0) {
497 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
498 pkts % sync_period != sysctl_sync_threshold(ipvs))
499 return 0;
500 } else if (sync_refresh_period <= 0 &&
501 pkts != sysctl_sync_threshold(ipvs))
502 return 0;
503
504set:
505 cp->old_state = cp->state;
506 n = cmpxchg(&cp->sync_endtime, orig, n);
507 return n == orig || force;
508}
509
510
511
512
513
514static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
515 int pkts)
516{
517 struct netns_ipvs *ipvs = net_ipvs(net);
518 struct ip_vs_sync_mesg_v0 *m;
519 struct ip_vs_sync_conn_v0 *s;
520 struct ip_vs_sync_buff *buff;
521 struct ipvs_master_sync_state *ms;
522 int id;
523 int len;
524
525 if (unlikely(cp->af != AF_INET))
526 return;
527
528 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
529 return;
530
531 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
532 return;
533
534 spin_lock_bh(&ipvs->sync_buff_lock);
535 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
536 spin_unlock_bh(&ipvs->sync_buff_lock);
537 return;
538 }
539
540 id = select_master_thread_id(ipvs, cp);
541 ms = &ipvs->ms[id];
542 buff = ms->sync_buff;
543 if (buff) {
544 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
545
546 if (!m->nr_conns) {
547 sb_queue_tail(ipvs, ms);
548 ms->sync_buff = NULL;
549 buff = NULL;
550 }
551 }
552 if (!buff) {
553 buff = ip_vs_sync_buff_create_v0(ipvs);
554 if (!buff) {
555 spin_unlock_bh(&ipvs->sync_buff_lock);
556 pr_err("ip_vs_sync_buff_create failed.\n");
557 return;
558 }
559 ms->sync_buff = buff;
560 }
561
562 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
563 SIMPLE_CONN_SIZE;
564 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
565 s = (struct ip_vs_sync_conn_v0 *) buff->head;
566
567
568 s->reserved = 0;
569 s->protocol = cp->protocol;
570 s->cport = cp->cport;
571 s->vport = cp->vport;
572 s->dport = cp->dport;
573 s->caddr = cp->caddr.ip;
574 s->vaddr = cp->vaddr.ip;
575 s->daddr = cp->daddr.ip;
576 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
577 s->state = htons(cp->state);
578 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
579 struct ip_vs_sync_conn_options *opt =
580 (struct ip_vs_sync_conn_options *)&s[1];
581 memcpy(opt, &cp->in_seq, sizeof(*opt));
582 }
583
584 m->nr_conns++;
585 m->size = htons(ntohs(m->size) + len);
586 buff->head += len;
587
588
589 if (buff->head + FULL_CONN_SIZE > buff->end) {
590 sb_queue_tail(ipvs, ms);
591 ms->sync_buff = NULL;
592 }
593 spin_unlock_bh(&ipvs->sync_buff_lock);
594
595
596 cp = cp->control;
597 if (cp) {
598 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
599 pkts = atomic_add_return(1, &cp->in_pkts);
600 else
601 pkts = sysctl_sync_threshold(ipvs);
602 ip_vs_sync_conn(net, cp->control, pkts);
603 }
604}
605
606
607
608
609
610
611void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
612{
613 struct netns_ipvs *ipvs = net_ipvs(net);
614 struct ip_vs_sync_mesg *m;
615 union ip_vs_sync_conn *s;
616 struct ip_vs_sync_buff *buff;
617 struct ipvs_master_sync_state *ms;
618 int id;
619 __u8 *p;
620 unsigned int len, pe_name_len, pad;
621
622
623 if (sysctl_sync_ver(ipvs) == 0) {
624 ip_vs_sync_conn_v0(net, cp, pkts);
625 return;
626 }
627
628 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
629 goto control;
630sloop:
631 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
632 goto control;
633
634
635 pe_name_len = 0;
636 if (cp->pe_data_len) {
637 if (!cp->pe_data || !cp->dest) {
638 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
639 return;
640 }
641 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
642 }
643
644 spin_lock_bh(&ipvs->sync_buff_lock);
645 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
646 spin_unlock_bh(&ipvs->sync_buff_lock);
647 return;
648 }
649
650 id = select_master_thread_id(ipvs, cp);
651 ms = &ipvs->ms[id];
652
653#ifdef CONFIG_IP_VS_IPV6
654 if (cp->af == AF_INET6)
655 len = sizeof(struct ip_vs_sync_v6);
656 else
657#endif
658 len = sizeof(struct ip_vs_sync_v4);
659
660 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
661 len += sizeof(struct ip_vs_sync_conn_options) + 2;
662
663 if (cp->pe_data_len)
664 len += cp->pe_data_len + 2;
665 if (pe_name_len)
666 len += pe_name_len + 2;
667
668
669 pad = 0;
670 buff = ms->sync_buff;
671 if (buff) {
672 m = buff->mesg;
673 pad = (4 - (size_t) buff->head) & 3;
674
675 if (buff->head + len + pad > buff->end || m->reserved) {
676 sb_queue_tail(ipvs, ms);
677 ms->sync_buff = NULL;
678 buff = NULL;
679 pad = 0;
680 }
681 }
682
683 if (!buff) {
684 buff = ip_vs_sync_buff_create(ipvs);
685 if (!buff) {
686 spin_unlock_bh(&ipvs->sync_buff_lock);
687 pr_err("ip_vs_sync_buff_create failed.\n");
688 return;
689 }
690 ms->sync_buff = buff;
691 m = buff->mesg;
692 }
693
694 p = buff->head;
695 buff->head += pad + len;
696 m->size = htons(ntohs(m->size) + pad + len);
697
698 while (pad--)
699 *(p++) = 0;
700
701 s = (union ip_vs_sync_conn *)p;
702
703
704 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
705 s->v4.ver_size = htons(len & SVER_MASK);
706 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
707 s->v4.state = htons(cp->state);
708 s->v4.protocol = cp->protocol;
709 s->v4.cport = cp->cport;
710 s->v4.vport = cp->vport;
711 s->v4.dport = cp->dport;
712 s->v4.fwmark = htonl(cp->fwmark);
713 s->v4.timeout = htonl(cp->timeout / HZ);
714 m->nr_conns++;
715
716#ifdef CONFIG_IP_VS_IPV6
717 if (cp->af == AF_INET6) {
718 p += sizeof(struct ip_vs_sync_v6);
719 s->v6.caddr = cp->caddr.in6;
720 s->v6.vaddr = cp->vaddr.in6;
721 s->v6.daddr = cp->daddr.in6;
722 } else
723#endif
724 {
725 p += sizeof(struct ip_vs_sync_v4);
726 s->v4.caddr = cp->caddr.ip;
727 s->v4.vaddr = cp->vaddr.ip;
728 s->v4.daddr = cp->daddr.ip;
729 }
730 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
731 *(p++) = IPVS_OPT_SEQ_DATA;
732 *(p++) = sizeof(struct ip_vs_sync_conn_options);
733 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
734 p += sizeof(struct ip_vs_seq);
735 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
736 p += sizeof(struct ip_vs_seq);
737 }
738
739 if (cp->pe_data_len && cp->pe_data) {
740 *(p++) = IPVS_OPT_PE_DATA;
741 *(p++) = cp->pe_data_len;
742 memcpy(p, cp->pe_data, cp->pe_data_len);
743 p += cp->pe_data_len;
744 if (pe_name_len) {
745
746 *(p++) = IPVS_OPT_PE_NAME;
747 *(p++) = pe_name_len;
748 memcpy(p, cp->pe->name, pe_name_len);
749 p += pe_name_len;
750 }
751 }
752
753 spin_unlock_bh(&ipvs->sync_buff_lock);
754
755control:
756
757 cp = cp->control;
758 if (!cp)
759 return;
760 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
761 pkts = atomic_add_return(1, &cp->in_pkts);
762 else
763 pkts = sysctl_sync_threshold(ipvs);
764 goto sloop;
765}
766
767
768
769
770static inline int
771ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
772 struct ip_vs_conn_param *p,
773 __u8 *pe_data, unsigned int pe_data_len,
774 __u8 *pe_name, unsigned int pe_name_len)
775{
776#ifdef CONFIG_IP_VS_IPV6
777 if (af == AF_INET6)
778 ip_vs_conn_fill_param(net, af, sc->v6.protocol,
779 (const union nf_inet_addr *)&sc->v6.caddr,
780 sc->v6.cport,
781 (const union nf_inet_addr *)&sc->v6.vaddr,
782 sc->v6.vport, p);
783 else
784#endif
785 ip_vs_conn_fill_param(net, af, sc->v4.protocol,
786 (const union nf_inet_addr *)&sc->v4.caddr,
787 sc->v4.cport,
788 (const union nf_inet_addr *)&sc->v4.vaddr,
789 sc->v4.vport, p);
790
791 if (pe_data_len) {
792 if (pe_name_len) {
793 char buff[IP_VS_PENAME_MAXLEN+1];
794
795 memcpy(buff, pe_name, pe_name_len);
796 buff[pe_name_len]=0;
797 p->pe = __ip_vs_pe_getbyname(buff);
798 if (!p->pe) {
799 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
800 buff);
801 return 1;
802 }
803 } else {
804 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
805 return 1;
806 }
807
808 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
809 if (!p->pe_data) {
810 module_put(p->pe->module);
811 return -ENOMEM;
812 }
813 p->pe_data_len = pe_data_len;
814 }
815 return 0;
816}
817
818
819
820
821
822
823
824static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
825 unsigned int flags, unsigned int state,
826 unsigned int protocol, unsigned int type,
827 const union nf_inet_addr *daddr, __be16 dport,
828 unsigned long timeout, __u32 fwmark,
829 struct ip_vs_sync_conn_options *opt)
830{
831 struct ip_vs_dest *dest;
832 struct ip_vs_conn *cp;
833 struct netns_ipvs *ipvs = net_ipvs(net);
834
835 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
836 cp = ip_vs_conn_in_get(param);
837 if (cp && ((cp->dport != dport) ||
838 !ip_vs_addr_equal(cp->af, &cp->daddr, daddr))) {
839 if (!(flags & IP_VS_CONN_F_INACTIVE)) {
840 ip_vs_conn_expire_now(cp);
841 __ip_vs_conn_put(cp);
842 cp = NULL;
843 } else {
844
845
846
847
848 __ip_vs_conn_put(cp);
849 kfree(param->pe_data);
850 return;
851 }
852 }
853 } else {
854 cp = ip_vs_ct_in_get(param);
855 }
856
857 if (cp) {
858
859 kfree(param->pe_data);
860
861 dest = cp->dest;
862 spin_lock_bh(&cp->lock);
863 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
864 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
865 if (flags & IP_VS_CONN_F_INACTIVE) {
866 atomic_dec(&dest->activeconns);
867 atomic_inc(&dest->inactconns);
868 } else {
869 atomic_inc(&dest->activeconns);
870 atomic_dec(&dest->inactconns);
871 }
872 }
873 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
874 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
875 cp->flags = flags;
876 spin_unlock_bh(&cp->lock);
877 if (!dest)
878 ip_vs_try_bind_dest(cp);
879 } else {
880
881
882
883
884
885 rcu_read_lock();
886 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
887 param->vport, protocol, fwmark, flags);
888
889 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
890 rcu_read_unlock();
891 if (!cp) {
892 if (param->pe_data)
893 kfree(param->pe_data);
894 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
895 return;
896 }
897 }
898
899 if (opt)
900 memcpy(&cp->in_seq, opt, sizeof(*opt));
901 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
902 cp->state = state;
903 cp->old_state = cp->state;
904
905
906
907
908
909
910
911
912
913 if (timeout) {
914 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
915 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
916 cp->timeout = timeout*HZ;
917 } else {
918 struct ip_vs_proto_data *pd;
919
920 pd = ip_vs_proto_data_get(net, protocol);
921 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
922 cp->timeout = pd->timeout_table[state];
923 else
924 cp->timeout = (3*60*HZ);
925 }
926 ip_vs_conn_put(cp);
927}
928
929
930
931
932static void ip_vs_process_message_v0(struct net *net, const char *buffer,
933 const size_t buflen)
934{
935 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
936 struct ip_vs_sync_conn_v0 *s;
937 struct ip_vs_sync_conn_options *opt;
938 struct ip_vs_protocol *pp;
939 struct ip_vs_conn_param param;
940 char *p;
941 int i;
942
943 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
944 for (i=0; i<m->nr_conns; i++) {
945 unsigned int flags, state;
946
947 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
948 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
949 return;
950 }
951 s = (struct ip_vs_sync_conn_v0 *) p;
952 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
953 flags &= ~IP_VS_CONN_F_HASHED;
954 if (flags & IP_VS_CONN_F_SEQ_MASK) {
955 opt = (struct ip_vs_sync_conn_options *)&s[1];
956 p += FULL_CONN_SIZE;
957 if (p > buffer+buflen) {
958 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
959 return;
960 }
961 } else {
962 opt = NULL;
963 p += SIMPLE_CONN_SIZE;
964 }
965
966 state = ntohs(s->state);
967 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
968 pp = ip_vs_proto_get(s->protocol);
969 if (!pp) {
970 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
971 s->protocol);
972 continue;
973 }
974 if (state >= pp->num_states) {
975 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
976 pp->name, state);
977 continue;
978 }
979 } else {
980
981 if (state > 0) {
982 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
983 state);
984 state = 0;
985 }
986 }
987
988 ip_vs_conn_fill_param(net, AF_INET, s->protocol,
989 (const union nf_inet_addr *)&s->caddr,
990 s->cport,
991 (const union nf_inet_addr *)&s->vaddr,
992 s->vport, ¶m);
993
994
995 ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET,
996 (union nf_inet_addr *)&s->daddr, s->dport,
997 0, 0, opt);
998 }
999}
1000
1001
1002
1003
1004static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
1005 __u32 *opt_flags,
1006 struct ip_vs_sync_conn_options *opt)
1007{
1008 struct ip_vs_sync_conn_options *topt;
1009
1010 topt = (struct ip_vs_sync_conn_options *)p;
1011
1012 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
1013 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
1014 return -EINVAL;
1015 }
1016 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
1017 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
1018 return -EINVAL;
1019 }
1020 ntoh_seq(&topt->in_seq, &opt->in_seq);
1021 ntoh_seq(&topt->out_seq, &opt->out_seq);
1022 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
1023 return 0;
1024}
1025
1026static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
1027 __u8 **data, unsigned int maxlen,
1028 __u32 *opt_flags, __u32 flag)
1029{
1030 if (plen > maxlen) {
1031 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
1032 return -EINVAL;
1033 }
1034 if (*opt_flags & flag) {
1035 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
1036 return -EINVAL;
1037 }
1038 *data_len = plen;
1039 *data = p;
1040 *opt_flags |= flag;
1041 return 0;
1042}
1043
1044
1045
1046static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
1047{
1048 struct ip_vs_sync_conn_options opt;
1049 union ip_vs_sync_conn *s;
1050 struct ip_vs_protocol *pp;
1051 struct ip_vs_conn_param param;
1052 __u32 flags;
1053 unsigned int af, state, pe_data_len=0, pe_name_len=0;
1054 __u8 *pe_data=NULL, *pe_name=NULL;
1055 __u32 opt_flags=0;
1056 int retc=0;
1057
1058 s = (union ip_vs_sync_conn *) p;
1059
1060 if (s->v6.type & STYPE_F_INET6) {
1061#ifdef CONFIG_IP_VS_IPV6
1062 af = AF_INET6;
1063 p += sizeof(struct ip_vs_sync_v6);
1064#else
1065 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
1066 retc = 10;
1067 goto out;
1068#endif
1069 } else if (!s->v4.type) {
1070 af = AF_INET;
1071 p += sizeof(struct ip_vs_sync_v4);
1072 } else {
1073 return -10;
1074 }
1075 if (p > msg_end)
1076 return -20;
1077
1078
1079 while (p < msg_end) {
1080 int ptype;
1081 int plen;
1082
1083 if (p+2 > msg_end)
1084 return -30;
1085 ptype = *(p++);
1086 plen = *(p++);
1087
1088 if (!plen || ((p + plen) > msg_end))
1089 return -40;
1090
1091 switch (ptype & ~IPVS_OPT_F_PARAM) {
1092 case IPVS_OPT_SEQ_DATA:
1093 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
1094 return -50;
1095 break;
1096
1097 case IPVS_OPT_PE_DATA:
1098 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
1099 IP_VS_PEDATA_MAXLEN, &opt_flags,
1100 IPVS_OPT_F_PE_DATA))
1101 return -60;
1102 break;
1103
1104 case IPVS_OPT_PE_NAME:
1105 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1106 IP_VS_PENAME_MAXLEN, &opt_flags,
1107 IPVS_OPT_F_PE_NAME))
1108 return -70;
1109 break;
1110
1111 default:
1112
1113 if (!(ptype & IPVS_OPT_F_PARAM)) {
1114 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1115 ptype & ~IPVS_OPT_F_PARAM);
1116 retc = 20;
1117 goto out;
1118 }
1119 }
1120 p += plen;
1121 }
1122
1123
1124 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
1125 flags |= IP_VS_CONN_F_SYNC;
1126 state = ntohs(s->v4.state);
1127
1128 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1129 pp = ip_vs_proto_get(s->v4.protocol);
1130 if (!pp) {
1131 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
1132 s->v4.protocol);
1133 retc = 30;
1134 goto out;
1135 }
1136 if (state >= pp->num_states) {
1137 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
1138 pp->name, state);
1139 retc = 40;
1140 goto out;
1141 }
1142 } else {
1143
1144 if (state > 0) {
1145 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1146 state);
1147 state = 0;
1148 }
1149 }
1150 if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data,
1151 pe_data_len, pe_name, pe_name_len)) {
1152 retc = 50;
1153 goto out;
1154 }
1155
1156 if (af == AF_INET)
1157 ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af,
1158 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1159 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1160 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1161 );
1162#ifdef CONFIG_IP_VS_IPV6
1163 else
1164 ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af,
1165 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1166 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1167 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1168 );
1169#endif
1170 return 0;
1171
1172out:
1173 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1174 return retc;
1175
1176}
1177
1178
1179
1180
1181
1182static void ip_vs_process_message(struct net *net, __u8 *buffer,
1183 const size_t buflen)
1184{
1185 struct netns_ipvs *ipvs = net_ipvs(net);
1186 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1187 __u8 *p, *msg_end;
1188 int i, nr_conns;
1189
1190 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1191 IP_VS_DBG(2, "BACKUP, message header too short\n");
1192 return;
1193 }
1194
1195 if (buflen != ntohs(m2->size)) {
1196 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1197 return;
1198 }
1199
1200 if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
1201 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1202 return;
1203 }
1204
1205 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1206 && (m2->spare == 0)) {
1207
1208 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1209 nr_conns = m2->nr_conns;
1210
1211 for (i=0; i<nr_conns; i++) {
1212 union ip_vs_sync_conn *s;
1213 unsigned int size;
1214 int retc;
1215
1216 p = msg_end;
1217 if (p + sizeof(s->v4) > buffer+buflen) {
1218 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1219 return;
1220 }
1221 s = (union ip_vs_sync_conn *)p;
1222 size = ntohs(s->v4.ver_size) & SVER_MASK;
1223 msg_end = p + size;
1224
1225 if (msg_end > buffer+buflen) {
1226 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
1227 return;
1228 }
1229 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
1230 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
1231 ntohs(s->v4.ver_size) >> SVER_SHIFT);
1232 return;
1233 }
1234
1235 retc = ip_vs_proc_sync_conn(net, p, msg_end);
1236 if (retc < 0) {
1237 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1238 retc);
1239 return;
1240 }
1241
1242 msg_end = p + ((size + 3) & ~3);
1243 }
1244 } else {
1245
1246 ip_vs_process_message_v0(net, buffer, buflen);
1247 return;
1248 }
1249}
1250
1251
1252
1253
1254
1255static void set_sock_size(struct sock *sk, int mode, int val)
1256{
1257
1258
1259 lock_sock(sk);
1260 if (mode) {
1261 val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
1262 sysctl_wmem_max);
1263 sk->sk_sndbuf = val * 2;
1264 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1265 } else {
1266 val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
1267 sysctl_rmem_max);
1268 sk->sk_rcvbuf = val * 2;
1269 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1270 }
1271 release_sock(sk);
1272}
1273
1274
1275
1276
1277static void set_mcast_loop(struct sock *sk, u_char loop)
1278{
1279 struct inet_sock *inet = inet_sk(sk);
1280
1281
1282 lock_sock(sk);
1283 inet->mc_loop = loop ? 1 : 0;
1284 release_sock(sk);
1285}
1286
1287
1288
1289
1290static void set_mcast_ttl(struct sock *sk, u_char ttl)
1291{
1292 struct inet_sock *inet = inet_sk(sk);
1293
1294
1295 lock_sock(sk);
1296 inet->mc_ttl = ttl;
1297 release_sock(sk);
1298}
1299
1300
1301
1302
1303static int set_mcast_if(struct sock *sk, char *ifname)
1304{
1305 struct net_device *dev;
1306 struct inet_sock *inet = inet_sk(sk);
1307 struct net *net = sock_net(sk);
1308
1309 dev = __dev_get_by_name(net, ifname);
1310 if (!dev)
1311 return -ENODEV;
1312
1313 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1314 return -EINVAL;
1315
1316 lock_sock(sk);
1317 inet->mc_index = dev->ifindex;
1318
1319 release_sock(sk);
1320
1321 return 0;
1322}
1323
1324
1325
1326
1327
1328
1329static int set_sync_mesg_maxlen(struct net *net, int sync_state)
1330{
1331 struct netns_ipvs *ipvs = net_ipvs(net);
1332 struct net_device *dev;
1333 int num;
1334
1335 if (sync_state == IP_VS_STATE_MASTER) {
1336 dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
1337 if (!dev)
1338 return -ENODEV;
1339
1340 num = (dev->mtu - sizeof(struct iphdr) -
1341 sizeof(struct udphdr) -
1342 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
1343 ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
1344 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
1345 IP_VS_DBG(7, "setting the maximum length of sync sending "
1346 "message %d.\n", ipvs->send_mesg_maxlen);
1347 } else if (sync_state == IP_VS_STATE_BACKUP) {
1348 dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
1349 if (!dev)
1350 return -ENODEV;
1351
1352 ipvs->recv_mesg_maxlen = dev->mtu -
1353 sizeof(struct iphdr) - sizeof(struct udphdr);
1354 IP_VS_DBG(7, "setting the maximum length of sync receiving "
1355 "message %d.\n", ipvs->recv_mesg_maxlen);
1356 }
1357
1358 return 0;
1359}
1360
1361
1362
1363
1364
1365
1366
1367static int
1368join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
1369{
1370 struct net *net = sock_net(sk);
1371 struct ip_mreqn mreq;
1372 struct net_device *dev;
1373 int ret;
1374
1375 memset(&mreq, 0, sizeof(mreq));
1376 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
1377
1378 dev = __dev_get_by_name(net, ifname);
1379 if (!dev)
1380 return -ENODEV;
1381 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1382 return -EINVAL;
1383
1384 mreq.imr_ifindex = dev->ifindex;
1385
1386 rtnl_lock();
1387 lock_sock(sk);
1388 ret = ip_mc_join_group(sk, &mreq);
1389 release_sock(sk);
1390 rtnl_unlock();
1391
1392 return ret;
1393}
1394
1395
1396static int bind_mcastif_addr(struct socket *sock, char *ifname)
1397{
1398 struct net *net = sock_net(sock->sk);
1399 struct net_device *dev;
1400 __be32 addr;
1401 struct sockaddr_in sin;
1402
1403 dev = __dev_get_by_name(net, ifname);
1404 if (!dev)
1405 return -ENODEV;
1406
1407 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1408 if (!addr)
1409 pr_err("You probably need to specify IP address on "
1410 "multicast interface.\n");
1411
1412 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
1413 ifname, &addr);
1414
1415
1416 sin.sin_family = AF_INET;
1417 sin.sin_addr.s_addr = addr;
1418 sin.sin_port = 0;
1419
1420 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
1421}
1422
1423
1424
1425
1426static struct socket *make_send_sock(struct net *net, int id)
1427{
1428 struct netns_ipvs *ipvs = net_ipvs(net);
1429
1430 struct sockaddr_in mcast_addr = {
1431 .sin_family = AF_INET,
1432 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1433 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1434 };
1435 struct socket *sock;
1436 int result;
1437
1438
1439 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1440 if (result < 0) {
1441 pr_err("Error during creation of socket; terminating\n");
1442 return ERR_PTR(result);
1443 }
1444
1445
1446
1447
1448
1449 sk_change_net(sock->sk, net);
1450 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
1451 if (result < 0) {
1452 pr_err("Error setting outbound mcast interface\n");
1453 goto error;
1454 }
1455
1456 set_mcast_loop(sock->sk, 0);
1457 set_mcast_ttl(sock->sk, 1);
1458 result = sysctl_sync_sock_size(ipvs);
1459 if (result > 0)
1460 set_sock_size(sock->sk, 1, result);
1461
1462 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
1463 if (result < 0) {
1464 pr_err("Error binding address of the mcast interface\n");
1465 goto error;
1466 }
1467
1468 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
1469 sizeof(struct sockaddr), 0);
1470 if (result < 0) {
1471 pr_err("Error connecting to the multicast addr\n");
1472 goto error;
1473 }
1474
1475 return sock;
1476
1477error:
1478 sk_release_kernel(sock->sk);
1479 return ERR_PTR(result);
1480}
1481
1482
1483
1484
1485
1486static struct socket *make_receive_sock(struct net *net, int id)
1487{
1488 struct netns_ipvs *ipvs = net_ipvs(net);
1489
1490 struct sockaddr_in mcast_addr = {
1491 .sin_family = AF_INET,
1492 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1493 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1494 };
1495 struct socket *sock;
1496 int result;
1497
1498
1499 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1500 if (result < 0) {
1501 pr_err("Error during creation of socket; terminating\n");
1502 return ERR_PTR(result);
1503 }
1504
1505
1506
1507
1508
1509 sk_change_net(sock->sk, net);
1510
1511 sock->sk->sk_reuse = SK_CAN_REUSE;
1512 result = sysctl_sync_sock_size(ipvs);
1513 if (result > 0)
1514 set_sock_size(sock->sk, 0, result);
1515
1516 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
1517 sizeof(struct sockaddr));
1518 if (result < 0) {
1519 pr_err("Error binding to the multicast addr\n");
1520 goto error;
1521 }
1522
1523
1524 result = join_mcast_group(sock->sk,
1525 (struct in_addr *) &mcast_addr.sin_addr,
1526 ipvs->backup_mcast_ifn);
1527 if (result < 0) {
1528 pr_err("Error joining to the multicast group\n");
1529 goto error;
1530 }
1531
1532 return sock;
1533
1534error:
1535 sk_release_kernel(sock->sk);
1536 return ERR_PTR(result);
1537}
1538
1539
1540static int
1541ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1542{
1543 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
1544 struct kvec iov;
1545 int len;
1546
1547 EnterFunction(7);
1548 iov.iov_base = (void *)buffer;
1549 iov.iov_len = length;
1550
1551 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
1552
1553 LeaveFunction(7);
1554 return len;
1555}
1556
1557static int
1558ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1559{
1560 int msize;
1561 int ret;
1562
1563 msize = ntohs(msg->size);
1564
1565 ret = ip_vs_send_async(sock, (char *)msg, msize);
1566 if (ret >= 0 || ret == -EAGAIN)
1567 return ret;
1568 pr_err("ip_vs_send_async error %d\n", ret);
1569 return 0;
1570}
1571
1572static int
1573ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1574{
1575 struct msghdr msg = {NULL,};
1576 struct kvec iov;
1577 int len;
1578
1579 EnterFunction(7);
1580
1581
1582 iov.iov_base = buffer;
1583 iov.iov_len = (size_t)buflen;
1584
1585 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
1586
1587 if (len < 0)
1588 return len;
1589
1590 LeaveFunction(7);
1591 return len;
1592}
1593
1594
1595static void master_wakeup_work_handler(struct work_struct *work)
1596{
1597 struct ipvs_master_sync_state *ms =
1598 container_of(work, struct ipvs_master_sync_state,
1599 master_wakeup_work.work);
1600 struct netns_ipvs *ipvs = ms->ipvs;
1601
1602 spin_lock_bh(&ipvs->sync_lock);
1603 if (ms->sync_queue_len &&
1604 ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
1605 ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
1606 wake_up_process(ms->master_thread);
1607 }
1608 spin_unlock_bh(&ipvs->sync_lock);
1609}
1610
1611
1612static inline struct ip_vs_sync_buff *
1613next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
1614{
1615 struct ip_vs_sync_buff *sb;
1616
1617 sb = sb_dequeue(ipvs, ms);
1618 if (sb)
1619 return sb;
1620
1621 return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
1622}
1623
1624static int sync_thread_master(void *data)
1625{
1626 struct ip_vs_sync_thread_data *tinfo = data;
1627 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1628 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
1629 struct sock *sk = tinfo->sock->sk;
1630 struct ip_vs_sync_buff *sb;
1631
1632 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1633 "syncid = %d, id = %d\n",
1634 ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);
1635
1636 for (;;) {
1637 sb = next_sync_buff(ipvs, ms);
1638 if (unlikely(kthread_should_stop()))
1639 break;
1640 if (!sb) {
1641 schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
1642 continue;
1643 }
1644 while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
1645 int ret = 0;
1646
1647 __wait_event_interruptible(*sk_sleep(sk),
1648 sock_writeable(sk) ||
1649 kthread_should_stop(),
1650 ret);
1651 if (unlikely(kthread_should_stop()))
1652 goto done;
1653 }
1654 ip_vs_sync_buff_release(sb);
1655 }
1656
1657done:
1658 __set_current_state(TASK_RUNNING);
1659 if (sb)
1660 ip_vs_sync_buff_release(sb);
1661
1662
1663 while ((sb = sb_dequeue(ipvs, ms)))
1664 ip_vs_sync_buff_release(sb);
1665 __set_current_state(TASK_RUNNING);
1666
1667
1668 sb = get_curr_sync_buff(ipvs, ms, 0);
1669 if (sb)
1670 ip_vs_sync_buff_release(sb);
1671
1672
1673 sk_release_kernel(tinfo->sock->sk);
1674 kfree(tinfo);
1675
1676 return 0;
1677}
1678
1679
1680static int sync_thread_backup(void *data)
1681{
1682 struct ip_vs_sync_thread_data *tinfo = data;
1683 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1684 int len;
1685
1686 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1687 "syncid = %d, id = %d\n",
1688 ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);
1689
1690 while (!kthread_should_stop()) {
1691 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
1692 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
1693 || kthread_should_stop());
1694
1695
1696 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
1697 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1698 ipvs->recv_mesg_maxlen);
1699 if (len <= 0) {
1700 if (len != -EAGAIN)
1701 pr_err("receiving message error\n");
1702 break;
1703 }
1704
1705 ip_vs_process_message(tinfo->net, tinfo->buf, len);
1706 }
1707 }
1708
1709
1710 sk_release_kernel(tinfo->sock->sk);
1711 kfree(tinfo->buf);
1712 kfree(tinfo);
1713
1714 return 0;
1715}
1716
1717
1718int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
1719{
1720 struct ip_vs_sync_thread_data *tinfo;
1721 struct task_struct **array = NULL, *task;
1722 struct socket *sock;
1723 struct netns_ipvs *ipvs = net_ipvs(net);
1724 char *name;
1725 int (*threadfn)(void *data);
1726 int id, count;
1727 int result = -ENOMEM;
1728
1729 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1730 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
1731 sizeof(struct ip_vs_sync_conn_v0));
1732
1733 if (!ipvs->sync_state) {
1734 count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
1735 ipvs->threads_mask = count - 1;
1736 } else
1737 count = ipvs->threads_mask + 1;
1738
1739 if (state == IP_VS_STATE_MASTER) {
1740 if (ipvs->ms)
1741 return -EEXIST;
1742
1743 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
1744 sizeof(ipvs->master_mcast_ifn));
1745 ipvs->master_syncid = syncid;
1746 name = "ipvs-m:%d:%d";
1747 threadfn = sync_thread_master;
1748 } else if (state == IP_VS_STATE_BACKUP) {
1749 if (ipvs->backup_threads)
1750 return -EEXIST;
1751
1752 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
1753 sizeof(ipvs->backup_mcast_ifn));
1754 ipvs->backup_syncid = syncid;
1755 name = "ipvs-b:%d:%d";
1756 threadfn = sync_thread_backup;
1757 } else {
1758 return -EINVAL;
1759 }
1760
1761 if (state == IP_VS_STATE_MASTER) {
1762 struct ipvs_master_sync_state *ms;
1763
1764 ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
1765 if (!ipvs->ms)
1766 goto out;
1767 ms = ipvs->ms;
1768 for (id = 0; id < count; id++, ms++) {
1769 INIT_LIST_HEAD(&ms->sync_queue);
1770 ms->sync_queue_len = 0;
1771 ms->sync_queue_delay = 0;
1772 INIT_DELAYED_WORK(&ms->master_wakeup_work,
1773 master_wakeup_work_handler);
1774 ms->ipvs = ipvs;
1775 }
1776 } else {
1777 array = kzalloc(count * sizeof(struct task_struct *),
1778 GFP_KERNEL);
1779 if (!array)
1780 goto out;
1781 }
1782 set_sync_mesg_maxlen(net, state);
1783
1784 tinfo = NULL;
1785 for (id = 0; id < count; id++) {
1786 if (state == IP_VS_STATE_MASTER)
1787 sock = make_send_sock(net, id);
1788 else
1789 sock = make_receive_sock(net, id);
1790 if (IS_ERR(sock)) {
1791 result = PTR_ERR(sock);
1792 goto outtinfo;
1793 }
1794 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1795 if (!tinfo)
1796 goto outsocket;
1797 tinfo->net = net;
1798 tinfo->sock = sock;
1799 if (state == IP_VS_STATE_BACKUP) {
1800 tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
1801 GFP_KERNEL);
1802 if (!tinfo->buf)
1803 goto outtinfo;
1804 } else {
1805 tinfo->buf = NULL;
1806 }
1807 tinfo->id = id;
1808
1809 task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
1810 if (IS_ERR(task)) {
1811 result = PTR_ERR(task);
1812 goto outtinfo;
1813 }
1814 tinfo = NULL;
1815 if (state == IP_VS_STATE_MASTER)
1816 ipvs->ms[id].master_thread = task;
1817 else
1818 array[id] = task;
1819 }
1820
1821
1822
1823 if (state == IP_VS_STATE_BACKUP)
1824 ipvs->backup_threads = array;
1825 spin_lock_bh(&ipvs->sync_buff_lock);
1826 ipvs->sync_state |= state;
1827 spin_unlock_bh(&ipvs->sync_buff_lock);
1828
1829
1830 ip_vs_use_count_inc();
1831
1832 return 0;
1833
1834outsocket:
1835 sk_release_kernel(sock->sk);
1836
1837outtinfo:
1838 if (tinfo) {
1839 sk_release_kernel(tinfo->sock->sk);
1840 kfree(tinfo->buf);
1841 kfree(tinfo);
1842 }
1843 count = id;
1844 while (count-- > 0) {
1845 if (state == IP_VS_STATE_MASTER)
1846 kthread_stop(ipvs->ms[count].master_thread);
1847 else
1848 kthread_stop(array[count]);
1849 }
1850 kfree(array);
1851
1852out:
1853 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
1854 kfree(ipvs->ms);
1855 ipvs->ms = NULL;
1856 }
1857 return result;
1858}
1859
1860
1861int stop_sync_thread(struct net *net, int state)
1862{
1863 struct netns_ipvs *ipvs = net_ipvs(net);
1864 struct task_struct **array;
1865 int id;
1866 int retc = -EINVAL;
1867
1868 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1869
1870 if (state == IP_VS_STATE_MASTER) {
1871 if (!ipvs->ms)
1872 return -ESRCH;
1873
1874
1875
1876
1877
1878
1879
1880 spin_lock_bh(&ipvs->sync_buff_lock);
1881 spin_lock(&ipvs->sync_lock);
1882 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1883 spin_unlock(&ipvs->sync_lock);
1884 spin_unlock_bh(&ipvs->sync_buff_lock);
1885
1886 retc = 0;
1887 for (id = ipvs->threads_mask; id >= 0; id--) {
1888 struct ipvs_master_sync_state *ms = &ipvs->ms[id];
1889 int ret;
1890
1891 pr_info("stopping master sync thread %d ...\n",
1892 task_pid_nr(ms->master_thread));
1893 cancel_delayed_work_sync(&ms->master_wakeup_work);
1894 ret = kthread_stop(ms->master_thread);
1895 if (retc >= 0)
1896 retc = ret;
1897 }
1898 kfree(ipvs->ms);
1899 ipvs->ms = NULL;
1900 } else if (state == IP_VS_STATE_BACKUP) {
1901 if (!ipvs->backup_threads)
1902 return -ESRCH;
1903
1904 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1905 array = ipvs->backup_threads;
1906 retc = 0;
1907 for (id = ipvs->threads_mask; id >= 0; id--) {
1908 int ret;
1909
1910 pr_info("stopping backup sync thread %d ...\n",
1911 task_pid_nr(array[id]));
1912 ret = kthread_stop(array[id]);
1913 if (retc >= 0)
1914 retc = ret;
1915 }
1916 kfree(array);
1917 ipvs->backup_threads = NULL;
1918 }
1919
1920
1921 ip_vs_use_count_dec();
1922
1923 return retc;
1924}
1925
1926
1927
1928
1929int __net_init ip_vs_sync_net_init(struct net *net)
1930{
1931 struct netns_ipvs *ipvs = net_ipvs(net);
1932
1933 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
1934 spin_lock_init(&ipvs->sync_lock);
1935 spin_lock_init(&ipvs->sync_buff_lock);
1936 return 0;
1937}
1938
1939void ip_vs_sync_net_cleanup(struct net *net)
1940{
1941 int retc;
1942 struct netns_ipvs *ipvs = net_ipvs(net);
1943
1944 mutex_lock(&ipvs->sync_mutex);
1945 retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
1946 if (retc && retc != -ESRCH)
1947 pr_err("Failed to stop Master Daemon\n");
1948
1949 retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
1950 if (retc && retc != -ESRCH)
1951 pr_err("Failed to stop Backup Daemon\n");
1952 mutex_unlock(&ipvs->sync_mutex);
1953}
1954