1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#define KMSG_COMPONENT "IPVS"
35#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
36
37#include <linux/module.h>
38#include <linux/slab.h>
39#include <linux/inetdevice.h>
40#include <linux/net.h>
41#include <linux/completion.h>
42#include <linux/delay.h>
43#include <linux/skbuff.h>
44#include <linux/in.h>
45#include <linux/igmp.h>
46#include <linux/udp.h>
47#include <linux/err.h>
48#include <linux/kthread.h>
49#include <linux/wait.h>
50#include <linux/kernel.h>
51
52#include <asm/unaligned.h>
53
54#include <net/ip.h>
55#include <net/sock.h>
56
57#include <net/ip_vs.h>
58
59#define IP_VS_SYNC_GROUP 0xe0000051
60#define IP_VS_SYNC_PORT 8848
61
62#define SYNC_PROTO_VER 1
63
64static struct lock_class_key __ipvs_sync_key;
65
66
67
68
69struct ip_vs_sync_conn_v0 {
70 __u8 reserved;
71
72
73 __u8 protocol;
74 __be16 cport;
75 __be16 vport;
76 __be16 dport;
77 __be32 caddr;
78 __be32 vaddr;
79 __be32 daddr;
80
81
82 __be16 flags;
83 __be16 state;
84
85
86};
87
88struct ip_vs_sync_conn_options {
89 struct ip_vs_seq in_seq;
90 struct ip_vs_seq out_seq;
91};
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131struct ip_vs_sync_v4 {
132 __u8 type;
133 __u8 protocol;
134 __be16 ver_size;
135
136 __be32 flags;
137 __be16 state;
138
139 __be16 cport;
140 __be16 vport;
141 __be16 dport;
142 __be32 fwmark;
143 __be32 timeout;
144 __be32 caddr;
145 __be32 vaddr;
146 __be32 daddr;
147
148
149};
150
151
152
153struct ip_vs_sync_v6 {
154 __u8 type;
155 __u8 protocol;
156 __be16 ver_size;
157
158 __be32 flags;
159 __be16 state;
160
161 __be16 cport;
162 __be16 vport;
163 __be16 dport;
164 __be32 fwmark;
165 __be32 timeout;
166 struct in6_addr caddr;
167 struct in6_addr vaddr;
168 struct in6_addr daddr;
169
170
171};
172
173union ip_vs_sync_conn {
174 struct ip_vs_sync_v4 v4;
175 struct ip_vs_sync_v6 v6;
176};
177
178
179#define STYPE_INET6 0
180#define STYPE_F_INET6 (1 << STYPE_INET6)
181
182#define SVER_SHIFT 12
183#define SVER_MASK 0x0fff
184
185#define IPVS_OPT_SEQ_DATA 1
186#define IPVS_OPT_PE_DATA 2
187#define IPVS_OPT_PE_NAME 3
188#define IPVS_OPT_PARAM 7
189
190#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
191#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
192#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
193#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
194
195struct ip_vs_sync_thread_data {
196 struct net *net;
197 struct socket *sock;
198 char *buf;
199 int id;
200};
201
202
203#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
204#define FULL_CONN_SIZE \
205(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242#define SYNC_MESG_HEADER_LEN 4
243#define MAX_CONNS_PER_SYNCBUFF 255
244
245
246struct ip_vs_sync_mesg_v0 {
247 __u8 nr_conns;
248 __u8 syncid;
249 __be16 size;
250
251
252};
253
254
255struct ip_vs_sync_mesg {
256 __u8 reserved;
257 __u8 syncid;
258 __be16 size;
259 __u8 nr_conns;
260 __s8 version;
261 __u16 spare;
262
263};
264
265struct ip_vs_sync_buff {
266 struct list_head list;
267 unsigned long firstuse;
268
269
270 struct ip_vs_sync_mesg *mesg;
271 unsigned char *head;
272 unsigned char *end;
273};
274
275
276
277
278
279static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
280{
281 ho->init_seq = get_unaligned_be32(&no->init_seq);
282 ho->delta = get_unaligned_be32(&no->delta);
283 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
284}
285
286
287
288
289
290static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
291{
292 put_unaligned_be32(ho->init_seq, &no->init_seq);
293 put_unaligned_be32(ho->delta, &no->delta);
294 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
295}
296
297static inline struct ip_vs_sync_buff *
298sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
299{
300 struct ip_vs_sync_buff *sb;
301
302 spin_lock_bh(&ipvs->sync_lock);
303 if (list_empty(&ms->sync_queue)) {
304 sb = NULL;
305 __set_current_state(TASK_INTERRUPTIBLE);
306 } else {
307 sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
308 list);
309 list_del(&sb->list);
310 ms->sync_queue_len--;
311 if (!ms->sync_queue_len)
312 ms->sync_queue_delay = 0;
313 }
314 spin_unlock_bh(&ipvs->sync_lock);
315
316 return sb;
317}
318
319
320
321
322static inline struct ip_vs_sync_buff *
323ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
324{
325 struct ip_vs_sync_buff *sb;
326
327 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
328 return NULL;
329
330 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
331 if (!sb->mesg) {
332 kfree(sb);
333 return NULL;
334 }
335 sb->mesg->reserved = 0;
336 sb->mesg->version = SYNC_PROTO_VER;
337 sb->mesg->syncid = ipvs->master_syncid;
338 sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
339 sb->mesg->nr_conns = 0;
340 sb->mesg->spare = 0;
341 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
342 sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
343
344 sb->firstuse = jiffies;
345 return sb;
346}
347
348static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
349{
350 kfree(sb->mesg);
351 kfree(sb);
352}
353
354static inline void sb_queue_tail(struct netns_ipvs *ipvs,
355 struct ipvs_master_sync_state *ms)
356{
357 struct ip_vs_sync_buff *sb = ms->sync_buff;
358
359 spin_lock(&ipvs->sync_lock);
360 if (ipvs->sync_state & IP_VS_STATE_MASTER &&
361 ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
362 if (!ms->sync_queue_len)
363 schedule_delayed_work(&ms->master_wakeup_work,
364 max(IPVS_SYNC_SEND_DELAY, 1));
365 ms->sync_queue_len++;
366 list_add_tail(&sb->list, &ms->sync_queue);
367 if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
368 wake_up_process(ms->master_thread);
369 } else
370 ip_vs_sync_buff_release(sb);
371 spin_unlock(&ipvs->sync_lock);
372}
373
374
375
376
377
378static inline struct ip_vs_sync_buff *
379get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
380 unsigned long time)
381{
382 struct ip_vs_sync_buff *sb;
383
384 spin_lock_bh(&ipvs->sync_buff_lock);
385 sb = ms->sync_buff;
386 if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
387 ms->sync_buff = NULL;
388 __set_current_state(TASK_RUNNING);
389 } else
390 sb = NULL;
391 spin_unlock_bh(&ipvs->sync_buff_lock);
392 return sb;
393}
394
395static inline int
396select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
397{
398 return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
399}
400
401
402
403
404static inline struct ip_vs_sync_buff *
405ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
406{
407 struct ip_vs_sync_buff *sb;
408 struct ip_vs_sync_mesg_v0 *mesg;
409
410 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
411 return NULL;
412
413 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
414 if (!sb->mesg) {
415 kfree(sb);
416 return NULL;
417 }
418 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
419 mesg->nr_conns = 0;
420 mesg->syncid = ipvs->master_syncid;
421 mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
422 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
423 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
424 sb->firstuse = jiffies;
425 return sb;
426}
427
428
429static inline bool in_persistence(struct ip_vs_conn *cp)
430{
431 for (cp = cp->control; cp; cp = cp->control) {
432 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
433 return true;
434 }
435 return false;
436}
437
438
439
440
441
442
443
444
445
446
447static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
448 struct ip_vs_conn *cp, int pkts)
449{
450 unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
451 unsigned long now = jiffies;
452 unsigned long n = (now + cp->timeout) & ~3UL;
453 unsigned int sync_refresh_period;
454 int sync_period;
455 int force;
456
457
458 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
459 force = 0;
460 else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp)))
461 return 0;
462 else if (likely(cp->protocol == IPPROTO_TCP)) {
463 if (!((1 << cp->state) &
464 ((1 << IP_VS_TCP_S_ESTABLISHED) |
465 (1 << IP_VS_TCP_S_FIN_WAIT) |
466 (1 << IP_VS_TCP_S_CLOSE) |
467 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
468 (1 << IP_VS_TCP_S_TIME_WAIT))))
469 return 0;
470 force = cp->state != cp->old_state;
471 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
472 goto set;
473 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
474 if (!((1 << cp->state) &
475 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
476 (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) |
477 (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) |
478 (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) |
479 (1 << IP_VS_SCTP_S_CLOSED))))
480 return 0;
481 force = cp->state != cp->old_state;
482 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
483 goto set;
484 } else {
485
486 force = 0;
487 }
488
489 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
490 if (sync_refresh_period > 0) {
491 long diff = n - orig;
492 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
493
494
495
496
497 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
498 int retries = orig & 3;
499
500 if (retries >= sysctl_sync_retries(ipvs))
501 return 0;
502 if (time_before(now, orig - cp->timeout +
503 (sync_refresh_period >> 3)))
504 return 0;
505 n |= retries + 1;
506 }
507 }
508 sync_period = sysctl_sync_period(ipvs);
509 if (sync_period > 0) {
510 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
511 pkts % sync_period != sysctl_sync_threshold(ipvs))
512 return 0;
513 } else if (sync_refresh_period <= 0 &&
514 pkts != sysctl_sync_threshold(ipvs))
515 return 0;
516
517set:
518 cp->old_state = cp->state;
519 n = cmpxchg(&cp->sync_endtime, orig, n);
520 return n == orig || force;
521}
522
523
524
525
526
527static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
528 int pkts)
529{
530 struct netns_ipvs *ipvs = net_ipvs(net);
531 struct ip_vs_sync_mesg_v0 *m;
532 struct ip_vs_sync_conn_v0 *s;
533 struct ip_vs_sync_buff *buff;
534 struct ipvs_master_sync_state *ms;
535 int id;
536 int len;
537
538 if (unlikely(cp->af != AF_INET))
539 return;
540
541 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
542 return;
543
544 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
545 return;
546
547 spin_lock_bh(&ipvs->sync_buff_lock);
548 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
549 spin_unlock_bh(&ipvs->sync_buff_lock);
550 return;
551 }
552
553 id = select_master_thread_id(ipvs, cp);
554 ms = &ipvs->ms[id];
555 buff = ms->sync_buff;
556 if (buff) {
557 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
558
559 if (!m->nr_conns) {
560 sb_queue_tail(ipvs, ms);
561 ms->sync_buff = NULL;
562 buff = NULL;
563 }
564 }
565 if (!buff) {
566 buff = ip_vs_sync_buff_create_v0(ipvs);
567 if (!buff) {
568 spin_unlock_bh(&ipvs->sync_buff_lock);
569 pr_err("ip_vs_sync_buff_create failed.\n");
570 return;
571 }
572 ms->sync_buff = buff;
573 }
574
575 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
576 SIMPLE_CONN_SIZE;
577 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
578 s = (struct ip_vs_sync_conn_v0 *) buff->head;
579
580
581 s->reserved = 0;
582 s->protocol = cp->protocol;
583 s->cport = cp->cport;
584 s->vport = cp->vport;
585 s->dport = cp->dport;
586 s->caddr = cp->caddr.ip;
587 s->vaddr = cp->vaddr.ip;
588 s->daddr = cp->daddr.ip;
589 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
590 s->state = htons(cp->state);
591 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
592 struct ip_vs_sync_conn_options *opt =
593 (struct ip_vs_sync_conn_options *)&s[1];
594 memcpy(opt, &cp->in_seq, sizeof(*opt));
595 }
596
597 m->nr_conns++;
598 m->size = htons(ntohs(m->size) + len);
599 buff->head += len;
600
601
602 if (buff->head + FULL_CONN_SIZE > buff->end) {
603 sb_queue_tail(ipvs, ms);
604 ms->sync_buff = NULL;
605 }
606 spin_unlock_bh(&ipvs->sync_buff_lock);
607
608
609 cp = cp->control;
610 if (cp) {
611 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
612 pkts = atomic_add_return(1, &cp->in_pkts);
613 else
614 pkts = sysctl_sync_threshold(ipvs);
615 ip_vs_sync_conn(net, cp->control, pkts);
616 }
617}
618
619
620
621
622
623
624void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
625{
626 struct netns_ipvs *ipvs = net_ipvs(net);
627 struct ip_vs_sync_mesg *m;
628 union ip_vs_sync_conn *s;
629 struct ip_vs_sync_buff *buff;
630 struct ipvs_master_sync_state *ms;
631 int id;
632 __u8 *p;
633 unsigned int len, pe_name_len, pad;
634
635
636 if (sysctl_sync_ver(ipvs) == 0) {
637 ip_vs_sync_conn_v0(net, cp, pkts);
638 return;
639 }
640
641 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
642 goto control;
643sloop:
644 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
645 goto control;
646
647
648 pe_name_len = 0;
649 if (cp->pe_data_len) {
650 if (!cp->pe_data || !cp->dest) {
651 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
652 return;
653 }
654 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
655 }
656
657 spin_lock_bh(&ipvs->sync_buff_lock);
658 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
659 spin_unlock_bh(&ipvs->sync_buff_lock);
660 return;
661 }
662
663 id = select_master_thread_id(ipvs, cp);
664 ms = &ipvs->ms[id];
665
666#ifdef CONFIG_IP_VS_IPV6
667 if (cp->af == AF_INET6)
668 len = sizeof(struct ip_vs_sync_v6);
669 else
670#endif
671 len = sizeof(struct ip_vs_sync_v4);
672
673 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
674 len += sizeof(struct ip_vs_sync_conn_options) + 2;
675
676 if (cp->pe_data_len)
677 len += cp->pe_data_len + 2;
678 if (pe_name_len)
679 len += pe_name_len + 2;
680
681
682 pad = 0;
683 buff = ms->sync_buff;
684 if (buff) {
685 m = buff->mesg;
686 pad = (4 - (size_t) buff->head) & 3;
687
688 if (buff->head + len + pad > buff->end || m->reserved) {
689 sb_queue_tail(ipvs, ms);
690 ms->sync_buff = NULL;
691 buff = NULL;
692 pad = 0;
693 }
694 }
695
696 if (!buff) {
697 buff = ip_vs_sync_buff_create(ipvs);
698 if (!buff) {
699 spin_unlock_bh(&ipvs->sync_buff_lock);
700 pr_err("ip_vs_sync_buff_create failed.\n");
701 return;
702 }
703 ms->sync_buff = buff;
704 m = buff->mesg;
705 }
706
707 p = buff->head;
708 buff->head += pad + len;
709 m->size = htons(ntohs(m->size) + pad + len);
710
711 while (pad--)
712 *(p++) = 0;
713
714 s = (union ip_vs_sync_conn *)p;
715
716
717 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
718 s->v4.ver_size = htons(len & SVER_MASK);
719 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
720 s->v4.state = htons(cp->state);
721 s->v4.protocol = cp->protocol;
722 s->v4.cport = cp->cport;
723 s->v4.vport = cp->vport;
724 s->v4.dport = cp->dport;
725 s->v4.fwmark = htonl(cp->fwmark);
726 s->v4.timeout = htonl(cp->timeout / HZ);
727 m->nr_conns++;
728
729#ifdef CONFIG_IP_VS_IPV6
730 if (cp->af == AF_INET6) {
731 p += sizeof(struct ip_vs_sync_v6);
732 s->v6.caddr = cp->caddr.in6;
733 s->v6.vaddr = cp->vaddr.in6;
734 s->v6.daddr = cp->daddr.in6;
735 } else
736#endif
737 {
738 p += sizeof(struct ip_vs_sync_v4);
739 s->v4.caddr = cp->caddr.ip;
740 s->v4.vaddr = cp->vaddr.ip;
741 s->v4.daddr = cp->daddr.ip;
742 }
743 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
744 *(p++) = IPVS_OPT_SEQ_DATA;
745 *(p++) = sizeof(struct ip_vs_sync_conn_options);
746 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
747 p += sizeof(struct ip_vs_seq);
748 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
749 p += sizeof(struct ip_vs_seq);
750 }
751
752 if (cp->pe_data_len && cp->pe_data) {
753 *(p++) = IPVS_OPT_PE_DATA;
754 *(p++) = cp->pe_data_len;
755 memcpy(p, cp->pe_data, cp->pe_data_len);
756 p += cp->pe_data_len;
757 if (pe_name_len) {
758
759 *(p++) = IPVS_OPT_PE_NAME;
760 *(p++) = pe_name_len;
761 memcpy(p, cp->pe->name, pe_name_len);
762 p += pe_name_len;
763 }
764 }
765
766 spin_unlock_bh(&ipvs->sync_buff_lock);
767
768control:
769
770 cp = cp->control;
771 if (!cp)
772 return;
773 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
774 pkts = atomic_add_return(1, &cp->in_pkts);
775 else
776 pkts = sysctl_sync_threshold(ipvs);
777 goto sloop;
778}
779
780
781
782
783static inline int
784ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
785 struct ip_vs_conn_param *p,
786 __u8 *pe_data, unsigned int pe_data_len,
787 __u8 *pe_name, unsigned int pe_name_len)
788{
789#ifdef CONFIG_IP_VS_IPV6
790 if (af == AF_INET6)
791 ip_vs_conn_fill_param(net, af, sc->v6.protocol,
792 (const union nf_inet_addr *)&sc->v6.caddr,
793 sc->v6.cport,
794 (const union nf_inet_addr *)&sc->v6.vaddr,
795 sc->v6.vport, p);
796 else
797#endif
798 ip_vs_conn_fill_param(net, af, sc->v4.protocol,
799 (const union nf_inet_addr *)&sc->v4.caddr,
800 sc->v4.cport,
801 (const union nf_inet_addr *)&sc->v4.vaddr,
802 sc->v4.vport, p);
803
804 if (pe_data_len) {
805 if (pe_name_len) {
806 char buff[IP_VS_PENAME_MAXLEN+1];
807
808 memcpy(buff, pe_name, pe_name_len);
809 buff[pe_name_len]=0;
810 p->pe = __ip_vs_pe_getbyname(buff);
811 if (!p->pe) {
812 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
813 buff);
814 return 1;
815 }
816 } else {
817 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
818 return 1;
819 }
820
821 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
822 if (!p->pe_data) {
823 module_put(p->pe->module);
824 return -ENOMEM;
825 }
826 p->pe_data_len = pe_data_len;
827 }
828 return 0;
829}
830
831
832
833
834
835
836
837static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
838 unsigned int flags, unsigned int state,
839 unsigned int protocol, unsigned int type,
840 const union nf_inet_addr *daddr, __be16 dport,
841 unsigned long timeout, __u32 fwmark,
842 struct ip_vs_sync_conn_options *opt)
843{
844 struct ip_vs_dest *dest;
845 struct ip_vs_conn *cp;
846 struct netns_ipvs *ipvs = net_ipvs(net);
847
848 if (!(flags & IP_VS_CONN_F_TEMPLATE))
849 cp = ip_vs_conn_in_get(param);
850 else
851 cp = ip_vs_ct_in_get(param);
852
853 if (cp) {
854
855 kfree(param->pe_data);
856
857 dest = cp->dest;
858 spin_lock_bh(&cp->lock);
859 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
860 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
861 if (flags & IP_VS_CONN_F_INACTIVE) {
862 atomic_dec(&dest->activeconns);
863 atomic_inc(&dest->inactconns);
864 } else {
865 atomic_inc(&dest->activeconns);
866 atomic_dec(&dest->inactconns);
867 }
868 }
869 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
870 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
871 cp->flags = flags;
872 spin_unlock_bh(&cp->lock);
873 if (!dest)
874 ip_vs_try_bind_dest(cp);
875 } else {
876
877
878
879
880
881 rcu_read_lock();
882
883
884
885
886
887 dest = ip_vs_find_dest(net, type, type, daddr, dport,
888 param->vaddr, param->vport, protocol,
889 fwmark, flags);
890
891 cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest,
892 fwmark);
893 rcu_read_unlock();
894 if (!cp) {
895 kfree(param->pe_data);
896 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
897 return;
898 }
899 }
900
901 if (opt)
902 memcpy(&cp->in_seq, opt, sizeof(*opt));
903 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
904 cp->state = state;
905 cp->old_state = cp->state;
906
907
908
909
910
911
912
913
914
915 if (timeout) {
916 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
917 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
918 cp->timeout = timeout*HZ;
919 } else {
920 struct ip_vs_proto_data *pd;
921
922 pd = ip_vs_proto_data_get(net, protocol);
923 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
924 cp->timeout = pd->timeout_table[state];
925 else
926 cp->timeout = (3*60*HZ);
927 }
928 ip_vs_conn_put(cp);
929}
930
931
932
933
934static void ip_vs_process_message_v0(struct net *net, const char *buffer,
935 const size_t buflen)
936{
937 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
938 struct ip_vs_sync_conn_v0 *s;
939 struct ip_vs_sync_conn_options *opt;
940 struct ip_vs_protocol *pp;
941 struct ip_vs_conn_param param;
942 char *p;
943 int i;
944
945 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
946 for (i=0; i<m->nr_conns; i++) {
947 unsigned int flags, state;
948
949 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
950 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
951 return;
952 }
953 s = (struct ip_vs_sync_conn_v0 *) p;
954 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
955 flags &= ~IP_VS_CONN_F_HASHED;
956 if (flags & IP_VS_CONN_F_SEQ_MASK) {
957 opt = (struct ip_vs_sync_conn_options *)&s[1];
958 p += FULL_CONN_SIZE;
959 if (p > buffer+buflen) {
960 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
961 return;
962 }
963 } else {
964 opt = NULL;
965 p += SIMPLE_CONN_SIZE;
966 }
967
968 state = ntohs(s->state);
969 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
970 pp = ip_vs_proto_get(s->protocol);
971 if (!pp) {
972 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
973 s->protocol);
974 continue;
975 }
976 if (state >= pp->num_states) {
977 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
978 pp->name, state);
979 continue;
980 }
981 } else {
982
983 if (state > 0) {
984 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
985 state);
986 state = 0;
987 }
988 }
989
990 ip_vs_conn_fill_param(net, AF_INET, s->protocol,
991 (const union nf_inet_addr *)&s->caddr,
992 s->cport,
993 (const union nf_inet_addr *)&s->vaddr,
994 s->vport, ¶m);
995
996
997 ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET,
998 (union nf_inet_addr *)&s->daddr, s->dport,
999 0, 0, opt);
1000 }
1001}
1002
1003
1004
1005
1006static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
1007 __u32 *opt_flags,
1008 struct ip_vs_sync_conn_options *opt)
1009{
1010 struct ip_vs_sync_conn_options *topt;
1011
1012 topt = (struct ip_vs_sync_conn_options *)p;
1013
1014 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
1015 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
1016 return -EINVAL;
1017 }
1018 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
1019 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
1020 return -EINVAL;
1021 }
1022 ntoh_seq(&topt->in_seq, &opt->in_seq);
1023 ntoh_seq(&topt->out_seq, &opt->out_seq);
1024 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
1025 return 0;
1026}
1027
1028static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
1029 __u8 **data, unsigned int maxlen,
1030 __u32 *opt_flags, __u32 flag)
1031{
1032 if (plen > maxlen) {
1033 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
1034 return -EINVAL;
1035 }
1036 if (*opt_flags & flag) {
1037 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
1038 return -EINVAL;
1039 }
1040 *data_len = plen;
1041 *data = p;
1042 *opt_flags |= flag;
1043 return 0;
1044}
1045
1046
1047
1048static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
1049{
1050 struct ip_vs_sync_conn_options opt;
1051 union ip_vs_sync_conn *s;
1052 struct ip_vs_protocol *pp;
1053 struct ip_vs_conn_param param;
1054 __u32 flags;
1055 unsigned int af, state, pe_data_len=0, pe_name_len=0;
1056 __u8 *pe_data=NULL, *pe_name=NULL;
1057 __u32 opt_flags=0;
1058 int retc=0;
1059
1060 s = (union ip_vs_sync_conn *) p;
1061
1062 if (s->v6.type & STYPE_F_INET6) {
1063#ifdef CONFIG_IP_VS_IPV6
1064 af = AF_INET6;
1065 p += sizeof(struct ip_vs_sync_v6);
1066#else
1067 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
1068 retc = 10;
1069 goto out;
1070#endif
1071 } else if (!s->v4.type) {
1072 af = AF_INET;
1073 p += sizeof(struct ip_vs_sync_v4);
1074 } else {
1075 return -10;
1076 }
1077 if (p > msg_end)
1078 return -20;
1079
1080
1081 while (p < msg_end) {
1082 int ptype;
1083 int plen;
1084
1085 if (p+2 > msg_end)
1086 return -30;
1087 ptype = *(p++);
1088 plen = *(p++);
1089
1090 if (!plen || ((p + plen) > msg_end))
1091 return -40;
1092
1093 switch (ptype & ~IPVS_OPT_F_PARAM) {
1094 case IPVS_OPT_SEQ_DATA:
1095 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
1096 return -50;
1097 break;
1098
1099 case IPVS_OPT_PE_DATA:
1100 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
1101 IP_VS_PEDATA_MAXLEN, &opt_flags,
1102 IPVS_OPT_F_PE_DATA))
1103 return -60;
1104 break;
1105
1106 case IPVS_OPT_PE_NAME:
1107 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1108 IP_VS_PENAME_MAXLEN, &opt_flags,
1109 IPVS_OPT_F_PE_NAME))
1110 return -70;
1111 break;
1112
1113 default:
1114
1115 if (!(ptype & IPVS_OPT_F_PARAM)) {
1116 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1117 ptype & ~IPVS_OPT_F_PARAM);
1118 retc = 20;
1119 goto out;
1120 }
1121 }
1122 p += plen;
1123 }
1124
1125
1126 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
1127 flags |= IP_VS_CONN_F_SYNC;
1128 state = ntohs(s->v4.state);
1129
1130 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1131 pp = ip_vs_proto_get(s->v4.protocol);
1132 if (!pp) {
1133 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
1134 s->v4.protocol);
1135 retc = 30;
1136 goto out;
1137 }
1138 if (state >= pp->num_states) {
1139 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
1140 pp->name, state);
1141 retc = 40;
1142 goto out;
1143 }
1144 } else {
1145
1146 if (state > 0) {
1147 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1148 state);
1149 state = 0;
1150 }
1151 }
1152 if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data,
1153 pe_data_len, pe_name, pe_name_len)) {
1154 retc = 50;
1155 goto out;
1156 }
1157
1158 if (af == AF_INET)
1159 ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af,
1160 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1161 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1162 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1163 );
1164#ifdef CONFIG_IP_VS_IPV6
1165 else
1166 ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af,
1167 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1168 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1169 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1170 );
1171#endif
1172 return 0;
1173
1174out:
1175 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1176 return retc;
1177
1178}
1179
1180
1181
1182
1183
1184static void ip_vs_process_message(struct net *net, __u8 *buffer,
1185 const size_t buflen)
1186{
1187 struct netns_ipvs *ipvs = net_ipvs(net);
1188 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1189 __u8 *p, *msg_end;
1190 int i, nr_conns;
1191
1192 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1193 IP_VS_DBG(2, "BACKUP, message header too short\n");
1194 return;
1195 }
1196
1197 if (buflen != ntohs(m2->size)) {
1198 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1199 return;
1200 }
1201
1202 if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
1203 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1204 return;
1205 }
1206
1207 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1208 && (m2->spare == 0)) {
1209
1210 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1211 nr_conns = m2->nr_conns;
1212
1213 for (i=0; i<nr_conns; i++) {
1214 union ip_vs_sync_conn *s;
1215 unsigned int size;
1216 int retc;
1217
1218 p = msg_end;
1219 if (p + sizeof(s->v4) > buffer+buflen) {
1220 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1221 return;
1222 }
1223 s = (union ip_vs_sync_conn *)p;
1224 size = ntohs(s->v4.ver_size) & SVER_MASK;
1225 msg_end = p + size;
1226
1227 if (msg_end > buffer+buflen) {
1228 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
1229 return;
1230 }
1231 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
1232 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
1233 ntohs(s->v4.ver_size) >> SVER_SHIFT);
1234 return;
1235 }
1236
1237 retc = ip_vs_proc_sync_conn(net, p, msg_end);
1238 if (retc < 0) {
1239 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1240 retc);
1241 return;
1242 }
1243
1244 msg_end = p + ((size + 3) & ~3);
1245 }
1246 } else {
1247
1248 ip_vs_process_message_v0(net, buffer, buflen);
1249 return;
1250 }
1251}
1252
1253
1254
1255
1256
1257static void set_sock_size(struct sock *sk, int mode, int val)
1258{
1259
1260
1261 lock_sock(sk);
1262 if (mode) {
1263 val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
1264 sysctl_wmem_max);
1265 sk->sk_sndbuf = val * 2;
1266 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1267 } else {
1268 val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
1269 sysctl_rmem_max);
1270 sk->sk_rcvbuf = val * 2;
1271 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1272 }
1273 release_sock(sk);
1274}
1275
1276
1277
1278
1279static void set_mcast_loop(struct sock *sk, u_char loop)
1280{
1281 struct inet_sock *inet = inet_sk(sk);
1282
1283
1284 lock_sock(sk);
1285 inet->mc_loop = loop ? 1 : 0;
1286 release_sock(sk);
1287}
1288
1289
1290
1291
1292static void set_mcast_ttl(struct sock *sk, u_char ttl)
1293{
1294 struct inet_sock *inet = inet_sk(sk);
1295
1296
1297 lock_sock(sk);
1298 inet->mc_ttl = ttl;
1299 release_sock(sk);
1300}
1301
1302
1303
1304
1305static int set_mcast_if(struct sock *sk, char *ifname)
1306{
1307 struct net_device *dev;
1308 struct inet_sock *inet = inet_sk(sk);
1309 struct net *net = sock_net(sk);
1310
1311 dev = __dev_get_by_name(net, ifname);
1312 if (!dev)
1313 return -ENODEV;
1314
1315 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1316 return -EINVAL;
1317
1318 lock_sock(sk);
1319 inet->mc_index = dev->ifindex;
1320
1321 release_sock(sk);
1322
1323 return 0;
1324}
1325
1326
1327
1328
1329
1330
1331static int set_sync_mesg_maxlen(struct net *net, int sync_state)
1332{
1333 struct netns_ipvs *ipvs = net_ipvs(net);
1334 struct net_device *dev;
1335 int num;
1336
1337 if (sync_state == IP_VS_STATE_MASTER) {
1338 dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
1339 if (!dev)
1340 return -ENODEV;
1341
1342 num = (dev->mtu - sizeof(struct iphdr) -
1343 sizeof(struct udphdr) -
1344 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
1345 ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
1346 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
1347 IP_VS_DBG(7, "setting the maximum length of sync sending "
1348 "message %d.\n", ipvs->send_mesg_maxlen);
1349 } else if (sync_state == IP_VS_STATE_BACKUP) {
1350 dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
1351 if (!dev)
1352 return -ENODEV;
1353
1354 ipvs->recv_mesg_maxlen = dev->mtu -
1355 sizeof(struct iphdr) - sizeof(struct udphdr);
1356 IP_VS_DBG(7, "setting the maximum length of sync receiving "
1357 "message %d.\n", ipvs->recv_mesg_maxlen);
1358 }
1359
1360 return 0;
1361}
1362
1363
1364
1365
1366
1367
1368
1369static int
1370join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
1371{
1372 struct net *net = sock_net(sk);
1373 struct ip_mreqn mreq;
1374 struct net_device *dev;
1375 int ret;
1376
1377 memset(&mreq, 0, sizeof(mreq));
1378 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
1379
1380 dev = __dev_get_by_name(net, ifname);
1381 if (!dev)
1382 return -ENODEV;
1383 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1384 return -EINVAL;
1385
1386 mreq.imr_ifindex = dev->ifindex;
1387
1388 lock_sock(sk);
1389 ret = ip_mc_join_group(sk, &mreq);
1390 release_sock(sk);
1391
1392 return ret;
1393}
1394
1395
1396static int bind_mcastif_addr(struct socket *sock, char *ifname)
1397{
1398 struct net *net = sock_net(sock->sk);
1399 struct net_device *dev;
1400 __be32 addr;
1401 struct sockaddr_in sin;
1402
1403 dev = __dev_get_by_name(net, ifname);
1404 if (!dev)
1405 return -ENODEV;
1406
1407 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1408 if (!addr)
1409 pr_err("You probably need to specify IP address on "
1410 "multicast interface.\n");
1411
1412 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
1413 ifname, &addr);
1414
1415
1416 sin.sin_family = AF_INET;
1417 sin.sin_addr.s_addr = addr;
1418 sin.sin_port = 0;
1419
1420 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
1421}
1422
1423
1424
1425
1426static struct socket *make_send_sock(struct net *net, int id)
1427{
1428 struct netns_ipvs *ipvs = net_ipvs(net);
1429
1430 struct sockaddr_in mcast_addr = {
1431 .sin_family = AF_INET,
1432 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1433 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1434 };
1435 struct socket *sock;
1436 int result;
1437
1438
1439 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1440 if (result < 0) {
1441 pr_err("Error during creation of socket; terminating\n");
1442 return ERR_PTR(result);
1443 }
1444
1445
1446
1447
1448
1449 sk_change_net(sock->sk, net);
1450 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
1451 if (result < 0) {
1452 pr_err("Error setting outbound mcast interface\n");
1453 goto error;
1454 }
1455
1456 set_mcast_loop(sock->sk, 0);
1457 set_mcast_ttl(sock->sk, 1);
1458 result = sysctl_sync_sock_size(ipvs);
1459 if (result > 0)
1460 set_sock_size(sock->sk, 1, result);
1461
1462 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
1463 if (result < 0) {
1464 pr_err("Error binding address of the mcast interface\n");
1465 goto error;
1466 }
1467
1468 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
1469 sizeof(struct sockaddr), 0);
1470 if (result < 0) {
1471 pr_err("Error connecting to the multicast addr\n");
1472 goto error;
1473 }
1474
1475 return sock;
1476
1477error:
1478 sk_release_kernel(sock->sk);
1479 return ERR_PTR(result);
1480}
1481
1482
1483
1484
1485
1486static struct socket *make_receive_sock(struct net *net, int id)
1487{
1488 struct netns_ipvs *ipvs = net_ipvs(net);
1489
1490 struct sockaddr_in mcast_addr = {
1491 .sin_family = AF_INET,
1492 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1493 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1494 };
1495 struct socket *sock;
1496 int result;
1497
1498
1499 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1500 if (result < 0) {
1501 pr_err("Error during creation of socket; terminating\n");
1502 return ERR_PTR(result);
1503 }
1504
1505
1506
1507
1508
1509 sk_change_net(sock->sk, net);
1510
1511 sock->sk->sk_reuse = SK_CAN_REUSE;
1512 result = sysctl_sync_sock_size(ipvs);
1513 if (result > 0)
1514 set_sock_size(sock->sk, 0, result);
1515
1516 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
1517 sizeof(struct sockaddr));
1518 if (result < 0) {
1519 pr_err("Error binding to the multicast addr\n");
1520 goto error;
1521 }
1522
1523
1524 result = join_mcast_group(sock->sk,
1525 (struct in_addr *) &mcast_addr.sin_addr,
1526 ipvs->backup_mcast_ifn);
1527 if (result < 0) {
1528 pr_err("Error joining to the multicast group\n");
1529 goto error;
1530 }
1531
1532 return sock;
1533
1534error:
1535 sk_release_kernel(sock->sk);
1536 return ERR_PTR(result);
1537}
1538
1539
1540static int
1541ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1542{
1543 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
1544 struct kvec iov;
1545 int len;
1546
1547 EnterFunction(7);
1548 iov.iov_base = (void *)buffer;
1549 iov.iov_len = length;
1550
1551 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
1552
1553 LeaveFunction(7);
1554 return len;
1555}
1556
1557static int
1558ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1559{
1560 int msize;
1561 int ret;
1562
1563 msize = ntohs(msg->size);
1564
1565 ret = ip_vs_send_async(sock, (char *)msg, msize);
1566 if (ret >= 0 || ret == -EAGAIN)
1567 return ret;
1568 pr_err("ip_vs_send_async error %d\n", ret);
1569 return 0;
1570}
1571
1572static int
1573ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1574{
1575 struct msghdr msg = {NULL,};
1576 struct kvec iov;
1577 int len;
1578
1579 EnterFunction(7);
1580
1581
1582 iov.iov_base = buffer;
1583 iov.iov_len = (size_t)buflen;
1584
1585 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
1586
1587 if (len < 0)
1588 return len;
1589
1590 LeaveFunction(7);
1591 return len;
1592}
1593
1594
1595static void master_wakeup_work_handler(struct work_struct *work)
1596{
1597 struct ipvs_master_sync_state *ms =
1598 container_of(work, struct ipvs_master_sync_state,
1599 master_wakeup_work.work);
1600 struct netns_ipvs *ipvs = ms->ipvs;
1601
1602 spin_lock_bh(&ipvs->sync_lock);
1603 if (ms->sync_queue_len &&
1604 ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
1605 ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
1606 wake_up_process(ms->master_thread);
1607 }
1608 spin_unlock_bh(&ipvs->sync_lock);
1609}
1610
1611
1612static inline struct ip_vs_sync_buff *
1613next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
1614{
1615 struct ip_vs_sync_buff *sb;
1616
1617 sb = sb_dequeue(ipvs, ms);
1618 if (sb)
1619 return sb;
1620
1621 return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
1622}
1623
1624static int sync_thread_master(void *data)
1625{
1626 struct ip_vs_sync_thread_data *tinfo = data;
1627 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1628 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
1629 struct sock *sk = tinfo->sock->sk;
1630 struct ip_vs_sync_buff *sb;
1631
1632 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1633 "syncid = %d, id = %d\n",
1634 ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);
1635
1636 for (;;) {
1637 sb = next_sync_buff(ipvs, ms);
1638 if (unlikely(kthread_should_stop()))
1639 break;
1640 if (!sb) {
1641 schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
1642 continue;
1643 }
1644 while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
1645
1646
1647
1648 __wait_event_interruptible(*sk_sleep(sk),
1649 sock_writeable(sk) ||
1650 kthread_should_stop());
1651 if (unlikely(kthread_should_stop()))
1652 goto done;
1653 }
1654 ip_vs_sync_buff_release(sb);
1655 }
1656
1657done:
1658 __set_current_state(TASK_RUNNING);
1659 if (sb)
1660 ip_vs_sync_buff_release(sb);
1661
1662
1663 while ((sb = sb_dequeue(ipvs, ms)))
1664 ip_vs_sync_buff_release(sb);
1665 __set_current_state(TASK_RUNNING);
1666
1667
1668 sb = get_curr_sync_buff(ipvs, ms, 0);
1669 if (sb)
1670 ip_vs_sync_buff_release(sb);
1671
1672
1673 sk_release_kernel(tinfo->sock->sk);
1674 kfree(tinfo);
1675
1676 return 0;
1677}
1678
1679
1680static int sync_thread_backup(void *data)
1681{
1682 struct ip_vs_sync_thread_data *tinfo = data;
1683 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1684 int len;
1685
1686 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1687 "syncid = %d, id = %d\n",
1688 ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);
1689
1690 while (!kthread_should_stop()) {
1691 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
1692 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
1693 || kthread_should_stop());
1694
1695
1696 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
1697 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1698 ipvs->recv_mesg_maxlen);
1699 if (len <= 0) {
1700 if (len != -EAGAIN)
1701 pr_err("receiving message error\n");
1702 break;
1703 }
1704
1705 ip_vs_process_message(tinfo->net, tinfo->buf, len);
1706 }
1707 }
1708
1709
1710 sk_release_kernel(tinfo->sock->sk);
1711 kfree(tinfo->buf);
1712 kfree(tinfo);
1713
1714 return 0;
1715}
1716
1717
1718int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
1719{
1720 struct ip_vs_sync_thread_data *tinfo;
1721 struct task_struct **array = NULL, *task;
1722 struct socket *sock;
1723 struct netns_ipvs *ipvs = net_ipvs(net);
1724 char *name;
1725 int (*threadfn)(void *data);
1726 int id, count;
1727 int result = -ENOMEM;
1728
1729 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1730 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
1731 sizeof(struct ip_vs_sync_conn_v0));
1732
1733 if (!ipvs->sync_state) {
1734 count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
1735 ipvs->threads_mask = count - 1;
1736 } else
1737 count = ipvs->threads_mask + 1;
1738
1739 if (state == IP_VS_STATE_MASTER) {
1740 if (ipvs->ms)
1741 return -EEXIST;
1742
1743 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
1744 sizeof(ipvs->master_mcast_ifn));
1745 ipvs->master_syncid = syncid;
1746 name = "ipvs-m:%d:%d";
1747 threadfn = sync_thread_master;
1748 } else if (state == IP_VS_STATE_BACKUP) {
1749 if (ipvs->backup_threads)
1750 return -EEXIST;
1751
1752 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
1753 sizeof(ipvs->backup_mcast_ifn));
1754 ipvs->backup_syncid = syncid;
1755 name = "ipvs-b:%d:%d";
1756 threadfn = sync_thread_backup;
1757 } else {
1758 return -EINVAL;
1759 }
1760
1761 if (state == IP_VS_STATE_MASTER) {
1762 struct ipvs_master_sync_state *ms;
1763
1764 ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
1765 if (!ipvs->ms)
1766 goto out;
1767 ms = ipvs->ms;
1768 for (id = 0; id < count; id++, ms++) {
1769 INIT_LIST_HEAD(&ms->sync_queue);
1770 ms->sync_queue_len = 0;
1771 ms->sync_queue_delay = 0;
1772 INIT_DELAYED_WORK(&ms->master_wakeup_work,
1773 master_wakeup_work_handler);
1774 ms->ipvs = ipvs;
1775 }
1776 } else {
1777 array = kzalloc(count * sizeof(struct task_struct *),
1778 GFP_KERNEL);
1779 if (!array)
1780 goto out;
1781 }
1782 set_sync_mesg_maxlen(net, state);
1783
1784 tinfo = NULL;
1785 for (id = 0; id < count; id++) {
1786 if (state == IP_VS_STATE_MASTER)
1787 sock = make_send_sock(net, id);
1788 else
1789 sock = make_receive_sock(net, id);
1790 if (IS_ERR(sock)) {
1791 result = PTR_ERR(sock);
1792 goto outtinfo;
1793 }
1794 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1795 if (!tinfo)
1796 goto outsocket;
1797 tinfo->net = net;
1798 tinfo->sock = sock;
1799 if (state == IP_VS_STATE_BACKUP) {
1800 tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
1801 GFP_KERNEL);
1802 if (!tinfo->buf)
1803 goto outtinfo;
1804 } else {
1805 tinfo->buf = NULL;
1806 }
1807 tinfo->id = id;
1808
1809 task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
1810 if (IS_ERR(task)) {
1811 result = PTR_ERR(task);
1812 goto outtinfo;
1813 }
1814 tinfo = NULL;
1815 if (state == IP_VS_STATE_MASTER)
1816 ipvs->ms[id].master_thread = task;
1817 else
1818 array[id] = task;
1819 }
1820
1821
1822
1823 if (state == IP_VS_STATE_BACKUP)
1824 ipvs->backup_threads = array;
1825 spin_lock_bh(&ipvs->sync_buff_lock);
1826 ipvs->sync_state |= state;
1827 spin_unlock_bh(&ipvs->sync_buff_lock);
1828
1829
1830 ip_vs_use_count_inc();
1831
1832 return 0;
1833
1834outsocket:
1835 sk_release_kernel(sock->sk);
1836
1837outtinfo:
1838 if (tinfo) {
1839 sk_release_kernel(tinfo->sock->sk);
1840 kfree(tinfo->buf);
1841 kfree(tinfo);
1842 }
1843 count = id;
1844 while (count-- > 0) {
1845 if (state == IP_VS_STATE_MASTER)
1846 kthread_stop(ipvs->ms[count].master_thread);
1847 else
1848 kthread_stop(array[count]);
1849 }
1850 kfree(array);
1851
1852out:
1853 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
1854 kfree(ipvs->ms);
1855 ipvs->ms = NULL;
1856 }
1857 return result;
1858}
1859
1860
1861int stop_sync_thread(struct net *net, int state)
1862{
1863 struct netns_ipvs *ipvs = net_ipvs(net);
1864 struct task_struct **array;
1865 int id;
1866 int retc = -EINVAL;
1867
1868 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1869
1870 if (state == IP_VS_STATE_MASTER) {
1871 if (!ipvs->ms)
1872 return -ESRCH;
1873
1874
1875
1876
1877
1878
1879
1880 spin_lock_bh(&ipvs->sync_buff_lock);
1881 spin_lock(&ipvs->sync_lock);
1882 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1883 spin_unlock(&ipvs->sync_lock);
1884 spin_unlock_bh(&ipvs->sync_buff_lock);
1885
1886 retc = 0;
1887 for (id = ipvs->threads_mask; id >= 0; id--) {
1888 struct ipvs_master_sync_state *ms = &ipvs->ms[id];
1889 int ret;
1890
1891 pr_info("stopping master sync thread %d ...\n",
1892 task_pid_nr(ms->master_thread));
1893 cancel_delayed_work_sync(&ms->master_wakeup_work);
1894 ret = kthread_stop(ms->master_thread);
1895 if (retc >= 0)
1896 retc = ret;
1897 }
1898 kfree(ipvs->ms);
1899 ipvs->ms = NULL;
1900 } else if (state == IP_VS_STATE_BACKUP) {
1901 if (!ipvs->backup_threads)
1902 return -ESRCH;
1903
1904 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1905 array = ipvs->backup_threads;
1906 retc = 0;
1907 for (id = ipvs->threads_mask; id >= 0; id--) {
1908 int ret;
1909
1910 pr_info("stopping backup sync thread %d ...\n",
1911 task_pid_nr(array[id]));
1912 ret = kthread_stop(array[id]);
1913 if (retc >= 0)
1914 retc = ret;
1915 }
1916 kfree(array);
1917 ipvs->backup_threads = NULL;
1918 }
1919
1920
1921 ip_vs_use_count_dec();
1922
1923 return retc;
1924}
1925
1926
1927
1928
1929int __net_init ip_vs_sync_net_init(struct net *net)
1930{
1931 struct netns_ipvs *ipvs = net_ipvs(net);
1932
1933 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
1934 spin_lock_init(&ipvs->sync_lock);
1935 spin_lock_init(&ipvs->sync_buff_lock);
1936 return 0;
1937}
1938
1939void ip_vs_sync_net_cleanup(struct net *net)
1940{
1941 int retc;
1942 struct netns_ipvs *ipvs = net_ipvs(net);
1943
1944 mutex_lock(&ipvs->sync_mutex);
1945 retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
1946 if (retc && retc != -ESRCH)
1947 pr_err("Failed to stop Master Daemon\n");
1948
1949 retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
1950 if (retc && retc != -ESRCH)
1951 pr_err("Failed to stop Backup Daemon\n");
1952 mutex_unlock(&ipvs->sync_mutex);
1953}
1954