1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#define KMSG_COMPONENT "IPVS"
35#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
36
37#include <linux/module.h>
38#include <linux/slab.h>
39#include <linux/inetdevice.h>
40#include <linux/net.h>
41#include <linux/completion.h>
42#include <linux/delay.h>
43#include <linux/skbuff.h>
44#include <linux/in.h>
45#include <linux/igmp.h>
46#include <linux/udp.h>
47#include <linux/err.h>
48#include <linux/kthread.h>
49#include <linux/wait.h>
50#include <linux/kernel.h>
51
52#include <asm/unaligned.h>
53
54#include <net/ip.h>
55#include <net/sock.h>
56
57#include <net/ip_vs.h>
58
59#define IP_VS_SYNC_GROUP 0xe0000051
60#define IP_VS_SYNC_PORT 8848
61
62#define SYNC_PROTO_VER 1
63
64static struct lock_class_key __ipvs_sync_key;
65
66
67
68
69struct ip_vs_sync_conn_v0 {
70 __u8 reserved;
71
72
73 __u8 protocol;
74 __be16 cport;
75 __be16 vport;
76 __be16 dport;
77 __be32 caddr;
78 __be32 vaddr;
79 __be32 daddr;
80
81
82 __be16 flags;
83 __be16 state;
84
85
86};
87
88struct ip_vs_sync_conn_options {
89 struct ip_vs_seq in_seq;
90 struct ip_vs_seq out_seq;
91};
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131struct ip_vs_sync_v4 {
132 __u8 type;
133 __u8 protocol;
134 __be16 ver_size;
135
136 __be32 flags;
137 __be16 state;
138
139 __be16 cport;
140 __be16 vport;
141 __be16 dport;
142 __be32 fwmark;
143 __be32 timeout;
144 __be32 caddr;
145 __be32 vaddr;
146 __be32 daddr;
147
148
149};
150
151
152
153struct ip_vs_sync_v6 {
154 __u8 type;
155 __u8 protocol;
156 __be16 ver_size;
157
158 __be32 flags;
159 __be16 state;
160
161 __be16 cport;
162 __be16 vport;
163 __be16 dport;
164 __be32 fwmark;
165 __be32 timeout;
166 struct in6_addr caddr;
167 struct in6_addr vaddr;
168 struct in6_addr daddr;
169
170
171};
172
173union ip_vs_sync_conn {
174 struct ip_vs_sync_v4 v4;
175 struct ip_vs_sync_v6 v6;
176};
177
178
179#define STYPE_INET6 0
180#define STYPE_F_INET6 (1 << STYPE_INET6)
181
182#define SVER_SHIFT 12
183#define SVER_MASK 0x0fff
184
185#define IPVS_OPT_SEQ_DATA 1
186#define IPVS_OPT_PE_DATA 2
187#define IPVS_OPT_PE_NAME 3
188#define IPVS_OPT_PARAM 7
189
190#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
191#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
192#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
193#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
194
195struct ip_vs_sync_thread_data {
196 struct net *net;
197 struct socket *sock;
198 char *buf;
199 int id;
200};
201
202
203#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
204#define FULL_CONN_SIZE \
205(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242#define SYNC_MESG_HEADER_LEN 4
243#define MAX_CONNS_PER_SYNCBUFF 255
244
245
246struct ip_vs_sync_mesg_v0 {
247 __u8 nr_conns;
248 __u8 syncid;
249 __be16 size;
250
251
252};
253
254
255struct ip_vs_sync_mesg {
256 __u8 reserved;
257 __u8 syncid;
258 __be16 size;
259 __u8 nr_conns;
260 __s8 version;
261 __u16 spare;
262
263};
264
265struct ip_vs_sync_buff {
266 struct list_head list;
267 unsigned long firstuse;
268
269
270 struct ip_vs_sync_mesg *mesg;
271 unsigned char *head;
272 unsigned char *end;
273};
274
275
276
277
278
279static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
280{
281 ho->init_seq = get_unaligned_be32(&no->init_seq);
282 ho->delta = get_unaligned_be32(&no->delta);
283 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
284}
285
286
287
288
289
290static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
291{
292 put_unaligned_be32(ho->init_seq, &no->init_seq);
293 put_unaligned_be32(ho->delta, &no->delta);
294 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
295}
296
297static inline struct ip_vs_sync_buff *
298sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
299{
300 struct ip_vs_sync_buff *sb;
301
302 spin_lock_bh(&ipvs->sync_lock);
303 if (list_empty(&ms->sync_queue)) {
304 sb = NULL;
305 __set_current_state(TASK_INTERRUPTIBLE);
306 } else {
307 sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
308 list);
309 list_del(&sb->list);
310 ms->sync_queue_len--;
311 if (!ms->sync_queue_len)
312 ms->sync_queue_delay = 0;
313 }
314 spin_unlock_bh(&ipvs->sync_lock);
315
316 return sb;
317}
318
319
320
321
322static inline struct ip_vs_sync_buff *
323ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
324{
325 struct ip_vs_sync_buff *sb;
326
327 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
328 return NULL;
329
330 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
331 if (!sb->mesg) {
332 kfree(sb);
333 return NULL;
334 }
335 sb->mesg->reserved = 0;
336 sb->mesg->version = SYNC_PROTO_VER;
337 sb->mesg->syncid = ipvs->master_syncid;
338 sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
339 sb->mesg->nr_conns = 0;
340 sb->mesg->spare = 0;
341 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
342 sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
343
344 sb->firstuse = jiffies;
345 return sb;
346}
347
348static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
349{
350 kfree(sb->mesg);
351 kfree(sb);
352}
353
354static inline void sb_queue_tail(struct netns_ipvs *ipvs,
355 struct ipvs_master_sync_state *ms)
356{
357 struct ip_vs_sync_buff *sb = ms->sync_buff;
358
359 spin_lock(&ipvs->sync_lock);
360 if (ipvs->sync_state & IP_VS_STATE_MASTER &&
361 ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
362 if (!ms->sync_queue_len)
363 schedule_delayed_work(&ms->master_wakeup_work,
364 max(IPVS_SYNC_SEND_DELAY, 1));
365 ms->sync_queue_len++;
366 list_add_tail(&sb->list, &ms->sync_queue);
367 if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
368 wake_up_process(ms->master_thread);
369 } else
370 ip_vs_sync_buff_release(sb);
371 spin_unlock(&ipvs->sync_lock);
372}
373
374
375
376
377
378static inline struct ip_vs_sync_buff *
379get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
380 unsigned long time)
381{
382 struct ip_vs_sync_buff *sb;
383
384 spin_lock_bh(&ipvs->sync_buff_lock);
385 sb = ms->sync_buff;
386 if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
387 ms->sync_buff = NULL;
388 __set_current_state(TASK_RUNNING);
389 } else
390 sb = NULL;
391 spin_unlock_bh(&ipvs->sync_buff_lock);
392 return sb;
393}
394
395static inline int
396select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
397{
398 return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
399}
400
401
402
403
404static inline struct ip_vs_sync_buff *
405ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
406{
407 struct ip_vs_sync_buff *sb;
408 struct ip_vs_sync_mesg_v0 *mesg;
409
410 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
411 return NULL;
412
413 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
414 if (!sb->mesg) {
415 kfree(sb);
416 return NULL;
417 }
418 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
419 mesg->nr_conns = 0;
420 mesg->syncid = ipvs->master_syncid;
421 mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
422 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
423 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
424 sb->firstuse = jiffies;
425 return sb;
426}
427
428
429static inline bool in_persistence(struct ip_vs_conn *cp)
430{
431 for (cp = cp->control; cp; cp = cp->control) {
432 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
433 return true;
434 }
435 return false;
436}
437
438
439
440
441
442
443
444
445
446
447static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
448 struct ip_vs_conn *cp, int pkts)
449{
450 unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
451 unsigned long now = jiffies;
452 unsigned long n = (now + cp->timeout) & ~3UL;
453 unsigned int sync_refresh_period;
454 int sync_period;
455 int force;
456
457
458 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
459 force = 0;
460 else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp)))
461 return 0;
462 else if (likely(cp->protocol == IPPROTO_TCP)) {
463 if (!((1 << cp->state) &
464 ((1 << IP_VS_TCP_S_ESTABLISHED) |
465 (1 << IP_VS_TCP_S_FIN_WAIT) |
466 (1 << IP_VS_TCP_S_CLOSE) |
467 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
468 (1 << IP_VS_TCP_S_TIME_WAIT))))
469 return 0;
470 force = cp->state != cp->old_state;
471 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
472 goto set;
473 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
474 if (!((1 << cp->state) &
475 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
476 (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) |
477 (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) |
478 (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) |
479 (1 << IP_VS_SCTP_S_CLOSED))))
480 return 0;
481 force = cp->state != cp->old_state;
482 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
483 goto set;
484 } else {
485
486 force = 0;
487 }
488
489 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
490 if (sync_refresh_period > 0) {
491 long diff = n - orig;
492 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
493
494
495
496
497 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
498 int retries = orig & 3;
499
500 if (retries >= sysctl_sync_retries(ipvs))
501 return 0;
502 if (time_before(now, orig - cp->timeout +
503 (sync_refresh_period >> 3)))
504 return 0;
505 n |= retries + 1;
506 }
507 }
508 sync_period = sysctl_sync_period(ipvs);
509 if (sync_period > 0) {
510 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
511 pkts % sync_period != sysctl_sync_threshold(ipvs))
512 return 0;
513 } else if (sync_refresh_period <= 0 &&
514 pkts != sysctl_sync_threshold(ipvs))
515 return 0;
516
517set:
518 cp->old_state = cp->state;
519 n = cmpxchg(&cp->sync_endtime, orig, n);
520 return n == orig || force;
521}
522
523
524
525
526
527static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
528 int pkts)
529{
530 struct netns_ipvs *ipvs = net_ipvs(net);
531 struct ip_vs_sync_mesg_v0 *m;
532 struct ip_vs_sync_conn_v0 *s;
533 struct ip_vs_sync_buff *buff;
534 struct ipvs_master_sync_state *ms;
535 int id;
536 int len;
537
538 if (unlikely(cp->af != AF_INET))
539 return;
540
541 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
542 return;
543
544 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
545 return;
546
547 spin_lock_bh(&ipvs->sync_buff_lock);
548 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
549 spin_unlock_bh(&ipvs->sync_buff_lock);
550 return;
551 }
552
553 id = select_master_thread_id(ipvs, cp);
554 ms = &ipvs->ms[id];
555 buff = ms->sync_buff;
556 if (buff) {
557 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
558
559 if (!m->nr_conns) {
560 sb_queue_tail(ipvs, ms);
561 ms->sync_buff = NULL;
562 buff = NULL;
563 }
564 }
565 if (!buff) {
566 buff = ip_vs_sync_buff_create_v0(ipvs);
567 if (!buff) {
568 spin_unlock_bh(&ipvs->sync_buff_lock);
569 pr_err("ip_vs_sync_buff_create failed.\n");
570 return;
571 }
572 ms->sync_buff = buff;
573 }
574
575 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
576 SIMPLE_CONN_SIZE;
577 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
578 s = (struct ip_vs_sync_conn_v0 *) buff->head;
579
580
581 s->reserved = 0;
582 s->protocol = cp->protocol;
583 s->cport = cp->cport;
584 s->vport = cp->vport;
585 s->dport = cp->dport;
586 s->caddr = cp->caddr.ip;
587 s->vaddr = cp->vaddr.ip;
588 s->daddr = cp->daddr.ip;
589 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
590 s->state = htons(cp->state);
591 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
592 struct ip_vs_sync_conn_options *opt =
593 (struct ip_vs_sync_conn_options *)&s[1];
594 memcpy(opt, &cp->in_seq, sizeof(*opt));
595 }
596
597 m->nr_conns++;
598 m->size = htons(ntohs(m->size) + len);
599 buff->head += len;
600
601
602 if (buff->head + FULL_CONN_SIZE > buff->end) {
603 sb_queue_tail(ipvs, ms);
604 ms->sync_buff = NULL;
605 }
606 spin_unlock_bh(&ipvs->sync_buff_lock);
607
608
609 cp = cp->control;
610 if (cp) {
611 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
612 pkts = atomic_add_return(1, &cp->in_pkts);
613 else
614 pkts = sysctl_sync_threshold(ipvs);
615 ip_vs_sync_conn(net, cp->control, pkts);
616 }
617}
618
619
620
621
622
623
624void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
625{
626 struct netns_ipvs *ipvs = net_ipvs(net);
627 struct ip_vs_sync_mesg *m;
628 union ip_vs_sync_conn *s;
629 struct ip_vs_sync_buff *buff;
630 struct ipvs_master_sync_state *ms;
631 int id;
632 __u8 *p;
633 unsigned int len, pe_name_len, pad;
634
635
636 if (sysctl_sync_ver(ipvs) == 0) {
637 ip_vs_sync_conn_v0(net, cp, pkts);
638 return;
639 }
640
641 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
642 goto control;
643sloop:
644 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
645 goto control;
646
647
648 pe_name_len = 0;
649 if (cp->pe_data_len) {
650 if (!cp->pe_data || !cp->dest) {
651 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
652 return;
653 }
654 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
655 }
656
657 spin_lock_bh(&ipvs->sync_buff_lock);
658 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
659 spin_unlock_bh(&ipvs->sync_buff_lock);
660 return;
661 }
662
663 id = select_master_thread_id(ipvs, cp);
664 ms = &ipvs->ms[id];
665
666#ifdef CONFIG_IP_VS_IPV6
667 if (cp->af == AF_INET6)
668 len = sizeof(struct ip_vs_sync_v6);
669 else
670#endif
671 len = sizeof(struct ip_vs_sync_v4);
672
673 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
674 len += sizeof(struct ip_vs_sync_conn_options) + 2;
675
676 if (cp->pe_data_len)
677 len += cp->pe_data_len + 2;
678 if (pe_name_len)
679 len += pe_name_len + 2;
680
681
682 pad = 0;
683 buff = ms->sync_buff;
684 if (buff) {
685 m = buff->mesg;
686 pad = (4 - (size_t) buff->head) & 3;
687
688 if (buff->head + len + pad > buff->end || m->reserved) {
689 sb_queue_tail(ipvs, ms);
690 ms->sync_buff = NULL;
691 buff = NULL;
692 pad = 0;
693 }
694 }
695
696 if (!buff) {
697 buff = ip_vs_sync_buff_create(ipvs);
698 if (!buff) {
699 spin_unlock_bh(&ipvs->sync_buff_lock);
700 pr_err("ip_vs_sync_buff_create failed.\n");
701 return;
702 }
703 ms->sync_buff = buff;
704 m = buff->mesg;
705 }
706
707 p = buff->head;
708 buff->head += pad + len;
709 m->size = htons(ntohs(m->size) + pad + len);
710
711 while (pad--)
712 *(p++) = 0;
713
714 s = (union ip_vs_sync_conn *)p;
715
716
717 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
718 s->v4.ver_size = htons(len & SVER_MASK);
719 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
720 s->v4.state = htons(cp->state);
721 s->v4.protocol = cp->protocol;
722 s->v4.cport = cp->cport;
723 s->v4.vport = cp->vport;
724 s->v4.dport = cp->dport;
725 s->v4.fwmark = htonl(cp->fwmark);
726 s->v4.timeout = htonl(cp->timeout / HZ);
727 m->nr_conns++;
728
729#ifdef CONFIG_IP_VS_IPV6
730 if (cp->af == AF_INET6) {
731 p += sizeof(struct ip_vs_sync_v6);
732 s->v6.caddr = cp->caddr.in6;
733 s->v6.vaddr = cp->vaddr.in6;
734 s->v6.daddr = cp->daddr.in6;
735 } else
736#endif
737 {
738 p += sizeof(struct ip_vs_sync_v4);
739 s->v4.caddr = cp->caddr.ip;
740 s->v4.vaddr = cp->vaddr.ip;
741 s->v4.daddr = cp->daddr.ip;
742 }
743 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
744 *(p++) = IPVS_OPT_SEQ_DATA;
745 *(p++) = sizeof(struct ip_vs_sync_conn_options);
746 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
747 p += sizeof(struct ip_vs_seq);
748 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
749 p += sizeof(struct ip_vs_seq);
750 }
751
752 if (cp->pe_data_len && cp->pe_data) {
753 *(p++) = IPVS_OPT_PE_DATA;
754 *(p++) = cp->pe_data_len;
755 memcpy(p, cp->pe_data, cp->pe_data_len);
756 p += cp->pe_data_len;
757 if (pe_name_len) {
758
759 *(p++) = IPVS_OPT_PE_NAME;
760 *(p++) = pe_name_len;
761 memcpy(p, cp->pe->name, pe_name_len);
762 p += pe_name_len;
763 }
764 }
765
766 spin_unlock_bh(&ipvs->sync_buff_lock);
767
768control:
769
770 cp = cp->control;
771 if (!cp)
772 return;
773 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
774 pkts = atomic_add_return(1, &cp->in_pkts);
775 else
776 pkts = sysctl_sync_threshold(ipvs);
777 goto sloop;
778}
779
780
781
782
783static inline int
784ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
785 struct ip_vs_conn_param *p,
786 __u8 *pe_data, unsigned int pe_data_len,
787 __u8 *pe_name, unsigned int pe_name_len)
788{
789#ifdef CONFIG_IP_VS_IPV6
790 if (af == AF_INET6)
791 ip_vs_conn_fill_param(net, af, sc->v6.protocol,
792 (const union nf_inet_addr *)&sc->v6.caddr,
793 sc->v6.cport,
794 (const union nf_inet_addr *)&sc->v6.vaddr,
795 sc->v6.vport, p);
796 else
797#endif
798 ip_vs_conn_fill_param(net, af, sc->v4.protocol,
799 (const union nf_inet_addr *)&sc->v4.caddr,
800 sc->v4.cport,
801 (const union nf_inet_addr *)&sc->v4.vaddr,
802 sc->v4.vport, p);
803
804 if (pe_data_len) {
805 if (pe_name_len) {
806 char buff[IP_VS_PENAME_MAXLEN+1];
807
808 memcpy(buff, pe_name, pe_name_len);
809 buff[pe_name_len]=0;
810 p->pe = __ip_vs_pe_getbyname(buff);
811 if (!p->pe) {
812 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
813 buff);
814 return 1;
815 }
816 } else {
817 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
818 return 1;
819 }
820
821 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
822 if (!p->pe_data) {
823 if (p->pe->module)
824 module_put(p->pe->module);
825 return -ENOMEM;
826 }
827 p->pe_data_len = pe_data_len;
828 }
829 return 0;
830}
831
832
833
834
835
836
837
838static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
839 unsigned int flags, unsigned int state,
840 unsigned int protocol, unsigned int type,
841 const union nf_inet_addr *daddr, __be16 dport,
842 unsigned long timeout, __u32 fwmark,
843 struct ip_vs_sync_conn_options *opt)
844{
845 struct ip_vs_dest *dest;
846 struct ip_vs_conn *cp;
847 struct netns_ipvs *ipvs = net_ipvs(net);
848
849 if (!(flags & IP_VS_CONN_F_TEMPLATE))
850 cp = ip_vs_conn_in_get(param);
851 else
852 cp = ip_vs_ct_in_get(param);
853
854 if (cp) {
855
856 kfree(param->pe_data);
857
858 dest = cp->dest;
859 spin_lock_bh(&cp->lock);
860 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
861 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
862 if (flags & IP_VS_CONN_F_INACTIVE) {
863 atomic_dec(&dest->activeconns);
864 atomic_inc(&dest->inactconns);
865 } else {
866 atomic_inc(&dest->activeconns);
867 atomic_dec(&dest->inactconns);
868 }
869 }
870 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
871 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
872 cp->flags = flags;
873 spin_unlock_bh(&cp->lock);
874 if (!dest)
875 ip_vs_try_bind_dest(cp);
876 } else {
877
878
879
880
881
882 rcu_read_lock();
883 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
884 param->vport, protocol, fwmark, flags);
885
886 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
887 rcu_read_unlock();
888 if (!cp) {
889 if (param->pe_data)
890 kfree(param->pe_data);
891 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
892 return;
893 }
894 }
895
896 if (opt)
897 memcpy(&cp->in_seq, opt, sizeof(*opt));
898 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
899 cp->state = state;
900 cp->old_state = cp->state;
901
902
903
904
905
906
907
908
909
910 if (timeout) {
911 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
912 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
913 cp->timeout = timeout*HZ;
914 } else {
915 struct ip_vs_proto_data *pd;
916
917 pd = ip_vs_proto_data_get(net, protocol);
918 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
919 cp->timeout = pd->timeout_table[state];
920 else
921 cp->timeout = (3*60*HZ);
922 }
923 ip_vs_conn_put(cp);
924}
925
926
927
928
929static void ip_vs_process_message_v0(struct net *net, const char *buffer,
930 const size_t buflen)
931{
932 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
933 struct ip_vs_sync_conn_v0 *s;
934 struct ip_vs_sync_conn_options *opt;
935 struct ip_vs_protocol *pp;
936 struct ip_vs_conn_param param;
937 char *p;
938 int i;
939
940 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
941 for (i=0; i<m->nr_conns; i++) {
942 unsigned int flags, state;
943
944 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
945 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
946 return;
947 }
948 s = (struct ip_vs_sync_conn_v0 *) p;
949 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
950 flags &= ~IP_VS_CONN_F_HASHED;
951 if (flags & IP_VS_CONN_F_SEQ_MASK) {
952 opt = (struct ip_vs_sync_conn_options *)&s[1];
953 p += FULL_CONN_SIZE;
954 if (p > buffer+buflen) {
955 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
956 return;
957 }
958 } else {
959 opt = NULL;
960 p += SIMPLE_CONN_SIZE;
961 }
962
963 state = ntohs(s->state);
964 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
965 pp = ip_vs_proto_get(s->protocol);
966 if (!pp) {
967 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
968 s->protocol);
969 continue;
970 }
971 if (state >= pp->num_states) {
972 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
973 pp->name, state);
974 continue;
975 }
976 } else {
977
978 if (state > 0) {
979 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
980 state);
981 state = 0;
982 }
983 }
984
985 ip_vs_conn_fill_param(net, AF_INET, s->protocol,
986 (const union nf_inet_addr *)&s->caddr,
987 s->cport,
988 (const union nf_inet_addr *)&s->vaddr,
989 s->vport, ¶m);
990
991
992 ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET,
993 (union nf_inet_addr *)&s->daddr, s->dport,
994 0, 0, opt);
995 }
996}
997
998
999
1000
1001static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
1002 __u32 *opt_flags,
1003 struct ip_vs_sync_conn_options *opt)
1004{
1005 struct ip_vs_sync_conn_options *topt;
1006
1007 topt = (struct ip_vs_sync_conn_options *)p;
1008
1009 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
1010 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
1011 return -EINVAL;
1012 }
1013 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
1014 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
1015 return -EINVAL;
1016 }
1017 ntoh_seq(&topt->in_seq, &opt->in_seq);
1018 ntoh_seq(&topt->out_seq, &opt->out_seq);
1019 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
1020 return 0;
1021}
1022
1023static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
1024 __u8 **data, unsigned int maxlen,
1025 __u32 *opt_flags, __u32 flag)
1026{
1027 if (plen > maxlen) {
1028 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
1029 return -EINVAL;
1030 }
1031 if (*opt_flags & flag) {
1032 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
1033 return -EINVAL;
1034 }
1035 *data_len = plen;
1036 *data = p;
1037 *opt_flags |= flag;
1038 return 0;
1039}
1040
1041
1042
1043static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
1044{
1045 struct ip_vs_sync_conn_options opt;
1046 union ip_vs_sync_conn *s;
1047 struct ip_vs_protocol *pp;
1048 struct ip_vs_conn_param param;
1049 __u32 flags;
1050 unsigned int af, state, pe_data_len=0, pe_name_len=0;
1051 __u8 *pe_data=NULL, *pe_name=NULL;
1052 __u32 opt_flags=0;
1053 int retc=0;
1054
1055 s = (union ip_vs_sync_conn *) p;
1056
1057 if (s->v6.type & STYPE_F_INET6) {
1058#ifdef CONFIG_IP_VS_IPV6
1059 af = AF_INET6;
1060 p += sizeof(struct ip_vs_sync_v6);
1061#else
1062 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
1063 retc = 10;
1064 goto out;
1065#endif
1066 } else if (!s->v4.type) {
1067 af = AF_INET;
1068 p += sizeof(struct ip_vs_sync_v4);
1069 } else {
1070 return -10;
1071 }
1072 if (p > msg_end)
1073 return -20;
1074
1075
1076 while (p < msg_end) {
1077 int ptype;
1078 int plen;
1079
1080 if (p+2 > msg_end)
1081 return -30;
1082 ptype = *(p++);
1083 plen = *(p++);
1084
1085 if (!plen || ((p + plen) > msg_end))
1086 return -40;
1087
1088 switch (ptype & ~IPVS_OPT_F_PARAM) {
1089 case IPVS_OPT_SEQ_DATA:
1090 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
1091 return -50;
1092 break;
1093
1094 case IPVS_OPT_PE_DATA:
1095 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
1096 IP_VS_PEDATA_MAXLEN, &opt_flags,
1097 IPVS_OPT_F_PE_DATA))
1098 return -60;
1099 break;
1100
1101 case IPVS_OPT_PE_NAME:
1102 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1103 IP_VS_PENAME_MAXLEN, &opt_flags,
1104 IPVS_OPT_F_PE_NAME))
1105 return -70;
1106 break;
1107
1108 default:
1109
1110 if (!(ptype & IPVS_OPT_F_PARAM)) {
1111 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1112 ptype & ~IPVS_OPT_F_PARAM);
1113 retc = 20;
1114 goto out;
1115 }
1116 }
1117 p += plen;
1118 }
1119
1120
1121 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
1122 flags |= IP_VS_CONN_F_SYNC;
1123 state = ntohs(s->v4.state);
1124
1125 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1126 pp = ip_vs_proto_get(s->v4.protocol);
1127 if (!pp) {
1128 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
1129 s->v4.protocol);
1130 retc = 30;
1131 goto out;
1132 }
1133 if (state >= pp->num_states) {
1134 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
1135 pp->name, state);
1136 retc = 40;
1137 goto out;
1138 }
1139 } else {
1140
1141 if (state > 0) {
1142 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1143 state);
1144 state = 0;
1145 }
1146 }
1147 if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data,
1148 pe_data_len, pe_name, pe_name_len)) {
1149 retc = 50;
1150 goto out;
1151 }
1152
1153 if (af == AF_INET)
1154 ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af,
1155 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1156 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1157 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1158 );
1159#ifdef CONFIG_IP_VS_IPV6
1160 else
1161 ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af,
1162 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1163 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1164 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1165 );
1166#endif
1167 return 0;
1168
1169out:
1170 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1171 return retc;
1172
1173}
1174
1175
1176
1177
1178
1179static void ip_vs_process_message(struct net *net, __u8 *buffer,
1180 const size_t buflen)
1181{
1182 struct netns_ipvs *ipvs = net_ipvs(net);
1183 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1184 __u8 *p, *msg_end;
1185 int i, nr_conns;
1186
1187 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1188 IP_VS_DBG(2, "BACKUP, message header too short\n");
1189 return;
1190 }
1191
1192 if (buflen != ntohs(m2->size)) {
1193 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1194 return;
1195 }
1196
1197 if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
1198 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1199 return;
1200 }
1201
1202 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1203 && (m2->spare == 0)) {
1204
1205 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1206 nr_conns = m2->nr_conns;
1207
1208 for (i=0; i<nr_conns; i++) {
1209 union ip_vs_sync_conn *s;
1210 unsigned int size;
1211 int retc;
1212
1213 p = msg_end;
1214 if (p + sizeof(s->v4) > buffer+buflen) {
1215 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1216 return;
1217 }
1218 s = (union ip_vs_sync_conn *)p;
1219 size = ntohs(s->v4.ver_size) & SVER_MASK;
1220 msg_end = p + size;
1221
1222 if (msg_end > buffer+buflen) {
1223 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
1224 return;
1225 }
1226 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
1227 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
1228 ntohs(s->v4.ver_size) >> SVER_SHIFT);
1229 return;
1230 }
1231
1232 retc = ip_vs_proc_sync_conn(net, p, msg_end);
1233 if (retc < 0) {
1234 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1235 retc);
1236 return;
1237 }
1238
1239 msg_end = p + ((size + 3) & ~3);
1240 }
1241 } else {
1242
1243 ip_vs_process_message_v0(net, buffer, buflen);
1244 return;
1245 }
1246}
1247
1248
1249
1250
1251
1252static void set_sock_size(struct sock *sk, int mode, int val)
1253{
1254
1255
1256 lock_sock(sk);
1257 if (mode) {
1258 val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
1259 sysctl_wmem_max);
1260 sk->sk_sndbuf = val * 2;
1261 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1262 } else {
1263 val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
1264 sysctl_rmem_max);
1265 sk->sk_rcvbuf = val * 2;
1266 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1267 }
1268 release_sock(sk);
1269}
1270
1271
1272
1273
1274static void set_mcast_loop(struct sock *sk, u_char loop)
1275{
1276 struct inet_sock *inet = inet_sk(sk);
1277
1278
1279 lock_sock(sk);
1280 inet->mc_loop = loop ? 1 : 0;
1281 release_sock(sk);
1282}
1283
1284
1285
1286
1287static void set_mcast_ttl(struct sock *sk, u_char ttl)
1288{
1289 struct inet_sock *inet = inet_sk(sk);
1290
1291
1292 lock_sock(sk);
1293 inet->mc_ttl = ttl;
1294 release_sock(sk);
1295}
1296
1297
1298
1299
1300static int set_mcast_if(struct sock *sk, char *ifname)
1301{
1302 struct net_device *dev;
1303 struct inet_sock *inet = inet_sk(sk);
1304 struct net *net = sock_net(sk);
1305
1306 dev = __dev_get_by_name(net, ifname);
1307 if (!dev)
1308 return -ENODEV;
1309
1310 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1311 return -EINVAL;
1312
1313 lock_sock(sk);
1314 inet->mc_index = dev->ifindex;
1315
1316 release_sock(sk);
1317
1318 return 0;
1319}
1320
1321
1322
1323
1324
1325
1326static int set_sync_mesg_maxlen(struct net *net, int sync_state)
1327{
1328 struct netns_ipvs *ipvs = net_ipvs(net);
1329 struct net_device *dev;
1330 int num;
1331
1332 if (sync_state == IP_VS_STATE_MASTER) {
1333 dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
1334 if (!dev)
1335 return -ENODEV;
1336
1337 num = (dev->mtu - sizeof(struct iphdr) -
1338 sizeof(struct udphdr) -
1339 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
1340 ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
1341 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
1342 IP_VS_DBG(7, "setting the maximum length of sync sending "
1343 "message %d.\n", ipvs->send_mesg_maxlen);
1344 } else if (sync_state == IP_VS_STATE_BACKUP) {
1345 dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
1346 if (!dev)
1347 return -ENODEV;
1348
1349 ipvs->recv_mesg_maxlen = dev->mtu -
1350 sizeof(struct iphdr) - sizeof(struct udphdr);
1351 IP_VS_DBG(7, "setting the maximum length of sync receiving "
1352 "message %d.\n", ipvs->recv_mesg_maxlen);
1353 }
1354
1355 return 0;
1356}
1357
1358
1359
1360
1361
1362
1363
1364static int
1365join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
1366{
1367 struct net *net = sock_net(sk);
1368 struct ip_mreqn mreq;
1369 struct net_device *dev;
1370 int ret;
1371
1372 memset(&mreq, 0, sizeof(mreq));
1373 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
1374
1375 dev = __dev_get_by_name(net, ifname);
1376 if (!dev)
1377 return -ENODEV;
1378 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1379 return -EINVAL;
1380
1381 mreq.imr_ifindex = dev->ifindex;
1382
1383 lock_sock(sk);
1384 ret = ip_mc_join_group(sk, &mreq);
1385 release_sock(sk);
1386
1387 return ret;
1388}
1389
1390
1391static int bind_mcastif_addr(struct socket *sock, char *ifname)
1392{
1393 struct net *net = sock_net(sock->sk);
1394 struct net_device *dev;
1395 __be32 addr;
1396 struct sockaddr_in sin;
1397
1398 dev = __dev_get_by_name(net, ifname);
1399 if (!dev)
1400 return -ENODEV;
1401
1402 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1403 if (!addr)
1404 pr_err("You probably need to specify IP address on "
1405 "multicast interface.\n");
1406
1407 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
1408 ifname, &addr);
1409
1410
1411 sin.sin_family = AF_INET;
1412 sin.sin_addr.s_addr = addr;
1413 sin.sin_port = 0;
1414
1415 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
1416}
1417
1418
1419
1420
1421static struct socket *make_send_sock(struct net *net, int id)
1422{
1423 struct netns_ipvs *ipvs = net_ipvs(net);
1424
1425 struct sockaddr_in mcast_addr = {
1426 .sin_family = AF_INET,
1427 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1428 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1429 };
1430 struct socket *sock;
1431 int result;
1432
1433
1434 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1435 if (result < 0) {
1436 pr_err("Error during creation of socket; terminating\n");
1437 return ERR_PTR(result);
1438 }
1439
1440
1441
1442
1443
1444 sk_change_net(sock->sk, net);
1445 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
1446 if (result < 0) {
1447 pr_err("Error setting outbound mcast interface\n");
1448 goto error;
1449 }
1450
1451 set_mcast_loop(sock->sk, 0);
1452 set_mcast_ttl(sock->sk, 1);
1453 result = sysctl_sync_sock_size(ipvs);
1454 if (result > 0)
1455 set_sock_size(sock->sk, 1, result);
1456
1457 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
1458 if (result < 0) {
1459 pr_err("Error binding address of the mcast interface\n");
1460 goto error;
1461 }
1462
1463 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
1464 sizeof(struct sockaddr), 0);
1465 if (result < 0) {
1466 pr_err("Error connecting to the multicast addr\n");
1467 goto error;
1468 }
1469
1470 return sock;
1471
1472error:
1473 sk_release_kernel(sock->sk);
1474 return ERR_PTR(result);
1475}
1476
1477
1478
1479
1480
1481static struct socket *make_receive_sock(struct net *net, int id)
1482{
1483 struct netns_ipvs *ipvs = net_ipvs(net);
1484
1485 struct sockaddr_in mcast_addr = {
1486 .sin_family = AF_INET,
1487 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1488 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1489 };
1490 struct socket *sock;
1491 int result;
1492
1493
1494 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1495 if (result < 0) {
1496 pr_err("Error during creation of socket; terminating\n");
1497 return ERR_PTR(result);
1498 }
1499
1500
1501
1502
1503
1504 sk_change_net(sock->sk, net);
1505
1506 sock->sk->sk_reuse = SK_CAN_REUSE;
1507 result = sysctl_sync_sock_size(ipvs);
1508 if (result > 0)
1509 set_sock_size(sock->sk, 0, result);
1510
1511 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
1512 sizeof(struct sockaddr));
1513 if (result < 0) {
1514 pr_err("Error binding to the multicast addr\n");
1515 goto error;
1516 }
1517
1518
1519 result = join_mcast_group(sock->sk,
1520 (struct in_addr *) &mcast_addr.sin_addr,
1521 ipvs->backup_mcast_ifn);
1522 if (result < 0) {
1523 pr_err("Error joining to the multicast group\n");
1524 goto error;
1525 }
1526
1527 return sock;
1528
1529error:
1530 sk_release_kernel(sock->sk);
1531 return ERR_PTR(result);
1532}
1533
1534
1535static int
1536ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1537{
1538 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
1539 struct kvec iov;
1540 int len;
1541
1542 EnterFunction(7);
1543 iov.iov_base = (void *)buffer;
1544 iov.iov_len = length;
1545
1546 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
1547
1548 LeaveFunction(7);
1549 return len;
1550}
1551
1552static int
1553ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1554{
1555 int msize;
1556 int ret;
1557
1558 msize = ntohs(msg->size);
1559
1560 ret = ip_vs_send_async(sock, (char *)msg, msize);
1561 if (ret >= 0 || ret == -EAGAIN)
1562 return ret;
1563 pr_err("ip_vs_send_async error %d\n", ret);
1564 return 0;
1565}
1566
1567static int
1568ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1569{
1570 struct msghdr msg = {NULL,};
1571 struct kvec iov;
1572 int len;
1573
1574 EnterFunction(7);
1575
1576
1577 iov.iov_base = buffer;
1578 iov.iov_len = (size_t)buflen;
1579
1580 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
1581
1582 if (len < 0)
1583 return len;
1584
1585 LeaveFunction(7);
1586 return len;
1587}
1588
1589
1590static void master_wakeup_work_handler(struct work_struct *work)
1591{
1592 struct ipvs_master_sync_state *ms =
1593 container_of(work, struct ipvs_master_sync_state,
1594 master_wakeup_work.work);
1595 struct netns_ipvs *ipvs = ms->ipvs;
1596
1597 spin_lock_bh(&ipvs->sync_lock);
1598 if (ms->sync_queue_len &&
1599 ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
1600 ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
1601 wake_up_process(ms->master_thread);
1602 }
1603 spin_unlock_bh(&ipvs->sync_lock);
1604}
1605
1606
1607static inline struct ip_vs_sync_buff *
1608next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
1609{
1610 struct ip_vs_sync_buff *sb;
1611
1612 sb = sb_dequeue(ipvs, ms);
1613 if (sb)
1614 return sb;
1615
1616 return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
1617}
1618
1619static int sync_thread_master(void *data)
1620{
1621 struct ip_vs_sync_thread_data *tinfo = data;
1622 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1623 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
1624 struct sock *sk = tinfo->sock->sk;
1625 struct ip_vs_sync_buff *sb;
1626
1627 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1628 "syncid = %d, id = %d\n",
1629 ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);
1630
1631 for (;;) {
1632 sb = next_sync_buff(ipvs, ms);
1633 if (unlikely(kthread_should_stop()))
1634 break;
1635 if (!sb) {
1636 schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
1637 continue;
1638 }
1639 while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
1640 int ret = 0;
1641
1642 __wait_event_interruptible(*sk_sleep(sk),
1643 sock_writeable(sk) ||
1644 kthread_should_stop(),
1645 ret);
1646 if (unlikely(kthread_should_stop()))
1647 goto done;
1648 }
1649 ip_vs_sync_buff_release(sb);
1650 }
1651
1652done:
1653 __set_current_state(TASK_RUNNING);
1654 if (sb)
1655 ip_vs_sync_buff_release(sb);
1656
1657
1658 while ((sb = sb_dequeue(ipvs, ms)))
1659 ip_vs_sync_buff_release(sb);
1660 __set_current_state(TASK_RUNNING);
1661
1662
1663 sb = get_curr_sync_buff(ipvs, ms, 0);
1664 if (sb)
1665 ip_vs_sync_buff_release(sb);
1666
1667
1668 sk_release_kernel(tinfo->sock->sk);
1669 kfree(tinfo);
1670
1671 return 0;
1672}
1673
1674
1675static int sync_thread_backup(void *data)
1676{
1677 struct ip_vs_sync_thread_data *tinfo = data;
1678 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1679 int len;
1680
1681 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1682 "syncid = %d, id = %d\n",
1683 ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);
1684
1685 while (!kthread_should_stop()) {
1686 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
1687 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
1688 || kthread_should_stop());
1689
1690
1691 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
1692 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1693 ipvs->recv_mesg_maxlen);
1694 if (len <= 0) {
1695 if (len != -EAGAIN)
1696 pr_err("receiving message error\n");
1697 break;
1698 }
1699
1700 ip_vs_process_message(tinfo->net, tinfo->buf, len);
1701 }
1702 }
1703
1704
1705 sk_release_kernel(tinfo->sock->sk);
1706 kfree(tinfo->buf);
1707 kfree(tinfo);
1708
1709 return 0;
1710}
1711
1712
1713int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
1714{
1715 struct ip_vs_sync_thread_data *tinfo;
1716 struct task_struct **array = NULL, *task;
1717 struct socket *sock;
1718 struct netns_ipvs *ipvs = net_ipvs(net);
1719 char *name;
1720 int (*threadfn)(void *data);
1721 int id, count;
1722 int result = -ENOMEM;
1723
1724 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1725 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
1726 sizeof(struct ip_vs_sync_conn_v0));
1727
1728 if (!ipvs->sync_state) {
1729 count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
1730 ipvs->threads_mask = count - 1;
1731 } else
1732 count = ipvs->threads_mask + 1;
1733
1734 if (state == IP_VS_STATE_MASTER) {
1735 if (ipvs->ms)
1736 return -EEXIST;
1737
1738 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
1739 sizeof(ipvs->master_mcast_ifn));
1740 ipvs->master_syncid = syncid;
1741 name = "ipvs-m:%d:%d";
1742 threadfn = sync_thread_master;
1743 } else if (state == IP_VS_STATE_BACKUP) {
1744 if (ipvs->backup_threads)
1745 return -EEXIST;
1746
1747 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
1748 sizeof(ipvs->backup_mcast_ifn));
1749 ipvs->backup_syncid = syncid;
1750 name = "ipvs-b:%d:%d";
1751 threadfn = sync_thread_backup;
1752 } else {
1753 return -EINVAL;
1754 }
1755
1756 if (state == IP_VS_STATE_MASTER) {
1757 struct ipvs_master_sync_state *ms;
1758
1759 ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
1760 if (!ipvs->ms)
1761 goto out;
1762 ms = ipvs->ms;
1763 for (id = 0; id < count; id++, ms++) {
1764 INIT_LIST_HEAD(&ms->sync_queue);
1765 ms->sync_queue_len = 0;
1766 ms->sync_queue_delay = 0;
1767 INIT_DELAYED_WORK(&ms->master_wakeup_work,
1768 master_wakeup_work_handler);
1769 ms->ipvs = ipvs;
1770 }
1771 } else {
1772 array = kzalloc(count * sizeof(struct task_struct *),
1773 GFP_KERNEL);
1774 if (!array)
1775 goto out;
1776 }
1777 set_sync_mesg_maxlen(net, state);
1778
1779 tinfo = NULL;
1780 for (id = 0; id < count; id++) {
1781 if (state == IP_VS_STATE_MASTER)
1782 sock = make_send_sock(net, id);
1783 else
1784 sock = make_receive_sock(net, id);
1785 if (IS_ERR(sock)) {
1786 result = PTR_ERR(sock);
1787 goto outtinfo;
1788 }
1789 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1790 if (!tinfo)
1791 goto outsocket;
1792 tinfo->net = net;
1793 tinfo->sock = sock;
1794 if (state == IP_VS_STATE_BACKUP) {
1795 tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
1796 GFP_KERNEL);
1797 if (!tinfo->buf)
1798 goto outtinfo;
1799 } else {
1800 tinfo->buf = NULL;
1801 }
1802 tinfo->id = id;
1803
1804 task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
1805 if (IS_ERR(task)) {
1806 result = PTR_ERR(task);
1807 goto outtinfo;
1808 }
1809 tinfo = NULL;
1810 if (state == IP_VS_STATE_MASTER)
1811 ipvs->ms[id].master_thread = task;
1812 else
1813 array[id] = task;
1814 }
1815
1816
1817
1818 if (state == IP_VS_STATE_BACKUP)
1819 ipvs->backup_threads = array;
1820 spin_lock_bh(&ipvs->sync_buff_lock);
1821 ipvs->sync_state |= state;
1822 spin_unlock_bh(&ipvs->sync_buff_lock);
1823
1824
1825 ip_vs_use_count_inc();
1826
1827 return 0;
1828
1829outsocket:
1830 sk_release_kernel(sock->sk);
1831
1832outtinfo:
1833 if (tinfo) {
1834 sk_release_kernel(tinfo->sock->sk);
1835 kfree(tinfo->buf);
1836 kfree(tinfo);
1837 }
1838 count = id;
1839 while (count-- > 0) {
1840 if (state == IP_VS_STATE_MASTER)
1841 kthread_stop(ipvs->ms[count].master_thread);
1842 else
1843 kthread_stop(array[count]);
1844 }
1845 kfree(array);
1846
1847out:
1848 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
1849 kfree(ipvs->ms);
1850 ipvs->ms = NULL;
1851 }
1852 return result;
1853}
1854
1855
1856int stop_sync_thread(struct net *net, int state)
1857{
1858 struct netns_ipvs *ipvs = net_ipvs(net);
1859 struct task_struct **array;
1860 int id;
1861 int retc = -EINVAL;
1862
1863 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1864
1865 if (state == IP_VS_STATE_MASTER) {
1866 if (!ipvs->ms)
1867 return -ESRCH;
1868
1869
1870
1871
1872
1873
1874
1875 spin_lock_bh(&ipvs->sync_buff_lock);
1876 spin_lock(&ipvs->sync_lock);
1877 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1878 spin_unlock(&ipvs->sync_lock);
1879 spin_unlock_bh(&ipvs->sync_buff_lock);
1880
1881 retc = 0;
1882 for (id = ipvs->threads_mask; id >= 0; id--) {
1883 struct ipvs_master_sync_state *ms = &ipvs->ms[id];
1884 int ret;
1885
1886 pr_info("stopping master sync thread %d ...\n",
1887 task_pid_nr(ms->master_thread));
1888 cancel_delayed_work_sync(&ms->master_wakeup_work);
1889 ret = kthread_stop(ms->master_thread);
1890 if (retc >= 0)
1891 retc = ret;
1892 }
1893 kfree(ipvs->ms);
1894 ipvs->ms = NULL;
1895 } else if (state == IP_VS_STATE_BACKUP) {
1896 if (!ipvs->backup_threads)
1897 return -ESRCH;
1898
1899 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1900 array = ipvs->backup_threads;
1901 retc = 0;
1902 for (id = ipvs->threads_mask; id >= 0; id--) {
1903 int ret;
1904
1905 pr_info("stopping backup sync thread %d ...\n",
1906 task_pid_nr(array[id]));
1907 ret = kthread_stop(array[id]);
1908 if (retc >= 0)
1909 retc = ret;
1910 }
1911 kfree(array);
1912 ipvs->backup_threads = NULL;
1913 }
1914
1915
1916 ip_vs_use_count_dec();
1917
1918 return retc;
1919}
1920
1921
1922
1923
1924int __net_init ip_vs_sync_net_init(struct net *net)
1925{
1926 struct netns_ipvs *ipvs = net_ipvs(net);
1927
1928 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
1929 spin_lock_init(&ipvs->sync_lock);
1930 spin_lock_init(&ipvs->sync_buff_lock);
1931 return 0;
1932}
1933
1934void ip_vs_sync_net_cleanup(struct net *net)
1935{
1936 int retc;
1937 struct netns_ipvs *ipvs = net_ipvs(net);
1938
1939 mutex_lock(&ipvs->sync_mutex);
1940 retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
1941 if (retc && retc != -ESRCH)
1942 pr_err("Failed to stop Master Daemon\n");
1943
1944 retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
1945 if (retc && retc != -ESRCH)
1946 pr_err("Failed to stop Backup Daemon\n");
1947 mutex_unlock(&ipvs->sync_mutex);
1948}
1949