1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#define KMSG_COMPONENT "IPVS"
35#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
36
37#include <linux/module.h>
38#include <linux/slab.h>
39#include <linux/inetdevice.h>
40#include <linux/net.h>
41#include <linux/completion.h>
42#include <linux/delay.h>
43#include <linux/skbuff.h>
44#include <linux/in.h>
45#include <linux/igmp.h>
46#include <linux/udp.h>
47#include <linux/err.h>
48#include <linux/kthread.h>
49#include <linux/wait.h>
50#include <linux/kernel.h>
51
52#include <asm/unaligned.h>
53
54#include <net/ip.h>
55#include <net/sock.h>
56
57#include <net/ip_vs.h>
58
59#define IP_VS_SYNC_GROUP 0xe0000051
60#define IP_VS_SYNC_PORT 8848
61
62#define SYNC_PROTO_VER 1
63
64static struct lock_class_key __ipvs_sync_key;
65
66
67
68
69struct ip_vs_sync_conn_v0 {
70 __u8 reserved;
71
72
73 __u8 protocol;
74 __be16 cport;
75 __be16 vport;
76 __be16 dport;
77 __be32 caddr;
78 __be32 vaddr;
79 __be32 daddr;
80
81
82 __be16 flags;
83 __be16 state;
84
85
86};
87
88struct ip_vs_sync_conn_options {
89 struct ip_vs_seq in_seq;
90 struct ip_vs_seq out_seq;
91};
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131struct ip_vs_sync_v4 {
132 __u8 type;
133 __u8 protocol;
134 __be16 ver_size;
135
136 __be32 flags;
137 __be16 state;
138
139 __be16 cport;
140 __be16 vport;
141 __be16 dport;
142 __be32 fwmark;
143 __be32 timeout;
144 __be32 caddr;
145 __be32 vaddr;
146 __be32 daddr;
147
148
149};
150
151
152
153struct ip_vs_sync_v6 {
154 __u8 type;
155 __u8 protocol;
156 __be16 ver_size;
157
158 __be32 flags;
159 __be16 state;
160
161 __be16 cport;
162 __be16 vport;
163 __be16 dport;
164 __be32 fwmark;
165 __be32 timeout;
166 struct in6_addr caddr;
167 struct in6_addr vaddr;
168 struct in6_addr daddr;
169
170
171};
172
173union ip_vs_sync_conn {
174 struct ip_vs_sync_v4 v4;
175 struct ip_vs_sync_v6 v6;
176};
177
178
179#define STYPE_INET6 0
180#define STYPE_F_INET6 (1 << STYPE_INET6)
181
182#define SVER_SHIFT 12
183#define SVER_MASK 0x0fff
184
185#define IPVS_OPT_SEQ_DATA 1
186#define IPVS_OPT_PE_DATA 2
187#define IPVS_OPT_PE_NAME 3
188#define IPVS_OPT_PARAM 7
189
190#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
191#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
192#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
193#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
194
195struct ip_vs_sync_thread_data {
196 struct net *net;
197 struct socket *sock;
198 char *buf;
199 int id;
200};
201
202
203#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
204#define FULL_CONN_SIZE \
205(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242#define SYNC_MESG_HEADER_LEN 4
243#define MAX_CONNS_PER_SYNCBUFF 255
244
245
246struct ip_vs_sync_mesg_v0 {
247 __u8 nr_conns;
248 __u8 syncid;
249 __be16 size;
250
251
252};
253
254
255struct ip_vs_sync_mesg {
256 __u8 reserved;
257 __u8 syncid;
258 __be16 size;
259 __u8 nr_conns;
260 __s8 version;
261 __u16 spare;
262
263};
264
265struct ip_vs_sync_buff {
266 struct list_head list;
267 unsigned long firstuse;
268
269
270 struct ip_vs_sync_mesg *mesg;
271 unsigned char *head;
272 unsigned char *end;
273};
274
275
276
277
278
279static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
280{
281 ho->init_seq = get_unaligned_be32(&no->init_seq);
282 ho->delta = get_unaligned_be32(&no->delta);
283 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
284}
285
286
287
288
289
290static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
291{
292 put_unaligned_be32(ho->init_seq, &no->init_seq);
293 put_unaligned_be32(ho->delta, &no->delta);
294 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
295}
296
297static inline struct ip_vs_sync_buff *
298sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
299{
300 struct ip_vs_sync_buff *sb;
301
302 spin_lock_bh(&ipvs->sync_lock);
303 if (list_empty(&ms->sync_queue)) {
304 sb = NULL;
305 __set_current_state(TASK_INTERRUPTIBLE);
306 } else {
307 sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
308 list);
309 list_del(&sb->list);
310 ms->sync_queue_len--;
311 if (!ms->sync_queue_len)
312 ms->sync_queue_delay = 0;
313 }
314 spin_unlock_bh(&ipvs->sync_lock);
315
316 return sb;
317}
318
319
320
321
322static inline struct ip_vs_sync_buff *
323ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
324{
325 struct ip_vs_sync_buff *sb;
326
327 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
328 return NULL;
329
330 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
331 if (!sb->mesg) {
332 kfree(sb);
333 return NULL;
334 }
335 sb->mesg->reserved = 0;
336 sb->mesg->version = SYNC_PROTO_VER;
337 sb->mesg->syncid = ipvs->master_syncid;
338 sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
339 sb->mesg->nr_conns = 0;
340 sb->mesg->spare = 0;
341 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
342 sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
343
344 sb->firstuse = jiffies;
345 return sb;
346}
347
348static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
349{
350 kfree(sb->mesg);
351 kfree(sb);
352}
353
354static inline void sb_queue_tail(struct netns_ipvs *ipvs,
355 struct ipvs_master_sync_state *ms)
356{
357 struct ip_vs_sync_buff *sb = ms->sync_buff;
358
359 spin_lock(&ipvs->sync_lock);
360 if (ipvs->sync_state & IP_VS_STATE_MASTER &&
361 ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
362 if (!ms->sync_queue_len)
363 schedule_delayed_work(&ms->master_wakeup_work,
364 max(IPVS_SYNC_SEND_DELAY, 1));
365 ms->sync_queue_len++;
366 list_add_tail(&sb->list, &ms->sync_queue);
367 if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
368 wake_up_process(ms->master_thread);
369 } else
370 ip_vs_sync_buff_release(sb);
371 spin_unlock(&ipvs->sync_lock);
372}
373
374
375
376
377
378static inline struct ip_vs_sync_buff *
379get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
380 unsigned long time)
381{
382 struct ip_vs_sync_buff *sb;
383
384 spin_lock_bh(&ipvs->sync_buff_lock);
385 sb = ms->sync_buff;
386 if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
387 ms->sync_buff = NULL;
388 __set_current_state(TASK_RUNNING);
389 } else
390 sb = NULL;
391 spin_unlock_bh(&ipvs->sync_buff_lock);
392 return sb;
393}
394
395static inline int
396select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
397{
398 return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
399}
400
401
402
403
404static inline struct ip_vs_sync_buff *
405ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
406{
407 struct ip_vs_sync_buff *sb;
408 struct ip_vs_sync_mesg_v0 *mesg;
409
410 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
411 return NULL;
412
413 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
414 if (!sb->mesg) {
415 kfree(sb);
416 return NULL;
417 }
418 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
419 mesg->nr_conns = 0;
420 mesg->syncid = ipvs->master_syncid;
421 mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
422 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
423 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
424 sb->firstuse = jiffies;
425 return sb;
426}
427
428
429static inline bool in_persistence(struct ip_vs_conn *cp)
430{
431 for (cp = cp->control; cp; cp = cp->control) {
432 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
433 return true;
434 }
435 return false;
436}
437
438
439
440
441
442
443
444
445
446
447static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
448 struct ip_vs_conn *cp, int pkts)
449{
450 unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
451 unsigned long now = jiffies;
452 unsigned long n = (now + cp->timeout) & ~3UL;
453 unsigned int sync_refresh_period;
454 int sync_period;
455 int force;
456
457
458 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
459 force = 0;
460 else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp)))
461 return 0;
462 else if (likely(cp->protocol == IPPROTO_TCP)) {
463 if (!((1 << cp->state) &
464 ((1 << IP_VS_TCP_S_ESTABLISHED) |
465 (1 << IP_VS_TCP_S_FIN_WAIT) |
466 (1 << IP_VS_TCP_S_CLOSE) |
467 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
468 (1 << IP_VS_TCP_S_TIME_WAIT))))
469 return 0;
470 force = cp->state != cp->old_state;
471 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
472 goto set;
473 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
474 if (!((1 << cp->state) &
475 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
476 (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) |
477 (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) |
478 (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) |
479 (1 << IP_VS_SCTP_S_CLOSED))))
480 return 0;
481 force = cp->state != cp->old_state;
482 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
483 goto set;
484 } else {
485
486 force = 0;
487 }
488
489 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
490 if (sync_refresh_period > 0) {
491 long diff = n - orig;
492 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
493
494
495
496
497 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
498 int retries = orig & 3;
499
500 if (retries >= sysctl_sync_retries(ipvs))
501 return 0;
502 if (time_before(now, orig - cp->timeout +
503 (sync_refresh_period >> 3)))
504 return 0;
505 n |= retries + 1;
506 }
507 }
508 sync_period = sysctl_sync_period(ipvs);
509 if (sync_period > 0) {
510 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
511 pkts % sync_period != sysctl_sync_threshold(ipvs))
512 return 0;
513 } else if (sync_refresh_period <= 0 &&
514 pkts != sysctl_sync_threshold(ipvs))
515 return 0;
516
517set:
518 cp->old_state = cp->state;
519 n = cmpxchg(&cp->sync_endtime, orig, n);
520 return n == orig || force;
521}
522
523
524
525
526
527static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
528 int pkts)
529{
530 struct netns_ipvs *ipvs = net_ipvs(net);
531 struct ip_vs_sync_mesg_v0 *m;
532 struct ip_vs_sync_conn_v0 *s;
533 struct ip_vs_sync_buff *buff;
534 struct ipvs_master_sync_state *ms;
535 int id;
536 int len;
537
538 if (unlikely(cp->af != AF_INET))
539 return;
540
541 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
542 return;
543
544 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
545 return;
546
547 spin_lock_bh(&ipvs->sync_buff_lock);
548 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
549 spin_unlock_bh(&ipvs->sync_buff_lock);
550 return;
551 }
552
553 id = select_master_thread_id(ipvs, cp);
554 ms = &ipvs->ms[id];
555 buff = ms->sync_buff;
556 if (buff) {
557 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
558
559 if (!m->nr_conns) {
560 sb_queue_tail(ipvs, ms);
561 ms->sync_buff = NULL;
562 buff = NULL;
563 }
564 }
565 if (!buff) {
566 buff = ip_vs_sync_buff_create_v0(ipvs);
567 if (!buff) {
568 spin_unlock_bh(&ipvs->sync_buff_lock);
569 pr_err("ip_vs_sync_buff_create failed.\n");
570 return;
571 }
572 ms->sync_buff = buff;
573 }
574
575 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
576 SIMPLE_CONN_SIZE;
577 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
578 s = (struct ip_vs_sync_conn_v0 *) buff->head;
579
580
581 s->reserved = 0;
582 s->protocol = cp->protocol;
583 s->cport = cp->cport;
584 s->vport = cp->vport;
585 s->dport = cp->dport;
586 s->caddr = cp->caddr.ip;
587 s->vaddr = cp->vaddr.ip;
588 s->daddr = cp->daddr.ip;
589 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
590 s->state = htons(cp->state);
591 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
592 struct ip_vs_sync_conn_options *opt =
593 (struct ip_vs_sync_conn_options *)&s[1];
594 memcpy(opt, &cp->in_seq, sizeof(*opt));
595 }
596
597 m->nr_conns++;
598 m->size = htons(ntohs(m->size) + len);
599 buff->head += len;
600
601
602 if (buff->head + FULL_CONN_SIZE > buff->end) {
603 sb_queue_tail(ipvs, ms);
604 ms->sync_buff = NULL;
605 }
606 spin_unlock_bh(&ipvs->sync_buff_lock);
607
608
609 cp = cp->control;
610 if (cp) {
611 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
612 pkts = atomic_add_return(1, &cp->in_pkts);
613 else
614 pkts = sysctl_sync_threshold(ipvs);
615 ip_vs_sync_conn(net, cp->control, pkts);
616 }
617}
618
619
620
621
622
623
624void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
625{
626 struct netns_ipvs *ipvs = net_ipvs(net);
627 struct ip_vs_sync_mesg *m;
628 union ip_vs_sync_conn *s;
629 struct ip_vs_sync_buff *buff;
630 struct ipvs_master_sync_state *ms;
631 int id;
632 __u8 *p;
633 unsigned int len, pe_name_len, pad;
634
635
636 if (sysctl_sync_ver(ipvs) == 0) {
637 ip_vs_sync_conn_v0(net, cp, pkts);
638 return;
639 }
640
641 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
642 goto control;
643sloop:
644 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
645 goto control;
646
647
648 pe_name_len = 0;
649 if (cp->pe_data_len) {
650 if (!cp->pe_data || !cp->dest) {
651 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
652 return;
653 }
654 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
655 }
656
657 spin_lock_bh(&ipvs->sync_buff_lock);
658 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
659 spin_unlock_bh(&ipvs->sync_buff_lock);
660 return;
661 }
662
663 id = select_master_thread_id(ipvs, cp);
664 ms = &ipvs->ms[id];
665
666#ifdef CONFIG_IP_VS_IPV6
667 if (cp->af == AF_INET6)
668 len = sizeof(struct ip_vs_sync_v6);
669 else
670#endif
671 len = sizeof(struct ip_vs_sync_v4);
672
673 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
674 len += sizeof(struct ip_vs_sync_conn_options) + 2;
675
676 if (cp->pe_data_len)
677 len += cp->pe_data_len + 2;
678 if (pe_name_len)
679 len += pe_name_len + 2;
680
681
682 pad = 0;
683 buff = ms->sync_buff;
684 if (buff) {
685 m = buff->mesg;
686 pad = (4 - (size_t) buff->head) & 3;
687
688 if (buff->head + len + pad > buff->end || m->reserved) {
689 sb_queue_tail(ipvs, ms);
690 ms->sync_buff = NULL;
691 buff = NULL;
692 pad = 0;
693 }
694 }
695
696 if (!buff) {
697 buff = ip_vs_sync_buff_create(ipvs);
698 if (!buff) {
699 spin_unlock_bh(&ipvs->sync_buff_lock);
700 pr_err("ip_vs_sync_buff_create failed.\n");
701 return;
702 }
703 ms->sync_buff = buff;
704 m = buff->mesg;
705 }
706
707 p = buff->head;
708 buff->head += pad + len;
709 m->size = htons(ntohs(m->size) + pad + len);
710
711 while (pad--)
712 *(p++) = 0;
713
714 s = (union ip_vs_sync_conn *)p;
715
716
717 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
718 s->v4.ver_size = htons(len & SVER_MASK);
719 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
720 s->v4.state = htons(cp->state);
721 s->v4.protocol = cp->protocol;
722 s->v4.cport = cp->cport;
723 s->v4.vport = cp->vport;
724 s->v4.dport = cp->dport;
725 s->v4.fwmark = htonl(cp->fwmark);
726 s->v4.timeout = htonl(cp->timeout / HZ);
727 m->nr_conns++;
728
729#ifdef CONFIG_IP_VS_IPV6
730 if (cp->af == AF_INET6) {
731 p += sizeof(struct ip_vs_sync_v6);
732 s->v6.caddr = cp->caddr.in6;
733 s->v6.vaddr = cp->vaddr.in6;
734 s->v6.daddr = cp->daddr.in6;
735 } else
736#endif
737 {
738 p += sizeof(struct ip_vs_sync_v4);
739 s->v4.caddr = cp->caddr.ip;
740 s->v4.vaddr = cp->vaddr.ip;
741 s->v4.daddr = cp->daddr.ip;
742 }
743 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
744 *(p++) = IPVS_OPT_SEQ_DATA;
745 *(p++) = sizeof(struct ip_vs_sync_conn_options);
746 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
747 p += sizeof(struct ip_vs_seq);
748 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
749 p += sizeof(struct ip_vs_seq);
750 }
751
752 if (cp->pe_data_len && cp->pe_data) {
753 *(p++) = IPVS_OPT_PE_DATA;
754 *(p++) = cp->pe_data_len;
755 memcpy(p, cp->pe_data, cp->pe_data_len);
756 p += cp->pe_data_len;
757 if (pe_name_len) {
758
759 *(p++) = IPVS_OPT_PE_NAME;
760 *(p++) = pe_name_len;
761 memcpy(p, cp->pe->name, pe_name_len);
762 p += pe_name_len;
763 }
764 }
765
766 spin_unlock_bh(&ipvs->sync_buff_lock);
767
768control:
769
770 cp = cp->control;
771 if (!cp)
772 return;
773 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
774 pkts = atomic_add_return(1, &cp->in_pkts);
775 else
776 pkts = sysctl_sync_threshold(ipvs);
777 goto sloop;
778}
779
780
781
782
783static inline int
784ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
785 struct ip_vs_conn_param *p,
786 __u8 *pe_data, unsigned int pe_data_len,
787 __u8 *pe_name, unsigned int pe_name_len)
788{
789#ifdef CONFIG_IP_VS_IPV6
790 if (af == AF_INET6)
791 ip_vs_conn_fill_param(net, af, sc->v6.protocol,
792 (const union nf_inet_addr *)&sc->v6.caddr,
793 sc->v6.cport,
794 (const union nf_inet_addr *)&sc->v6.vaddr,
795 sc->v6.vport, p);
796 else
797#endif
798 ip_vs_conn_fill_param(net, af, sc->v4.protocol,
799 (const union nf_inet_addr *)&sc->v4.caddr,
800 sc->v4.cport,
801 (const union nf_inet_addr *)&sc->v4.vaddr,
802 sc->v4.vport, p);
803
804 if (pe_data_len) {
805 if (pe_name_len) {
806 char buff[IP_VS_PENAME_MAXLEN+1];
807
808 memcpy(buff, pe_name, pe_name_len);
809 buff[pe_name_len]=0;
810 p->pe = __ip_vs_pe_getbyname(buff);
811 if (!p->pe) {
812 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
813 buff);
814 return 1;
815 }
816 } else {
817 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
818 return 1;
819 }
820
821 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
822 if (!p->pe_data) {
823 module_put(p->pe->module);
824 return -ENOMEM;
825 }
826 p->pe_data_len = pe_data_len;
827 }
828 return 0;
829}
830
831
832
833
834
835
836
837static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
838 unsigned int flags, unsigned int state,
839 unsigned int protocol, unsigned int type,
840 const union nf_inet_addr *daddr, __be16 dport,
841 unsigned long timeout, __u32 fwmark,
842 struct ip_vs_sync_conn_options *opt)
843{
844 struct ip_vs_dest *dest;
845 struct ip_vs_conn *cp;
846 struct netns_ipvs *ipvs = net_ipvs(net);
847
848 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
849 cp = ip_vs_conn_in_get(param);
850 if (cp && ((cp->dport != dport) ||
851 !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
852 if (!(flags & IP_VS_CONN_F_INACTIVE)) {
853 ip_vs_conn_expire_now(cp);
854 __ip_vs_conn_put(cp);
855 cp = NULL;
856 } else {
857
858
859
860
861 __ip_vs_conn_put(cp);
862 kfree(param->pe_data);
863 return;
864 }
865 }
866 } else {
867 cp = ip_vs_ct_in_get(param);
868 }
869
870 if (cp) {
871
872 kfree(param->pe_data);
873
874 dest = cp->dest;
875 spin_lock_bh(&cp->lock);
876 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
877 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
878 if (flags & IP_VS_CONN_F_INACTIVE) {
879 atomic_dec(&dest->activeconns);
880 atomic_inc(&dest->inactconns);
881 } else {
882 atomic_inc(&dest->activeconns);
883 atomic_dec(&dest->inactconns);
884 }
885 }
886 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
887 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
888 cp->flags = flags;
889 spin_unlock_bh(&cp->lock);
890 if (!dest)
891 ip_vs_try_bind_dest(cp);
892 } else {
893
894
895
896
897
898 rcu_read_lock();
899
900
901
902
903
904 dest = ip_vs_find_dest(net, type, type, daddr, dport,
905 param->vaddr, param->vport, protocol,
906 fwmark, flags);
907
908 cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest,
909 fwmark);
910 rcu_read_unlock();
911 if (!cp) {
912 kfree(param->pe_data);
913 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
914 return;
915 }
916 if (!(flags & IP_VS_CONN_F_TEMPLATE))
917 kfree(param->pe_data);
918 }
919
920 if (opt)
921 memcpy(&cp->in_seq, opt, sizeof(*opt));
922 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
923 cp->state = state;
924 cp->old_state = cp->state;
925
926
927
928
929
930
931
932
933
934 if (timeout) {
935 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
936 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
937 cp->timeout = timeout*HZ;
938 } else {
939 struct ip_vs_proto_data *pd;
940
941 pd = ip_vs_proto_data_get(net, protocol);
942 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
943 cp->timeout = pd->timeout_table[state];
944 else
945 cp->timeout = (3*60*HZ);
946 }
947 ip_vs_conn_put(cp);
948}
949
950
951
952
953static void ip_vs_process_message_v0(struct net *net, const char *buffer,
954 const size_t buflen)
955{
956 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
957 struct ip_vs_sync_conn_v0 *s;
958 struct ip_vs_sync_conn_options *opt;
959 struct ip_vs_protocol *pp;
960 struct ip_vs_conn_param param;
961 char *p;
962 int i;
963
964 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
965 for (i=0; i<m->nr_conns; i++) {
966 unsigned int flags, state;
967
968 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
969 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
970 return;
971 }
972 s = (struct ip_vs_sync_conn_v0 *) p;
973 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
974 flags &= ~IP_VS_CONN_F_HASHED;
975 if (flags & IP_VS_CONN_F_SEQ_MASK) {
976 opt = (struct ip_vs_sync_conn_options *)&s[1];
977 p += FULL_CONN_SIZE;
978 if (p > buffer+buflen) {
979 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
980 return;
981 }
982 } else {
983 opt = NULL;
984 p += SIMPLE_CONN_SIZE;
985 }
986
987 state = ntohs(s->state);
988 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
989 pp = ip_vs_proto_get(s->protocol);
990 if (!pp) {
991 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
992 s->protocol);
993 continue;
994 }
995 if (state >= pp->num_states) {
996 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
997 pp->name, state);
998 continue;
999 }
1000 } else {
1001
1002 if (state > 0) {
1003 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
1004 state);
1005 state = 0;
1006 }
1007 }
1008
1009 ip_vs_conn_fill_param(net, AF_INET, s->protocol,
1010 (const union nf_inet_addr *)&s->caddr,
1011 s->cport,
1012 (const union nf_inet_addr *)&s->vaddr,
1013 s->vport, ¶m);
1014
1015
1016 ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET,
1017 (union nf_inet_addr *)&s->daddr, s->dport,
1018 0, 0, opt);
1019 }
1020}
1021
1022
1023
1024
1025static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
1026 __u32 *opt_flags,
1027 struct ip_vs_sync_conn_options *opt)
1028{
1029 struct ip_vs_sync_conn_options *topt;
1030
1031 topt = (struct ip_vs_sync_conn_options *)p;
1032
1033 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
1034 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
1035 return -EINVAL;
1036 }
1037 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
1038 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
1039 return -EINVAL;
1040 }
1041 ntoh_seq(&topt->in_seq, &opt->in_seq);
1042 ntoh_seq(&topt->out_seq, &opt->out_seq);
1043 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
1044 return 0;
1045}
1046
1047static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
1048 __u8 **data, unsigned int maxlen,
1049 __u32 *opt_flags, __u32 flag)
1050{
1051 if (plen > maxlen) {
1052 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
1053 return -EINVAL;
1054 }
1055 if (*opt_flags & flag) {
1056 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
1057 return -EINVAL;
1058 }
1059 *data_len = plen;
1060 *data = p;
1061 *opt_flags |= flag;
1062 return 0;
1063}
1064
1065
1066
1067static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
1068{
1069 struct ip_vs_sync_conn_options opt;
1070 union ip_vs_sync_conn *s;
1071 struct ip_vs_protocol *pp;
1072 struct ip_vs_conn_param param;
1073 __u32 flags;
1074 unsigned int af, state, pe_data_len=0, pe_name_len=0;
1075 __u8 *pe_data=NULL, *pe_name=NULL;
1076 __u32 opt_flags=0;
1077 int retc=0;
1078
1079 s = (union ip_vs_sync_conn *) p;
1080
1081 if (s->v6.type & STYPE_F_INET6) {
1082#ifdef CONFIG_IP_VS_IPV6
1083 af = AF_INET6;
1084 p += sizeof(struct ip_vs_sync_v6);
1085#else
1086 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
1087 retc = 10;
1088 goto out;
1089#endif
1090 } else if (!s->v4.type) {
1091 af = AF_INET;
1092 p += sizeof(struct ip_vs_sync_v4);
1093 } else {
1094 return -10;
1095 }
1096 if (p > msg_end)
1097 return -20;
1098
1099
1100 while (p < msg_end) {
1101 int ptype;
1102 int plen;
1103
1104 if (p+2 > msg_end)
1105 return -30;
1106 ptype = *(p++);
1107 plen = *(p++);
1108
1109 if (!plen || ((p + plen) > msg_end))
1110 return -40;
1111
1112 switch (ptype & ~IPVS_OPT_F_PARAM) {
1113 case IPVS_OPT_SEQ_DATA:
1114 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
1115 return -50;
1116 break;
1117
1118 case IPVS_OPT_PE_DATA:
1119 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
1120 IP_VS_PEDATA_MAXLEN, &opt_flags,
1121 IPVS_OPT_F_PE_DATA))
1122 return -60;
1123 break;
1124
1125 case IPVS_OPT_PE_NAME:
1126 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1127 IP_VS_PENAME_MAXLEN, &opt_flags,
1128 IPVS_OPT_F_PE_NAME))
1129 return -70;
1130 break;
1131
1132 default:
1133
1134 if (!(ptype & IPVS_OPT_F_PARAM)) {
1135 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1136 ptype & ~IPVS_OPT_F_PARAM);
1137 retc = 20;
1138 goto out;
1139 }
1140 }
1141 p += plen;
1142 }
1143
1144
1145 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
1146 flags |= IP_VS_CONN_F_SYNC;
1147 state = ntohs(s->v4.state);
1148
1149 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1150 pp = ip_vs_proto_get(s->v4.protocol);
1151 if (!pp) {
1152 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
1153 s->v4.protocol);
1154 retc = 30;
1155 goto out;
1156 }
1157 if (state >= pp->num_states) {
1158 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
1159 pp->name, state);
1160 retc = 40;
1161 goto out;
1162 }
1163 } else {
1164
1165 if (state > 0) {
1166 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1167 state);
1168 state = 0;
1169 }
1170 }
1171 if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data,
1172 pe_data_len, pe_name, pe_name_len)) {
1173 retc = 50;
1174 goto out;
1175 }
1176
1177 if (af == AF_INET)
1178 ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af,
1179 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1180 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1181 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1182 );
1183#ifdef CONFIG_IP_VS_IPV6
1184 else
1185 ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af,
1186 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1187 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1188 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1189 );
1190#endif
1191 ip_vs_pe_put(param.pe);
1192 return 0;
1193
1194out:
1195 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1196 return retc;
1197
1198}
1199
1200
1201
1202
1203
1204static void ip_vs_process_message(struct net *net, __u8 *buffer,
1205 const size_t buflen)
1206{
1207 struct netns_ipvs *ipvs = net_ipvs(net);
1208 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1209 __u8 *p, *msg_end;
1210 int i, nr_conns;
1211
1212 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1213 IP_VS_DBG(2, "BACKUP, message header too short\n");
1214 return;
1215 }
1216
1217 if (buflen != ntohs(m2->size)) {
1218 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1219 return;
1220 }
1221
1222 if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
1223 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1224 return;
1225 }
1226
1227 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1228 && (m2->spare == 0)) {
1229
1230 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1231 nr_conns = m2->nr_conns;
1232
1233 for (i=0; i<nr_conns; i++) {
1234 union ip_vs_sync_conn *s;
1235 unsigned int size;
1236 int retc;
1237
1238 p = msg_end;
1239 if (p + sizeof(s->v4) > buffer+buflen) {
1240 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1241 return;
1242 }
1243 s = (union ip_vs_sync_conn *)p;
1244 size = ntohs(s->v4.ver_size) & SVER_MASK;
1245 msg_end = p + size;
1246
1247 if (msg_end > buffer+buflen) {
1248 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
1249 return;
1250 }
1251 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
1252 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
1253 ntohs(s->v4.ver_size) >> SVER_SHIFT);
1254 return;
1255 }
1256
1257 retc = ip_vs_proc_sync_conn(net, p, msg_end);
1258 if (retc < 0) {
1259 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1260 retc);
1261 return;
1262 }
1263
1264 msg_end = p + ((size + 3) & ~3);
1265 }
1266 } else {
1267
1268 ip_vs_process_message_v0(net, buffer, buflen);
1269 return;
1270 }
1271}
1272
1273
1274
1275
1276
1277static void set_sock_size(struct sock *sk, int mode, int val)
1278{
1279
1280
1281 lock_sock(sk);
1282 if (mode) {
1283 val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
1284 sysctl_wmem_max);
1285 sk->sk_sndbuf = val * 2;
1286 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1287 } else {
1288 val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
1289 sysctl_rmem_max);
1290 sk->sk_rcvbuf = val * 2;
1291 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1292 }
1293 release_sock(sk);
1294}
1295
1296
1297
1298
1299static void set_mcast_loop(struct sock *sk, u_char loop)
1300{
1301 struct inet_sock *inet = inet_sk(sk);
1302
1303
1304 lock_sock(sk);
1305 inet->mc_loop = loop ? 1 : 0;
1306 release_sock(sk);
1307}
1308
1309
1310
1311
1312static void set_mcast_ttl(struct sock *sk, u_char ttl)
1313{
1314 struct inet_sock *inet = inet_sk(sk);
1315
1316
1317 lock_sock(sk);
1318 inet->mc_ttl = ttl;
1319 release_sock(sk);
1320}
1321
1322
1323
1324
1325static int set_mcast_if(struct sock *sk, char *ifname)
1326{
1327 struct net_device *dev;
1328 struct inet_sock *inet = inet_sk(sk);
1329 struct net *net = sock_net(sk);
1330
1331 dev = __dev_get_by_name(net, ifname);
1332 if (!dev)
1333 return -ENODEV;
1334
1335 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1336 return -EINVAL;
1337
1338 lock_sock(sk);
1339 inet->mc_index = dev->ifindex;
1340
1341 release_sock(sk);
1342
1343 return 0;
1344}
1345
1346
1347
1348
1349
1350
1351static int set_sync_mesg_maxlen(struct net *net, int sync_state)
1352{
1353 struct netns_ipvs *ipvs = net_ipvs(net);
1354 struct net_device *dev;
1355 int num;
1356
1357 if (sync_state == IP_VS_STATE_MASTER) {
1358 dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
1359 if (!dev)
1360 return -ENODEV;
1361
1362 num = (dev->mtu - sizeof(struct iphdr) -
1363 sizeof(struct udphdr) -
1364 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
1365 ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
1366 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
1367 IP_VS_DBG(7, "setting the maximum length of sync sending "
1368 "message %d.\n", ipvs->send_mesg_maxlen);
1369 } else if (sync_state == IP_VS_STATE_BACKUP) {
1370 dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
1371 if (!dev)
1372 return -ENODEV;
1373
1374 ipvs->recv_mesg_maxlen = dev->mtu -
1375 sizeof(struct iphdr) - sizeof(struct udphdr);
1376 IP_VS_DBG(7, "setting the maximum length of sync receiving "
1377 "message %d.\n", ipvs->recv_mesg_maxlen);
1378 }
1379
1380 return 0;
1381}
1382
1383
1384
1385
1386
1387
1388
1389static int
1390join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
1391{
1392 struct net *net = sock_net(sk);
1393 struct ip_mreqn mreq;
1394 struct net_device *dev;
1395 int ret;
1396
1397 memset(&mreq, 0, sizeof(mreq));
1398 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
1399
1400 dev = __dev_get_by_name(net, ifname);
1401 if (!dev)
1402 return -ENODEV;
1403 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1404 return -EINVAL;
1405
1406 mreq.imr_ifindex = dev->ifindex;
1407
1408 rtnl_lock();
1409 lock_sock(sk);
1410 ret = ip_mc_join_group(sk, &mreq);
1411 release_sock(sk);
1412 rtnl_unlock();
1413
1414 return ret;
1415}
1416
1417
1418static int bind_mcastif_addr(struct socket *sock, char *ifname)
1419{
1420 struct net *net = sock_net(sock->sk);
1421 struct net_device *dev;
1422 __be32 addr;
1423 struct sockaddr_in sin;
1424
1425 dev = __dev_get_by_name(net, ifname);
1426 if (!dev)
1427 return -ENODEV;
1428
1429 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1430 if (!addr)
1431 pr_err("You probably need to specify IP address on "
1432 "multicast interface.\n");
1433
1434 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
1435 ifname, &addr);
1436
1437
1438 sin.sin_family = AF_INET;
1439 sin.sin_addr.s_addr = addr;
1440 sin.sin_port = 0;
1441
1442 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
1443}
1444
1445
1446
1447
1448static struct socket *make_send_sock(struct net *net, int id)
1449{
1450 struct netns_ipvs *ipvs = net_ipvs(net);
1451
1452 struct sockaddr_in mcast_addr = {
1453 .sin_family = AF_INET,
1454 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1455 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1456 };
1457 struct socket *sock;
1458 int result;
1459
1460
1461 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1462 if (result < 0) {
1463 pr_err("Error during creation of socket; terminating\n");
1464 return ERR_PTR(result);
1465 }
1466
1467
1468
1469
1470
1471 sk_change_net(sock->sk, net);
1472 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
1473 if (result < 0) {
1474 pr_err("Error setting outbound mcast interface\n");
1475 goto error;
1476 }
1477
1478 set_mcast_loop(sock->sk, 0);
1479 set_mcast_ttl(sock->sk, 1);
1480 result = sysctl_sync_sock_size(ipvs);
1481 if (result > 0)
1482 set_sock_size(sock->sk, 1, result);
1483
1484 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
1485 if (result < 0) {
1486 pr_err("Error binding address of the mcast interface\n");
1487 goto error;
1488 }
1489
1490 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
1491 sizeof(struct sockaddr), 0);
1492 if (result < 0) {
1493 pr_err("Error connecting to the multicast addr\n");
1494 goto error;
1495 }
1496
1497 return sock;
1498
1499error:
1500 sk_release_kernel(sock->sk);
1501 return ERR_PTR(result);
1502}
1503
1504
1505
1506
1507
1508static struct socket *make_receive_sock(struct net *net, int id)
1509{
1510 struct netns_ipvs *ipvs = net_ipvs(net);
1511
1512 struct sockaddr_in mcast_addr = {
1513 .sin_family = AF_INET,
1514 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1515 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1516 };
1517 struct socket *sock;
1518 int result;
1519
1520
1521 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1522 if (result < 0) {
1523 pr_err("Error during creation of socket; terminating\n");
1524 return ERR_PTR(result);
1525 }
1526
1527
1528
1529
1530
1531 sk_change_net(sock->sk, net);
1532
1533 sock->sk->sk_reuse = SK_CAN_REUSE;
1534 result = sysctl_sync_sock_size(ipvs);
1535 if (result > 0)
1536 set_sock_size(sock->sk, 0, result);
1537
1538 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
1539 sizeof(struct sockaddr));
1540 if (result < 0) {
1541 pr_err("Error binding to the multicast addr\n");
1542 goto error;
1543 }
1544
1545
1546 result = join_mcast_group(sock->sk,
1547 (struct in_addr *) &mcast_addr.sin_addr,
1548 ipvs->backup_mcast_ifn);
1549 if (result < 0) {
1550 pr_err("Error joining to the multicast group\n");
1551 goto error;
1552 }
1553
1554 return sock;
1555
1556error:
1557 sk_release_kernel(sock->sk);
1558 return ERR_PTR(result);
1559}
1560
1561
1562static int
1563ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1564{
1565 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
1566 struct kvec iov;
1567 int len;
1568
1569 EnterFunction(7);
1570 iov.iov_base = (void *)buffer;
1571 iov.iov_len = length;
1572
1573 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
1574
1575 LeaveFunction(7);
1576 return len;
1577}
1578
1579static int
1580ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1581{
1582 int msize;
1583 int ret;
1584
1585 msize = ntohs(msg->size);
1586
1587 ret = ip_vs_send_async(sock, (char *)msg, msize);
1588 if (ret >= 0 || ret == -EAGAIN)
1589 return ret;
1590 pr_err("ip_vs_send_async error %d\n", ret);
1591 return 0;
1592}
1593
1594static int
1595ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1596{
1597 struct msghdr msg = {NULL,};
1598 struct kvec iov;
1599 int len;
1600
1601 EnterFunction(7);
1602
1603
1604 iov.iov_base = buffer;
1605 iov.iov_len = (size_t)buflen;
1606
1607 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
1608
1609 if (len < 0)
1610 return len;
1611
1612 LeaveFunction(7);
1613 return len;
1614}
1615
1616
1617static void master_wakeup_work_handler(struct work_struct *work)
1618{
1619 struct ipvs_master_sync_state *ms =
1620 container_of(work, struct ipvs_master_sync_state,
1621 master_wakeup_work.work);
1622 struct netns_ipvs *ipvs = ms->ipvs;
1623
1624 spin_lock_bh(&ipvs->sync_lock);
1625 if (ms->sync_queue_len &&
1626 ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
1627 ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
1628 wake_up_process(ms->master_thread);
1629 }
1630 spin_unlock_bh(&ipvs->sync_lock);
1631}
1632
1633
1634static inline struct ip_vs_sync_buff *
1635next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
1636{
1637 struct ip_vs_sync_buff *sb;
1638
1639 sb = sb_dequeue(ipvs, ms);
1640 if (sb)
1641 return sb;
1642
1643 return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
1644}
1645
1646static int sync_thread_master(void *data)
1647{
1648 struct ip_vs_sync_thread_data *tinfo = data;
1649 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1650 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
1651 struct sock *sk = tinfo->sock->sk;
1652 struct ip_vs_sync_buff *sb;
1653
1654 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1655 "syncid = %d, id = %d\n",
1656 ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);
1657
1658 for (;;) {
1659 sb = next_sync_buff(ipvs, ms);
1660 if (unlikely(kthread_should_stop()))
1661 break;
1662 if (!sb) {
1663 schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
1664 continue;
1665 }
1666 while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
1667
1668
1669
1670 __wait_event_interruptible(*sk_sleep(sk),
1671 sock_writeable(sk) ||
1672 kthread_should_stop());
1673 if (unlikely(kthread_should_stop()))
1674 goto done;
1675 }
1676 ip_vs_sync_buff_release(sb);
1677 }
1678
1679done:
1680 __set_current_state(TASK_RUNNING);
1681 if (sb)
1682 ip_vs_sync_buff_release(sb);
1683
1684
1685 while ((sb = sb_dequeue(ipvs, ms)))
1686 ip_vs_sync_buff_release(sb);
1687 __set_current_state(TASK_RUNNING);
1688
1689
1690 sb = get_curr_sync_buff(ipvs, ms, 0);
1691 if (sb)
1692 ip_vs_sync_buff_release(sb);
1693
1694
1695 sk_release_kernel(tinfo->sock->sk);
1696 kfree(tinfo);
1697
1698 return 0;
1699}
1700
1701
1702static int sync_thread_backup(void *data)
1703{
1704 struct ip_vs_sync_thread_data *tinfo = data;
1705 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1706 int len;
1707
1708 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1709 "syncid = %d, id = %d\n",
1710 ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);
1711
1712 while (!kthread_should_stop()) {
1713 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
1714 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
1715 || kthread_should_stop());
1716
1717
1718 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
1719 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1720 ipvs->recv_mesg_maxlen);
1721 if (len <= 0) {
1722 if (len != -EAGAIN)
1723 pr_err("receiving message error\n");
1724 break;
1725 }
1726
1727 ip_vs_process_message(tinfo->net, tinfo->buf, len);
1728 }
1729 }
1730
1731
1732 sk_release_kernel(tinfo->sock->sk);
1733 kfree(tinfo->buf);
1734 kfree(tinfo);
1735
1736 return 0;
1737}
1738
1739
1740int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
1741{
1742 struct ip_vs_sync_thread_data *tinfo;
1743 struct task_struct **array = NULL, *task;
1744 struct socket *sock;
1745 struct netns_ipvs *ipvs = net_ipvs(net);
1746 char *name;
1747 int (*threadfn)(void *data);
1748 int id, count;
1749 int result = -ENOMEM;
1750
1751 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1752 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
1753 sizeof(struct ip_vs_sync_conn_v0));
1754
1755 if (!ipvs->sync_state) {
1756 count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
1757 ipvs->threads_mask = count - 1;
1758 } else
1759 count = ipvs->threads_mask + 1;
1760
1761 if (state == IP_VS_STATE_MASTER) {
1762 if (ipvs->ms)
1763 return -EEXIST;
1764
1765 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
1766 sizeof(ipvs->master_mcast_ifn));
1767 ipvs->master_syncid = syncid;
1768 name = "ipvs-m:%d:%d";
1769 threadfn = sync_thread_master;
1770 } else if (state == IP_VS_STATE_BACKUP) {
1771 if (ipvs->backup_threads)
1772 return -EEXIST;
1773
1774 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
1775 sizeof(ipvs->backup_mcast_ifn));
1776 ipvs->backup_syncid = syncid;
1777 name = "ipvs-b:%d:%d";
1778 threadfn = sync_thread_backup;
1779 } else {
1780 return -EINVAL;
1781 }
1782
1783 if (state == IP_VS_STATE_MASTER) {
1784 struct ipvs_master_sync_state *ms;
1785
1786 ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
1787 if (!ipvs->ms)
1788 goto out;
1789 ms = ipvs->ms;
1790 for (id = 0; id < count; id++, ms++) {
1791 INIT_LIST_HEAD(&ms->sync_queue);
1792 ms->sync_queue_len = 0;
1793 ms->sync_queue_delay = 0;
1794 INIT_DELAYED_WORK(&ms->master_wakeup_work,
1795 master_wakeup_work_handler);
1796 ms->ipvs = ipvs;
1797 }
1798 } else {
1799 array = kzalloc(count * sizeof(struct task_struct *),
1800 GFP_KERNEL);
1801 if (!array)
1802 goto out;
1803 }
1804 set_sync_mesg_maxlen(net, state);
1805
1806 tinfo = NULL;
1807 for (id = 0; id < count; id++) {
1808 if (state == IP_VS_STATE_MASTER)
1809 sock = make_send_sock(net, id);
1810 else
1811 sock = make_receive_sock(net, id);
1812 if (IS_ERR(sock)) {
1813 result = PTR_ERR(sock);
1814 goto outtinfo;
1815 }
1816 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1817 if (!tinfo)
1818 goto outsocket;
1819 tinfo->net = net;
1820 tinfo->sock = sock;
1821 if (state == IP_VS_STATE_BACKUP) {
1822 tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
1823 GFP_KERNEL);
1824 if (!tinfo->buf)
1825 goto outtinfo;
1826 } else {
1827 tinfo->buf = NULL;
1828 }
1829 tinfo->id = id;
1830
1831 task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
1832 if (IS_ERR(task)) {
1833 result = PTR_ERR(task);
1834 goto outtinfo;
1835 }
1836 tinfo = NULL;
1837 if (state == IP_VS_STATE_MASTER)
1838 ipvs->ms[id].master_thread = task;
1839 else
1840 array[id] = task;
1841 }
1842
1843
1844
1845 if (state == IP_VS_STATE_BACKUP)
1846 ipvs->backup_threads = array;
1847 spin_lock_bh(&ipvs->sync_buff_lock);
1848 ipvs->sync_state |= state;
1849 spin_unlock_bh(&ipvs->sync_buff_lock);
1850
1851
1852 ip_vs_use_count_inc();
1853
1854 return 0;
1855
1856outsocket:
1857 sk_release_kernel(sock->sk);
1858
1859outtinfo:
1860 if (tinfo) {
1861 sk_release_kernel(tinfo->sock->sk);
1862 kfree(tinfo->buf);
1863 kfree(tinfo);
1864 }
1865 count = id;
1866 while (count-- > 0) {
1867 if (state == IP_VS_STATE_MASTER)
1868 kthread_stop(ipvs->ms[count].master_thread);
1869 else
1870 kthread_stop(array[count]);
1871 }
1872 kfree(array);
1873
1874out:
1875 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
1876 kfree(ipvs->ms);
1877 ipvs->ms = NULL;
1878 }
1879 return result;
1880}
1881
1882
1883int stop_sync_thread(struct net *net, int state)
1884{
1885 struct netns_ipvs *ipvs = net_ipvs(net);
1886 struct task_struct **array;
1887 int id;
1888 int retc = -EINVAL;
1889
1890 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1891
1892 if (state == IP_VS_STATE_MASTER) {
1893 if (!ipvs->ms)
1894 return -ESRCH;
1895
1896
1897
1898
1899
1900
1901
1902 spin_lock_bh(&ipvs->sync_buff_lock);
1903 spin_lock(&ipvs->sync_lock);
1904 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1905 spin_unlock(&ipvs->sync_lock);
1906 spin_unlock_bh(&ipvs->sync_buff_lock);
1907
1908 retc = 0;
1909 for (id = ipvs->threads_mask; id >= 0; id--) {
1910 struct ipvs_master_sync_state *ms = &ipvs->ms[id];
1911 int ret;
1912
1913 pr_info("stopping master sync thread %d ...\n",
1914 task_pid_nr(ms->master_thread));
1915 cancel_delayed_work_sync(&ms->master_wakeup_work);
1916 ret = kthread_stop(ms->master_thread);
1917 if (retc >= 0)
1918 retc = ret;
1919 }
1920 kfree(ipvs->ms);
1921 ipvs->ms = NULL;
1922 } else if (state == IP_VS_STATE_BACKUP) {
1923 if (!ipvs->backup_threads)
1924 return -ESRCH;
1925
1926 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1927 array = ipvs->backup_threads;
1928 retc = 0;
1929 for (id = ipvs->threads_mask; id >= 0; id--) {
1930 int ret;
1931
1932 pr_info("stopping backup sync thread %d ...\n",
1933 task_pid_nr(array[id]));
1934 ret = kthread_stop(array[id]);
1935 if (retc >= 0)
1936 retc = ret;
1937 }
1938 kfree(array);
1939 ipvs->backup_threads = NULL;
1940 }
1941
1942
1943 ip_vs_use_count_dec();
1944
1945 return retc;
1946}
1947
1948
1949
1950
1951int __net_init ip_vs_sync_net_init(struct net *net)
1952{
1953 struct netns_ipvs *ipvs = net_ipvs(net);
1954
1955 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
1956 spin_lock_init(&ipvs->sync_lock);
1957 spin_lock_init(&ipvs->sync_buff_lock);
1958 return 0;
1959}
1960
1961void ip_vs_sync_net_cleanup(struct net *net)
1962{
1963 int retc;
1964 struct netns_ipvs *ipvs = net_ipvs(net);
1965
1966 mutex_lock(&ipvs->sync_mutex);
1967 retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
1968 if (retc && retc != -ESRCH)
1969 pr_err("Failed to stop Master Daemon\n");
1970
1971 retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
1972 if (retc && retc != -ESRCH)
1973 pr_err("Failed to stop Backup Daemon\n");
1974 mutex_unlock(&ipvs->sync_mutex);
1975}
1976