1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#define KMSG_COMPONENT "IPVS"
36#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
37
38#include <linux/module.h>
39#include <linux/slab.h>
40#include <linux/inetdevice.h>
41#include <linux/net.h>
42#include <linux/completion.h>
43#include <linux/delay.h>
44#include <linux/skbuff.h>
45#include <linux/in.h>
46#include <linux/igmp.h>
47#include <linux/udp.h>
48#include <linux/err.h>
49#include <linux/kthread.h>
50#include <linux/wait.h>
51#include <linux/kernel.h>
52
53#include <asm/unaligned.h>
54
55#include <net/ip.h>
56#include <net/sock.h>
57
58#include <net/ip_vs.h>
59
60#define IP_VS_SYNC_GROUP 0xe0000051
61#define IP_VS_SYNC_PORT 8848
62
63#define SYNC_PROTO_VER 1
64
65static struct lock_class_key __ipvs_sync_key;
66
67
68
69
70struct ip_vs_sync_conn_v0 {
71 __u8 reserved;
72
73
74 __u8 protocol;
75 __be16 cport;
76 __be16 vport;
77 __be16 dport;
78 __be32 caddr;
79 __be32 vaddr;
80 __be32 daddr;
81
82
83 __be16 flags;
84 __be16 state;
85
86
87};
88
89struct ip_vs_sync_conn_options {
90 struct ip_vs_seq in_seq;
91 struct ip_vs_seq out_seq;
92};
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132struct ip_vs_sync_v4 {
133 __u8 type;
134 __u8 protocol;
135 __be16 ver_size;
136
137 __be32 flags;
138 __be16 state;
139
140 __be16 cport;
141 __be16 vport;
142 __be16 dport;
143 __be32 fwmark;
144 __be32 timeout;
145 __be32 caddr;
146 __be32 vaddr;
147 __be32 daddr;
148
149
150};
151
152
153
154struct ip_vs_sync_v6 {
155 __u8 type;
156 __u8 protocol;
157 __be16 ver_size;
158
159 __be32 flags;
160 __be16 state;
161
162 __be16 cport;
163 __be16 vport;
164 __be16 dport;
165 __be32 fwmark;
166 __be32 timeout;
167 struct in6_addr caddr;
168 struct in6_addr vaddr;
169 struct in6_addr daddr;
170
171
172};
173
174union ip_vs_sync_conn {
175 struct ip_vs_sync_v4 v4;
176 struct ip_vs_sync_v6 v6;
177};
178
179
180#define STYPE_INET6 0
181#define STYPE_F_INET6 (1 << STYPE_INET6)
182
183#define SVER_SHIFT 12
184#define SVER_MASK 0x0fff
185
186#define IPVS_OPT_SEQ_DATA 1
187#define IPVS_OPT_PE_DATA 2
188#define IPVS_OPT_PE_NAME 3
189#define IPVS_OPT_PARAM 7
190
191#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
192#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
193#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
194#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
195
196struct ip_vs_sync_thread_data {
197 struct netns_ipvs *ipvs;
198 struct socket *sock;
199 char *buf;
200 int id;
201};
202
203
204#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
205#define FULL_CONN_SIZE \
206(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243#define SYNC_MESG_HEADER_LEN 4
244#define MAX_CONNS_PER_SYNCBUFF 255
245
246
247struct ip_vs_sync_mesg_v0 {
248 __u8 nr_conns;
249 __u8 syncid;
250 __be16 size;
251
252
253};
254
255
256struct ip_vs_sync_mesg {
257 __u8 reserved;
258 __u8 syncid;
259 __be16 size;
260 __u8 nr_conns;
261 __s8 version;
262 __u16 spare;
263
264};
265
266union ipvs_sockaddr {
267 struct sockaddr_in in;
268 struct sockaddr_in6 in6;
269};
270
271struct ip_vs_sync_buff {
272 struct list_head list;
273 unsigned long firstuse;
274
275
276 struct ip_vs_sync_mesg *mesg;
277 unsigned char *head;
278 unsigned char *end;
279};
280
281
282
283
284
285static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
286{
287 memset(ho, 0, sizeof(*ho));
288 ho->init_seq = get_unaligned_be32(&no->init_seq);
289 ho->delta = get_unaligned_be32(&no->delta);
290 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
291}
292
293
294
295
296
297static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
298{
299 put_unaligned_be32(ho->init_seq, &no->init_seq);
300 put_unaligned_be32(ho->delta, &no->delta);
301 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
302}
303
304static inline struct ip_vs_sync_buff *
305sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
306{
307 struct ip_vs_sync_buff *sb;
308
309 spin_lock_bh(&ipvs->sync_lock);
310 if (list_empty(&ms->sync_queue)) {
311 sb = NULL;
312 __set_current_state(TASK_INTERRUPTIBLE);
313 } else {
314 sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
315 list);
316 list_del(&sb->list);
317 ms->sync_queue_len--;
318 if (!ms->sync_queue_len)
319 ms->sync_queue_delay = 0;
320 }
321 spin_unlock_bh(&ipvs->sync_lock);
322
323 return sb;
324}
325
326
327
328
329static inline struct ip_vs_sync_buff *
330ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len)
331{
332 struct ip_vs_sync_buff *sb;
333
334 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
335 return NULL;
336
337 len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg),
338 ipvs->mcfg.sync_maxlen);
339 sb->mesg = kmalloc(len, GFP_ATOMIC);
340 if (!sb->mesg) {
341 kfree(sb);
342 return NULL;
343 }
344 sb->mesg->reserved = 0;
345 sb->mesg->version = SYNC_PROTO_VER;
346 sb->mesg->syncid = ipvs->mcfg.syncid;
347 sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
348 sb->mesg->nr_conns = 0;
349 sb->mesg->spare = 0;
350 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
351 sb->end = (unsigned char *)sb->mesg + len;
352
353 sb->firstuse = jiffies;
354 return sb;
355}
356
357static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
358{
359 kfree(sb->mesg);
360 kfree(sb);
361}
362
363static inline void sb_queue_tail(struct netns_ipvs *ipvs,
364 struct ipvs_master_sync_state *ms)
365{
366 struct ip_vs_sync_buff *sb = ms->sync_buff;
367
368 spin_lock(&ipvs->sync_lock);
369 if (ipvs->sync_state & IP_VS_STATE_MASTER &&
370 ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
371 if (!ms->sync_queue_len)
372 schedule_delayed_work(&ms->master_wakeup_work,
373 max(IPVS_SYNC_SEND_DELAY, 1));
374 ms->sync_queue_len++;
375 list_add_tail(&sb->list, &ms->sync_queue);
376 if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
377 wake_up_process(ms->master_thread);
378 } else
379 ip_vs_sync_buff_release(sb);
380 spin_unlock(&ipvs->sync_lock);
381}
382
383
384
385
386
387static inline struct ip_vs_sync_buff *
388get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
389 unsigned long time)
390{
391 struct ip_vs_sync_buff *sb;
392
393 spin_lock_bh(&ipvs->sync_buff_lock);
394 sb = ms->sync_buff;
395 if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
396 ms->sync_buff = NULL;
397 __set_current_state(TASK_RUNNING);
398 } else
399 sb = NULL;
400 spin_unlock_bh(&ipvs->sync_buff_lock);
401 return sb;
402}
403
404static inline int
405select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
406{
407 return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
408}
409
410
411
412
413static inline struct ip_vs_sync_buff *
414ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len)
415{
416 struct ip_vs_sync_buff *sb;
417 struct ip_vs_sync_mesg_v0 *mesg;
418
419 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
420 return NULL;
421
422 len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0),
423 ipvs->mcfg.sync_maxlen);
424 sb->mesg = kmalloc(len, GFP_ATOMIC);
425 if (!sb->mesg) {
426 kfree(sb);
427 return NULL;
428 }
429 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
430 mesg->nr_conns = 0;
431 mesg->syncid = ipvs->mcfg.syncid;
432 mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
433 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
434 sb->end = (unsigned char *)mesg + len;
435 sb->firstuse = jiffies;
436 return sb;
437}
438
439
440static inline bool in_persistence(struct ip_vs_conn *cp)
441{
442 for (cp = cp->control; cp; cp = cp->control) {
443 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
444 return true;
445 }
446 return false;
447}
448
449
450
451
452
453
454
455
456
457
458static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
459 struct ip_vs_conn *cp, int pkts)
460{
461 unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
462 unsigned long now = jiffies;
463 unsigned long n = (now + cp->timeout) & ~3UL;
464 unsigned int sync_refresh_period;
465 int sync_period;
466 int force;
467
468
469 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
470 force = 0;
471 else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp)))
472 return 0;
473 else if (likely(cp->protocol == IPPROTO_TCP)) {
474 if (!((1 << cp->state) &
475 ((1 << IP_VS_TCP_S_ESTABLISHED) |
476 (1 << IP_VS_TCP_S_FIN_WAIT) |
477 (1 << IP_VS_TCP_S_CLOSE) |
478 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
479 (1 << IP_VS_TCP_S_TIME_WAIT))))
480 return 0;
481 force = cp->state != cp->old_state;
482 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
483 goto set;
484 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
485 if (!((1 << cp->state) &
486 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
487 (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) |
488 (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) |
489 (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) |
490 (1 << IP_VS_SCTP_S_CLOSED))))
491 return 0;
492 force = cp->state != cp->old_state;
493 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
494 goto set;
495 } else {
496
497 force = 0;
498 }
499
500 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
501 if (sync_refresh_period > 0) {
502 long diff = n - orig;
503 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
504
505
506
507
508 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
509 int retries = orig & 3;
510
511 if (retries >= sysctl_sync_retries(ipvs))
512 return 0;
513 if (time_before(now, orig - cp->timeout +
514 (sync_refresh_period >> 3)))
515 return 0;
516 n |= retries + 1;
517 }
518 }
519 sync_period = sysctl_sync_period(ipvs);
520 if (sync_period > 0) {
521 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
522 pkts % sync_period != sysctl_sync_threshold(ipvs))
523 return 0;
524 } else if (!sync_refresh_period &&
525 pkts != sysctl_sync_threshold(ipvs))
526 return 0;
527
528set:
529 cp->old_state = cp->state;
530 n = cmpxchg(&cp->sync_endtime, orig, n);
531 return n == orig || force;
532}
533
534
535
536
537
538static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp,
539 int pkts)
540{
541 struct ip_vs_sync_mesg_v0 *m;
542 struct ip_vs_sync_conn_v0 *s;
543 struct ip_vs_sync_buff *buff;
544 struct ipvs_master_sync_state *ms;
545 int id;
546 unsigned int len;
547
548 if (unlikely(cp->af != AF_INET))
549 return;
550
551 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
552 return;
553
554 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
555 return;
556
557 spin_lock_bh(&ipvs->sync_buff_lock);
558 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
559 spin_unlock_bh(&ipvs->sync_buff_lock);
560 return;
561 }
562
563 id = select_master_thread_id(ipvs, cp);
564 ms = &ipvs->ms[id];
565 buff = ms->sync_buff;
566 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
567 SIMPLE_CONN_SIZE;
568 if (buff) {
569 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
570
571 if (buff->head + len > buff->end || !m->nr_conns) {
572 sb_queue_tail(ipvs, ms);
573 ms->sync_buff = NULL;
574 buff = NULL;
575 }
576 }
577 if (!buff) {
578 buff = ip_vs_sync_buff_create_v0(ipvs, len);
579 if (!buff) {
580 spin_unlock_bh(&ipvs->sync_buff_lock);
581 pr_err("ip_vs_sync_buff_create failed.\n");
582 return;
583 }
584 ms->sync_buff = buff;
585 }
586
587 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
588 s = (struct ip_vs_sync_conn_v0 *) buff->head;
589
590
591 s->reserved = 0;
592 s->protocol = cp->protocol;
593 s->cport = cp->cport;
594 s->vport = cp->vport;
595 s->dport = cp->dport;
596 s->caddr = cp->caddr.ip;
597 s->vaddr = cp->vaddr.ip;
598 s->daddr = cp->daddr.ip;
599 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
600 s->state = htons(cp->state);
601 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
602 struct ip_vs_sync_conn_options *opt =
603 (struct ip_vs_sync_conn_options *)&s[1];
604 memcpy(opt, &cp->in_seq, sizeof(*opt));
605 }
606
607 m->nr_conns++;
608 m->size = htons(ntohs(m->size) + len);
609 buff->head += len;
610 spin_unlock_bh(&ipvs->sync_buff_lock);
611
612
613 cp = cp->control;
614 if (cp) {
615 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
616 pkts = atomic_add_return(1, &cp->in_pkts);
617 else
618 pkts = sysctl_sync_threshold(ipvs);
619 ip_vs_sync_conn(ipvs, cp, pkts);
620 }
621}
622
623
624
625
626
627
628void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts)
629{
630 struct ip_vs_sync_mesg *m;
631 union ip_vs_sync_conn *s;
632 struct ip_vs_sync_buff *buff;
633 struct ipvs_master_sync_state *ms;
634 int id;
635 __u8 *p;
636 unsigned int len, pe_name_len, pad;
637
638
639 if (sysctl_sync_ver(ipvs) == 0) {
640 ip_vs_sync_conn_v0(ipvs, cp, pkts);
641 return;
642 }
643
644 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
645 goto control;
646sloop:
647 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
648 goto control;
649
650
651 pe_name_len = 0;
652 if (cp->pe_data_len) {
653 if (!cp->pe_data || !cp->dest) {
654 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
655 return;
656 }
657 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
658 }
659
660 spin_lock_bh(&ipvs->sync_buff_lock);
661 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
662 spin_unlock_bh(&ipvs->sync_buff_lock);
663 return;
664 }
665
666 id = select_master_thread_id(ipvs, cp);
667 ms = &ipvs->ms[id];
668
669#ifdef CONFIG_IP_VS_IPV6
670 if (cp->af == AF_INET6)
671 len = sizeof(struct ip_vs_sync_v6);
672 else
673#endif
674 len = sizeof(struct ip_vs_sync_v4);
675
676 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
677 len += sizeof(struct ip_vs_sync_conn_options) + 2;
678
679 if (cp->pe_data_len)
680 len += cp->pe_data_len + 2;
681 if (pe_name_len)
682 len += pe_name_len + 2;
683
684
685 pad = 0;
686 buff = ms->sync_buff;
687 if (buff) {
688 m = buff->mesg;
689 pad = (4 - (size_t) buff->head) & 3;
690
691 if (buff->head + len + pad > buff->end || m->reserved) {
692 sb_queue_tail(ipvs, ms);
693 ms->sync_buff = NULL;
694 buff = NULL;
695 pad = 0;
696 }
697 }
698
699 if (!buff) {
700 buff = ip_vs_sync_buff_create(ipvs, len);
701 if (!buff) {
702 spin_unlock_bh(&ipvs->sync_buff_lock);
703 pr_err("ip_vs_sync_buff_create failed.\n");
704 return;
705 }
706 ms->sync_buff = buff;
707 m = buff->mesg;
708 }
709
710 p = buff->head;
711 buff->head += pad + len;
712 m->size = htons(ntohs(m->size) + pad + len);
713
714 while (pad--)
715 *(p++) = 0;
716
717 s = (union ip_vs_sync_conn *)p;
718
719
720 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
721 s->v4.ver_size = htons(len & SVER_MASK);
722 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
723 s->v4.state = htons(cp->state);
724 s->v4.protocol = cp->protocol;
725 s->v4.cport = cp->cport;
726 s->v4.vport = cp->vport;
727 s->v4.dport = cp->dport;
728 s->v4.fwmark = htonl(cp->fwmark);
729 s->v4.timeout = htonl(cp->timeout / HZ);
730 m->nr_conns++;
731
732#ifdef CONFIG_IP_VS_IPV6
733 if (cp->af == AF_INET6) {
734 p += sizeof(struct ip_vs_sync_v6);
735 s->v6.caddr = cp->caddr.in6;
736 s->v6.vaddr = cp->vaddr.in6;
737 s->v6.daddr = cp->daddr.in6;
738 } else
739#endif
740 {
741 p += sizeof(struct ip_vs_sync_v4);
742 s->v4.caddr = cp->caddr.ip;
743 s->v4.vaddr = cp->vaddr.ip;
744 s->v4.daddr = cp->daddr.ip;
745 }
746 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
747 *(p++) = IPVS_OPT_SEQ_DATA;
748 *(p++) = sizeof(struct ip_vs_sync_conn_options);
749 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
750 p += sizeof(struct ip_vs_seq);
751 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
752 p += sizeof(struct ip_vs_seq);
753 }
754
755 if (cp->pe_data_len && cp->pe_data) {
756 *(p++) = IPVS_OPT_PE_DATA;
757 *(p++) = cp->pe_data_len;
758 memcpy(p, cp->pe_data, cp->pe_data_len);
759 p += cp->pe_data_len;
760 if (pe_name_len) {
761
762 *(p++) = IPVS_OPT_PE_NAME;
763 *(p++) = pe_name_len;
764 memcpy(p, cp->pe->name, pe_name_len);
765 p += pe_name_len;
766 }
767 }
768
769 spin_unlock_bh(&ipvs->sync_buff_lock);
770
771control:
772
773 cp = cp->control;
774 if (!cp)
775 return;
776 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
777 pkts = atomic_add_return(1, &cp->in_pkts);
778 else
779 pkts = sysctl_sync_threshold(ipvs);
780 goto sloop;
781}
782
783
784
785
786static inline int
787ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc,
788 struct ip_vs_conn_param *p,
789 __u8 *pe_data, unsigned int pe_data_len,
790 __u8 *pe_name, unsigned int pe_name_len)
791{
792#ifdef CONFIG_IP_VS_IPV6
793 if (af == AF_INET6)
794 ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol,
795 (const union nf_inet_addr *)&sc->v6.caddr,
796 sc->v6.cport,
797 (const union nf_inet_addr *)&sc->v6.vaddr,
798 sc->v6.vport, p);
799 else
800#endif
801 ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol,
802 (const union nf_inet_addr *)&sc->v4.caddr,
803 sc->v4.cport,
804 (const union nf_inet_addr *)&sc->v4.vaddr,
805 sc->v4.vport, p);
806
807 if (pe_data_len) {
808 if (pe_name_len) {
809 char buff[IP_VS_PENAME_MAXLEN+1];
810
811 memcpy(buff, pe_name, pe_name_len);
812 buff[pe_name_len]=0;
813 p->pe = __ip_vs_pe_getbyname(buff);
814 if (!p->pe) {
815 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
816 buff);
817 return 1;
818 }
819 } else {
820 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
821 return 1;
822 }
823
824 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
825 if (!p->pe_data) {
826 module_put(p->pe->module);
827 return -ENOMEM;
828 }
829 p->pe_data_len = pe_data_len;
830 }
831 return 0;
832}
833
834
835
836
837
838
839
840static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param,
841 unsigned int flags, unsigned int state,
842 unsigned int protocol, unsigned int type,
843 const union nf_inet_addr *daddr, __be16 dport,
844 unsigned long timeout, __u32 fwmark,
845 struct ip_vs_sync_conn_options *opt)
846{
847 struct ip_vs_dest *dest;
848 struct ip_vs_conn *cp;
849
850 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
851 cp = ip_vs_conn_in_get(param);
852 if (cp && ((cp->dport != dport) ||
853 !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
854 if (!(flags & IP_VS_CONN_F_INACTIVE)) {
855 ip_vs_conn_expire_now(cp);
856 __ip_vs_conn_put(cp);
857 cp = NULL;
858 } else {
859
860
861
862
863 __ip_vs_conn_put(cp);
864 kfree(param->pe_data);
865 return;
866 }
867 }
868 } else {
869 cp = ip_vs_ct_in_get(param);
870 }
871
872 if (cp) {
873
874 kfree(param->pe_data);
875
876 dest = cp->dest;
877 spin_lock_bh(&cp->lock);
878 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
879 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
880 if (flags & IP_VS_CONN_F_INACTIVE) {
881 atomic_dec(&dest->activeconns);
882 atomic_inc(&dest->inactconns);
883 } else {
884 atomic_inc(&dest->activeconns);
885 atomic_dec(&dest->inactconns);
886 }
887 }
888 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
889 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
890 cp->flags = flags;
891 spin_unlock_bh(&cp->lock);
892 if (!dest)
893 ip_vs_try_bind_dest(cp);
894 } else {
895
896
897
898
899
900 rcu_read_lock();
901
902
903
904
905
906 dest = ip_vs_find_dest(ipvs, type, type, daddr, dport,
907 param->vaddr, param->vport, protocol,
908 fwmark, flags);
909
910 cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest,
911 fwmark);
912 rcu_read_unlock();
913 if (!cp) {
914 kfree(param->pe_data);
915 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
916 return;
917 }
918 if (!(flags & IP_VS_CONN_F_TEMPLATE))
919 kfree(param->pe_data);
920 }
921
922 if (opt) {
923 cp->in_seq = opt->in_seq;
924 cp->out_seq = opt->out_seq;
925 }
926 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
927 cp->state = state;
928 cp->old_state = cp->state;
929
930
931
932
933
934
935
936
937
938 if (timeout) {
939 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
940 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
941 cp->timeout = timeout*HZ;
942 } else {
943 struct ip_vs_proto_data *pd;
944
945 pd = ip_vs_proto_data_get(ipvs, protocol);
946 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
947 cp->timeout = pd->timeout_table[state];
948 else
949 cp->timeout = (3*60*HZ);
950 }
951 ip_vs_conn_put(cp);
952}
953
954
955
956
957static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer,
958 const size_t buflen)
959{
960 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
961 struct ip_vs_sync_conn_v0 *s;
962 struct ip_vs_sync_conn_options *opt;
963 struct ip_vs_protocol *pp;
964 struct ip_vs_conn_param param;
965 char *p;
966 int i;
967
968 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
969 for (i=0; i<m->nr_conns; i++) {
970 unsigned int flags, state;
971
972 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
973 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
974 return;
975 }
976 s = (struct ip_vs_sync_conn_v0 *) p;
977 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
978 flags &= ~IP_VS_CONN_F_HASHED;
979 if (flags & IP_VS_CONN_F_SEQ_MASK) {
980 opt = (struct ip_vs_sync_conn_options *)&s[1];
981 p += FULL_CONN_SIZE;
982 if (p > buffer+buflen) {
983 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
984 return;
985 }
986 } else {
987 opt = NULL;
988 p += SIMPLE_CONN_SIZE;
989 }
990
991 state = ntohs(s->state);
992 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
993 pp = ip_vs_proto_get(s->protocol);
994 if (!pp) {
995 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
996 s->protocol);
997 continue;
998 }
999 if (state >= pp->num_states) {
1000 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
1001 pp->name, state);
1002 continue;
1003 }
1004 } else {
1005
1006 if (state > 0) {
1007 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
1008 state);
1009 state = 0;
1010 }
1011 }
1012
1013 ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
1014 (const union nf_inet_addr *)&s->caddr,
1015 s->cport,
1016 (const union nf_inet_addr *)&s->vaddr,
1017 s->vport, ¶m);
1018
1019
1020 ip_vs_proc_conn(ipvs, ¶m, flags, state, s->protocol, AF_INET,
1021 (union nf_inet_addr *)&s->daddr, s->dport,
1022 0, 0, opt);
1023 }
1024}
1025
1026
1027
1028
1029static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
1030 __u32 *opt_flags,
1031 struct ip_vs_sync_conn_options *opt)
1032{
1033 struct ip_vs_sync_conn_options *topt;
1034
1035 topt = (struct ip_vs_sync_conn_options *)p;
1036
1037 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
1038 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
1039 return -EINVAL;
1040 }
1041 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
1042 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
1043 return -EINVAL;
1044 }
1045 ntoh_seq(&topt->in_seq, &opt->in_seq);
1046 ntoh_seq(&topt->out_seq, &opt->out_seq);
1047 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
1048 return 0;
1049}
1050
1051static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
1052 __u8 **data, unsigned int maxlen,
1053 __u32 *opt_flags, __u32 flag)
1054{
1055 if (plen > maxlen) {
1056 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
1057 return -EINVAL;
1058 }
1059 if (*opt_flags & flag) {
1060 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
1061 return -EINVAL;
1062 }
1063 *data_len = plen;
1064 *data = p;
1065 *opt_flags |= flag;
1066 return 0;
1067}
1068
1069
1070
1071static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end)
1072{
1073 struct ip_vs_sync_conn_options opt;
1074 union ip_vs_sync_conn *s;
1075 struct ip_vs_protocol *pp;
1076 struct ip_vs_conn_param param;
1077 __u32 flags;
1078 unsigned int af, state, pe_data_len=0, pe_name_len=0;
1079 __u8 *pe_data=NULL, *pe_name=NULL;
1080 __u32 opt_flags=0;
1081 int retc=0;
1082
1083 s = (union ip_vs_sync_conn *) p;
1084
1085 if (s->v6.type & STYPE_F_INET6) {
1086#ifdef CONFIG_IP_VS_IPV6
1087 af = AF_INET6;
1088 p += sizeof(struct ip_vs_sync_v6);
1089#else
1090 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
1091 retc = 10;
1092 goto out;
1093#endif
1094 } else if (!s->v4.type) {
1095 af = AF_INET;
1096 p += sizeof(struct ip_vs_sync_v4);
1097 } else {
1098 return -10;
1099 }
1100 if (p > msg_end)
1101 return -20;
1102
1103
1104 while (p < msg_end) {
1105 int ptype;
1106 int plen;
1107
1108 if (p+2 > msg_end)
1109 return -30;
1110 ptype = *(p++);
1111 plen = *(p++);
1112
1113 if (!plen || ((p + plen) > msg_end))
1114 return -40;
1115
1116 switch (ptype & ~IPVS_OPT_F_PARAM) {
1117 case IPVS_OPT_SEQ_DATA:
1118 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
1119 return -50;
1120 break;
1121
1122 case IPVS_OPT_PE_DATA:
1123 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
1124 IP_VS_PEDATA_MAXLEN, &opt_flags,
1125 IPVS_OPT_F_PE_DATA))
1126 return -60;
1127 break;
1128
1129 case IPVS_OPT_PE_NAME:
1130 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1131 IP_VS_PENAME_MAXLEN, &opt_flags,
1132 IPVS_OPT_F_PE_NAME))
1133 return -70;
1134 break;
1135
1136 default:
1137
1138 if (!(ptype & IPVS_OPT_F_PARAM)) {
1139 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1140 ptype & ~IPVS_OPT_F_PARAM);
1141 retc = 20;
1142 goto out;
1143 }
1144 }
1145 p += plen;
1146 }
1147
1148
1149 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
1150 flags |= IP_VS_CONN_F_SYNC;
1151 state = ntohs(s->v4.state);
1152
1153 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1154 pp = ip_vs_proto_get(s->v4.protocol);
1155 if (!pp) {
1156 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
1157 s->v4.protocol);
1158 retc = 30;
1159 goto out;
1160 }
1161 if (state >= pp->num_states) {
1162 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
1163 pp->name, state);
1164 retc = 40;
1165 goto out;
1166 }
1167 } else {
1168
1169 if (state > 0) {
1170 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1171 state);
1172 state = 0;
1173 }
1174 }
1175 if (ip_vs_conn_fill_param_sync(ipvs, af, s, ¶m, pe_data,
1176 pe_data_len, pe_name, pe_name_len)) {
1177 retc = 50;
1178 goto out;
1179 }
1180
1181 if (af == AF_INET)
1182 ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v4.protocol, af,
1183 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1184 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1185 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1186 );
1187#ifdef CONFIG_IP_VS_IPV6
1188 else
1189 ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v6.protocol, af,
1190 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1191 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1192 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1193 );
1194#endif
1195 ip_vs_pe_put(param.pe);
1196 return 0;
1197
1198out:
1199 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1200 return retc;
1201
1202}
1203
1204
1205
1206
1207
1208static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer,
1209 const size_t buflen)
1210{
1211 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1212 __u8 *p, *msg_end;
1213 int i, nr_conns;
1214
1215 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1216 IP_VS_DBG(2, "BACKUP, message header too short\n");
1217 return;
1218 }
1219
1220 if (buflen != ntohs(m2->size)) {
1221 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1222 return;
1223 }
1224
1225 if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) {
1226 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1227 return;
1228 }
1229
1230 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1231 && (m2->spare == 0)) {
1232
1233 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1234 nr_conns = m2->nr_conns;
1235
1236 for (i=0; i<nr_conns; i++) {
1237 union ip_vs_sync_conn *s;
1238 unsigned int size;
1239 int retc;
1240
1241 p = msg_end;
1242 if (p + sizeof(s->v4) > buffer+buflen) {
1243 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1244 return;
1245 }
1246 s = (union ip_vs_sync_conn *)p;
1247 size = ntohs(s->v4.ver_size) & SVER_MASK;
1248 msg_end = p + size;
1249
1250 if (msg_end > buffer+buflen) {
1251 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
1252 return;
1253 }
1254 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
1255 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
1256 ntohs(s->v4.ver_size) >> SVER_SHIFT);
1257 return;
1258 }
1259
1260 retc = ip_vs_proc_sync_conn(ipvs, p, msg_end);
1261 if (retc < 0) {
1262 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1263 retc);
1264 return;
1265 }
1266
1267 msg_end = p + ((size + 3) & ~3);
1268 }
1269 } else {
1270
1271 ip_vs_process_message_v0(ipvs, buffer, buflen);
1272 return;
1273 }
1274}
1275
1276
1277
1278
1279
1280static void set_sock_size(struct sock *sk, int mode, int val)
1281{
1282
1283
1284 lock_sock(sk);
1285 if (mode) {
1286 val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
1287 sysctl_wmem_max);
1288 sk->sk_sndbuf = val * 2;
1289 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1290 } else {
1291 val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
1292 sysctl_rmem_max);
1293 sk->sk_rcvbuf = val * 2;
1294 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1295 }
1296 release_sock(sk);
1297}
1298
1299
1300
1301
1302static void set_mcast_loop(struct sock *sk, u_char loop)
1303{
1304 struct inet_sock *inet = inet_sk(sk);
1305
1306
1307 lock_sock(sk);
1308 inet->mc_loop = loop ? 1 : 0;
1309#ifdef CONFIG_IP_VS_IPV6
1310 if (sk->sk_family == AF_INET6) {
1311 struct ipv6_pinfo *np = inet6_sk(sk);
1312
1313
1314 np->mc_loop = loop ? 1 : 0;
1315 }
1316#endif
1317 release_sock(sk);
1318}
1319
1320
1321
1322
1323static void set_mcast_ttl(struct sock *sk, u_char ttl)
1324{
1325 struct inet_sock *inet = inet_sk(sk);
1326
1327
1328 lock_sock(sk);
1329 inet->mc_ttl = ttl;
1330#ifdef CONFIG_IP_VS_IPV6
1331 if (sk->sk_family == AF_INET6) {
1332 struct ipv6_pinfo *np = inet6_sk(sk);
1333
1334
1335 np->mcast_hops = ttl;
1336 }
1337#endif
1338 release_sock(sk);
1339}
1340
1341
1342static void set_mcast_pmtudisc(struct sock *sk, int val)
1343{
1344 struct inet_sock *inet = inet_sk(sk);
1345
1346
1347 lock_sock(sk);
1348 inet->pmtudisc = val;
1349#ifdef CONFIG_IP_VS_IPV6
1350 if (sk->sk_family == AF_INET6) {
1351 struct ipv6_pinfo *np = inet6_sk(sk);
1352
1353
1354 np->pmtudisc = val;
1355 }
1356#endif
1357 release_sock(sk);
1358}
1359
1360
1361
1362
1363static int set_mcast_if(struct sock *sk, char *ifname)
1364{
1365 struct net_device *dev;
1366 struct inet_sock *inet = inet_sk(sk);
1367 struct net *net = sock_net(sk);
1368
1369 dev = __dev_get_by_name(net, ifname);
1370 if (!dev)
1371 return -ENODEV;
1372
1373 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1374 return -EINVAL;
1375
1376 lock_sock(sk);
1377 inet->mc_index = dev->ifindex;
1378
1379#ifdef CONFIG_IP_VS_IPV6
1380 if (sk->sk_family == AF_INET6) {
1381 struct ipv6_pinfo *np = inet6_sk(sk);
1382
1383
1384 np->mcast_oif = dev->ifindex;
1385 }
1386#endif
1387 release_sock(sk);
1388
1389 return 0;
1390}
1391
1392
1393
1394
1395
1396
1397
1398static int
1399join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
1400{
1401 struct net *net = sock_net(sk);
1402 struct ip_mreqn mreq;
1403 struct net_device *dev;
1404 int ret;
1405
1406 memset(&mreq, 0, sizeof(mreq));
1407 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
1408
1409 dev = __dev_get_by_name(net, ifname);
1410 if (!dev)
1411 return -ENODEV;
1412 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1413 return -EINVAL;
1414
1415 mreq.imr_ifindex = dev->ifindex;
1416
1417 lock_sock(sk);
1418 ret = ip_mc_join_group(sk, &mreq);
1419 release_sock(sk);
1420
1421 return ret;
1422}
1423
1424#ifdef CONFIG_IP_VS_IPV6
1425static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
1426 char *ifname)
1427{
1428 struct net *net = sock_net(sk);
1429 struct net_device *dev;
1430 int ret;
1431
1432 dev = __dev_get_by_name(net, ifname);
1433 if (!dev)
1434 return -ENODEV;
1435 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1436 return -EINVAL;
1437
1438 lock_sock(sk);
1439 ret = ipv6_sock_mc_join(sk, dev->ifindex, addr);
1440 release_sock(sk);
1441
1442 return ret;
1443}
1444#endif
1445
1446static int bind_mcastif_addr(struct socket *sock, char *ifname)
1447{
1448 struct net *net = sock_net(sock->sk);
1449 struct net_device *dev;
1450 __be32 addr;
1451 struct sockaddr_in sin;
1452
1453 dev = __dev_get_by_name(net, ifname);
1454 if (!dev)
1455 return -ENODEV;
1456
1457 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1458 if (!addr)
1459 pr_err("You probably need to specify IP address on "
1460 "multicast interface.\n");
1461
1462 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
1463 ifname, &addr);
1464
1465
1466 sin.sin_family = AF_INET;
1467 sin.sin_addr.s_addr = addr;
1468 sin.sin_port = 0;
1469
1470 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
1471}
1472
1473static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
1474 struct ipvs_sync_daemon_cfg *c, int id)
1475{
1476 if (AF_INET6 == c->mcast_af) {
1477 sa->in6 = (struct sockaddr_in6) {
1478 .sin6_family = AF_INET6,
1479 .sin6_port = htons(c->mcast_port + id),
1480 };
1481 sa->in6.sin6_addr = c->mcast_group.in6;
1482 *salen = sizeof(sa->in6);
1483 } else {
1484 sa->in = (struct sockaddr_in) {
1485 .sin_family = AF_INET,
1486 .sin_port = htons(c->mcast_port + id),
1487 };
1488 sa->in.sin_addr = c->mcast_group.in;
1489 *salen = sizeof(sa->in);
1490 }
1491}
1492
1493
1494
1495
1496static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
1497{
1498
1499 union ipvs_sockaddr mcast_addr;
1500 struct socket *sock;
1501 int result, salen;
1502
1503
1504 result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
1505 IPPROTO_UDP, &sock);
1506 if (result < 0) {
1507 pr_err("Error during creation of socket; terminating\n");
1508 return ERR_PTR(result);
1509 }
1510 result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn);
1511 if (result < 0) {
1512 pr_err("Error setting outbound mcast interface\n");
1513 goto error;
1514 }
1515
1516 set_mcast_loop(sock->sk, 0);
1517 set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl);
1518
1519 set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT);
1520 result = sysctl_sync_sock_size(ipvs);
1521 if (result > 0)
1522 set_sock_size(sock->sk, 1, result);
1523
1524 if (AF_INET == ipvs->mcfg.mcast_af)
1525 result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn);
1526 else
1527 result = 0;
1528 if (result < 0) {
1529 pr_err("Error binding address of the mcast interface\n");
1530 goto error;
1531 }
1532
1533 get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
1534 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
1535 salen, 0);
1536 if (result < 0) {
1537 pr_err("Error connecting to the multicast addr\n");
1538 goto error;
1539 }
1540
1541 return sock;
1542
1543error:
1544 sock_release(sock);
1545 return ERR_PTR(result);
1546}
1547
1548
1549
1550
1551
1552static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
1553 int ifindex)
1554{
1555
1556 union ipvs_sockaddr mcast_addr;
1557 struct socket *sock;
1558 int result, salen;
1559
1560
1561 result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
1562 IPPROTO_UDP, &sock);
1563 if (result < 0) {
1564 pr_err("Error during creation of socket; terminating\n");
1565 return ERR_PTR(result);
1566 }
1567
1568 sock->sk->sk_reuse = SK_CAN_REUSE;
1569 result = sysctl_sync_sock_size(ipvs);
1570 if (result > 0)
1571 set_sock_size(sock->sk, 0, result);
1572
1573 get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
1574 sock->sk->sk_bound_dev_if = ifindex;
1575 result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
1576 if (result < 0) {
1577 pr_err("Error binding to the multicast addr\n");
1578 goto error;
1579 }
1580
1581
1582#ifdef CONFIG_IP_VS_IPV6
1583 if (ipvs->bcfg.mcast_af == AF_INET6)
1584 result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
1585 ipvs->bcfg.mcast_ifn);
1586 else
1587#endif
1588 result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
1589 ipvs->bcfg.mcast_ifn);
1590 if (result < 0) {
1591 pr_err("Error joining to the multicast group\n");
1592 goto error;
1593 }
1594
1595 return sock;
1596
1597error:
1598 sock_release(sock);
1599 return ERR_PTR(result);
1600}
1601
1602
1603static int
1604ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1605{
1606 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
1607 struct kvec iov;
1608 int len;
1609
1610 EnterFunction(7);
1611 iov.iov_base = (void *)buffer;
1612 iov.iov_len = length;
1613
1614 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
1615
1616 LeaveFunction(7);
1617 return len;
1618}
1619
1620static int
1621ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1622{
1623 int msize;
1624 int ret;
1625
1626 msize = ntohs(msg->size);
1627
1628 ret = ip_vs_send_async(sock, (char *)msg, msize);
1629 if (ret >= 0 || ret == -EAGAIN)
1630 return ret;
1631 pr_err("ip_vs_send_async error %d\n", ret);
1632 return 0;
1633}
1634
1635static int
1636ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1637{
1638 struct msghdr msg = {NULL,};
1639 struct kvec iov;
1640 int len;
1641
1642 EnterFunction(7);
1643
1644
1645 iov.iov_base = buffer;
1646 iov.iov_len = (size_t)buflen;
1647
1648 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
1649
1650 if (len < 0)
1651 return len;
1652
1653 LeaveFunction(7);
1654 return len;
1655}
1656
1657
1658static void master_wakeup_work_handler(struct work_struct *work)
1659{
1660 struct ipvs_master_sync_state *ms =
1661 container_of(work, struct ipvs_master_sync_state,
1662 master_wakeup_work.work);
1663 struct netns_ipvs *ipvs = ms->ipvs;
1664
1665 spin_lock_bh(&ipvs->sync_lock);
1666 if (ms->sync_queue_len &&
1667 ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
1668 ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
1669 wake_up_process(ms->master_thread);
1670 }
1671 spin_unlock_bh(&ipvs->sync_lock);
1672}
1673
1674
1675static inline struct ip_vs_sync_buff *
1676next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
1677{
1678 struct ip_vs_sync_buff *sb;
1679
1680 sb = sb_dequeue(ipvs, ms);
1681 if (sb)
1682 return sb;
1683
1684 return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
1685}
1686
1687static int sync_thread_master(void *data)
1688{
1689 struct ip_vs_sync_thread_data *tinfo = data;
1690 struct netns_ipvs *ipvs = tinfo->ipvs;
1691 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
1692 struct sock *sk = tinfo->sock->sk;
1693 struct ip_vs_sync_buff *sb;
1694
1695 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1696 "syncid = %d, id = %d\n",
1697 ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id);
1698
1699 for (;;) {
1700 sb = next_sync_buff(ipvs, ms);
1701 if (unlikely(kthread_should_stop()))
1702 break;
1703 if (!sb) {
1704 schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
1705 continue;
1706 }
1707 while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
1708
1709
1710
1711 __wait_event_interruptible(*sk_sleep(sk),
1712 sock_writeable(sk) ||
1713 kthread_should_stop());
1714 if (unlikely(kthread_should_stop()))
1715 goto done;
1716 }
1717 ip_vs_sync_buff_release(sb);
1718 }
1719
1720done:
1721 __set_current_state(TASK_RUNNING);
1722 if (sb)
1723 ip_vs_sync_buff_release(sb);
1724
1725
1726 while ((sb = sb_dequeue(ipvs, ms)))
1727 ip_vs_sync_buff_release(sb);
1728 __set_current_state(TASK_RUNNING);
1729
1730
1731 sb = get_curr_sync_buff(ipvs, ms, 0);
1732 if (sb)
1733 ip_vs_sync_buff_release(sb);
1734
1735
1736 sock_release(tinfo->sock);
1737 kfree(tinfo);
1738
1739 return 0;
1740}
1741
1742
1743static int sync_thread_backup(void *data)
1744{
1745 struct ip_vs_sync_thread_data *tinfo = data;
1746 struct netns_ipvs *ipvs = tinfo->ipvs;
1747 int len;
1748
1749 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1750 "syncid = %d, id = %d\n",
1751 ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id);
1752
1753 while (!kthread_should_stop()) {
1754 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
1755 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
1756 || kthread_should_stop());
1757
1758
1759 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
1760 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1761 ipvs->bcfg.sync_maxlen);
1762 if (len <= 0) {
1763 if (len != -EAGAIN)
1764 pr_err("receiving message error\n");
1765 break;
1766 }
1767
1768 ip_vs_process_message(ipvs, tinfo->buf, len);
1769 }
1770 }
1771
1772
1773 sock_release(tinfo->sock);
1774 kfree(tinfo->buf);
1775 kfree(tinfo);
1776
1777 return 0;
1778}
1779
1780
1781int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
1782 int state)
1783{
1784 struct ip_vs_sync_thread_data *tinfo;
1785 struct task_struct **array = NULL, *task;
1786 struct socket *sock;
1787 struct net_device *dev;
1788 char *name;
1789 int (*threadfn)(void *data);
1790 int id, count, hlen;
1791 int result = -ENOMEM;
1792 u16 mtu, min_mtu;
1793
1794 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1795 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n",
1796 sizeof(struct ip_vs_sync_conn_v0));
1797
1798 if (!ipvs->sync_state) {
1799 count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
1800 ipvs->threads_mask = count - 1;
1801 } else
1802 count = ipvs->threads_mask + 1;
1803
1804 if (c->mcast_af == AF_UNSPEC) {
1805 c->mcast_af = AF_INET;
1806 c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP);
1807 }
1808 if (!c->mcast_port)
1809 c->mcast_port = IP_VS_SYNC_PORT;
1810 if (!c->mcast_ttl)
1811 c->mcast_ttl = 1;
1812
1813 dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
1814 if (!dev) {
1815 pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
1816 return -ENODEV;
1817 }
1818 hlen = (AF_INET6 == c->mcast_af) ?
1819 sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
1820 sizeof(struct iphdr) + sizeof(struct udphdr);
1821 mtu = (state == IP_VS_STATE_BACKUP) ?
1822 clamp(dev->mtu, 1500U, 65535U) : 1500U;
1823 min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1;
1824
1825 if (c->sync_maxlen)
1826 c->sync_maxlen = clamp_t(unsigned int,
1827 c->sync_maxlen, min_mtu,
1828 65535 - hlen);
1829 else
1830 c->sync_maxlen = mtu - hlen;
1831
1832 if (state == IP_VS_STATE_MASTER) {
1833 if (ipvs->ms)
1834 return -EEXIST;
1835
1836 ipvs->mcfg = *c;
1837 name = "ipvs-m:%d:%d";
1838 threadfn = sync_thread_master;
1839 } else if (state == IP_VS_STATE_BACKUP) {
1840 if (ipvs->backup_threads)
1841 return -EEXIST;
1842
1843 ipvs->bcfg = *c;
1844 name = "ipvs-b:%d:%d";
1845 threadfn = sync_thread_backup;
1846 } else {
1847 return -EINVAL;
1848 }
1849
1850 if (state == IP_VS_STATE_MASTER) {
1851 struct ipvs_master_sync_state *ms;
1852
1853 ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL);
1854 if (!ipvs->ms)
1855 goto out;
1856 ms = ipvs->ms;
1857 for (id = 0; id < count; id++, ms++) {
1858 INIT_LIST_HEAD(&ms->sync_queue);
1859 ms->sync_queue_len = 0;
1860 ms->sync_queue_delay = 0;
1861 INIT_DELAYED_WORK(&ms->master_wakeup_work,
1862 master_wakeup_work_handler);
1863 ms->ipvs = ipvs;
1864 }
1865 } else {
1866 array = kcalloc(count, sizeof(struct task_struct *),
1867 GFP_KERNEL);
1868 if (!array)
1869 goto out;
1870 }
1871
1872 tinfo = NULL;
1873 for (id = 0; id < count; id++) {
1874 if (state == IP_VS_STATE_MASTER)
1875 sock = make_send_sock(ipvs, id);
1876 else
1877 sock = make_receive_sock(ipvs, id, dev->ifindex);
1878 if (IS_ERR(sock)) {
1879 result = PTR_ERR(sock);
1880 goto outtinfo;
1881 }
1882 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1883 if (!tinfo)
1884 goto outsocket;
1885 tinfo->ipvs = ipvs;
1886 tinfo->sock = sock;
1887 if (state == IP_VS_STATE_BACKUP) {
1888 tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
1889 GFP_KERNEL);
1890 if (!tinfo->buf)
1891 goto outtinfo;
1892 } else {
1893 tinfo->buf = NULL;
1894 }
1895 tinfo->id = id;
1896
1897 task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
1898 if (IS_ERR(task)) {
1899 result = PTR_ERR(task);
1900 goto outtinfo;
1901 }
1902 tinfo = NULL;
1903 if (state == IP_VS_STATE_MASTER)
1904 ipvs->ms[id].master_thread = task;
1905 else
1906 array[id] = task;
1907 }
1908
1909
1910
1911 if (state == IP_VS_STATE_BACKUP)
1912 ipvs->backup_threads = array;
1913 spin_lock_bh(&ipvs->sync_buff_lock);
1914 ipvs->sync_state |= state;
1915 spin_unlock_bh(&ipvs->sync_buff_lock);
1916
1917
1918 ip_vs_use_count_inc();
1919
1920 return 0;
1921
1922outsocket:
1923 sock_release(sock);
1924
1925outtinfo:
1926 if (tinfo) {
1927 sock_release(tinfo->sock);
1928 kfree(tinfo->buf);
1929 kfree(tinfo);
1930 }
1931 count = id;
1932 while (count-- > 0) {
1933 if (state == IP_VS_STATE_MASTER)
1934 kthread_stop(ipvs->ms[count].master_thread);
1935 else
1936 kthread_stop(array[count]);
1937 }
1938 kfree(array);
1939
1940out:
1941 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
1942 kfree(ipvs->ms);
1943 ipvs->ms = NULL;
1944 }
1945 return result;
1946}
1947
1948
1949int stop_sync_thread(struct netns_ipvs *ipvs, int state)
1950{
1951 struct task_struct **array;
1952 int id;
1953 int retc = -EINVAL;
1954
1955 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1956
1957 if (state == IP_VS_STATE_MASTER) {
1958 if (!ipvs->ms)
1959 return -ESRCH;
1960
1961
1962
1963
1964
1965
1966
1967 spin_lock_bh(&ipvs->sync_buff_lock);
1968 spin_lock(&ipvs->sync_lock);
1969 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1970 spin_unlock(&ipvs->sync_lock);
1971 spin_unlock_bh(&ipvs->sync_buff_lock);
1972
1973 retc = 0;
1974 for (id = ipvs->threads_mask; id >= 0; id--) {
1975 struct ipvs_master_sync_state *ms = &ipvs->ms[id];
1976 int ret;
1977
1978 pr_info("stopping master sync thread %d ...\n",
1979 task_pid_nr(ms->master_thread));
1980 cancel_delayed_work_sync(&ms->master_wakeup_work);
1981 ret = kthread_stop(ms->master_thread);
1982 if (retc >= 0)
1983 retc = ret;
1984 }
1985 kfree(ipvs->ms);
1986 ipvs->ms = NULL;
1987 } else if (state == IP_VS_STATE_BACKUP) {
1988 if (!ipvs->backup_threads)
1989 return -ESRCH;
1990
1991 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1992 array = ipvs->backup_threads;
1993 retc = 0;
1994 for (id = ipvs->threads_mask; id >= 0; id--) {
1995 int ret;
1996
1997 pr_info("stopping backup sync thread %d ...\n",
1998 task_pid_nr(array[id]));
1999 ret = kthread_stop(array[id]);
2000 if (retc >= 0)
2001 retc = ret;
2002 }
2003 kfree(array);
2004 ipvs->backup_threads = NULL;
2005 }
2006
2007
2008 ip_vs_use_count_dec();
2009
2010 return retc;
2011}
2012
2013
2014
2015
2016int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs)
2017{
2018 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
2019 spin_lock_init(&ipvs->sync_lock);
2020 spin_lock_init(&ipvs->sync_buff_lock);
2021 return 0;
2022}
2023
2024void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
2025{
2026 int retc;
2027
2028 mutex_lock(&ipvs->sync_mutex);
2029 retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
2030 if (retc && retc != -ESRCH)
2031 pr_err("Failed to stop Master Daemon\n");
2032
2033 retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
2034 if (retc && retc != -ESRCH)
2035 pr_err("Failed to stop Backup Daemon\n");
2036 mutex_unlock(&ipvs->sync_mutex);
2037}
2038