1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#define KMSG_COMPONENT "IPVS"
35#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
36
37#include <linux/module.h>
38#include <linux/slab.h>
39#include <linux/inetdevice.h>
40#include <linux/net.h>
41#include <linux/completion.h>
42#include <linux/delay.h>
43#include <linux/skbuff.h>
44#include <linux/in.h>
45#include <linux/igmp.h>
46#include <linux/udp.h>
47#include <linux/err.h>
48#include <linux/kthread.h>
49#include <linux/wait.h>
50#include <linux/kernel.h>
51
52#include <asm/unaligned.h>
53
54#include <net/ip.h>
55#include <net/sock.h>
56
57#include <net/ip_vs.h>
58
59#define IP_VS_SYNC_GROUP 0xe0000051
60#define IP_VS_SYNC_PORT 8848
61
62#define SYNC_PROTO_VER 1
63
64static struct lock_class_key __ipvs_sync_key;
65
66
67
68
69struct ip_vs_sync_conn_v0 {
70 __u8 reserved;
71
72
73 __u8 protocol;
74 __be16 cport;
75 __be16 vport;
76 __be16 dport;
77 __be32 caddr;
78 __be32 vaddr;
79 __be32 daddr;
80
81
82 __be16 flags;
83 __be16 state;
84
85
86};
87
88struct ip_vs_sync_conn_options {
89 struct ip_vs_seq in_seq;
90 struct ip_vs_seq out_seq;
91};
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131struct ip_vs_sync_v4 {
132 __u8 type;
133 __u8 protocol;
134 __be16 ver_size;
135
136 __be32 flags;
137 __be16 state;
138
139 __be16 cport;
140 __be16 vport;
141 __be16 dport;
142 __be32 fwmark;
143 __be32 timeout;
144 __be32 caddr;
145 __be32 vaddr;
146 __be32 daddr;
147
148
149};
150
151
152
153struct ip_vs_sync_v6 {
154 __u8 type;
155 __u8 protocol;
156 __be16 ver_size;
157
158 __be32 flags;
159 __be16 state;
160
161 __be16 cport;
162 __be16 vport;
163 __be16 dport;
164 __be32 fwmark;
165 __be32 timeout;
166 struct in6_addr caddr;
167 struct in6_addr vaddr;
168 struct in6_addr daddr;
169
170
171};
172
173union ip_vs_sync_conn {
174 struct ip_vs_sync_v4 v4;
175 struct ip_vs_sync_v6 v6;
176};
177
178
179#define STYPE_INET6 0
180#define STYPE_F_INET6 (1 << STYPE_INET6)
181
182#define SVER_SHIFT 12
183#define SVER_MASK 0x0fff
184
185#define IPVS_OPT_SEQ_DATA 1
186#define IPVS_OPT_PE_DATA 2
187#define IPVS_OPT_PE_NAME 3
188#define IPVS_OPT_PARAM 7
189
190#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
191#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
192#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
193#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
194
195struct ip_vs_sync_thread_data {
196 struct netns_ipvs *ipvs;
197 struct socket *sock;
198 char *buf;
199 int id;
200};
201
202
203#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
204#define FULL_CONN_SIZE \
205(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242#define SYNC_MESG_HEADER_LEN 4
243#define MAX_CONNS_PER_SYNCBUFF 255
244
245
246struct ip_vs_sync_mesg_v0 {
247 __u8 nr_conns;
248 __u8 syncid;
249 __be16 size;
250
251
252};
253
254
255struct ip_vs_sync_mesg {
256 __u8 reserved;
257 __u8 syncid;
258 __be16 size;
259 __u8 nr_conns;
260 __s8 version;
261 __u16 spare;
262
263};
264
265union ipvs_sockaddr {
266 struct sockaddr_in in;
267 struct sockaddr_in6 in6;
268};
269
270struct ip_vs_sync_buff {
271 struct list_head list;
272 unsigned long firstuse;
273
274
275 struct ip_vs_sync_mesg *mesg;
276 unsigned char *head;
277 unsigned char *end;
278};
279
280
281
282
283
284static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
285{
286 memset(ho, 0, sizeof(*ho));
287 ho->init_seq = get_unaligned_be32(&no->init_seq);
288 ho->delta = get_unaligned_be32(&no->delta);
289 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
290}
291
292
293
294
295
296static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
297{
298 put_unaligned_be32(ho->init_seq, &no->init_seq);
299 put_unaligned_be32(ho->delta, &no->delta);
300 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
301}
302
303static inline struct ip_vs_sync_buff *
304sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
305{
306 struct ip_vs_sync_buff *sb;
307
308 spin_lock_bh(&ipvs->sync_lock);
309 if (list_empty(&ms->sync_queue)) {
310 sb = NULL;
311 __set_current_state(TASK_INTERRUPTIBLE);
312 } else {
313 sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
314 list);
315 list_del(&sb->list);
316 ms->sync_queue_len--;
317 if (!ms->sync_queue_len)
318 ms->sync_queue_delay = 0;
319 }
320 spin_unlock_bh(&ipvs->sync_lock);
321
322 return sb;
323}
324
325
326
327
328static inline struct ip_vs_sync_buff *
329ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len)
330{
331 struct ip_vs_sync_buff *sb;
332
333 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
334 return NULL;
335
336 len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg),
337 ipvs->mcfg.sync_maxlen);
338 sb->mesg = kmalloc(len, GFP_ATOMIC);
339 if (!sb->mesg) {
340 kfree(sb);
341 return NULL;
342 }
343 sb->mesg->reserved = 0;
344 sb->mesg->version = SYNC_PROTO_VER;
345 sb->mesg->syncid = ipvs->mcfg.syncid;
346 sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
347 sb->mesg->nr_conns = 0;
348 sb->mesg->spare = 0;
349 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
350 sb->end = (unsigned char *)sb->mesg + len;
351
352 sb->firstuse = jiffies;
353 return sb;
354}
355
356static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
357{
358 kfree(sb->mesg);
359 kfree(sb);
360}
361
362static inline void sb_queue_tail(struct netns_ipvs *ipvs,
363 struct ipvs_master_sync_state *ms)
364{
365 struct ip_vs_sync_buff *sb = ms->sync_buff;
366
367 spin_lock(&ipvs->sync_lock);
368 if (ipvs->sync_state & IP_VS_STATE_MASTER &&
369 ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
370 if (!ms->sync_queue_len)
371 schedule_delayed_work(&ms->master_wakeup_work,
372 max(IPVS_SYNC_SEND_DELAY, 1));
373 ms->sync_queue_len++;
374 list_add_tail(&sb->list, &ms->sync_queue);
375 if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
376 wake_up_process(ms->master_thread);
377 } else
378 ip_vs_sync_buff_release(sb);
379 spin_unlock(&ipvs->sync_lock);
380}
381
382
383
384
385
386static inline struct ip_vs_sync_buff *
387get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
388 unsigned long time)
389{
390 struct ip_vs_sync_buff *sb;
391
392 spin_lock_bh(&ipvs->sync_buff_lock);
393 sb = ms->sync_buff;
394 if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
395 ms->sync_buff = NULL;
396 __set_current_state(TASK_RUNNING);
397 } else
398 sb = NULL;
399 spin_unlock_bh(&ipvs->sync_buff_lock);
400 return sb;
401}
402
403static inline int
404select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
405{
406 return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
407}
408
409
410
411
412static inline struct ip_vs_sync_buff *
413ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len)
414{
415 struct ip_vs_sync_buff *sb;
416 struct ip_vs_sync_mesg_v0 *mesg;
417
418 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
419 return NULL;
420
421 len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0),
422 ipvs->mcfg.sync_maxlen);
423 sb->mesg = kmalloc(len, GFP_ATOMIC);
424 if (!sb->mesg) {
425 kfree(sb);
426 return NULL;
427 }
428 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
429 mesg->nr_conns = 0;
430 mesg->syncid = ipvs->mcfg.syncid;
431 mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
432 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
433 sb->end = (unsigned char *)mesg + len;
434 sb->firstuse = jiffies;
435 return sb;
436}
437
438
439static inline bool in_persistence(struct ip_vs_conn *cp)
440{
441 for (cp = cp->control; cp; cp = cp->control) {
442 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
443 return true;
444 }
445 return false;
446}
447
448
449
450
451
452
453
454
455
456
457static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
458 struct ip_vs_conn *cp, int pkts)
459{
460 unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
461 unsigned long now = jiffies;
462 unsigned long n = (now + cp->timeout) & ~3UL;
463 unsigned int sync_refresh_period;
464 int sync_period;
465 int force;
466
467
468 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
469 force = 0;
470 else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp)))
471 return 0;
472 else if (likely(cp->protocol == IPPROTO_TCP)) {
473 if (!((1 << cp->state) &
474 ((1 << IP_VS_TCP_S_ESTABLISHED) |
475 (1 << IP_VS_TCP_S_FIN_WAIT) |
476 (1 << IP_VS_TCP_S_CLOSE) |
477 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
478 (1 << IP_VS_TCP_S_TIME_WAIT))))
479 return 0;
480 force = cp->state != cp->old_state;
481 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
482 goto set;
483 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
484 if (!((1 << cp->state) &
485 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
486 (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) |
487 (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) |
488 (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) |
489 (1 << IP_VS_SCTP_S_CLOSED))))
490 return 0;
491 force = cp->state != cp->old_state;
492 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
493 goto set;
494 } else {
495
496 force = 0;
497 }
498
499 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
500 if (sync_refresh_period > 0) {
501 long diff = n - orig;
502 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
503
504
505
506
507 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
508 int retries = orig & 3;
509
510 if (retries >= sysctl_sync_retries(ipvs))
511 return 0;
512 if (time_before(now, orig - cp->timeout +
513 (sync_refresh_period >> 3)))
514 return 0;
515 n |= retries + 1;
516 }
517 }
518 sync_period = sysctl_sync_period(ipvs);
519 if (sync_period > 0) {
520 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
521 pkts % sync_period != sysctl_sync_threshold(ipvs))
522 return 0;
523 } else if (!sync_refresh_period &&
524 pkts != sysctl_sync_threshold(ipvs))
525 return 0;
526
527set:
528 cp->old_state = cp->state;
529 n = cmpxchg(&cp->sync_endtime, orig, n);
530 return n == orig || force;
531}
532
533
534
535
536
537static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp,
538 int pkts)
539{
540 struct ip_vs_sync_mesg_v0 *m;
541 struct ip_vs_sync_conn_v0 *s;
542 struct ip_vs_sync_buff *buff;
543 struct ipvs_master_sync_state *ms;
544 int id;
545 unsigned int len;
546
547 if (unlikely(cp->af != AF_INET))
548 return;
549
550 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
551 return;
552
553 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
554 return;
555
556 spin_lock_bh(&ipvs->sync_buff_lock);
557 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
558 spin_unlock_bh(&ipvs->sync_buff_lock);
559 return;
560 }
561
562 id = select_master_thread_id(ipvs, cp);
563 ms = &ipvs->ms[id];
564 buff = ms->sync_buff;
565 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
566 SIMPLE_CONN_SIZE;
567 if (buff) {
568 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
569
570 if (buff->head + len > buff->end || !m->nr_conns) {
571 sb_queue_tail(ipvs, ms);
572 ms->sync_buff = NULL;
573 buff = NULL;
574 }
575 }
576 if (!buff) {
577 buff = ip_vs_sync_buff_create_v0(ipvs, len);
578 if (!buff) {
579 spin_unlock_bh(&ipvs->sync_buff_lock);
580 pr_err("ip_vs_sync_buff_create failed.\n");
581 return;
582 }
583 ms->sync_buff = buff;
584 }
585
586 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
587 s = (struct ip_vs_sync_conn_v0 *) buff->head;
588
589
590 s->reserved = 0;
591 s->protocol = cp->protocol;
592 s->cport = cp->cport;
593 s->vport = cp->vport;
594 s->dport = cp->dport;
595 s->caddr = cp->caddr.ip;
596 s->vaddr = cp->vaddr.ip;
597 s->daddr = cp->daddr.ip;
598 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
599 s->state = htons(cp->state);
600 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
601 struct ip_vs_sync_conn_options *opt =
602 (struct ip_vs_sync_conn_options *)&s[1];
603 memcpy(opt, &cp->in_seq, sizeof(*opt));
604 }
605
606 m->nr_conns++;
607 m->size = htons(ntohs(m->size) + len);
608 buff->head += len;
609 spin_unlock_bh(&ipvs->sync_buff_lock);
610
611
612 cp = cp->control;
613 if (cp) {
614 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
615 pkts = atomic_add_return(1, &cp->in_pkts);
616 else
617 pkts = sysctl_sync_threshold(ipvs);
618 ip_vs_sync_conn(ipvs, cp, pkts);
619 }
620}
621
622
623
624
625
626
627void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts)
628{
629 struct ip_vs_sync_mesg *m;
630 union ip_vs_sync_conn *s;
631 struct ip_vs_sync_buff *buff;
632 struct ipvs_master_sync_state *ms;
633 int id;
634 __u8 *p;
635 unsigned int len, pe_name_len, pad;
636
637
638 if (sysctl_sync_ver(ipvs) == 0) {
639 ip_vs_sync_conn_v0(ipvs, cp, pkts);
640 return;
641 }
642
643 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
644 goto control;
645sloop:
646 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
647 goto control;
648
649
650 pe_name_len = 0;
651 if (cp->pe_data_len) {
652 if (!cp->pe_data || !cp->dest) {
653 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
654 return;
655 }
656 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
657 }
658
659 spin_lock_bh(&ipvs->sync_buff_lock);
660 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
661 spin_unlock_bh(&ipvs->sync_buff_lock);
662 return;
663 }
664
665 id = select_master_thread_id(ipvs, cp);
666 ms = &ipvs->ms[id];
667
668#ifdef CONFIG_IP_VS_IPV6
669 if (cp->af == AF_INET6)
670 len = sizeof(struct ip_vs_sync_v6);
671 else
672#endif
673 len = sizeof(struct ip_vs_sync_v4);
674
675 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
676 len += sizeof(struct ip_vs_sync_conn_options) + 2;
677
678 if (cp->pe_data_len)
679 len += cp->pe_data_len + 2;
680 if (pe_name_len)
681 len += pe_name_len + 2;
682
683
684 pad = 0;
685 buff = ms->sync_buff;
686 if (buff) {
687 m = buff->mesg;
688 pad = (4 - (size_t) buff->head) & 3;
689
690 if (buff->head + len + pad > buff->end || m->reserved) {
691 sb_queue_tail(ipvs, ms);
692 ms->sync_buff = NULL;
693 buff = NULL;
694 pad = 0;
695 }
696 }
697
698 if (!buff) {
699 buff = ip_vs_sync_buff_create(ipvs, len);
700 if (!buff) {
701 spin_unlock_bh(&ipvs->sync_buff_lock);
702 pr_err("ip_vs_sync_buff_create failed.\n");
703 return;
704 }
705 ms->sync_buff = buff;
706 m = buff->mesg;
707 }
708
709 p = buff->head;
710 buff->head += pad + len;
711 m->size = htons(ntohs(m->size) + pad + len);
712
713 while (pad--)
714 *(p++) = 0;
715
716 s = (union ip_vs_sync_conn *)p;
717
718
719 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
720 s->v4.ver_size = htons(len & SVER_MASK);
721 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
722 s->v4.state = htons(cp->state);
723 s->v4.protocol = cp->protocol;
724 s->v4.cport = cp->cport;
725 s->v4.vport = cp->vport;
726 s->v4.dport = cp->dport;
727 s->v4.fwmark = htonl(cp->fwmark);
728 s->v4.timeout = htonl(cp->timeout / HZ);
729 m->nr_conns++;
730
731#ifdef CONFIG_IP_VS_IPV6
732 if (cp->af == AF_INET6) {
733 p += sizeof(struct ip_vs_sync_v6);
734 s->v6.caddr = cp->caddr.in6;
735 s->v6.vaddr = cp->vaddr.in6;
736 s->v6.daddr = cp->daddr.in6;
737 } else
738#endif
739 {
740 p += sizeof(struct ip_vs_sync_v4);
741 s->v4.caddr = cp->caddr.ip;
742 s->v4.vaddr = cp->vaddr.ip;
743 s->v4.daddr = cp->daddr.ip;
744 }
745 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
746 *(p++) = IPVS_OPT_SEQ_DATA;
747 *(p++) = sizeof(struct ip_vs_sync_conn_options);
748 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
749 p += sizeof(struct ip_vs_seq);
750 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
751 p += sizeof(struct ip_vs_seq);
752 }
753
754 if (cp->pe_data_len && cp->pe_data) {
755 *(p++) = IPVS_OPT_PE_DATA;
756 *(p++) = cp->pe_data_len;
757 memcpy(p, cp->pe_data, cp->pe_data_len);
758 p += cp->pe_data_len;
759 if (pe_name_len) {
760
761 *(p++) = IPVS_OPT_PE_NAME;
762 *(p++) = pe_name_len;
763 memcpy(p, cp->pe->name, pe_name_len);
764 p += pe_name_len;
765 }
766 }
767
768 spin_unlock_bh(&ipvs->sync_buff_lock);
769
770control:
771
772 cp = cp->control;
773 if (!cp)
774 return;
775 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
776 pkts = atomic_add_return(1, &cp->in_pkts);
777 else
778 pkts = sysctl_sync_threshold(ipvs);
779 goto sloop;
780}
781
782
783
784
785static inline int
786ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc,
787 struct ip_vs_conn_param *p,
788 __u8 *pe_data, unsigned int pe_data_len,
789 __u8 *pe_name, unsigned int pe_name_len)
790{
791#ifdef CONFIG_IP_VS_IPV6
792 if (af == AF_INET6)
793 ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol,
794 (const union nf_inet_addr *)&sc->v6.caddr,
795 sc->v6.cport,
796 (const union nf_inet_addr *)&sc->v6.vaddr,
797 sc->v6.vport, p);
798 else
799#endif
800 ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol,
801 (const union nf_inet_addr *)&sc->v4.caddr,
802 sc->v4.cport,
803 (const union nf_inet_addr *)&sc->v4.vaddr,
804 sc->v4.vport, p);
805
806 if (pe_data_len) {
807 if (pe_name_len) {
808 char buff[IP_VS_PENAME_MAXLEN+1];
809
810 memcpy(buff, pe_name, pe_name_len);
811 buff[pe_name_len]=0;
812 p->pe = __ip_vs_pe_getbyname(buff);
813 if (!p->pe) {
814 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
815 buff);
816 return 1;
817 }
818 } else {
819 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
820 return 1;
821 }
822
823 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
824 if (!p->pe_data) {
825 module_put(p->pe->module);
826 return -ENOMEM;
827 }
828 p->pe_data_len = pe_data_len;
829 }
830 return 0;
831}
832
833
834
835
836
837
838
839static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param,
840 unsigned int flags, unsigned int state,
841 unsigned int protocol, unsigned int type,
842 const union nf_inet_addr *daddr, __be16 dport,
843 unsigned long timeout, __u32 fwmark,
844 struct ip_vs_sync_conn_options *opt)
845{
846 struct ip_vs_dest *dest;
847 struct ip_vs_conn *cp;
848
849 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
850 cp = ip_vs_conn_in_get(param);
851 if (cp && ((cp->dport != dport) ||
852 !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
853 if (!(flags & IP_VS_CONN_F_INACTIVE)) {
854 ip_vs_conn_expire_now(cp);
855 __ip_vs_conn_put(cp);
856 cp = NULL;
857 } else {
858
859
860
861
862 __ip_vs_conn_put(cp);
863 kfree(param->pe_data);
864 return;
865 }
866 }
867 } else {
868 cp = ip_vs_ct_in_get(param);
869 }
870
871 if (cp) {
872
873 kfree(param->pe_data);
874
875 dest = cp->dest;
876 spin_lock_bh(&cp->lock);
877 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
878 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
879 if (flags & IP_VS_CONN_F_INACTIVE) {
880 atomic_dec(&dest->activeconns);
881 atomic_inc(&dest->inactconns);
882 } else {
883 atomic_inc(&dest->activeconns);
884 atomic_dec(&dest->inactconns);
885 }
886 }
887 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
888 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
889 cp->flags = flags;
890 spin_unlock_bh(&cp->lock);
891 if (!dest)
892 ip_vs_try_bind_dest(cp);
893 } else {
894
895
896
897
898
899 rcu_read_lock();
900
901
902
903
904
905 dest = ip_vs_find_dest(ipvs, type, type, daddr, dport,
906 param->vaddr, param->vport, protocol,
907 fwmark, flags);
908
909 cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest,
910 fwmark);
911 rcu_read_unlock();
912 if (!cp) {
913 kfree(param->pe_data);
914 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
915 return;
916 }
917 if (!(flags & IP_VS_CONN_F_TEMPLATE))
918 kfree(param->pe_data);
919 }
920
921 if (opt) {
922 cp->in_seq = opt->in_seq;
923 cp->out_seq = opt->out_seq;
924 }
925 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
926 cp->state = state;
927 cp->old_state = cp->state;
928
929
930
931
932
933
934
935
936
937 if (timeout) {
938 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
939 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
940 cp->timeout = timeout*HZ;
941 } else {
942 struct ip_vs_proto_data *pd;
943
944 pd = ip_vs_proto_data_get(ipvs, protocol);
945 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
946 cp->timeout = pd->timeout_table[state];
947 else
948 cp->timeout = (3*60*HZ);
949 }
950 ip_vs_conn_put(cp);
951}
952
953
954
955
956static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer,
957 const size_t buflen)
958{
959 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
960 struct ip_vs_sync_conn_v0 *s;
961 struct ip_vs_sync_conn_options *opt;
962 struct ip_vs_protocol *pp;
963 struct ip_vs_conn_param param;
964 char *p;
965 int i;
966
967 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
968 for (i=0; i<m->nr_conns; i++) {
969 unsigned int flags, state;
970
971 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
972 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
973 return;
974 }
975 s = (struct ip_vs_sync_conn_v0 *) p;
976 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
977 flags &= ~IP_VS_CONN_F_HASHED;
978 if (flags & IP_VS_CONN_F_SEQ_MASK) {
979 opt = (struct ip_vs_sync_conn_options *)&s[1];
980 p += FULL_CONN_SIZE;
981 if (p > buffer+buflen) {
982 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
983 return;
984 }
985 } else {
986 opt = NULL;
987 p += SIMPLE_CONN_SIZE;
988 }
989
990 state = ntohs(s->state);
991 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
992 pp = ip_vs_proto_get(s->protocol);
993 if (!pp) {
994 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
995 s->protocol);
996 continue;
997 }
998 if (state >= pp->num_states) {
999 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
1000 pp->name, state);
1001 continue;
1002 }
1003 } else {
1004
1005 if (state > 0) {
1006 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
1007 state);
1008 state = 0;
1009 }
1010 }
1011
1012 ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
1013 (const union nf_inet_addr *)&s->caddr,
1014 s->cport,
1015 (const union nf_inet_addr *)&s->vaddr,
1016 s->vport, ¶m);
1017
1018
1019 ip_vs_proc_conn(ipvs, ¶m, flags, state, s->protocol, AF_INET,
1020 (union nf_inet_addr *)&s->daddr, s->dport,
1021 0, 0, opt);
1022 }
1023}
1024
1025
1026
1027
1028static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
1029 __u32 *opt_flags,
1030 struct ip_vs_sync_conn_options *opt)
1031{
1032 struct ip_vs_sync_conn_options *topt;
1033
1034 topt = (struct ip_vs_sync_conn_options *)p;
1035
1036 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
1037 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
1038 return -EINVAL;
1039 }
1040 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
1041 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
1042 return -EINVAL;
1043 }
1044 ntoh_seq(&topt->in_seq, &opt->in_seq);
1045 ntoh_seq(&topt->out_seq, &opt->out_seq);
1046 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
1047 return 0;
1048}
1049
1050static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
1051 __u8 **data, unsigned int maxlen,
1052 __u32 *opt_flags, __u32 flag)
1053{
1054 if (plen > maxlen) {
1055 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
1056 return -EINVAL;
1057 }
1058 if (*opt_flags & flag) {
1059 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
1060 return -EINVAL;
1061 }
1062 *data_len = plen;
1063 *data = p;
1064 *opt_flags |= flag;
1065 return 0;
1066}
1067
1068
1069
1070static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end)
1071{
1072 struct ip_vs_sync_conn_options opt;
1073 union ip_vs_sync_conn *s;
1074 struct ip_vs_protocol *pp;
1075 struct ip_vs_conn_param param;
1076 __u32 flags;
1077 unsigned int af, state, pe_data_len=0, pe_name_len=0;
1078 __u8 *pe_data=NULL, *pe_name=NULL;
1079 __u32 opt_flags=0;
1080 int retc=0;
1081
1082 s = (union ip_vs_sync_conn *) p;
1083
1084 if (s->v6.type & STYPE_F_INET6) {
1085#ifdef CONFIG_IP_VS_IPV6
1086 af = AF_INET6;
1087 p += sizeof(struct ip_vs_sync_v6);
1088#else
1089 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
1090 retc = 10;
1091 goto out;
1092#endif
1093 } else if (!s->v4.type) {
1094 af = AF_INET;
1095 p += sizeof(struct ip_vs_sync_v4);
1096 } else {
1097 return -10;
1098 }
1099 if (p > msg_end)
1100 return -20;
1101
1102
1103 while (p < msg_end) {
1104 int ptype;
1105 int plen;
1106
1107 if (p+2 > msg_end)
1108 return -30;
1109 ptype = *(p++);
1110 plen = *(p++);
1111
1112 if (!plen || ((p + plen) > msg_end))
1113 return -40;
1114
1115 switch (ptype & ~IPVS_OPT_F_PARAM) {
1116 case IPVS_OPT_SEQ_DATA:
1117 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
1118 return -50;
1119 break;
1120
1121 case IPVS_OPT_PE_DATA:
1122 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
1123 IP_VS_PEDATA_MAXLEN, &opt_flags,
1124 IPVS_OPT_F_PE_DATA))
1125 return -60;
1126 break;
1127
1128 case IPVS_OPT_PE_NAME:
1129 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1130 IP_VS_PENAME_MAXLEN, &opt_flags,
1131 IPVS_OPT_F_PE_NAME))
1132 return -70;
1133 break;
1134
1135 default:
1136
1137 if (!(ptype & IPVS_OPT_F_PARAM)) {
1138 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1139 ptype & ~IPVS_OPT_F_PARAM);
1140 retc = 20;
1141 goto out;
1142 }
1143 }
1144 p += plen;
1145 }
1146
1147
1148 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
1149 flags |= IP_VS_CONN_F_SYNC;
1150 state = ntohs(s->v4.state);
1151
1152 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1153 pp = ip_vs_proto_get(s->v4.protocol);
1154 if (!pp) {
1155 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
1156 s->v4.protocol);
1157 retc = 30;
1158 goto out;
1159 }
1160 if (state >= pp->num_states) {
1161 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
1162 pp->name, state);
1163 retc = 40;
1164 goto out;
1165 }
1166 } else {
1167
1168 if (state > 0) {
1169 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1170 state);
1171 state = 0;
1172 }
1173 }
1174 if (ip_vs_conn_fill_param_sync(ipvs, af, s, ¶m, pe_data,
1175 pe_data_len, pe_name, pe_name_len)) {
1176 retc = 50;
1177 goto out;
1178 }
1179
1180 if (af == AF_INET)
1181 ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v4.protocol, af,
1182 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1183 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1184 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1185 );
1186#ifdef CONFIG_IP_VS_IPV6
1187 else
1188 ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v6.protocol, af,
1189 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1190 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1191 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1192 );
1193#endif
1194 ip_vs_pe_put(param.pe);
1195 return 0;
1196
1197out:
1198 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1199 return retc;
1200
1201}
1202
1203
1204
1205
1206
1207static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer,
1208 const size_t buflen)
1209{
1210 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1211 __u8 *p, *msg_end;
1212 int i, nr_conns;
1213
1214 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1215 IP_VS_DBG(2, "BACKUP, message header too short\n");
1216 return;
1217 }
1218
1219 if (buflen != ntohs(m2->size)) {
1220 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1221 return;
1222 }
1223
1224 if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) {
1225 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1226 return;
1227 }
1228
1229 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1230 && (m2->spare == 0)) {
1231
1232 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1233 nr_conns = m2->nr_conns;
1234
1235 for (i=0; i<nr_conns; i++) {
1236 union ip_vs_sync_conn *s;
1237 unsigned int size;
1238 int retc;
1239
1240 p = msg_end;
1241 if (p + sizeof(s->v4) > buffer+buflen) {
1242 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1243 return;
1244 }
1245 s = (union ip_vs_sync_conn *)p;
1246 size = ntohs(s->v4.ver_size) & SVER_MASK;
1247 msg_end = p + size;
1248
1249 if (msg_end > buffer+buflen) {
1250 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
1251 return;
1252 }
1253 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
1254 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
1255 ntohs(s->v4.ver_size) >> SVER_SHIFT);
1256 return;
1257 }
1258
1259 retc = ip_vs_proc_sync_conn(ipvs, p, msg_end);
1260 if (retc < 0) {
1261 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1262 retc);
1263 return;
1264 }
1265
1266 msg_end = p + ((size + 3) & ~3);
1267 }
1268 } else {
1269
1270 ip_vs_process_message_v0(ipvs, buffer, buflen);
1271 return;
1272 }
1273}
1274
1275
1276
1277
1278
1279static void set_sock_size(struct sock *sk, int mode, int val)
1280{
1281
1282
1283 lock_sock(sk);
1284 if (mode) {
1285 val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
1286 sysctl_wmem_max);
1287 sk->sk_sndbuf = val * 2;
1288 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1289 } else {
1290 val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
1291 sysctl_rmem_max);
1292 sk->sk_rcvbuf = val * 2;
1293 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1294 }
1295 release_sock(sk);
1296}
1297
1298
1299
1300
1301static void set_mcast_loop(struct sock *sk, u_char loop)
1302{
1303 struct inet_sock *inet = inet_sk(sk);
1304
1305
1306 lock_sock(sk);
1307 inet->mc_loop = loop ? 1 : 0;
1308#ifdef CONFIG_IP_VS_IPV6
1309 if (sk->sk_family == AF_INET6) {
1310 struct ipv6_pinfo *np = inet6_sk(sk);
1311
1312
1313 np->mc_loop = loop ? 1 : 0;
1314 }
1315#endif
1316 release_sock(sk);
1317}
1318
1319
1320
1321
1322static void set_mcast_ttl(struct sock *sk, u_char ttl)
1323{
1324 struct inet_sock *inet = inet_sk(sk);
1325
1326
1327 lock_sock(sk);
1328 inet->mc_ttl = ttl;
1329#ifdef CONFIG_IP_VS_IPV6
1330 if (sk->sk_family == AF_INET6) {
1331 struct ipv6_pinfo *np = inet6_sk(sk);
1332
1333
1334 np->mcast_hops = ttl;
1335 }
1336#endif
1337 release_sock(sk);
1338}
1339
1340
1341static void set_mcast_pmtudisc(struct sock *sk, int val)
1342{
1343 struct inet_sock *inet = inet_sk(sk);
1344
1345
1346 lock_sock(sk);
1347 inet->pmtudisc = val;
1348#ifdef CONFIG_IP_VS_IPV6
1349 if (sk->sk_family == AF_INET6) {
1350 struct ipv6_pinfo *np = inet6_sk(sk);
1351
1352
1353 np->pmtudisc = val;
1354 }
1355#endif
1356 release_sock(sk);
1357}
1358
1359
1360
1361
1362static int set_mcast_if(struct sock *sk, char *ifname)
1363{
1364 struct net_device *dev;
1365 struct inet_sock *inet = inet_sk(sk);
1366 struct net *net = sock_net(sk);
1367
1368 dev = __dev_get_by_name(net, ifname);
1369 if (!dev)
1370 return -ENODEV;
1371
1372 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1373 return -EINVAL;
1374
1375 lock_sock(sk);
1376 inet->mc_index = dev->ifindex;
1377
1378#ifdef CONFIG_IP_VS_IPV6
1379 if (sk->sk_family == AF_INET6) {
1380 struct ipv6_pinfo *np = inet6_sk(sk);
1381
1382
1383 np->mcast_oif = dev->ifindex;
1384 }
1385#endif
1386 release_sock(sk);
1387
1388 return 0;
1389}
1390
1391
1392
1393
1394
1395
1396
1397static int
1398join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
1399{
1400 struct net *net = sock_net(sk);
1401 struct ip_mreqn mreq;
1402 struct net_device *dev;
1403 int ret;
1404
1405 memset(&mreq, 0, sizeof(mreq));
1406 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
1407
1408 dev = __dev_get_by_name(net, ifname);
1409 if (!dev)
1410 return -ENODEV;
1411 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1412 return -EINVAL;
1413
1414 mreq.imr_ifindex = dev->ifindex;
1415
1416 lock_sock(sk);
1417 ret = ip_mc_join_group(sk, &mreq);
1418 release_sock(sk);
1419
1420 return ret;
1421}
1422
1423#ifdef CONFIG_IP_VS_IPV6
1424static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
1425 char *ifname)
1426{
1427 struct net *net = sock_net(sk);
1428 struct net_device *dev;
1429 int ret;
1430
1431 dev = __dev_get_by_name(net, ifname);
1432 if (!dev)
1433 return -ENODEV;
1434 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1435 return -EINVAL;
1436
1437 lock_sock(sk);
1438 ret = ipv6_sock_mc_join(sk, dev->ifindex, addr);
1439 release_sock(sk);
1440
1441 return ret;
1442}
1443#endif
1444
1445static int bind_mcastif_addr(struct socket *sock, char *ifname)
1446{
1447 struct net *net = sock_net(sock->sk);
1448 struct net_device *dev;
1449 __be32 addr;
1450 struct sockaddr_in sin;
1451
1452 dev = __dev_get_by_name(net, ifname);
1453 if (!dev)
1454 return -ENODEV;
1455
1456 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1457 if (!addr)
1458 pr_err("You probably need to specify IP address on "
1459 "multicast interface.\n");
1460
1461 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
1462 ifname, &addr);
1463
1464
1465 sin.sin_family = AF_INET;
1466 sin.sin_addr.s_addr = addr;
1467 sin.sin_port = 0;
1468
1469 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
1470}
1471
1472static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
1473 struct ipvs_sync_daemon_cfg *c, int id)
1474{
1475 if (AF_INET6 == c->mcast_af) {
1476 sa->in6 = (struct sockaddr_in6) {
1477 .sin6_family = AF_INET6,
1478 .sin6_port = htons(c->mcast_port + id),
1479 };
1480 sa->in6.sin6_addr = c->mcast_group.in6;
1481 *salen = sizeof(sa->in6);
1482 } else {
1483 sa->in = (struct sockaddr_in) {
1484 .sin_family = AF_INET,
1485 .sin_port = htons(c->mcast_port + id),
1486 };
1487 sa->in.sin_addr = c->mcast_group.in;
1488 *salen = sizeof(sa->in);
1489 }
1490}
1491
1492
1493
1494
1495static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
1496{
1497
1498 union ipvs_sockaddr mcast_addr;
1499 struct socket *sock;
1500 int result, salen;
1501
1502
1503 result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
1504 IPPROTO_UDP, &sock);
1505 if (result < 0) {
1506 pr_err("Error during creation of socket; terminating\n");
1507 return ERR_PTR(result);
1508 }
1509 result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn);
1510 if (result < 0) {
1511 pr_err("Error setting outbound mcast interface\n");
1512 goto error;
1513 }
1514
1515 set_mcast_loop(sock->sk, 0);
1516 set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl);
1517
1518 set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT);
1519 result = sysctl_sync_sock_size(ipvs);
1520 if (result > 0)
1521 set_sock_size(sock->sk, 1, result);
1522
1523 if (AF_INET == ipvs->mcfg.mcast_af)
1524 result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn);
1525 else
1526 result = 0;
1527 if (result < 0) {
1528 pr_err("Error binding address of the mcast interface\n");
1529 goto error;
1530 }
1531
1532 get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
1533 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
1534 salen, 0);
1535 if (result < 0) {
1536 pr_err("Error connecting to the multicast addr\n");
1537 goto error;
1538 }
1539
1540 return sock;
1541
1542error:
1543 sock_release(sock);
1544 return ERR_PTR(result);
1545}
1546
1547
1548
1549
1550
1551static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
1552 int ifindex)
1553{
1554
1555 union ipvs_sockaddr mcast_addr;
1556 struct socket *sock;
1557 int result, salen;
1558
1559
1560 result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
1561 IPPROTO_UDP, &sock);
1562 if (result < 0) {
1563 pr_err("Error during creation of socket; terminating\n");
1564 return ERR_PTR(result);
1565 }
1566
1567 sock->sk->sk_reuse = SK_CAN_REUSE;
1568 result = sysctl_sync_sock_size(ipvs);
1569 if (result > 0)
1570 set_sock_size(sock->sk, 0, result);
1571
1572 get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
1573 sock->sk->sk_bound_dev_if = ifindex;
1574 result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
1575 if (result < 0) {
1576 pr_err("Error binding to the multicast addr\n");
1577 goto error;
1578 }
1579
1580
1581#ifdef CONFIG_IP_VS_IPV6
1582 if (ipvs->bcfg.mcast_af == AF_INET6)
1583 result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
1584 ipvs->bcfg.mcast_ifn);
1585 else
1586#endif
1587 result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
1588 ipvs->bcfg.mcast_ifn);
1589 if (result < 0) {
1590 pr_err("Error joining to the multicast group\n");
1591 goto error;
1592 }
1593
1594 return sock;
1595
1596error:
1597 sock_release(sock);
1598 return ERR_PTR(result);
1599}
1600
1601
1602static int
1603ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1604{
1605 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
1606 struct kvec iov;
1607 int len;
1608
1609 EnterFunction(7);
1610 iov.iov_base = (void *)buffer;
1611 iov.iov_len = length;
1612
1613 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
1614
1615 LeaveFunction(7);
1616 return len;
1617}
1618
1619static int
1620ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1621{
1622 int msize;
1623 int ret;
1624
1625 msize = ntohs(msg->size);
1626
1627 ret = ip_vs_send_async(sock, (char *)msg, msize);
1628 if (ret >= 0 || ret == -EAGAIN)
1629 return ret;
1630 pr_err("ip_vs_send_async error %d\n", ret);
1631 return 0;
1632}
1633
1634static int
1635ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1636{
1637 struct msghdr msg = {NULL,};
1638 struct kvec iov;
1639 int len;
1640
1641 EnterFunction(7);
1642
1643
1644 iov.iov_base = buffer;
1645 iov.iov_len = (size_t)buflen;
1646
1647 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
1648
1649 if (len < 0)
1650 return len;
1651
1652 LeaveFunction(7);
1653 return len;
1654}
1655
1656
1657static void master_wakeup_work_handler(struct work_struct *work)
1658{
1659 struct ipvs_master_sync_state *ms =
1660 container_of(work, struct ipvs_master_sync_state,
1661 master_wakeup_work.work);
1662 struct netns_ipvs *ipvs = ms->ipvs;
1663
1664 spin_lock_bh(&ipvs->sync_lock);
1665 if (ms->sync_queue_len &&
1666 ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
1667 ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
1668 wake_up_process(ms->master_thread);
1669 }
1670 spin_unlock_bh(&ipvs->sync_lock);
1671}
1672
1673
1674static inline struct ip_vs_sync_buff *
1675next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
1676{
1677 struct ip_vs_sync_buff *sb;
1678
1679 sb = sb_dequeue(ipvs, ms);
1680 if (sb)
1681 return sb;
1682
1683 return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
1684}
1685
1686static int sync_thread_master(void *data)
1687{
1688 struct ip_vs_sync_thread_data *tinfo = data;
1689 struct netns_ipvs *ipvs = tinfo->ipvs;
1690 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
1691 struct sock *sk = tinfo->sock->sk;
1692 struct ip_vs_sync_buff *sb;
1693
1694 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1695 "syncid = %d, id = %d\n",
1696 ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id);
1697
1698 for (;;) {
1699 sb = next_sync_buff(ipvs, ms);
1700 if (unlikely(kthread_should_stop()))
1701 break;
1702 if (!sb) {
1703 schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
1704 continue;
1705 }
1706 while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
1707
1708
1709
1710 __wait_event_interruptible(*sk_sleep(sk),
1711 sock_writeable(sk) ||
1712 kthread_should_stop());
1713 if (unlikely(kthread_should_stop()))
1714 goto done;
1715 }
1716 ip_vs_sync_buff_release(sb);
1717 }
1718
1719done:
1720 __set_current_state(TASK_RUNNING);
1721 if (sb)
1722 ip_vs_sync_buff_release(sb);
1723
1724
1725 while ((sb = sb_dequeue(ipvs, ms)))
1726 ip_vs_sync_buff_release(sb);
1727 __set_current_state(TASK_RUNNING);
1728
1729
1730 sb = get_curr_sync_buff(ipvs, ms, 0);
1731 if (sb)
1732 ip_vs_sync_buff_release(sb);
1733
1734
1735 sock_release(tinfo->sock);
1736 kfree(tinfo);
1737
1738 return 0;
1739}
1740
1741
1742static int sync_thread_backup(void *data)
1743{
1744 struct ip_vs_sync_thread_data *tinfo = data;
1745 struct netns_ipvs *ipvs = tinfo->ipvs;
1746 int len;
1747
1748 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1749 "syncid = %d, id = %d\n",
1750 ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id);
1751
1752 while (!kthread_should_stop()) {
1753 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
1754 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
1755 || kthread_should_stop());
1756
1757
1758 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
1759 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1760 ipvs->bcfg.sync_maxlen);
1761 if (len <= 0) {
1762 if (len != -EAGAIN)
1763 pr_err("receiving message error\n");
1764 break;
1765 }
1766
1767 ip_vs_process_message(ipvs, tinfo->buf, len);
1768 }
1769 }
1770
1771
1772 sock_release(tinfo->sock);
1773 kfree(tinfo->buf);
1774 kfree(tinfo);
1775
1776 return 0;
1777}
1778
1779
1780int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
1781 int state)
1782{
1783 struct ip_vs_sync_thread_data *tinfo;
1784 struct task_struct **array = NULL, *task;
1785 struct socket *sock;
1786 struct net_device *dev;
1787 char *name;
1788 int (*threadfn)(void *data);
1789 int id, count, hlen;
1790 int result = -ENOMEM;
1791 u16 mtu, min_mtu;
1792
1793 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1794 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n",
1795 sizeof(struct ip_vs_sync_conn_v0));
1796
1797 if (!ipvs->sync_state) {
1798 count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
1799 ipvs->threads_mask = count - 1;
1800 } else
1801 count = ipvs->threads_mask + 1;
1802
1803 if (c->mcast_af == AF_UNSPEC) {
1804 c->mcast_af = AF_INET;
1805 c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP);
1806 }
1807 if (!c->mcast_port)
1808 c->mcast_port = IP_VS_SYNC_PORT;
1809 if (!c->mcast_ttl)
1810 c->mcast_ttl = 1;
1811
1812 dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
1813 if (!dev) {
1814 pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
1815 return -ENODEV;
1816 }
1817 hlen = (AF_INET6 == c->mcast_af) ?
1818 sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
1819 sizeof(struct iphdr) + sizeof(struct udphdr);
1820 mtu = (state == IP_VS_STATE_BACKUP) ?
1821 clamp(dev->mtu, 1500U, 65535U) : 1500U;
1822 min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1;
1823
1824 if (c->sync_maxlen)
1825 c->sync_maxlen = clamp_t(unsigned int,
1826 c->sync_maxlen, min_mtu,
1827 65535 - hlen);
1828 else
1829 c->sync_maxlen = mtu - hlen;
1830
1831 if (state == IP_VS_STATE_MASTER) {
1832 if (ipvs->ms)
1833 return -EEXIST;
1834
1835 ipvs->mcfg = *c;
1836 name = "ipvs-m:%d:%d";
1837 threadfn = sync_thread_master;
1838 } else if (state == IP_VS_STATE_BACKUP) {
1839 if (ipvs->backup_threads)
1840 return -EEXIST;
1841
1842 ipvs->bcfg = *c;
1843 name = "ipvs-b:%d:%d";
1844 threadfn = sync_thread_backup;
1845 } else {
1846 return -EINVAL;
1847 }
1848
1849 if (state == IP_VS_STATE_MASTER) {
1850 struct ipvs_master_sync_state *ms;
1851
1852 ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL);
1853 if (!ipvs->ms)
1854 goto out;
1855 ms = ipvs->ms;
1856 for (id = 0; id < count; id++, ms++) {
1857 INIT_LIST_HEAD(&ms->sync_queue);
1858 ms->sync_queue_len = 0;
1859 ms->sync_queue_delay = 0;
1860 INIT_DELAYED_WORK(&ms->master_wakeup_work,
1861 master_wakeup_work_handler);
1862 ms->ipvs = ipvs;
1863 }
1864 } else {
1865 array = kcalloc(count, sizeof(struct task_struct *),
1866 GFP_KERNEL);
1867 if (!array)
1868 goto out;
1869 }
1870
1871 tinfo = NULL;
1872 for (id = 0; id < count; id++) {
1873 if (state == IP_VS_STATE_MASTER)
1874 sock = make_send_sock(ipvs, id);
1875 else
1876 sock = make_receive_sock(ipvs, id, dev->ifindex);
1877 if (IS_ERR(sock)) {
1878 result = PTR_ERR(sock);
1879 goto outtinfo;
1880 }
1881 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1882 if (!tinfo)
1883 goto outsocket;
1884 tinfo->ipvs = ipvs;
1885 tinfo->sock = sock;
1886 if (state == IP_VS_STATE_BACKUP) {
1887 tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
1888 GFP_KERNEL);
1889 if (!tinfo->buf)
1890 goto outtinfo;
1891 } else {
1892 tinfo->buf = NULL;
1893 }
1894 tinfo->id = id;
1895
1896 task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
1897 if (IS_ERR(task)) {
1898 result = PTR_ERR(task);
1899 goto outtinfo;
1900 }
1901 tinfo = NULL;
1902 if (state == IP_VS_STATE_MASTER)
1903 ipvs->ms[id].master_thread = task;
1904 else
1905 array[id] = task;
1906 }
1907
1908
1909
1910 if (state == IP_VS_STATE_BACKUP)
1911 ipvs->backup_threads = array;
1912 spin_lock_bh(&ipvs->sync_buff_lock);
1913 ipvs->sync_state |= state;
1914 spin_unlock_bh(&ipvs->sync_buff_lock);
1915
1916
1917 ip_vs_use_count_inc();
1918
1919 return 0;
1920
1921outsocket:
1922 sock_release(sock);
1923
1924outtinfo:
1925 if (tinfo) {
1926 sock_release(tinfo->sock);
1927 kfree(tinfo->buf);
1928 kfree(tinfo);
1929 }
1930 count = id;
1931 while (count-- > 0) {
1932 if (state == IP_VS_STATE_MASTER)
1933 kthread_stop(ipvs->ms[count].master_thread);
1934 else
1935 kthread_stop(array[count]);
1936 }
1937 kfree(array);
1938
1939out:
1940 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
1941 kfree(ipvs->ms);
1942 ipvs->ms = NULL;
1943 }
1944 return result;
1945}
1946
1947
1948int stop_sync_thread(struct netns_ipvs *ipvs, int state)
1949{
1950 struct task_struct **array;
1951 int id;
1952 int retc = -EINVAL;
1953
1954 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1955
1956 if (state == IP_VS_STATE_MASTER) {
1957 if (!ipvs->ms)
1958 return -ESRCH;
1959
1960
1961
1962
1963
1964
1965
1966 spin_lock_bh(&ipvs->sync_buff_lock);
1967 spin_lock(&ipvs->sync_lock);
1968 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1969 spin_unlock(&ipvs->sync_lock);
1970 spin_unlock_bh(&ipvs->sync_buff_lock);
1971
1972 retc = 0;
1973 for (id = ipvs->threads_mask; id >= 0; id--) {
1974 struct ipvs_master_sync_state *ms = &ipvs->ms[id];
1975 int ret;
1976
1977 pr_info("stopping master sync thread %d ...\n",
1978 task_pid_nr(ms->master_thread));
1979 cancel_delayed_work_sync(&ms->master_wakeup_work);
1980 ret = kthread_stop(ms->master_thread);
1981 if (retc >= 0)
1982 retc = ret;
1983 }
1984 kfree(ipvs->ms);
1985 ipvs->ms = NULL;
1986 } else if (state == IP_VS_STATE_BACKUP) {
1987 if (!ipvs->backup_threads)
1988 return -ESRCH;
1989
1990 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1991 array = ipvs->backup_threads;
1992 retc = 0;
1993 for (id = ipvs->threads_mask; id >= 0; id--) {
1994 int ret;
1995
1996 pr_info("stopping backup sync thread %d ...\n",
1997 task_pid_nr(array[id]));
1998 ret = kthread_stop(array[id]);
1999 if (retc >= 0)
2000 retc = ret;
2001 }
2002 kfree(array);
2003 ipvs->backup_threads = NULL;
2004 }
2005
2006
2007 ip_vs_use_count_dec();
2008
2009 return retc;
2010}
2011
2012
2013
2014
2015int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs)
2016{
2017 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
2018 spin_lock_init(&ipvs->sync_lock);
2019 spin_lock_init(&ipvs->sync_buff_lock);
2020 return 0;
2021}
2022
2023void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
2024{
2025 int retc;
2026
2027 mutex_lock(&ipvs->sync_mutex);
2028 retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
2029 if (retc && retc != -ESRCH)
2030 pr_err("Failed to stop Master Daemon\n");
2031
2032 retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
2033 if (retc && retc != -ESRCH)
2034 pr_err("Failed to stop Backup Daemon\n");
2035 mutex_unlock(&ipvs->sync_mutex);
2036}
2037