1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#define KMSG_COMPONENT "IPVS"
36#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
37
38#include <linux/module.h>
39#include <linux/slab.h>
40#include <linux/inetdevice.h>
41#include <linux/net.h>
42#include <linux/completion.h>
43#include <linux/delay.h>
44#include <linux/skbuff.h>
45#include <linux/in.h>
46#include <linux/igmp.h>
47#include <linux/udp.h>
48#include <linux/err.h>
49#include <linux/kthread.h>
50#include <linux/wait.h>
51#include <linux/kernel.h>
52#include <linux/sched/signal.h>
53
54#include <asm/unaligned.h>
55
56#include <net/ip.h>
57#include <net/sock.h>
58
59#include <net/ip_vs.h>
60
61#define IP_VS_SYNC_GROUP 0xe0000051
62#define IP_VS_SYNC_PORT 8848
63
64#define SYNC_PROTO_VER 1
65
66static struct lock_class_key __ipvs_sync_key;
67
68
69
70
71struct ip_vs_sync_conn_v0 {
72 __u8 reserved;
73
74
75 __u8 protocol;
76 __be16 cport;
77 __be16 vport;
78 __be16 dport;
79 __be32 caddr;
80 __be32 vaddr;
81 __be32 daddr;
82
83
84 __be16 flags;
85 __be16 state;
86
87
88};
89
90struct ip_vs_sync_conn_options {
91 struct ip_vs_seq in_seq;
92 struct ip_vs_seq out_seq;
93};
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133struct ip_vs_sync_v4 {
134 __u8 type;
135 __u8 protocol;
136 __be16 ver_size;
137
138 __be32 flags;
139 __be16 state;
140
141 __be16 cport;
142 __be16 vport;
143 __be16 dport;
144 __be32 fwmark;
145 __be32 timeout;
146 __be32 caddr;
147 __be32 vaddr;
148 __be32 daddr;
149
150
151};
152
153
154
155struct ip_vs_sync_v6 {
156 __u8 type;
157 __u8 protocol;
158 __be16 ver_size;
159
160 __be32 flags;
161 __be16 state;
162
163 __be16 cport;
164 __be16 vport;
165 __be16 dport;
166 __be32 fwmark;
167 __be32 timeout;
168 struct in6_addr caddr;
169 struct in6_addr vaddr;
170 struct in6_addr daddr;
171
172
173};
174
175union ip_vs_sync_conn {
176 struct ip_vs_sync_v4 v4;
177 struct ip_vs_sync_v6 v6;
178};
179
180
181#define STYPE_INET6 0
182#define STYPE_F_INET6 (1 << STYPE_INET6)
183
184#define SVER_SHIFT 12
185#define SVER_MASK 0x0fff
186
187#define IPVS_OPT_SEQ_DATA 1
188#define IPVS_OPT_PE_DATA 2
189#define IPVS_OPT_PE_NAME 3
190#define IPVS_OPT_PARAM 7
191
192#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
193#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
194#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
195#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
196
197struct ip_vs_sync_thread_data {
198 struct task_struct *task;
199 struct netns_ipvs *ipvs;
200 struct socket *sock;
201 char *buf;
202 int id;
203};
204
205
206#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
207#define FULL_CONN_SIZE \
208(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245#define SYNC_MESG_HEADER_LEN 4
246#define MAX_CONNS_PER_SYNCBUFF 255
247
248
249struct ip_vs_sync_mesg_v0 {
250 __u8 nr_conns;
251 __u8 syncid;
252 __be16 size;
253
254
255};
256
257
258struct ip_vs_sync_mesg {
259 __u8 reserved;
260 __u8 syncid;
261 __be16 size;
262 __u8 nr_conns;
263 __s8 version;
264 __u16 spare;
265
266};
267
268union ipvs_sockaddr {
269 struct sockaddr_in in;
270 struct sockaddr_in6 in6;
271};
272
273struct ip_vs_sync_buff {
274 struct list_head list;
275 unsigned long firstuse;
276
277
278 struct ip_vs_sync_mesg *mesg;
279 unsigned char *head;
280 unsigned char *end;
281};
282
283
284
285
286
287static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
288{
289 memset(ho, 0, sizeof(*ho));
290 ho->init_seq = get_unaligned_be32(&no->init_seq);
291 ho->delta = get_unaligned_be32(&no->delta);
292 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
293}
294
295
296
297
298
299static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
300{
301 put_unaligned_be32(ho->init_seq, &no->init_seq);
302 put_unaligned_be32(ho->delta, &no->delta);
303 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
304}
305
306static inline struct ip_vs_sync_buff *
307sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
308{
309 struct ip_vs_sync_buff *sb;
310
311 spin_lock_bh(&ipvs->sync_lock);
312 if (list_empty(&ms->sync_queue)) {
313 sb = NULL;
314 __set_current_state(TASK_INTERRUPTIBLE);
315 } else {
316 sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
317 list);
318 list_del(&sb->list);
319 ms->sync_queue_len--;
320 if (!ms->sync_queue_len)
321 ms->sync_queue_delay = 0;
322 }
323 spin_unlock_bh(&ipvs->sync_lock);
324
325 return sb;
326}
327
328
329
330
331static inline struct ip_vs_sync_buff *
332ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len)
333{
334 struct ip_vs_sync_buff *sb;
335
336 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
337 return NULL;
338
339 len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg),
340 ipvs->mcfg.sync_maxlen);
341 sb->mesg = kmalloc(len, GFP_ATOMIC);
342 if (!sb->mesg) {
343 kfree(sb);
344 return NULL;
345 }
346 sb->mesg->reserved = 0;
347 sb->mesg->version = SYNC_PROTO_VER;
348 sb->mesg->syncid = ipvs->mcfg.syncid;
349 sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
350 sb->mesg->nr_conns = 0;
351 sb->mesg->spare = 0;
352 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
353 sb->end = (unsigned char *)sb->mesg + len;
354
355 sb->firstuse = jiffies;
356 return sb;
357}
358
359static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
360{
361 kfree(sb->mesg);
362 kfree(sb);
363}
364
365static inline void sb_queue_tail(struct netns_ipvs *ipvs,
366 struct ipvs_master_sync_state *ms)
367{
368 struct ip_vs_sync_buff *sb = ms->sync_buff;
369
370 spin_lock(&ipvs->sync_lock);
371 if (ipvs->sync_state & IP_VS_STATE_MASTER &&
372 ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
373 if (!ms->sync_queue_len)
374 schedule_delayed_work(&ms->master_wakeup_work,
375 max(IPVS_SYNC_SEND_DELAY, 1));
376 ms->sync_queue_len++;
377 list_add_tail(&sb->list, &ms->sync_queue);
378 if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) {
379 int id = (int)(ms - ipvs->ms);
380
381 wake_up_process(ipvs->master_tinfo[id].task);
382 }
383 } else
384 ip_vs_sync_buff_release(sb);
385 spin_unlock(&ipvs->sync_lock);
386}
387
388
389
390
391
392static inline struct ip_vs_sync_buff *
393get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
394 unsigned long time)
395{
396 struct ip_vs_sync_buff *sb;
397
398 spin_lock_bh(&ipvs->sync_buff_lock);
399 sb = ms->sync_buff;
400 if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
401 ms->sync_buff = NULL;
402 __set_current_state(TASK_RUNNING);
403 } else
404 sb = NULL;
405 spin_unlock_bh(&ipvs->sync_buff_lock);
406 return sb;
407}
408
409static inline int
410select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
411{
412 return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
413}
414
415
416
417
418static inline struct ip_vs_sync_buff *
419ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len)
420{
421 struct ip_vs_sync_buff *sb;
422 struct ip_vs_sync_mesg_v0 *mesg;
423
424 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
425 return NULL;
426
427 len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0),
428 ipvs->mcfg.sync_maxlen);
429 sb->mesg = kmalloc(len, GFP_ATOMIC);
430 if (!sb->mesg) {
431 kfree(sb);
432 return NULL;
433 }
434 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
435 mesg->nr_conns = 0;
436 mesg->syncid = ipvs->mcfg.syncid;
437 mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
438 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
439 sb->end = (unsigned char *)mesg + len;
440 sb->firstuse = jiffies;
441 return sb;
442}
443
444
445static inline bool in_persistence(struct ip_vs_conn *cp)
446{
447 for (cp = cp->control; cp; cp = cp->control) {
448 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
449 return true;
450 }
451 return false;
452}
453
454
455
456
457
458
459
460
461
462
463static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
464 struct ip_vs_conn *cp, int pkts)
465{
466 unsigned long orig = READ_ONCE(cp->sync_endtime);
467 unsigned long now = jiffies;
468 unsigned long n = (now + cp->timeout) & ~3UL;
469 unsigned int sync_refresh_period;
470 int sync_period;
471 int force;
472
473
474 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
475 force = 0;
476 else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp)))
477 return 0;
478 else if (likely(cp->protocol == IPPROTO_TCP)) {
479 if (!((1 << cp->state) &
480 ((1 << IP_VS_TCP_S_ESTABLISHED) |
481 (1 << IP_VS_TCP_S_FIN_WAIT) |
482 (1 << IP_VS_TCP_S_CLOSE) |
483 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
484 (1 << IP_VS_TCP_S_TIME_WAIT))))
485 return 0;
486 force = cp->state != cp->old_state;
487 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
488 goto set;
489 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
490 if (!((1 << cp->state) &
491 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
492 (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) |
493 (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) |
494 (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) |
495 (1 << IP_VS_SCTP_S_CLOSED))))
496 return 0;
497 force = cp->state != cp->old_state;
498 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
499 goto set;
500 } else {
501
502 force = 0;
503 }
504
505 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
506 if (sync_refresh_period > 0) {
507 long diff = n - orig;
508 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
509
510
511
512
513 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
514 int retries = orig & 3;
515
516 if (retries >= sysctl_sync_retries(ipvs))
517 return 0;
518 if (time_before(now, orig - cp->timeout +
519 (sync_refresh_period >> 3)))
520 return 0;
521 n |= retries + 1;
522 }
523 }
524 sync_period = sysctl_sync_period(ipvs);
525 if (sync_period > 0) {
526 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
527 pkts % sync_period != sysctl_sync_threshold(ipvs))
528 return 0;
529 } else if (!sync_refresh_period &&
530 pkts != sysctl_sync_threshold(ipvs))
531 return 0;
532
533set:
534 cp->old_state = cp->state;
535 n = cmpxchg(&cp->sync_endtime, orig, n);
536 return n == orig || force;
537}
538
539
540
541
542
543static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp,
544 int pkts)
545{
546 struct ip_vs_sync_mesg_v0 *m;
547 struct ip_vs_sync_conn_v0 *s;
548 struct ip_vs_sync_buff *buff;
549 struct ipvs_master_sync_state *ms;
550 int id;
551 unsigned int len;
552
553 if (unlikely(cp->af != AF_INET))
554 return;
555
556 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
557 return;
558
559 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
560 return;
561
562 spin_lock_bh(&ipvs->sync_buff_lock);
563 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
564 spin_unlock_bh(&ipvs->sync_buff_lock);
565 return;
566 }
567
568 id = select_master_thread_id(ipvs, cp);
569 ms = &ipvs->ms[id];
570 buff = ms->sync_buff;
571 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
572 SIMPLE_CONN_SIZE;
573 if (buff) {
574 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
575
576 if (buff->head + len > buff->end || !m->nr_conns) {
577 sb_queue_tail(ipvs, ms);
578 ms->sync_buff = NULL;
579 buff = NULL;
580 }
581 }
582 if (!buff) {
583 buff = ip_vs_sync_buff_create_v0(ipvs, len);
584 if (!buff) {
585 spin_unlock_bh(&ipvs->sync_buff_lock);
586 pr_err("ip_vs_sync_buff_create failed.\n");
587 return;
588 }
589 ms->sync_buff = buff;
590 }
591
592 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
593 s = (struct ip_vs_sync_conn_v0 *) buff->head;
594
595
596 s->reserved = 0;
597 s->protocol = cp->protocol;
598 s->cport = cp->cport;
599 s->vport = cp->vport;
600 s->dport = cp->dport;
601 s->caddr = cp->caddr.ip;
602 s->vaddr = cp->vaddr.ip;
603 s->daddr = cp->daddr.ip;
604 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
605 s->state = htons(cp->state);
606 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
607 struct ip_vs_sync_conn_options *opt =
608 (struct ip_vs_sync_conn_options *)&s[1];
609 memcpy(opt, &cp->in_seq, sizeof(*opt));
610 }
611
612 m->nr_conns++;
613 m->size = htons(ntohs(m->size) + len);
614 buff->head += len;
615 spin_unlock_bh(&ipvs->sync_buff_lock);
616
617
618 cp = cp->control;
619 if (cp) {
620 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
621 pkts = atomic_add_return(1, &cp->in_pkts);
622 else
623 pkts = sysctl_sync_threshold(ipvs);
624 ip_vs_sync_conn(ipvs, cp, pkts);
625 }
626}
627
628
629
630
631
632
633void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts)
634{
635 struct ip_vs_sync_mesg *m;
636 union ip_vs_sync_conn *s;
637 struct ip_vs_sync_buff *buff;
638 struct ipvs_master_sync_state *ms;
639 int id;
640 __u8 *p;
641 unsigned int len, pe_name_len, pad;
642
643
644 if (sysctl_sync_ver(ipvs) == 0) {
645 ip_vs_sync_conn_v0(ipvs, cp, pkts);
646 return;
647 }
648
649 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
650 goto control;
651sloop:
652 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
653 goto control;
654
655
656 pe_name_len = 0;
657 if (cp->pe_data_len) {
658 if (!cp->pe_data || !cp->dest) {
659 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
660 return;
661 }
662 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
663 }
664
665 spin_lock_bh(&ipvs->sync_buff_lock);
666 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
667 spin_unlock_bh(&ipvs->sync_buff_lock);
668 return;
669 }
670
671 id = select_master_thread_id(ipvs, cp);
672 ms = &ipvs->ms[id];
673
674#ifdef CONFIG_IP_VS_IPV6
675 if (cp->af == AF_INET6)
676 len = sizeof(struct ip_vs_sync_v6);
677 else
678#endif
679 len = sizeof(struct ip_vs_sync_v4);
680
681 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
682 len += sizeof(struct ip_vs_sync_conn_options) + 2;
683
684 if (cp->pe_data_len)
685 len += cp->pe_data_len + 2;
686 if (pe_name_len)
687 len += pe_name_len + 2;
688
689
690 pad = 0;
691 buff = ms->sync_buff;
692 if (buff) {
693 m = buff->mesg;
694 pad = (4 - (size_t) buff->head) & 3;
695
696 if (buff->head + len + pad > buff->end || m->reserved) {
697 sb_queue_tail(ipvs, ms);
698 ms->sync_buff = NULL;
699 buff = NULL;
700 pad = 0;
701 }
702 }
703
704 if (!buff) {
705 buff = ip_vs_sync_buff_create(ipvs, len);
706 if (!buff) {
707 spin_unlock_bh(&ipvs->sync_buff_lock);
708 pr_err("ip_vs_sync_buff_create failed.\n");
709 return;
710 }
711 ms->sync_buff = buff;
712 m = buff->mesg;
713 }
714
715 p = buff->head;
716 buff->head += pad + len;
717 m->size = htons(ntohs(m->size) + pad + len);
718
719 while (pad--)
720 *(p++) = 0;
721
722 s = (union ip_vs_sync_conn *)p;
723
724
725 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
726 s->v4.ver_size = htons(len & SVER_MASK);
727 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
728 s->v4.state = htons(cp->state);
729 s->v4.protocol = cp->protocol;
730 s->v4.cport = cp->cport;
731 s->v4.vport = cp->vport;
732 s->v4.dport = cp->dport;
733 s->v4.fwmark = htonl(cp->fwmark);
734 s->v4.timeout = htonl(cp->timeout / HZ);
735 m->nr_conns++;
736
737#ifdef CONFIG_IP_VS_IPV6
738 if (cp->af == AF_INET6) {
739 p += sizeof(struct ip_vs_sync_v6);
740 s->v6.caddr = cp->caddr.in6;
741 s->v6.vaddr = cp->vaddr.in6;
742 s->v6.daddr = cp->daddr.in6;
743 } else
744#endif
745 {
746 p += sizeof(struct ip_vs_sync_v4);
747 s->v4.caddr = cp->caddr.ip;
748 s->v4.vaddr = cp->vaddr.ip;
749 s->v4.daddr = cp->daddr.ip;
750 }
751 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
752 *(p++) = IPVS_OPT_SEQ_DATA;
753 *(p++) = sizeof(struct ip_vs_sync_conn_options);
754 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
755 p += sizeof(struct ip_vs_seq);
756 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
757 p += sizeof(struct ip_vs_seq);
758 }
759
760 if (cp->pe_data_len && cp->pe_data) {
761 *(p++) = IPVS_OPT_PE_DATA;
762 *(p++) = cp->pe_data_len;
763 memcpy(p, cp->pe_data, cp->pe_data_len);
764 p += cp->pe_data_len;
765 if (pe_name_len) {
766
767 *(p++) = IPVS_OPT_PE_NAME;
768 *(p++) = pe_name_len;
769 memcpy(p, cp->pe->name, pe_name_len);
770 p += pe_name_len;
771 }
772 }
773
774 spin_unlock_bh(&ipvs->sync_buff_lock);
775
776control:
777
778 cp = cp->control;
779 if (!cp)
780 return;
781 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
782 pkts = atomic_add_return(1, &cp->in_pkts);
783 else
784 pkts = sysctl_sync_threshold(ipvs);
785 goto sloop;
786}
787
788
789
790
791static inline int
792ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc,
793 struct ip_vs_conn_param *p,
794 __u8 *pe_data, unsigned int pe_data_len,
795 __u8 *pe_name, unsigned int pe_name_len)
796{
797#ifdef CONFIG_IP_VS_IPV6
798 if (af == AF_INET6)
799 ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol,
800 (const union nf_inet_addr *)&sc->v6.caddr,
801 sc->v6.cport,
802 (const union nf_inet_addr *)&sc->v6.vaddr,
803 sc->v6.vport, p);
804 else
805#endif
806 ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol,
807 (const union nf_inet_addr *)&sc->v4.caddr,
808 sc->v4.cport,
809 (const union nf_inet_addr *)&sc->v4.vaddr,
810 sc->v4.vport, p);
811
812 if (pe_data_len) {
813 if (pe_name_len) {
814 char buff[IP_VS_PENAME_MAXLEN+1];
815
816 memcpy(buff, pe_name, pe_name_len);
817 buff[pe_name_len]=0;
818 p->pe = __ip_vs_pe_getbyname(buff);
819 if (!p->pe) {
820 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
821 buff);
822 return 1;
823 }
824 } else {
825 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
826 return 1;
827 }
828
829 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
830 if (!p->pe_data) {
831 module_put(p->pe->module);
832 return -ENOMEM;
833 }
834 p->pe_data_len = pe_data_len;
835 }
836 return 0;
837}
838
839
840
841
842
843
844
845static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param,
846 unsigned int flags, unsigned int state,
847 unsigned int protocol, unsigned int type,
848 const union nf_inet_addr *daddr, __be16 dport,
849 unsigned long timeout, __u32 fwmark,
850 struct ip_vs_sync_conn_options *opt)
851{
852 struct ip_vs_dest *dest;
853 struct ip_vs_conn *cp;
854
855 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
856 cp = ip_vs_conn_in_get(param);
857 if (cp && ((cp->dport != dport) ||
858 !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
859 if (!(flags & IP_VS_CONN_F_INACTIVE)) {
860 ip_vs_conn_expire_now(cp);
861 __ip_vs_conn_put(cp);
862 cp = NULL;
863 } else {
864
865
866
867
868 __ip_vs_conn_put(cp);
869 kfree(param->pe_data);
870 return;
871 }
872 }
873 } else {
874 cp = ip_vs_ct_in_get(param);
875 }
876
877 if (cp) {
878
879 kfree(param->pe_data);
880
881 dest = cp->dest;
882 spin_lock_bh(&cp->lock);
883 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
884 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
885 if (flags & IP_VS_CONN_F_INACTIVE) {
886 atomic_dec(&dest->activeconns);
887 atomic_inc(&dest->inactconns);
888 } else {
889 atomic_inc(&dest->activeconns);
890 atomic_dec(&dest->inactconns);
891 }
892 }
893 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
894 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
895 cp->flags = flags;
896 spin_unlock_bh(&cp->lock);
897 if (!dest)
898 ip_vs_try_bind_dest(cp);
899 } else {
900
901
902
903
904
905 rcu_read_lock();
906
907
908
909
910
911 dest = ip_vs_find_dest(ipvs, type, type, daddr, dport,
912 param->vaddr, param->vport, protocol,
913 fwmark, flags);
914
915 cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest,
916 fwmark);
917 rcu_read_unlock();
918 if (!cp) {
919 kfree(param->pe_data);
920 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
921 return;
922 }
923 if (!(flags & IP_VS_CONN_F_TEMPLATE))
924 kfree(param->pe_data);
925 }
926
927 if (opt) {
928 cp->in_seq = opt->in_seq;
929 cp->out_seq = opt->out_seq;
930 }
931 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
932 cp->state = state;
933 cp->old_state = cp->state;
934
935
936
937
938
939
940
941
942
943 if (timeout) {
944 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
945 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
946 cp->timeout = timeout*HZ;
947 } else {
948 struct ip_vs_proto_data *pd;
949
950 pd = ip_vs_proto_data_get(ipvs, protocol);
951 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
952 cp->timeout = pd->timeout_table[state];
953 else
954 cp->timeout = (3*60*HZ);
955 }
956 ip_vs_conn_put(cp);
957}
958
959
960
961
962static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer,
963 const size_t buflen)
964{
965 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
966 struct ip_vs_sync_conn_v0 *s;
967 struct ip_vs_sync_conn_options *opt;
968 struct ip_vs_protocol *pp;
969 struct ip_vs_conn_param param;
970 char *p;
971 int i;
972
973 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
974 for (i=0; i<m->nr_conns; i++) {
975 unsigned int flags, state;
976
977 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
978 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
979 return;
980 }
981 s = (struct ip_vs_sync_conn_v0 *) p;
982 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
983 flags &= ~IP_VS_CONN_F_HASHED;
984 if (flags & IP_VS_CONN_F_SEQ_MASK) {
985 opt = (struct ip_vs_sync_conn_options *)&s[1];
986 p += FULL_CONN_SIZE;
987 if (p > buffer+buflen) {
988 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
989 return;
990 }
991 } else {
992 opt = NULL;
993 p += SIMPLE_CONN_SIZE;
994 }
995
996 state = ntohs(s->state);
997 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
998 pp = ip_vs_proto_get(s->protocol);
999 if (!pp) {
1000 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
1001 s->protocol);
1002 continue;
1003 }
1004 if (state >= pp->num_states) {
1005 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
1006 pp->name, state);
1007 continue;
1008 }
1009 } else {
1010 if (state >= IP_VS_CTPL_S_LAST)
1011 IP_VS_DBG(7, "BACKUP v0, Invalid tpl state %u\n",
1012 state);
1013 }
1014
1015 ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
1016 (const union nf_inet_addr *)&s->caddr,
1017 s->cport,
1018 (const union nf_inet_addr *)&s->vaddr,
1019 s->vport, ¶m);
1020
1021
1022 ip_vs_proc_conn(ipvs, ¶m, flags, state, s->protocol, AF_INET,
1023 (union nf_inet_addr *)&s->daddr, s->dport,
1024 0, 0, opt);
1025 }
1026}
1027
1028
1029
1030
1031static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
1032 __u32 *opt_flags,
1033 struct ip_vs_sync_conn_options *opt)
1034{
1035 struct ip_vs_sync_conn_options *topt;
1036
1037 topt = (struct ip_vs_sync_conn_options *)p;
1038
1039 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
1040 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
1041 return -EINVAL;
1042 }
1043 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
1044 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
1045 return -EINVAL;
1046 }
1047 ntoh_seq(&topt->in_seq, &opt->in_seq);
1048 ntoh_seq(&topt->out_seq, &opt->out_seq);
1049 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
1050 return 0;
1051}
1052
1053static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
1054 __u8 **data, unsigned int maxlen,
1055 __u32 *opt_flags, __u32 flag)
1056{
1057 if (plen > maxlen) {
1058 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
1059 return -EINVAL;
1060 }
1061 if (*opt_flags & flag) {
1062 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
1063 return -EINVAL;
1064 }
1065 *data_len = plen;
1066 *data = p;
1067 *opt_flags |= flag;
1068 return 0;
1069}
1070
1071
1072
1073static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end)
1074{
1075 struct ip_vs_sync_conn_options opt;
1076 union ip_vs_sync_conn *s;
1077 struct ip_vs_protocol *pp;
1078 struct ip_vs_conn_param param;
1079 __u32 flags;
1080 unsigned int af, state, pe_data_len=0, pe_name_len=0;
1081 __u8 *pe_data=NULL, *pe_name=NULL;
1082 __u32 opt_flags=0;
1083 int retc=0;
1084
1085 s = (union ip_vs_sync_conn *) p;
1086
1087 if (s->v6.type & STYPE_F_INET6) {
1088#ifdef CONFIG_IP_VS_IPV6
1089 af = AF_INET6;
1090 p += sizeof(struct ip_vs_sync_v6);
1091#else
1092 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
1093 retc = 10;
1094 goto out;
1095#endif
1096 } else if (!s->v4.type) {
1097 af = AF_INET;
1098 p += sizeof(struct ip_vs_sync_v4);
1099 } else {
1100 return -10;
1101 }
1102 if (p > msg_end)
1103 return -20;
1104
1105
1106 while (p < msg_end) {
1107 int ptype;
1108 int plen;
1109
1110 if (p+2 > msg_end)
1111 return -30;
1112 ptype = *(p++);
1113 plen = *(p++);
1114
1115 if (!plen || ((p + plen) > msg_end))
1116 return -40;
1117
1118 switch (ptype & ~IPVS_OPT_F_PARAM) {
1119 case IPVS_OPT_SEQ_DATA:
1120 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
1121 return -50;
1122 break;
1123
1124 case IPVS_OPT_PE_DATA:
1125 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
1126 IP_VS_PEDATA_MAXLEN, &opt_flags,
1127 IPVS_OPT_F_PE_DATA))
1128 return -60;
1129 break;
1130
1131 case IPVS_OPT_PE_NAME:
1132 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1133 IP_VS_PENAME_MAXLEN, &opt_flags,
1134 IPVS_OPT_F_PE_NAME))
1135 return -70;
1136 break;
1137
1138 default:
1139
1140 if (!(ptype & IPVS_OPT_F_PARAM)) {
1141 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1142 ptype & ~IPVS_OPT_F_PARAM);
1143 retc = 20;
1144 goto out;
1145 }
1146 }
1147 p += plen;
1148 }
1149
1150
1151 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
1152 flags |= IP_VS_CONN_F_SYNC;
1153 state = ntohs(s->v4.state);
1154
1155 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1156 pp = ip_vs_proto_get(s->v4.protocol);
1157 if (!pp) {
1158 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
1159 s->v4.protocol);
1160 retc = 30;
1161 goto out;
1162 }
1163 if (state >= pp->num_states) {
1164 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
1165 pp->name, state);
1166 retc = 40;
1167 goto out;
1168 }
1169 } else {
1170 if (state >= IP_VS_CTPL_S_LAST)
1171 IP_VS_DBG(7, "BACKUP, Invalid tpl state %u\n",
1172 state);
1173 }
1174 if (ip_vs_conn_fill_param_sync(ipvs, af, s, ¶m, pe_data,
1175 pe_data_len, pe_name, pe_name_len)) {
1176 retc = 50;
1177 goto out;
1178 }
1179
1180 if (af == AF_INET)
1181 ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v4.protocol, af,
1182 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1183 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1184 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1185 );
1186#ifdef CONFIG_IP_VS_IPV6
1187 else
1188 ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v6.protocol, af,
1189 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1190 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1191 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1192 );
1193#endif
1194 ip_vs_pe_put(param.pe);
1195 return 0;
1196
1197out:
1198 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1199 return retc;
1200
1201}
1202
1203
1204
1205
1206
1207static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer,
1208 const size_t buflen)
1209{
1210 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1211 __u8 *p, *msg_end;
1212 int i, nr_conns;
1213
1214 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1215 IP_VS_DBG(2, "BACKUP, message header too short\n");
1216 return;
1217 }
1218
1219 if (buflen != ntohs(m2->size)) {
1220 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1221 return;
1222 }
1223
1224 if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) {
1225 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1226 return;
1227 }
1228
1229 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1230 && (m2->spare == 0)) {
1231
1232 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1233 nr_conns = m2->nr_conns;
1234
1235 for (i=0; i<nr_conns; i++) {
1236 union ip_vs_sync_conn *s;
1237 unsigned int size;
1238 int retc;
1239
1240 p = msg_end;
1241 if (p + sizeof(s->v4) > buffer+buflen) {
1242 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1243 return;
1244 }
1245 s = (union ip_vs_sync_conn *)p;
1246 size = ntohs(s->v4.ver_size) & SVER_MASK;
1247 msg_end = p + size;
1248
1249 if (msg_end > buffer+buflen) {
1250 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
1251 return;
1252 }
1253 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
1254 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
1255 ntohs(s->v4.ver_size) >> SVER_SHIFT);
1256 return;
1257 }
1258
1259 retc = ip_vs_proc_sync_conn(ipvs, p, msg_end);
1260 if (retc < 0) {
1261 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1262 retc);
1263 return;
1264 }
1265
1266 msg_end = p + ((size + 3) & ~3);
1267 }
1268 } else {
1269
1270 ip_vs_process_message_v0(ipvs, buffer, buflen);
1271 return;
1272 }
1273}
1274
1275
1276
1277
1278
1279static void set_sock_size(struct sock *sk, int mode, int val)
1280{
1281
1282
1283 lock_sock(sk);
1284 if (mode) {
1285 val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
1286 sysctl_wmem_max);
1287 sk->sk_sndbuf = val * 2;
1288 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1289 } else {
1290 val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
1291 sysctl_rmem_max);
1292 sk->sk_rcvbuf = val * 2;
1293 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1294 }
1295 release_sock(sk);
1296}
1297
1298
1299
1300
1301static void set_mcast_loop(struct sock *sk, u_char loop)
1302{
1303 struct inet_sock *inet = inet_sk(sk);
1304
1305
1306 lock_sock(sk);
1307 inet->mc_loop = loop ? 1 : 0;
1308#ifdef CONFIG_IP_VS_IPV6
1309 if (sk->sk_family == AF_INET6) {
1310 struct ipv6_pinfo *np = inet6_sk(sk);
1311
1312
1313 np->mc_loop = loop ? 1 : 0;
1314 }
1315#endif
1316 release_sock(sk);
1317}
1318
1319
1320
1321
1322static void set_mcast_ttl(struct sock *sk, u_char ttl)
1323{
1324 struct inet_sock *inet = inet_sk(sk);
1325
1326
1327 lock_sock(sk);
1328 inet->mc_ttl = ttl;
1329#ifdef CONFIG_IP_VS_IPV6
1330 if (sk->sk_family == AF_INET6) {
1331 struct ipv6_pinfo *np = inet6_sk(sk);
1332
1333
1334 np->mcast_hops = ttl;
1335 }
1336#endif
1337 release_sock(sk);
1338}
1339
1340
1341static void set_mcast_pmtudisc(struct sock *sk, int val)
1342{
1343 struct inet_sock *inet = inet_sk(sk);
1344
1345
1346 lock_sock(sk);
1347 inet->pmtudisc = val;
1348#ifdef CONFIG_IP_VS_IPV6
1349 if (sk->sk_family == AF_INET6) {
1350 struct ipv6_pinfo *np = inet6_sk(sk);
1351
1352
1353 np->pmtudisc = val;
1354 }
1355#endif
1356 release_sock(sk);
1357}
1358
1359
1360
1361
1362static int set_mcast_if(struct sock *sk, struct net_device *dev)
1363{
1364 struct inet_sock *inet = inet_sk(sk);
1365
1366 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1367 return -EINVAL;
1368
1369 lock_sock(sk);
1370 inet->mc_index = dev->ifindex;
1371
1372#ifdef CONFIG_IP_VS_IPV6
1373 if (sk->sk_family == AF_INET6) {
1374 struct ipv6_pinfo *np = inet6_sk(sk);
1375
1376
1377 np->mcast_oif = dev->ifindex;
1378 }
1379#endif
1380 release_sock(sk);
1381
1382 return 0;
1383}
1384
1385
1386
1387
1388
1389
1390
1391static int
1392join_mcast_group(struct sock *sk, struct in_addr *addr, struct net_device *dev)
1393{
1394 struct ip_mreqn mreq;
1395 int ret;
1396
1397 memset(&mreq, 0, sizeof(mreq));
1398 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
1399
1400 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1401 return -EINVAL;
1402
1403 mreq.imr_ifindex = dev->ifindex;
1404
1405 lock_sock(sk);
1406 ret = ip_mc_join_group(sk, &mreq);
1407 release_sock(sk);
1408
1409 return ret;
1410}
1411
1412#ifdef CONFIG_IP_VS_IPV6
1413static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
1414 struct net_device *dev)
1415{
1416 int ret;
1417
1418 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1419 return -EINVAL;
1420
1421 lock_sock(sk);
1422 ret = ipv6_sock_mc_join(sk, dev->ifindex, addr);
1423 release_sock(sk);
1424
1425 return ret;
1426}
1427#endif
1428
1429static int bind_mcastif_addr(struct socket *sock, struct net_device *dev)
1430{
1431 __be32 addr;
1432 struct sockaddr_in sin;
1433
1434 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1435 if (!addr)
1436 pr_err("You probably need to specify IP address on "
1437 "multicast interface.\n");
1438
1439 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
1440 dev->name, &addr);
1441
1442
1443 sin.sin_family = AF_INET;
1444 sin.sin_addr.s_addr = addr;
1445 sin.sin_port = 0;
1446
1447 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
1448}
1449
1450static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
1451 struct ipvs_sync_daemon_cfg *c, int id)
1452{
1453 if (AF_INET6 == c->mcast_af) {
1454 sa->in6 = (struct sockaddr_in6) {
1455 .sin6_family = AF_INET6,
1456 .sin6_port = htons(c->mcast_port + id),
1457 };
1458 sa->in6.sin6_addr = c->mcast_group.in6;
1459 *salen = sizeof(sa->in6);
1460 } else {
1461 sa->in = (struct sockaddr_in) {
1462 .sin_family = AF_INET,
1463 .sin_port = htons(c->mcast_port + id),
1464 };
1465 sa->in.sin_addr = c->mcast_group.in;
1466 *salen = sizeof(sa->in);
1467 }
1468}
1469
1470
1471
1472
1473static int make_send_sock(struct netns_ipvs *ipvs, int id,
1474 struct net_device *dev, struct socket **sock_ret)
1475{
1476
1477 union ipvs_sockaddr mcast_addr;
1478 struct socket *sock;
1479 int result, salen;
1480
1481
1482 result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
1483 IPPROTO_UDP, &sock);
1484 if (result < 0) {
1485 pr_err("Error during creation of socket; terminating\n");
1486 goto error;
1487 }
1488 *sock_ret = sock;
1489 result = set_mcast_if(sock->sk, dev);
1490 if (result < 0) {
1491 pr_err("Error setting outbound mcast interface\n");
1492 goto error;
1493 }
1494
1495 set_mcast_loop(sock->sk, 0);
1496 set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl);
1497
1498 set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT);
1499 result = sysctl_sync_sock_size(ipvs);
1500 if (result > 0)
1501 set_sock_size(sock->sk, 1, result);
1502
1503 if (AF_INET == ipvs->mcfg.mcast_af)
1504 result = bind_mcastif_addr(sock, dev);
1505 else
1506 result = 0;
1507 if (result < 0) {
1508 pr_err("Error binding address of the mcast interface\n");
1509 goto error;
1510 }
1511
1512 get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
1513 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
1514 salen, 0);
1515 if (result < 0) {
1516 pr_err("Error connecting to the multicast addr\n");
1517 goto error;
1518 }
1519
1520 return 0;
1521
1522error:
1523 return result;
1524}
1525
1526
1527
1528
1529
1530static int make_receive_sock(struct netns_ipvs *ipvs, int id,
1531 struct net_device *dev, struct socket **sock_ret)
1532{
1533
1534 union ipvs_sockaddr mcast_addr;
1535 struct socket *sock;
1536 int result, salen;
1537
1538
1539 result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
1540 IPPROTO_UDP, &sock);
1541 if (result < 0) {
1542 pr_err("Error during creation of socket; terminating\n");
1543 goto error;
1544 }
1545 *sock_ret = sock;
1546
1547 sock->sk->sk_reuse = SK_CAN_REUSE;
1548 result = sysctl_sync_sock_size(ipvs);
1549 if (result > 0)
1550 set_sock_size(sock->sk, 0, result);
1551
1552 get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
1553 sock->sk->sk_bound_dev_if = dev->ifindex;
1554 result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
1555 if (result < 0) {
1556 pr_err("Error binding to the multicast addr\n");
1557 goto error;
1558 }
1559
1560
1561#ifdef CONFIG_IP_VS_IPV6
1562 if (ipvs->bcfg.mcast_af == AF_INET6)
1563 result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
1564 dev);
1565 else
1566#endif
1567 result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
1568 dev);
1569 if (result < 0) {
1570 pr_err("Error joining to the multicast group\n");
1571 goto error;
1572 }
1573
1574 return 0;
1575
1576error:
1577 return result;
1578}
1579
1580
1581static int
1582ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1583{
1584 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
1585 struct kvec iov;
1586 int len;
1587
1588 EnterFunction(7);
1589 iov.iov_base = (void *)buffer;
1590 iov.iov_len = length;
1591
1592 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
1593
1594 LeaveFunction(7);
1595 return len;
1596}
1597
1598static int
1599ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1600{
1601 int msize;
1602 int ret;
1603
1604 msize = ntohs(msg->size);
1605
1606 ret = ip_vs_send_async(sock, (char *)msg, msize);
1607 if (ret >= 0 || ret == -EAGAIN)
1608 return ret;
1609 pr_err("ip_vs_send_async error %d\n", ret);
1610 return 0;
1611}
1612
1613static int
1614ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1615{
1616 struct msghdr msg = {NULL,};
1617 struct kvec iov = {buffer, buflen};
1618 int len;
1619
1620 EnterFunction(7);
1621
1622
1623 iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, buflen);
1624 len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
1625 if (len < 0)
1626 return len;
1627
1628 LeaveFunction(7);
1629 return len;
1630}
1631
1632
1633static void master_wakeup_work_handler(struct work_struct *work)
1634{
1635 struct ipvs_master_sync_state *ms =
1636 container_of(work, struct ipvs_master_sync_state,
1637 master_wakeup_work.work);
1638 struct netns_ipvs *ipvs = ms->ipvs;
1639
1640 spin_lock_bh(&ipvs->sync_lock);
1641 if (ms->sync_queue_len &&
1642 ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
1643 int id = (int)(ms - ipvs->ms);
1644
1645 ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
1646 wake_up_process(ipvs->master_tinfo[id].task);
1647 }
1648 spin_unlock_bh(&ipvs->sync_lock);
1649}
1650
1651
1652static inline struct ip_vs_sync_buff *
1653next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
1654{
1655 struct ip_vs_sync_buff *sb;
1656
1657 sb = sb_dequeue(ipvs, ms);
1658 if (sb)
1659 return sb;
1660
1661 return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
1662}
1663
1664static int sync_thread_master(void *data)
1665{
1666 struct ip_vs_sync_thread_data *tinfo = data;
1667 struct netns_ipvs *ipvs = tinfo->ipvs;
1668 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
1669 struct sock *sk = tinfo->sock->sk;
1670 struct ip_vs_sync_buff *sb;
1671
1672 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1673 "syncid = %d, id = %d\n",
1674 ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id);
1675
1676 for (;;) {
1677 sb = next_sync_buff(ipvs, ms);
1678 if (unlikely(kthread_should_stop()))
1679 break;
1680 if (!sb) {
1681 schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
1682 continue;
1683 }
1684 while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
1685
1686
1687
1688 __wait_event_interruptible(*sk_sleep(sk),
1689 sock_writeable(sk) ||
1690 kthread_should_stop());
1691 if (unlikely(kthread_should_stop()))
1692 goto done;
1693 }
1694 ip_vs_sync_buff_release(sb);
1695 }
1696
1697done:
1698 __set_current_state(TASK_RUNNING);
1699 if (sb)
1700 ip_vs_sync_buff_release(sb);
1701
1702
1703 while ((sb = sb_dequeue(ipvs, ms)))
1704 ip_vs_sync_buff_release(sb);
1705 __set_current_state(TASK_RUNNING);
1706
1707
1708 sb = get_curr_sync_buff(ipvs, ms, 0);
1709 if (sb)
1710 ip_vs_sync_buff_release(sb);
1711
1712 return 0;
1713}
1714
1715
1716static int sync_thread_backup(void *data)
1717{
1718 struct ip_vs_sync_thread_data *tinfo = data;
1719 struct netns_ipvs *ipvs = tinfo->ipvs;
1720 int len;
1721
1722 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1723 "syncid = %d, id = %d\n",
1724 ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id);
1725
1726 while (!kthread_should_stop()) {
1727 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
1728 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
1729 || kthread_should_stop());
1730
1731
1732 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
1733 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1734 ipvs->bcfg.sync_maxlen);
1735 if (len <= 0) {
1736 if (len != -EAGAIN)
1737 pr_err("receiving message error\n");
1738 break;
1739 }
1740
1741 ip_vs_process_message(ipvs, tinfo->buf, len);
1742 }
1743 }
1744
1745 return 0;
1746}
1747
1748
1749int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
1750 int state)
1751{
1752 struct ip_vs_sync_thread_data *ti = NULL, *tinfo;
1753 struct task_struct *task;
1754 struct net_device *dev;
1755 char *name;
1756 int (*threadfn)(void *data);
1757 int id = 0, count, hlen;
1758 int result = -ENOMEM;
1759 u16 mtu, min_mtu;
1760
1761 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1762 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n",
1763 sizeof(struct ip_vs_sync_conn_v0));
1764
1765
1766 for (;;) {
1767 rtnl_lock();
1768 if (mutex_trylock(&ipvs->sync_mutex))
1769 break;
1770 rtnl_unlock();
1771 mutex_lock(&ipvs->sync_mutex);
1772 if (rtnl_trylock())
1773 break;
1774 mutex_unlock(&ipvs->sync_mutex);
1775 }
1776
1777 if (!ipvs->sync_state) {
1778 count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
1779 ipvs->threads_mask = count - 1;
1780 } else
1781 count = ipvs->threads_mask + 1;
1782
1783 if (c->mcast_af == AF_UNSPEC) {
1784 c->mcast_af = AF_INET;
1785 c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP);
1786 }
1787 if (!c->mcast_port)
1788 c->mcast_port = IP_VS_SYNC_PORT;
1789 if (!c->mcast_ttl)
1790 c->mcast_ttl = 1;
1791
1792 dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
1793 if (!dev) {
1794 pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
1795 result = -ENODEV;
1796 goto out_early;
1797 }
1798 hlen = (AF_INET6 == c->mcast_af) ?
1799 sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
1800 sizeof(struct iphdr) + sizeof(struct udphdr);
1801 mtu = (state == IP_VS_STATE_BACKUP) ?
1802 clamp(dev->mtu, 1500U, 65535U) : 1500U;
1803 min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1;
1804
1805 if (c->sync_maxlen)
1806 c->sync_maxlen = clamp_t(unsigned int,
1807 c->sync_maxlen, min_mtu,
1808 65535 - hlen);
1809 else
1810 c->sync_maxlen = mtu - hlen;
1811
1812 if (state == IP_VS_STATE_MASTER) {
1813 result = -EEXIST;
1814 if (ipvs->ms)
1815 goto out_early;
1816
1817 ipvs->mcfg = *c;
1818 name = "ipvs-m:%d:%d";
1819 threadfn = sync_thread_master;
1820 } else if (state == IP_VS_STATE_BACKUP) {
1821 result = -EEXIST;
1822 if (ipvs->backup_tinfo)
1823 goto out_early;
1824
1825 ipvs->bcfg = *c;
1826 name = "ipvs-b:%d:%d";
1827 threadfn = sync_thread_backup;
1828 } else {
1829 result = -EINVAL;
1830 goto out_early;
1831 }
1832
1833 if (state == IP_VS_STATE_MASTER) {
1834 struct ipvs_master_sync_state *ms;
1835
1836 result = -ENOMEM;
1837 ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL);
1838 if (!ipvs->ms)
1839 goto out;
1840 ms = ipvs->ms;
1841 for (id = 0; id < count; id++, ms++) {
1842 INIT_LIST_HEAD(&ms->sync_queue);
1843 ms->sync_queue_len = 0;
1844 ms->sync_queue_delay = 0;
1845 INIT_DELAYED_WORK(&ms->master_wakeup_work,
1846 master_wakeup_work_handler);
1847 ms->ipvs = ipvs;
1848 }
1849 }
1850 result = -ENOMEM;
1851 ti = kcalloc(count, sizeof(struct ip_vs_sync_thread_data),
1852 GFP_KERNEL);
1853 if (!ti)
1854 goto out;
1855
1856 for (id = 0; id < count; id++) {
1857 tinfo = &ti[id];
1858 tinfo->ipvs = ipvs;
1859 if (state == IP_VS_STATE_BACKUP) {
1860 result = -ENOMEM;
1861 tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
1862 GFP_KERNEL);
1863 if (!tinfo->buf)
1864 goto out;
1865 }
1866 tinfo->id = id;
1867 if (state == IP_VS_STATE_MASTER)
1868 result = make_send_sock(ipvs, id, dev, &tinfo->sock);
1869 else
1870 result = make_receive_sock(ipvs, id, dev, &tinfo->sock);
1871 if (result < 0)
1872 goto out;
1873
1874 task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
1875 if (IS_ERR(task)) {
1876 result = PTR_ERR(task);
1877 goto out;
1878 }
1879 tinfo->task = task;
1880 }
1881
1882
1883
1884 if (state == IP_VS_STATE_MASTER)
1885 ipvs->master_tinfo = ti;
1886 else
1887 ipvs->backup_tinfo = ti;
1888 spin_lock_bh(&ipvs->sync_buff_lock);
1889 ipvs->sync_state |= state;
1890 spin_unlock_bh(&ipvs->sync_buff_lock);
1891
1892 mutex_unlock(&ipvs->sync_mutex);
1893 rtnl_unlock();
1894
1895
1896 ip_vs_use_count_inc();
1897
1898 return 0;
1899
1900out:
1901
1902
1903
1904 rtnl_unlock();
1905 id = min(id, count - 1);
1906 if (ti) {
1907 for (tinfo = ti + id; tinfo >= ti; tinfo--) {
1908 if (tinfo->task)
1909 kthread_stop(tinfo->task);
1910 }
1911 }
1912 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
1913 kfree(ipvs->ms);
1914 ipvs->ms = NULL;
1915 }
1916 mutex_unlock(&ipvs->sync_mutex);
1917
1918
1919 if (ti) {
1920 for (tinfo = ti + id; tinfo >= ti; tinfo--) {
1921 if (tinfo->sock)
1922 sock_release(tinfo->sock);
1923 kfree(tinfo->buf);
1924 }
1925 kfree(ti);
1926 }
1927 return result;
1928
1929out_early:
1930 mutex_unlock(&ipvs->sync_mutex);
1931 rtnl_unlock();
1932 return result;
1933}
1934
1935
1936int stop_sync_thread(struct netns_ipvs *ipvs, int state)
1937{
1938 struct ip_vs_sync_thread_data *ti, *tinfo;
1939 int id;
1940 int retc = -EINVAL;
1941
1942 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1943
1944 mutex_lock(&ipvs->sync_mutex);
1945 if (state == IP_VS_STATE_MASTER) {
1946 retc = -ESRCH;
1947 if (!ipvs->ms)
1948 goto err;
1949 ti = ipvs->master_tinfo;
1950
1951
1952
1953
1954
1955
1956
1957 spin_lock_bh(&ipvs->sync_buff_lock);
1958 spin_lock(&ipvs->sync_lock);
1959 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1960 spin_unlock(&ipvs->sync_lock);
1961 spin_unlock_bh(&ipvs->sync_buff_lock);
1962
1963 retc = 0;
1964 for (id = ipvs->threads_mask; id >= 0; id--) {
1965 struct ipvs_master_sync_state *ms = &ipvs->ms[id];
1966 int ret;
1967
1968 tinfo = &ti[id];
1969 pr_info("stopping master sync thread %d ...\n",
1970 task_pid_nr(tinfo->task));
1971 cancel_delayed_work_sync(&ms->master_wakeup_work);
1972 ret = kthread_stop(tinfo->task);
1973 if (retc >= 0)
1974 retc = ret;
1975 }
1976 kfree(ipvs->ms);
1977 ipvs->ms = NULL;
1978 ipvs->master_tinfo = NULL;
1979 } else if (state == IP_VS_STATE_BACKUP) {
1980 retc = -ESRCH;
1981 if (!ipvs->backup_tinfo)
1982 goto err;
1983 ti = ipvs->backup_tinfo;
1984
1985 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1986 retc = 0;
1987 for (id = ipvs->threads_mask; id >= 0; id--) {
1988 int ret;
1989
1990 tinfo = &ti[id];
1991 pr_info("stopping backup sync thread %d ...\n",
1992 task_pid_nr(tinfo->task));
1993 ret = kthread_stop(tinfo->task);
1994 if (retc >= 0)
1995 retc = ret;
1996 }
1997 ipvs->backup_tinfo = NULL;
1998 } else {
1999 goto err;
2000 }
2001 id = ipvs->threads_mask;
2002 mutex_unlock(&ipvs->sync_mutex);
2003
2004
2005 for (tinfo = ti + id; tinfo >= ti; tinfo--) {
2006 if (tinfo->sock)
2007 sock_release(tinfo->sock);
2008 kfree(tinfo->buf);
2009 }
2010 kfree(ti);
2011
2012
2013 ip_vs_use_count_dec();
2014 return retc;
2015
2016err:
2017 mutex_unlock(&ipvs->sync_mutex);
2018 return retc;
2019}
2020
2021
2022
2023
2024int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs)
2025{
2026 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
2027 spin_lock_init(&ipvs->sync_lock);
2028 spin_lock_init(&ipvs->sync_buff_lock);
2029 return 0;
2030}
2031
2032void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
2033{
2034 int retc;
2035
2036 retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
2037 if (retc && retc != -ESRCH)
2038 pr_err("Failed to stop Master Daemon\n");
2039
2040 retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
2041 if (retc && retc != -ESRCH)
2042 pr_err("Failed to stop Backup Daemon\n");
2043}
2044