1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#define KMSG_COMPONENT "IPVS"
17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19#include <linux/module.h>
20#include <linux/init.h>
21#include <linux/types.h>
22#include <linux/capability.h>
23#include <linux/fs.h>
24#include <linux/sysctl.h>
25#include <linux/proc_fs.h>
26#include <linux/workqueue.h>
27#include <linux/swap.h>
28#include <linux/seq_file.h>
29#include <linux/slab.h>
30
31#include <linux/netfilter.h>
32#include <linux/netfilter_ipv4.h>
33#include <linux/mutex.h>
34
35#include <net/net_namespace.h>
36#include <linux/nsproxy.h>
37#include <net/ip.h>
38#ifdef CONFIG_IP_VS_IPV6
39#include <net/ipv6.h>
40#include <net/ip6_route.h>
41#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
42#endif
43#include <net/route.h>
44#include <net/sock.h>
45#include <net/genetlink.h>
46
47#include <linux/uaccess.h>
48
49#include <net/ip_vs.h>
50
51
52static DEFINE_MUTEX(__ip_vs_mutex);
53
54
55
56#ifdef CONFIG_IP_VS_DEBUG
57static int sysctl_ip_vs_debug_level = 0;
58
59int ip_vs_get_debug_level(void)
60{
61 return sysctl_ip_vs_debug_level;
62}
63#endif
64
65
66
67static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
68
69
70#ifdef CONFIG_IP_VS_IPV6
71
72static bool __ip_vs_addr_is_local_v6(struct net *net,
73 const struct in6_addr *addr)
74{
75 struct flowi6 fl6 = {
76 .daddr = *addr,
77 };
78 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
79 bool is_local;
80
81 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
82
83 dst_release(dst);
84 return is_local;
85}
86#endif
87
88#ifdef CONFIG_SYSCTL
89
90
91
92
93static void update_defense_level(struct netns_ipvs *ipvs)
94{
95 struct sysinfo i;
96 static int old_secure_tcp = 0;
97 int availmem;
98 int nomem;
99 int to_change = -1;
100
101
102 si_meminfo(&i);
103 availmem = i.freeram + i.bufferram;
104
105
106
107
108
109 nomem = (availmem < ipvs->sysctl_amemthresh);
110
111 local_bh_disable();
112
113
114 spin_lock(&ipvs->dropentry_lock);
115 switch (ipvs->sysctl_drop_entry) {
116 case 0:
117 atomic_set(&ipvs->dropentry, 0);
118 break;
119 case 1:
120 if (nomem) {
121 atomic_set(&ipvs->dropentry, 1);
122 ipvs->sysctl_drop_entry = 2;
123 } else {
124 atomic_set(&ipvs->dropentry, 0);
125 }
126 break;
127 case 2:
128 if (nomem) {
129 atomic_set(&ipvs->dropentry, 1);
130 } else {
131 atomic_set(&ipvs->dropentry, 0);
132 ipvs->sysctl_drop_entry = 1;
133 }
134 break;
135 case 3:
136 atomic_set(&ipvs->dropentry, 1);
137 break;
138 }
139 spin_unlock(&ipvs->dropentry_lock);
140
141
142 spin_lock(&ipvs->droppacket_lock);
143 switch (ipvs->sysctl_drop_packet) {
144 case 0:
145 ipvs->drop_rate = 0;
146 break;
147 case 1:
148 if (nomem) {
149 ipvs->drop_rate = ipvs->drop_counter
150 = ipvs->sysctl_amemthresh /
151 (ipvs->sysctl_amemthresh-availmem);
152 ipvs->sysctl_drop_packet = 2;
153 } else {
154 ipvs->drop_rate = 0;
155 }
156 break;
157 case 2:
158 if (nomem) {
159 ipvs->drop_rate = ipvs->drop_counter
160 = ipvs->sysctl_amemthresh /
161 (ipvs->sysctl_amemthresh-availmem);
162 } else {
163 ipvs->drop_rate = 0;
164 ipvs->sysctl_drop_packet = 1;
165 }
166 break;
167 case 3:
168 ipvs->drop_rate = ipvs->sysctl_am_droprate;
169 break;
170 }
171 spin_unlock(&ipvs->droppacket_lock);
172
173
174 spin_lock(&ipvs->securetcp_lock);
175 switch (ipvs->sysctl_secure_tcp) {
176 case 0:
177 if (old_secure_tcp >= 2)
178 to_change = 0;
179 break;
180 case 1:
181 if (nomem) {
182 if (old_secure_tcp < 2)
183 to_change = 1;
184 ipvs->sysctl_secure_tcp = 2;
185 } else {
186 if (old_secure_tcp >= 2)
187 to_change = 0;
188 }
189 break;
190 case 2:
191 if (nomem) {
192 if (old_secure_tcp < 2)
193 to_change = 1;
194 } else {
195 if (old_secure_tcp >= 2)
196 to_change = 0;
197 ipvs->sysctl_secure_tcp = 1;
198 }
199 break;
200 case 3:
201 if (old_secure_tcp < 2)
202 to_change = 1;
203 break;
204 }
205 old_secure_tcp = ipvs->sysctl_secure_tcp;
206 if (to_change >= 0)
207 ip_vs_protocol_timeout_change(ipvs,
208 ipvs->sysctl_secure_tcp > 1);
209 spin_unlock(&ipvs->securetcp_lock);
210
211 local_bh_enable();
212}
213
214
215
216
217
218#define DEFENSE_TIMER_PERIOD 1*HZ
219
220static void defense_work_handler(struct work_struct *work)
221{
222 struct netns_ipvs *ipvs =
223 container_of(work, struct netns_ipvs, defense_work.work);
224
225 update_defense_level(ipvs);
226 if (atomic_read(&ipvs->dropentry))
227 ip_vs_random_dropentry(ipvs);
228 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
229}
230#endif
231
232int
233ip_vs_use_count_inc(void)
234{
235 return try_module_get(THIS_MODULE);
236}
237
238void
239ip_vs_use_count_dec(void)
240{
241 module_put(THIS_MODULE);
242}
243
244
245
246
247
248#define IP_VS_SVC_TAB_BITS 8
249#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
250#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
251
252
253static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
254
255static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
256
257
258
259
260
261static inline unsigned int
262ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
263 const union nf_inet_addr *addr, __be16 port)
264{
265 register unsigned int porth = ntohs(port);
266 __be32 addr_fold = addr->ip;
267 __u32 ahash;
268
269#ifdef CONFIG_IP_VS_IPV6
270 if (af == AF_INET6)
271 addr_fold = addr->ip6[0]^addr->ip6[1]^
272 addr->ip6[2]^addr->ip6[3];
273#endif
274 ahash = ntohl(addr_fold);
275 ahash ^= ((size_t) ipvs >> 8);
276
277 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
278 IP_VS_SVC_TAB_MASK;
279}
280
281
282
283
284static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark)
285{
286 return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
287}
288
289
290
291
292
293
294static int ip_vs_svc_hash(struct ip_vs_service *svc)
295{
296 unsigned int hash;
297
298 if (svc->flags & IP_VS_SVC_F_HASHED) {
299 pr_err("%s(): request for already hashed, called from %pS\n",
300 __func__, __builtin_return_address(0));
301 return 0;
302 }
303
304 if (svc->fwmark == 0) {
305
306
307
308 hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
309 &svc->addr, svc->port);
310 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
311 } else {
312
313
314
315 hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
316 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
317 }
318
319 svc->flags |= IP_VS_SVC_F_HASHED;
320
321 atomic_inc(&svc->refcnt);
322 return 1;
323}
324
325
326
327
328
329
330static int ip_vs_svc_unhash(struct ip_vs_service *svc)
331{
332 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
333 pr_err("%s(): request for unhash flagged, called from %pS\n",
334 __func__, __builtin_return_address(0));
335 return 0;
336 }
337
338 if (svc->fwmark == 0) {
339
340 hlist_del_rcu(&svc->s_list);
341 } else {
342
343 hlist_del_rcu(&svc->f_list);
344 }
345
346 svc->flags &= ~IP_VS_SVC_F_HASHED;
347 atomic_dec(&svc->refcnt);
348 return 1;
349}
350
351
352
353
354
355static inline struct ip_vs_service *
356__ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
357 const union nf_inet_addr *vaddr, __be16 vport)
358{
359 unsigned int hash;
360 struct ip_vs_service *svc;
361
362
363 hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport);
364
365 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
366 if ((svc->af == af)
367 && ip_vs_addr_equal(af, &svc->addr, vaddr)
368 && (svc->port == vport)
369 && (svc->protocol == protocol)
370 && (svc->ipvs == ipvs)) {
371
372 return svc;
373 }
374 }
375
376 return NULL;
377}
378
379
380
381
382
383static inline struct ip_vs_service *
384__ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
385{
386 unsigned int hash;
387 struct ip_vs_service *svc;
388
389
390 hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
391
392 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
393 if (svc->fwmark == fwmark && svc->af == af
394 && (svc->ipvs == ipvs)) {
395
396 return svc;
397 }
398 }
399
400 return NULL;
401}
402
403
404struct ip_vs_service *
405ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
406 const union nf_inet_addr *vaddr, __be16 vport)
407{
408 struct ip_vs_service *svc;
409
410
411
412
413 if (fwmark) {
414 svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
415 if (svc)
416 goto out;
417 }
418
419
420
421
422
423 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
424
425 if (!svc && protocol == IPPROTO_TCP &&
426 atomic_read(&ipvs->ftpsvc_counter) &&
427 (vport == FTPDATA || ntohs(vport) >= inet_prot_sock(ipvs->net))) {
428
429
430
431
432 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
433 }
434
435 if (svc == NULL
436 && atomic_read(&ipvs->nullsvc_counter)) {
437
438
439
440 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
441 }
442
443 out:
444 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
445 fwmark, ip_vs_proto_name(protocol),
446 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
447 svc ? "hit" : "not hit");
448
449 return svc;
450}
451
452
453static inline void
454__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
455{
456 atomic_inc(&svc->refcnt);
457 rcu_assign_pointer(dest->svc, svc);
458}
459
460static void ip_vs_service_free(struct ip_vs_service *svc)
461{
462 free_percpu(svc->stats.cpustats);
463 kfree(svc);
464}
465
466static void ip_vs_service_rcu_free(struct rcu_head *head)
467{
468 struct ip_vs_service *svc;
469
470 svc = container_of(head, struct ip_vs_service, rcu_head);
471 ip_vs_service_free(svc);
472}
473
474static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
475{
476 if (atomic_dec_and_test(&svc->refcnt)) {
477 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
478 svc->fwmark,
479 IP_VS_DBG_ADDR(svc->af, &svc->addr),
480 ntohs(svc->port));
481 if (do_delay)
482 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
483 else
484 ip_vs_service_free(svc);
485 }
486}
487
488
489
490
491
492static inline unsigned int ip_vs_rs_hashkey(int af,
493 const union nf_inet_addr *addr,
494 __be16 port)
495{
496 register unsigned int porth = ntohs(port);
497 __be32 addr_fold = addr->ip;
498
499#ifdef CONFIG_IP_VS_IPV6
500 if (af == AF_INET6)
501 addr_fold = addr->ip6[0]^addr->ip6[1]^
502 addr->ip6[2]^addr->ip6[3];
503#endif
504
505 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
506 & IP_VS_RTAB_MASK;
507}
508
509
510static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
511{
512 unsigned int hash;
513 __be16 port;
514
515 if (dest->in_rs_table)
516 return;
517
518 switch (IP_VS_DFWD_METHOD(dest)) {
519 case IP_VS_CONN_F_MASQ:
520 port = dest->port;
521 break;
522 case IP_VS_CONN_F_TUNNEL:
523 switch (dest->tun_type) {
524 case IP_VS_CONN_F_TUNNEL_TYPE_GUE:
525 port = dest->tun_port;
526 break;
527 case IP_VS_CONN_F_TUNNEL_TYPE_IPIP:
528 case IP_VS_CONN_F_TUNNEL_TYPE_GRE:
529 port = 0;
530 break;
531 default:
532 return;
533 }
534 break;
535 default:
536 return;
537 }
538
539
540
541
542
543 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port);
544
545 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
546 dest->in_rs_table = 1;
547}
548
549
550static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
551{
552
553
554
555 if (dest->in_rs_table) {
556 hlist_del_rcu(&dest->d_list);
557 dest->in_rs_table = 0;
558 }
559}
560
561
562bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
563 const union nf_inet_addr *daddr, __be16 dport)
564{
565 unsigned int hash;
566 struct ip_vs_dest *dest;
567
568
569 hash = ip_vs_rs_hashkey(af, daddr, dport);
570
571 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
572 if (dest->port == dport &&
573 dest->af == af &&
574 ip_vs_addr_equal(af, &dest->addr, daddr) &&
575 (dest->protocol == protocol || dest->vfwmark) &&
576 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
577
578 return true;
579 }
580 }
581
582 return false;
583}
584
585
586
587
588
589
590
591struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
592 __u16 protocol,
593 const union nf_inet_addr *daddr,
594 __be16 dport)
595{
596 unsigned int hash;
597 struct ip_vs_dest *dest;
598
599
600 hash = ip_vs_rs_hashkey(af, daddr, dport);
601
602 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
603 if (dest->port == dport &&
604 dest->af == af &&
605 ip_vs_addr_equal(af, &dest->addr, daddr) &&
606 (dest->protocol == protocol || dest->vfwmark) &&
607 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
608
609 return dest;
610 }
611 }
612
613 return NULL;
614}
615
616
617
618
619
620
621
622struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
623 const union nf_inet_addr *daddr,
624 __be16 tun_port)
625{
626 struct ip_vs_dest *dest;
627 unsigned int hash;
628
629
630 hash = ip_vs_rs_hashkey(af, daddr, tun_port);
631
632 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
633 if (dest->tun_port == tun_port &&
634 dest->af == af &&
635 ip_vs_addr_equal(af, &dest->addr, daddr) &&
636 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) {
637
638 return dest;
639 }
640 }
641
642 return NULL;
643}
644
645
646
647
648static struct ip_vs_dest *
649ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
650 const union nf_inet_addr *daddr, __be16 dport)
651{
652 struct ip_vs_dest *dest;
653
654
655
656
657 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
658 if ((dest->af == dest_af) &&
659 ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
660 (dest->port == dport)) {
661
662 return dest;
663 }
664 }
665
666 return NULL;
667}
668
669
670
671
672
673
674
675
676
677struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
678 const union nf_inet_addr *daddr,
679 __be16 dport,
680 const union nf_inet_addr *vaddr,
681 __be16 vport, __u16 protocol, __u32 fwmark,
682 __u32 flags)
683{
684 struct ip_vs_dest *dest;
685 struct ip_vs_service *svc;
686 __be16 port = dport;
687
688 svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
689 if (!svc)
690 return NULL;
691 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
692 port = 0;
693 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
694 if (!dest)
695 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
696 return dest;
697}
698
699void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
700{
701 struct ip_vs_dest_dst *dest_dst = container_of(head,
702 struct ip_vs_dest_dst,
703 rcu_head);
704
705 dst_release(dest_dst->dst_cache);
706 kfree(dest_dst);
707}
708
709
710static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
711{
712 struct ip_vs_dest_dst *old;
713
714 old = rcu_dereference_protected(dest->dest_dst, 1);
715 if (old) {
716 RCU_INIT_POINTER(dest->dest_dst, NULL);
717 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
718 }
719}
720
721
722
723
724
725
726
727
728
729
730
731static struct ip_vs_dest *
732ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
733 const union nf_inet_addr *daddr, __be16 dport)
734{
735 struct ip_vs_dest *dest;
736 struct netns_ipvs *ipvs = svc->ipvs;
737
738
739
740
741 spin_lock_bh(&ipvs->dest_trash_lock);
742 list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
743 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
744 "dest->refcnt=%d\n",
745 dest->vfwmark,
746 IP_VS_DBG_ADDR(dest->af, &dest->addr),
747 ntohs(dest->port),
748 refcount_read(&dest->refcnt));
749 if (dest->af == dest_af &&
750 ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
751 dest->port == dport &&
752 dest->vfwmark == svc->fwmark &&
753 dest->protocol == svc->protocol &&
754 (svc->fwmark ||
755 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
756 dest->vport == svc->port))) {
757
758 list_del(&dest->t_list);
759 goto out;
760 }
761 }
762
763 dest = NULL;
764
765out:
766 spin_unlock_bh(&ipvs->dest_trash_lock);
767
768 return dest;
769}
770
771static void ip_vs_dest_free(struct ip_vs_dest *dest)
772{
773 struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
774
775 __ip_vs_dst_cache_reset(dest);
776 __ip_vs_svc_put(svc, false);
777 free_percpu(dest->stats.cpustats);
778 ip_vs_dest_put_and_free(dest);
779}
780
781
782
783
784
785
786
787
788
789
790static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
791{
792 struct ip_vs_dest *dest, *nxt;
793
794 del_timer_sync(&ipvs->dest_trash_timer);
795
796 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
797 list_del(&dest->t_list);
798 ip_vs_dest_free(dest);
799 }
800}
801
802static void
803ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
804{
805#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
806
807 spin_lock_bh(&src->lock);
808
809 IP_VS_SHOW_STATS_COUNTER(conns);
810 IP_VS_SHOW_STATS_COUNTER(inpkts);
811 IP_VS_SHOW_STATS_COUNTER(outpkts);
812 IP_VS_SHOW_STATS_COUNTER(inbytes);
813 IP_VS_SHOW_STATS_COUNTER(outbytes);
814
815 ip_vs_read_estimator(dst, src);
816
817 spin_unlock_bh(&src->lock);
818}
819
820static void
821ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
822{
823 dst->conns = (u32)src->conns;
824 dst->inpkts = (u32)src->inpkts;
825 dst->outpkts = (u32)src->outpkts;
826 dst->inbytes = src->inbytes;
827 dst->outbytes = src->outbytes;
828 dst->cps = (u32)src->cps;
829 dst->inpps = (u32)src->inpps;
830 dst->outpps = (u32)src->outpps;
831 dst->inbps = (u32)src->inbps;
832 dst->outbps = (u32)src->outbps;
833}
834
835static void
836ip_vs_zero_stats(struct ip_vs_stats *stats)
837{
838 spin_lock_bh(&stats->lock);
839
840
841
842#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
843
844 IP_VS_ZERO_STATS_COUNTER(conns);
845 IP_VS_ZERO_STATS_COUNTER(inpkts);
846 IP_VS_ZERO_STATS_COUNTER(outpkts);
847 IP_VS_ZERO_STATS_COUNTER(inbytes);
848 IP_VS_ZERO_STATS_COUNTER(outbytes);
849
850 ip_vs_zero_estimator(stats);
851
852 spin_unlock_bh(&stats->lock);
853}
854
855
856
857
858static void
859__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
860 struct ip_vs_dest_user_kern *udest, int add)
861{
862 struct netns_ipvs *ipvs = svc->ipvs;
863 struct ip_vs_service *old_svc;
864 struct ip_vs_scheduler *sched;
865 int conn_flags;
866
867
868 BUG_ON(!add && udest->af != dest->af);
869
870 if (add && udest->af != svc->af)
871 ipvs->mixed_address_family_dests++;
872
873
874 if (add || udest->weight != 0)
875 atomic_set(&dest->last_weight, udest->weight);
876
877
878 atomic_set(&dest->weight, udest->weight);
879 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
880 conn_flags |= IP_VS_CONN_F_INACTIVE;
881
882
883 if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) !=
884 IP_VS_DFWD_METHOD(dest) ||
885 udest->tun_type != dest->tun_type ||
886 udest->tun_port != dest->tun_port)
887 ip_vs_rs_unhash(dest);
888
889
890 dest->tun_type = udest->tun_type;
891 dest->tun_port = udest->tun_port;
892 dest->tun_flags = udest->tun_flags;
893
894
895 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
896 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
897 } else {
898
899 if (svc->port == FTPPORT)
900 ip_vs_register_conntrack(svc);
901 }
902 atomic_set(&dest->conn_flags, conn_flags);
903
904 ip_vs_rs_hash(ipvs, dest);
905
906
907 old_svc = rcu_dereference_protected(dest->svc, 1);
908 if (!old_svc) {
909 __ip_vs_bind_svc(dest, svc);
910 } else {
911 if (old_svc != svc) {
912 ip_vs_zero_stats(&dest->stats);
913 __ip_vs_bind_svc(dest, svc);
914 __ip_vs_svc_put(old_svc, true);
915 }
916 }
917
918
919 dest->flags |= IP_VS_DEST_F_AVAILABLE;
920
921 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
922 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
923 dest->u_threshold = udest->u_threshold;
924 dest->l_threshold = udest->l_threshold;
925
926 dest->af = udest->af;
927
928 spin_lock_bh(&dest->dst_lock);
929 __ip_vs_dst_cache_reset(dest);
930 spin_unlock_bh(&dest->dst_lock);
931
932 if (add) {
933 ip_vs_start_estimator(svc->ipvs, &dest->stats);
934 list_add_rcu(&dest->n_list, &svc->destinations);
935 svc->num_dests++;
936 sched = rcu_dereference_protected(svc->scheduler, 1);
937 if (sched && sched->add_dest)
938 sched->add_dest(svc, dest);
939 } else {
940 sched = rcu_dereference_protected(svc->scheduler, 1);
941 if (sched && sched->upd_dest)
942 sched->upd_dest(svc, dest);
943 }
944}
945
946
947
948
949
950static int
951ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
952 struct ip_vs_dest **dest_p)
953{
954 struct ip_vs_dest *dest;
955 unsigned int atype, i;
956
957 EnterFunction(2);
958
959#ifdef CONFIG_IP_VS_IPV6
960 if (udest->af == AF_INET6) {
961 int ret;
962
963 atype = ipv6_addr_type(&udest->addr.in6);
964 if ((!(atype & IPV6_ADDR_UNICAST) ||
965 atype & IPV6_ADDR_LINKLOCAL) &&
966 !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
967 return -EINVAL;
968
969 ret = nf_defrag_ipv6_enable(svc->ipvs->net);
970 if (ret)
971 return ret;
972 } else
973#endif
974 {
975 atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
976 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
977 return -EINVAL;
978 }
979
980 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
981 if (dest == NULL)
982 return -ENOMEM;
983
984 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
985 if (!dest->stats.cpustats)
986 goto err_alloc;
987
988 for_each_possible_cpu(i) {
989 struct ip_vs_cpu_stats *ip_vs_dest_stats;
990 ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i);
991 u64_stats_init(&ip_vs_dest_stats->syncp);
992 }
993
994 dest->af = udest->af;
995 dest->protocol = svc->protocol;
996 dest->vaddr = svc->addr;
997 dest->vport = svc->port;
998 dest->vfwmark = svc->fwmark;
999 ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
1000 dest->port = udest->port;
1001
1002 atomic_set(&dest->activeconns, 0);
1003 atomic_set(&dest->inactconns, 0);
1004 atomic_set(&dest->persistconns, 0);
1005 refcount_set(&dest->refcnt, 1);
1006
1007 INIT_HLIST_NODE(&dest->d_list);
1008 spin_lock_init(&dest->dst_lock);
1009 spin_lock_init(&dest->stats.lock);
1010 __ip_vs_update_dest(svc, dest, udest, 1);
1011
1012 *dest_p = dest;
1013
1014 LeaveFunction(2);
1015 return 0;
1016
1017err_alloc:
1018 kfree(dest);
1019 return -ENOMEM;
1020}
1021
1022
1023
1024
1025
1026static int
1027ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1028{
1029 struct ip_vs_dest *dest;
1030 union nf_inet_addr daddr;
1031 __be16 dport = udest->port;
1032 int ret;
1033
1034 EnterFunction(2);
1035
1036 if (udest->weight < 0) {
1037 pr_err("%s(): server weight less than zero\n", __func__);
1038 return -ERANGE;
1039 }
1040
1041 if (udest->l_threshold > udest->u_threshold) {
1042 pr_err("%s(): lower threshold is higher than upper threshold\n",
1043 __func__);
1044 return -ERANGE;
1045 }
1046
1047 if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
1048 if (udest->tun_port == 0) {
1049 pr_err("%s(): tunnel port is zero\n", __func__);
1050 return -EINVAL;
1051 }
1052 }
1053
1054 ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
1055
1056
1057 rcu_read_lock();
1058 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
1059 rcu_read_unlock();
1060
1061 if (dest != NULL) {
1062 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1063 return -EEXIST;
1064 }
1065
1066
1067
1068
1069
1070 dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
1071
1072 if (dest != NULL) {
1073 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
1074 "dest->refcnt=%d, service %u/%s:%u\n",
1075 IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
1076 refcount_read(&dest->refcnt),
1077 dest->vfwmark,
1078 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
1079 ntohs(dest->vport));
1080
1081 __ip_vs_update_dest(svc, dest, udest, 1);
1082 ret = 0;
1083 } else {
1084
1085
1086
1087 ret = ip_vs_new_dest(svc, udest, &dest);
1088 }
1089 LeaveFunction(2);
1090
1091 return ret;
1092}
1093
1094
1095
1096
1097
1098static int
1099ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1100{
1101 struct ip_vs_dest *dest;
1102 union nf_inet_addr daddr;
1103 __be16 dport = udest->port;
1104
1105 EnterFunction(2);
1106
1107 if (udest->weight < 0) {
1108 pr_err("%s(): server weight less than zero\n", __func__);
1109 return -ERANGE;
1110 }
1111
1112 if (udest->l_threshold > udest->u_threshold) {
1113 pr_err("%s(): lower threshold is higher than upper threshold\n",
1114 __func__);
1115 return -ERANGE;
1116 }
1117
1118 if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
1119 if (udest->tun_port == 0) {
1120 pr_err("%s(): tunnel port is zero\n", __func__);
1121 return -EINVAL;
1122 }
1123 }
1124
1125 ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
1126
1127
1128 rcu_read_lock();
1129 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
1130 rcu_read_unlock();
1131
1132 if (dest == NULL) {
1133 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1134 return -ENOENT;
1135 }
1136
1137 __ip_vs_update_dest(svc, dest, udest, 0);
1138 LeaveFunction(2);
1139
1140 return 0;
1141}
1142
1143
1144
1145
1146static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
1147 bool cleanup)
1148{
1149 ip_vs_stop_estimator(ipvs, &dest->stats);
1150
1151
1152
1153
1154 ip_vs_rs_unhash(dest);
1155
1156 spin_lock_bh(&ipvs->dest_trash_lock);
1157 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1158 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1159 refcount_read(&dest->refcnt));
1160 if (list_empty(&ipvs->dest_trash) && !cleanup)
1161 mod_timer(&ipvs->dest_trash_timer,
1162 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1163
1164 list_add(&dest->t_list, &ipvs->dest_trash);
1165 dest->idle_start = 0;
1166 spin_unlock_bh(&ipvs->dest_trash_lock);
1167}
1168
1169
1170
1171
1172
1173static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1174 struct ip_vs_dest *dest,
1175 int svcupd)
1176{
1177 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1178
1179
1180
1181
1182 list_del_rcu(&dest->n_list);
1183 svc->num_dests--;
1184
1185 if (dest->af != svc->af)
1186 svc->ipvs->mixed_address_family_dests--;
1187
1188 if (svcupd) {
1189 struct ip_vs_scheduler *sched;
1190
1191 sched = rcu_dereference_protected(svc->scheduler, 1);
1192 if (sched && sched->del_dest)
1193 sched->del_dest(svc, dest);
1194 }
1195}
1196
1197
1198
1199
1200
1201static int
1202ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1203{
1204 struct ip_vs_dest *dest;
1205 __be16 dport = udest->port;
1206
1207 EnterFunction(2);
1208
1209
1210 rcu_read_lock();
1211 dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
1212 rcu_read_unlock();
1213
1214 if (dest == NULL) {
1215 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1216 return -ENOENT;
1217 }
1218
1219
1220
1221
1222 __ip_vs_unlink_dest(svc, dest, 1);
1223
1224
1225
1226
1227 __ip_vs_del_dest(svc->ipvs, dest, false);
1228
1229 LeaveFunction(2);
1230
1231 return 0;
1232}
1233
1234static void ip_vs_dest_trash_expire(struct timer_list *t)
1235{
1236 struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer);
1237 struct ip_vs_dest *dest, *next;
1238 unsigned long now = jiffies;
1239
1240 spin_lock(&ipvs->dest_trash_lock);
1241 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1242 if (refcount_read(&dest->refcnt) > 1)
1243 continue;
1244 if (dest->idle_start) {
1245 if (time_before(now, dest->idle_start +
1246 IP_VS_DEST_TRASH_PERIOD))
1247 continue;
1248 } else {
1249 dest->idle_start = max(1UL, now);
1250 continue;
1251 }
1252 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1253 dest->vfwmark,
1254 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1255 ntohs(dest->port));
1256 list_del(&dest->t_list);
1257 ip_vs_dest_free(dest);
1258 }
1259 if (!list_empty(&ipvs->dest_trash))
1260 mod_timer(&ipvs->dest_trash_timer,
1261 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1262 spin_unlock(&ipvs->dest_trash_lock);
1263}
1264
1265
1266
1267
1268static int
1269ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
1270 struct ip_vs_service **svc_p)
1271{
1272 int ret = 0, i;
1273 struct ip_vs_scheduler *sched = NULL;
1274 struct ip_vs_pe *pe = NULL;
1275 struct ip_vs_service *svc = NULL;
1276
1277
1278 ip_vs_use_count_inc();
1279
1280
1281 if (strcmp(u->sched_name, "none")) {
1282 sched = ip_vs_scheduler_get(u->sched_name);
1283 if (!sched) {
1284 pr_info("Scheduler module ip_vs_%s not found\n",
1285 u->sched_name);
1286 ret = -ENOENT;
1287 goto out_err;
1288 }
1289 }
1290
1291 if (u->pe_name && *u->pe_name) {
1292 pe = ip_vs_pe_getbyname(u->pe_name);
1293 if (pe == NULL) {
1294 pr_info("persistence engine module ip_vs_pe_%s "
1295 "not found\n", u->pe_name);
1296 ret = -ENOENT;
1297 goto out_err;
1298 }
1299 }
1300
1301#ifdef CONFIG_IP_VS_IPV6
1302 if (u->af == AF_INET6) {
1303 __u32 plen = (__force __u32) u->netmask;
1304
1305 if (plen < 1 || plen > 128) {
1306 ret = -EINVAL;
1307 goto out_err;
1308 }
1309
1310 ret = nf_defrag_ipv6_enable(ipvs->net);
1311 if (ret)
1312 goto out_err;
1313 }
1314#endif
1315
1316 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1317 if (svc == NULL) {
1318 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1319 ret = -ENOMEM;
1320 goto out_err;
1321 }
1322 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1323 if (!svc->stats.cpustats) {
1324 ret = -ENOMEM;
1325 goto out_err;
1326 }
1327
1328 for_each_possible_cpu(i) {
1329 struct ip_vs_cpu_stats *ip_vs_stats;
1330 ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i);
1331 u64_stats_init(&ip_vs_stats->syncp);
1332 }
1333
1334
1335
1336 atomic_set(&svc->refcnt, 0);
1337
1338 svc->af = u->af;
1339 svc->protocol = u->protocol;
1340 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1341 svc->port = u->port;
1342 svc->fwmark = u->fwmark;
1343 svc->flags = u->flags;
1344 svc->timeout = u->timeout * HZ;
1345 svc->netmask = u->netmask;
1346 svc->ipvs = ipvs;
1347
1348 INIT_LIST_HEAD(&svc->destinations);
1349 spin_lock_init(&svc->sched_lock);
1350 spin_lock_init(&svc->stats.lock);
1351
1352
1353 if (sched) {
1354 ret = ip_vs_bind_scheduler(svc, sched);
1355 if (ret)
1356 goto out_err;
1357 sched = NULL;
1358 }
1359
1360
1361 RCU_INIT_POINTER(svc->pe, pe);
1362 pe = NULL;
1363
1364
1365 if (svc->port == FTPPORT)
1366 atomic_inc(&ipvs->ftpsvc_counter);
1367 else if (svc->port == 0)
1368 atomic_inc(&ipvs->nullsvc_counter);
1369 if (svc->pe && svc->pe->conn_out)
1370 atomic_inc(&ipvs->conn_out_counter);
1371
1372 ip_vs_start_estimator(ipvs, &svc->stats);
1373
1374
1375 if (svc->af == AF_INET)
1376 ipvs->num_services++;
1377
1378
1379 ip_vs_svc_hash(svc);
1380
1381 *svc_p = svc;
1382
1383 ipvs->enable = 1;
1384 return 0;
1385
1386
1387 out_err:
1388 if (svc != NULL) {
1389 ip_vs_unbind_scheduler(svc, sched);
1390 ip_vs_service_free(svc);
1391 }
1392 ip_vs_scheduler_put(sched);
1393 ip_vs_pe_put(pe);
1394
1395
1396 ip_vs_use_count_dec();
1397
1398 return ret;
1399}
1400
1401
1402
1403
1404
1405static int
1406ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1407{
1408 struct ip_vs_scheduler *sched = NULL, *old_sched;
1409 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1410 int ret = 0;
1411 bool new_pe_conn_out, old_pe_conn_out;
1412
1413
1414
1415
1416 if (strcmp(u->sched_name, "none")) {
1417 sched = ip_vs_scheduler_get(u->sched_name);
1418 if (!sched) {
1419 pr_info("Scheduler module ip_vs_%s not found\n",
1420 u->sched_name);
1421 return -ENOENT;
1422 }
1423 }
1424 old_sched = sched;
1425
1426 if (u->pe_name && *u->pe_name) {
1427 pe = ip_vs_pe_getbyname(u->pe_name);
1428 if (pe == NULL) {
1429 pr_info("persistence engine module ip_vs_pe_%s "
1430 "not found\n", u->pe_name);
1431 ret = -ENOENT;
1432 goto out;
1433 }
1434 old_pe = pe;
1435 }
1436
1437#ifdef CONFIG_IP_VS_IPV6
1438 if (u->af == AF_INET6) {
1439 __u32 plen = (__force __u32) u->netmask;
1440
1441 if (plen < 1 || plen > 128) {
1442 ret = -EINVAL;
1443 goto out;
1444 }
1445 }
1446#endif
1447
1448 old_sched = rcu_dereference_protected(svc->scheduler, 1);
1449 if (sched != old_sched) {
1450 if (old_sched) {
1451 ip_vs_unbind_scheduler(svc, old_sched);
1452 RCU_INIT_POINTER(svc->scheduler, NULL);
1453
1454 synchronize_rcu();
1455 }
1456
1457 if (sched) {
1458 ret = ip_vs_bind_scheduler(svc, sched);
1459 if (ret) {
1460 ip_vs_scheduler_put(sched);
1461 goto out;
1462 }
1463 }
1464 }
1465
1466
1467
1468
1469 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1470 svc->timeout = u->timeout * HZ;
1471 svc->netmask = u->netmask;
1472
1473 old_pe = rcu_dereference_protected(svc->pe, 1);
1474 if (pe != old_pe) {
1475 rcu_assign_pointer(svc->pe, pe);
1476
1477 new_pe_conn_out = (pe && pe->conn_out) ? true : false;
1478 old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false;
1479 if (new_pe_conn_out && !old_pe_conn_out)
1480 atomic_inc(&svc->ipvs->conn_out_counter);
1481 if (old_pe_conn_out && !new_pe_conn_out)
1482 atomic_dec(&svc->ipvs->conn_out_counter);
1483 }
1484
1485out:
1486 ip_vs_scheduler_put(old_sched);
1487 ip_vs_pe_put(old_pe);
1488 return ret;
1489}
1490
1491
1492
1493
1494
1495
1496static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1497{
1498 struct ip_vs_dest *dest, *nxt;
1499 struct ip_vs_scheduler *old_sched;
1500 struct ip_vs_pe *old_pe;
1501 struct netns_ipvs *ipvs = svc->ipvs;
1502
1503
1504 if (svc->af == AF_INET)
1505 ipvs->num_services--;
1506
1507 ip_vs_stop_estimator(svc->ipvs, &svc->stats);
1508
1509
1510 old_sched = rcu_dereference_protected(svc->scheduler, 1);
1511 ip_vs_unbind_scheduler(svc, old_sched);
1512 ip_vs_scheduler_put(old_sched);
1513
1514
1515 old_pe = rcu_dereference_protected(svc->pe, 1);
1516 if (old_pe && old_pe->conn_out)
1517 atomic_dec(&ipvs->conn_out_counter);
1518 ip_vs_pe_put(old_pe);
1519
1520
1521
1522
1523 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1524 __ip_vs_unlink_dest(svc, dest, 0);
1525 __ip_vs_del_dest(svc->ipvs, dest, cleanup);
1526 }
1527
1528
1529
1530
1531 if (svc->port == FTPPORT)
1532 atomic_dec(&ipvs->ftpsvc_counter);
1533 else if (svc->port == 0)
1534 atomic_dec(&ipvs->nullsvc_counter);
1535
1536
1537
1538
1539 __ip_vs_svc_put(svc, true);
1540
1541
1542 ip_vs_use_count_dec();
1543}
1544
1545
1546
1547
1548static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
1549{
1550 ip_vs_unregister_conntrack(svc);
1551
1552 atomic_inc(&svc->refcnt);
1553
1554
1555
1556 ip_vs_svc_unhash(svc);
1557
1558 __ip_vs_del_service(svc, cleanup);
1559}
1560
1561
1562
1563
1564static int ip_vs_del_service(struct ip_vs_service *svc)
1565{
1566 if (svc == NULL)
1567 return -EEXIST;
1568 ip_vs_unlink_service(svc, false);
1569
1570 return 0;
1571}
1572
1573
1574
1575
1576
1577static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
1578{
1579 int idx;
1580 struct ip_vs_service *svc;
1581 struct hlist_node *n;
1582
1583
1584
1585
1586 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1587 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
1588 s_list) {
1589 if (svc->ipvs == ipvs)
1590 ip_vs_unlink_service(svc, cleanup);
1591 }
1592 }
1593
1594
1595
1596
1597 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1598 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
1599 f_list) {
1600 if (svc->ipvs == ipvs)
1601 ip_vs_unlink_service(svc, cleanup);
1602 }
1603 }
1604
1605 return 0;
1606}
1607
1608
1609
1610
1611
1612void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
1613{
1614 EnterFunction(2);
1615
1616 mutex_lock(&__ip_vs_mutex);
1617 ip_vs_flush(ipvs, true);
1618 mutex_unlock(&__ip_vs_mutex);
1619 LeaveFunction(2);
1620}
1621
1622
1623static inline void
1624ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
1625{
1626 struct ip_vs_dest_dst *dest_dst;
1627
1628 spin_lock_bh(&dest->dst_lock);
1629 dest_dst = rcu_dereference_protected(dest->dest_dst, 1);
1630 if (dest_dst && dest_dst->dst_cache->dev == dev) {
1631 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1632 dev->name,
1633 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1634 ntohs(dest->port),
1635 refcount_read(&dest->refcnt));
1636 __ip_vs_dst_cache_reset(dest);
1637 }
1638 spin_unlock_bh(&dest->dst_lock);
1639
1640}
1641
1642
1643
1644static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1645 void *ptr)
1646{
1647 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1648 struct net *net = dev_net(dev);
1649 struct netns_ipvs *ipvs = net_ipvs(net);
1650 struct ip_vs_service *svc;
1651 struct ip_vs_dest *dest;
1652 unsigned int idx;
1653
1654 if (event != NETDEV_DOWN || !ipvs)
1655 return NOTIFY_DONE;
1656 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1657 EnterFunction(2);
1658 mutex_lock(&__ip_vs_mutex);
1659 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1660 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1661 if (svc->ipvs == ipvs) {
1662 list_for_each_entry(dest, &svc->destinations,
1663 n_list) {
1664 ip_vs_forget_dev(dest, dev);
1665 }
1666 }
1667 }
1668
1669 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1670 if (svc->ipvs == ipvs) {
1671 list_for_each_entry(dest, &svc->destinations,
1672 n_list) {
1673 ip_vs_forget_dev(dest, dev);
1674 }
1675 }
1676
1677 }
1678 }
1679
1680 spin_lock_bh(&ipvs->dest_trash_lock);
1681 list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
1682 ip_vs_forget_dev(dest, dev);
1683 }
1684 spin_unlock_bh(&ipvs->dest_trash_lock);
1685 mutex_unlock(&__ip_vs_mutex);
1686 LeaveFunction(2);
1687 return NOTIFY_DONE;
1688}
1689
1690
1691
1692
1693static int ip_vs_zero_service(struct ip_vs_service *svc)
1694{
1695 struct ip_vs_dest *dest;
1696
1697 list_for_each_entry(dest, &svc->destinations, n_list) {
1698 ip_vs_zero_stats(&dest->stats);
1699 }
1700 ip_vs_zero_stats(&svc->stats);
1701 return 0;
1702}
1703
1704static int ip_vs_zero_all(struct netns_ipvs *ipvs)
1705{
1706 int idx;
1707 struct ip_vs_service *svc;
1708
1709 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1710 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1711 if (svc->ipvs == ipvs)
1712 ip_vs_zero_service(svc);
1713 }
1714 }
1715
1716 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1717 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1718 if (svc->ipvs == ipvs)
1719 ip_vs_zero_service(svc);
1720 }
1721 }
1722
1723 ip_vs_zero_stats(&ipvs->tot_stats);
1724 return 0;
1725}
1726
1727#ifdef CONFIG_SYSCTL
1728
1729static int three = 3;
1730
1731static int
1732proc_do_defense_mode(struct ctl_table *table, int write,
1733 void __user *buffer, size_t *lenp, loff_t *ppos)
1734{
1735 struct netns_ipvs *ipvs = table->extra2;
1736 int *valp = table->data;
1737 int val = *valp;
1738 int rc;
1739
1740 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1741 if (write && (*valp != val)) {
1742 if ((*valp < 0) || (*valp > 3)) {
1743
1744 *valp = val;
1745 } else {
1746 update_defense_level(ipvs);
1747 }
1748 }
1749 return rc;
1750}
1751
1752static int
1753proc_do_sync_threshold(struct ctl_table *table, int write,
1754 void __user *buffer, size_t *lenp, loff_t *ppos)
1755{
1756 int *valp = table->data;
1757 int val[2];
1758 int rc;
1759
1760
1761 memcpy(val, valp, sizeof(val));
1762
1763 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1764 if (write && (valp[0] < 0 || valp[1] < 0 ||
1765 (valp[0] >= valp[1] && valp[1]))) {
1766
1767 memcpy(valp, val, sizeof(val));
1768 }
1769 return rc;
1770}
1771
1772static int
1773proc_do_sync_mode(struct ctl_table *table, int write,
1774 void __user *buffer, size_t *lenp, loff_t *ppos)
1775{
1776 int *valp = table->data;
1777 int val = *valp;
1778 int rc;
1779
1780 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1781 if (write && (*valp != val)) {
1782 if ((*valp < 0) || (*valp > 1)) {
1783
1784 *valp = val;
1785 }
1786 }
1787 return rc;
1788}
1789
1790static int
1791proc_do_sync_ports(struct ctl_table *table, int write,
1792 void __user *buffer, size_t *lenp, loff_t *ppos)
1793{
1794 int *valp = table->data;
1795 int val = *valp;
1796 int rc;
1797
1798 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1799 if (write && (*valp != val)) {
1800 if (*valp < 1 || !is_power_of_2(*valp)) {
1801
1802 *valp = val;
1803 }
1804 }
1805 return rc;
1806}
1807
1808
1809
1810
1811
1812
1813
1814static struct ctl_table vs_vars[] = {
1815 {
1816 .procname = "amemthresh",
1817 .maxlen = sizeof(int),
1818 .mode = 0644,
1819 .proc_handler = proc_dointvec,
1820 },
1821 {
1822 .procname = "am_droprate",
1823 .maxlen = sizeof(int),
1824 .mode = 0644,
1825 .proc_handler = proc_dointvec,
1826 },
1827 {
1828 .procname = "drop_entry",
1829 .maxlen = sizeof(int),
1830 .mode = 0644,
1831 .proc_handler = proc_do_defense_mode,
1832 },
1833 {
1834 .procname = "drop_packet",
1835 .maxlen = sizeof(int),
1836 .mode = 0644,
1837 .proc_handler = proc_do_defense_mode,
1838 },
1839#ifdef CONFIG_IP_VS_NFCT
1840 {
1841 .procname = "conntrack",
1842 .maxlen = sizeof(int),
1843 .mode = 0644,
1844 .proc_handler = &proc_dointvec,
1845 },
1846#endif
1847 {
1848 .procname = "secure_tcp",
1849 .maxlen = sizeof(int),
1850 .mode = 0644,
1851 .proc_handler = proc_do_defense_mode,
1852 },
1853 {
1854 .procname = "snat_reroute",
1855 .maxlen = sizeof(int),
1856 .mode = 0644,
1857 .proc_handler = &proc_dointvec,
1858 },
1859 {
1860 .procname = "sync_version",
1861 .maxlen = sizeof(int),
1862 .mode = 0644,
1863 .proc_handler = proc_do_sync_mode,
1864 },
1865 {
1866 .procname = "sync_ports",
1867 .maxlen = sizeof(int),
1868 .mode = 0644,
1869 .proc_handler = proc_do_sync_ports,
1870 },
1871 {
1872 .procname = "sync_persist_mode",
1873 .maxlen = sizeof(int),
1874 .mode = 0644,
1875 .proc_handler = proc_dointvec,
1876 },
1877 {
1878 .procname = "sync_qlen_max",
1879 .maxlen = sizeof(unsigned long),
1880 .mode = 0644,
1881 .proc_handler = proc_doulongvec_minmax,
1882 },
1883 {
1884 .procname = "sync_sock_size",
1885 .maxlen = sizeof(int),
1886 .mode = 0644,
1887 .proc_handler = proc_dointvec,
1888 },
1889 {
1890 .procname = "cache_bypass",
1891 .maxlen = sizeof(int),
1892 .mode = 0644,
1893 .proc_handler = proc_dointvec,
1894 },
1895 {
1896 .procname = "expire_nodest_conn",
1897 .maxlen = sizeof(int),
1898 .mode = 0644,
1899 .proc_handler = proc_dointvec,
1900 },
1901 {
1902 .procname = "sloppy_tcp",
1903 .maxlen = sizeof(int),
1904 .mode = 0644,
1905 .proc_handler = proc_dointvec,
1906 },
1907 {
1908 .procname = "sloppy_sctp",
1909 .maxlen = sizeof(int),
1910 .mode = 0644,
1911 .proc_handler = proc_dointvec,
1912 },
1913 {
1914 .procname = "expire_quiescent_template",
1915 .maxlen = sizeof(int),
1916 .mode = 0644,
1917 .proc_handler = proc_dointvec,
1918 },
1919 {
1920 .procname = "sync_threshold",
1921 .maxlen =
1922 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1923 .mode = 0644,
1924 .proc_handler = proc_do_sync_threshold,
1925 },
1926 {
1927 .procname = "sync_refresh_period",
1928 .maxlen = sizeof(int),
1929 .mode = 0644,
1930 .proc_handler = proc_dointvec_jiffies,
1931 },
1932 {
1933 .procname = "sync_retries",
1934 .maxlen = sizeof(int),
1935 .mode = 0644,
1936 .proc_handler = proc_dointvec_minmax,
1937 .extra1 = SYSCTL_ZERO,
1938 .extra2 = &three,
1939 },
1940 {
1941 .procname = "nat_icmp_send",
1942 .maxlen = sizeof(int),
1943 .mode = 0644,
1944 .proc_handler = proc_dointvec,
1945 },
1946 {
1947 .procname = "pmtu_disc",
1948 .maxlen = sizeof(int),
1949 .mode = 0644,
1950 .proc_handler = proc_dointvec,
1951 },
1952 {
1953 .procname = "backup_only",
1954 .maxlen = sizeof(int),
1955 .mode = 0644,
1956 .proc_handler = proc_dointvec,
1957 },
1958 {
1959 .procname = "conn_reuse_mode",
1960 .maxlen = sizeof(int),
1961 .mode = 0644,
1962 .proc_handler = proc_dointvec,
1963 },
1964 {
1965 .procname = "schedule_icmp",
1966 .maxlen = sizeof(int),
1967 .mode = 0644,
1968 .proc_handler = proc_dointvec,
1969 },
1970 {
1971 .procname = "ignore_tunneled",
1972 .maxlen = sizeof(int),
1973 .mode = 0644,
1974 .proc_handler = proc_dointvec,
1975 },
1976#ifdef CONFIG_IP_VS_DEBUG
1977 {
1978 .procname = "debug_level",
1979 .data = &sysctl_ip_vs_debug_level,
1980 .maxlen = sizeof(int),
1981 .mode = 0644,
1982 .proc_handler = proc_dointvec,
1983 },
1984#endif
1985 { }
1986};
1987
1988#endif
1989
1990#ifdef CONFIG_PROC_FS
1991
1992struct ip_vs_iter {
1993 struct seq_net_private p;
1994 struct hlist_head *table;
1995 int bucket;
1996};
1997
1998
1999
2000
2001
2002static inline const char *ip_vs_fwd_name(unsigned int flags)
2003{
2004 switch (flags & IP_VS_CONN_F_FWD_MASK) {
2005 case IP_VS_CONN_F_LOCALNODE:
2006 return "Local";
2007 case IP_VS_CONN_F_TUNNEL:
2008 return "Tunnel";
2009 case IP_VS_CONN_F_DROUTE:
2010 return "Route";
2011 default:
2012 return "Masq";
2013 }
2014}
2015
2016
2017
2018static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
2019{
2020 struct net *net = seq_file_net(seq);
2021 struct netns_ipvs *ipvs = net_ipvs(net);
2022 struct ip_vs_iter *iter = seq->private;
2023 int idx;
2024 struct ip_vs_service *svc;
2025
2026
2027 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2028 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
2029 if ((svc->ipvs == ipvs) && pos-- == 0) {
2030 iter->table = ip_vs_svc_table;
2031 iter->bucket = idx;
2032 return svc;
2033 }
2034 }
2035 }
2036
2037
2038 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2039 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
2040 f_list) {
2041 if ((svc->ipvs == ipvs) && pos-- == 0) {
2042 iter->table = ip_vs_svc_fwm_table;
2043 iter->bucket = idx;
2044 return svc;
2045 }
2046 }
2047 }
2048
2049 return NULL;
2050}
2051
2052static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
2053 __acquires(RCU)
2054{
2055 rcu_read_lock();
2056 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
2057}
2058
2059
2060static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2061{
2062 struct hlist_node *e;
2063 struct ip_vs_iter *iter;
2064 struct ip_vs_service *svc;
2065
2066 ++*pos;
2067 if (v == SEQ_START_TOKEN)
2068 return ip_vs_info_array(seq,0);
2069
2070 svc = v;
2071 iter = seq->private;
2072
2073 if (iter->table == ip_vs_svc_table) {
2074
2075 e = rcu_dereference(hlist_next_rcu(&svc->s_list));
2076 if (e)
2077 return hlist_entry(e, struct ip_vs_service, s_list);
2078
2079 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2080 hlist_for_each_entry_rcu(svc,
2081 &ip_vs_svc_table[iter->bucket],
2082 s_list) {
2083 return svc;
2084 }
2085 }
2086
2087 iter->table = ip_vs_svc_fwm_table;
2088 iter->bucket = -1;
2089 goto scan_fwmark;
2090 }
2091
2092
2093 e = rcu_dereference(hlist_next_rcu(&svc->f_list));
2094 if (e)
2095 return hlist_entry(e, struct ip_vs_service, f_list);
2096
2097 scan_fwmark:
2098 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2099 hlist_for_each_entry_rcu(svc,
2100 &ip_vs_svc_fwm_table[iter->bucket],
2101 f_list)
2102 return svc;
2103 }
2104
2105 return NULL;
2106}
2107
2108static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2109 __releases(RCU)
2110{
2111 rcu_read_unlock();
2112}
2113
2114
2115static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2116{
2117 if (v == SEQ_START_TOKEN) {
2118 seq_printf(seq,
2119 "IP Virtual Server version %d.%d.%d (size=%d)\n",
2120 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2121 seq_puts(seq,
2122 "Prot LocalAddress:Port Scheduler Flags\n");
2123 seq_puts(seq,
2124 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2125 } else {
2126 struct net *net = seq_file_net(seq);
2127 struct netns_ipvs *ipvs = net_ipvs(net);
2128 const struct ip_vs_service *svc = v;
2129 const struct ip_vs_iter *iter = seq->private;
2130 const struct ip_vs_dest *dest;
2131 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
2132 char *sched_name = sched ? sched->name : "none";
2133
2134 if (svc->ipvs != ipvs)
2135 return 0;
2136 if (iter->table == ip_vs_svc_table) {
2137#ifdef CONFIG_IP_VS_IPV6
2138 if (svc->af == AF_INET6)
2139 seq_printf(seq, "%s [%pI6]:%04X %s ",
2140 ip_vs_proto_name(svc->protocol),
2141 &svc->addr.in6,
2142 ntohs(svc->port),
2143 sched_name);
2144 else
2145#endif
2146 seq_printf(seq, "%s %08X:%04X %s %s ",
2147 ip_vs_proto_name(svc->protocol),
2148 ntohl(svc->addr.ip),
2149 ntohs(svc->port),
2150 sched_name,
2151 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2152 } else {
2153 seq_printf(seq, "FWM %08X %s %s",
2154 svc->fwmark, sched_name,
2155 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2156 }
2157
2158 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2159 seq_printf(seq, "persistent %d %08X\n",
2160 svc->timeout,
2161 ntohl(svc->netmask));
2162 else
2163 seq_putc(seq, '\n');
2164
2165 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
2166#ifdef CONFIG_IP_VS_IPV6
2167 if (dest->af == AF_INET6)
2168 seq_printf(seq,
2169 " -> [%pI6]:%04X"
2170 " %-7s %-6d %-10d %-10d\n",
2171 &dest->addr.in6,
2172 ntohs(dest->port),
2173 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2174 atomic_read(&dest->weight),
2175 atomic_read(&dest->activeconns),
2176 atomic_read(&dest->inactconns));
2177 else
2178#endif
2179 seq_printf(seq,
2180 " -> %08X:%04X "
2181 "%-7s %-6d %-10d %-10d\n",
2182 ntohl(dest->addr.ip),
2183 ntohs(dest->port),
2184 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2185 atomic_read(&dest->weight),
2186 atomic_read(&dest->activeconns),
2187 atomic_read(&dest->inactconns));
2188
2189 }
2190 }
2191 return 0;
2192}
2193
2194static const struct seq_operations ip_vs_info_seq_ops = {
2195 .start = ip_vs_info_seq_start,
2196 .next = ip_vs_info_seq_next,
2197 .stop = ip_vs_info_seq_stop,
2198 .show = ip_vs_info_seq_show,
2199};
2200
2201static int ip_vs_stats_show(struct seq_file *seq, void *v)
2202{
2203 struct net *net = seq_file_single_net(seq);
2204 struct ip_vs_kstats show;
2205
2206
2207 seq_puts(seq,
2208 " Total Incoming Outgoing Incoming Outgoing\n");
2209 seq_puts(seq,
2210 " Conns Packets Packets Bytes Bytes\n");
2211
2212 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2213 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
2214 (unsigned long long)show.conns,
2215 (unsigned long long)show.inpkts,
2216 (unsigned long long)show.outpkts,
2217 (unsigned long long)show.inbytes,
2218 (unsigned long long)show.outbytes);
2219
2220
2221 seq_puts(seq,
2222 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2223 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
2224 (unsigned long long)show.cps,
2225 (unsigned long long)show.inpps,
2226 (unsigned long long)show.outpps,
2227 (unsigned long long)show.inbps,
2228 (unsigned long long)show.outbps);
2229
2230 return 0;
2231}
2232
2233static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2234{
2235 struct net *net = seq_file_single_net(seq);
2236 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2237 struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
2238 struct ip_vs_kstats kstats;
2239 int i;
2240
2241
2242 seq_puts(seq,
2243 " Total Incoming Outgoing Incoming Outgoing\n");
2244 seq_puts(seq,
2245 "CPU Conns Packets Packets Bytes Bytes\n");
2246
2247 for_each_possible_cpu(i) {
2248 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2249 unsigned int start;
2250 u64 conns, inpkts, outpkts, inbytes, outbytes;
2251
2252 do {
2253 start = u64_stats_fetch_begin_irq(&u->syncp);
2254 conns = u->cnt.conns;
2255 inpkts = u->cnt.inpkts;
2256 outpkts = u->cnt.outpkts;
2257 inbytes = u->cnt.inbytes;
2258 outbytes = u->cnt.outbytes;
2259 } while (u64_stats_fetch_retry_irq(&u->syncp, start));
2260
2261 seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
2262 i, (u64)conns, (u64)inpkts,
2263 (u64)outpkts, (u64)inbytes,
2264 (u64)outbytes);
2265 }
2266
2267 ip_vs_copy_stats(&kstats, tot_stats);
2268
2269 seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n",
2270 (unsigned long long)kstats.conns,
2271 (unsigned long long)kstats.inpkts,
2272 (unsigned long long)kstats.outpkts,
2273 (unsigned long long)kstats.inbytes,
2274 (unsigned long long)kstats.outbytes);
2275
2276
2277 seq_puts(seq,
2278 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2279 seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n",
2280 kstats.cps,
2281 kstats.inpps,
2282 kstats.outpps,
2283 kstats.inbps,
2284 kstats.outbps);
2285
2286 return 0;
2287}
2288#endif
2289
2290
2291
2292
2293static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
2294{
2295#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2296 struct ip_vs_proto_data *pd;
2297#endif
2298
2299 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2300 u->tcp_timeout,
2301 u->tcp_fin_timeout,
2302 u->udp_timeout);
2303
2304#ifdef CONFIG_IP_VS_PROTO_TCP
2305 if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) ||
2306 u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) {
2307 return -EINVAL;
2308 }
2309#endif
2310
2311#ifdef CONFIG_IP_VS_PROTO_UDP
2312 if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ))
2313 return -EINVAL;
2314#endif
2315
2316#ifdef CONFIG_IP_VS_PROTO_TCP
2317 if (u->tcp_timeout) {
2318 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2319 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2320 = u->tcp_timeout * HZ;
2321 }
2322
2323 if (u->tcp_fin_timeout) {
2324 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2325 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2326 = u->tcp_fin_timeout * HZ;
2327 }
2328#endif
2329
2330#ifdef CONFIG_IP_VS_PROTO_UDP
2331 if (u->udp_timeout) {
2332 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
2333 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2334 = u->udp_timeout * HZ;
2335 }
2336#endif
2337 return 0;
2338}
2339
2340#define CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2341
2342struct ip_vs_svcdest_user {
2343 struct ip_vs_service_user s;
2344 struct ip_vs_dest_user d;
2345};
2346
2347static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = {
2348 [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user),
2349 [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user),
2350 [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user),
2351 [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user),
2352 [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user),
2353 [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user),
2354 [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user),
2355 [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user),
2356 [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user),
2357 [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user),
2358};
2359
2360union ip_vs_set_arglen {
2361 struct ip_vs_service_user field_IP_VS_SO_SET_ADD;
2362 struct ip_vs_service_user field_IP_VS_SO_SET_EDIT;
2363 struct ip_vs_service_user field_IP_VS_SO_SET_DEL;
2364 struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST;
2365 struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST;
2366 struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST;
2367 struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT;
2368 struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON;
2369 struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON;
2370 struct ip_vs_service_user field_IP_VS_SO_SET_ZERO;
2371};
2372
2373#define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen)
2374
2375static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2376 struct ip_vs_service_user *usvc_compat)
2377{
2378 memset(usvc, 0, sizeof(*usvc));
2379
2380 usvc->af = AF_INET;
2381 usvc->protocol = usvc_compat->protocol;
2382 usvc->addr.ip = usvc_compat->addr;
2383 usvc->port = usvc_compat->port;
2384 usvc->fwmark = usvc_compat->fwmark;
2385
2386
2387 usvc->sched_name = usvc_compat->sched_name;
2388
2389 usvc->flags = usvc_compat->flags;
2390 usvc->timeout = usvc_compat->timeout;
2391 usvc->netmask = usvc_compat->netmask;
2392}
2393
2394static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2395 struct ip_vs_dest_user *udest_compat)
2396{
2397 memset(udest, 0, sizeof(*udest));
2398
2399 udest->addr.ip = udest_compat->addr;
2400 udest->port = udest_compat->port;
2401 udest->conn_flags = udest_compat->conn_flags;
2402 udest->weight = udest_compat->weight;
2403 udest->u_threshold = udest_compat->u_threshold;
2404 udest->l_threshold = udest_compat->l_threshold;
2405 udest->af = AF_INET;
2406 udest->tun_type = IP_VS_CONN_F_TUNNEL_TYPE_IPIP;
2407}
2408
2409static int
2410do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2411{
2412 struct net *net = sock_net(sk);
2413 int ret;
2414 unsigned char arg[MAX_SET_ARGLEN];
2415 struct ip_vs_service_user *usvc_compat;
2416 struct ip_vs_service_user_kern usvc;
2417 struct ip_vs_service *svc;
2418 struct ip_vs_dest_user *udest_compat;
2419 struct ip_vs_dest_user_kern udest;
2420 struct netns_ipvs *ipvs = net_ipvs(net);
2421
2422 BUILD_BUG_ON(sizeof(arg) > 255);
2423 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2424 return -EPERM;
2425
2426 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2427 return -EINVAL;
2428 if (len != set_arglen[CMDID(cmd)]) {
2429 IP_VS_DBG(1, "set_ctl: len %u != %u\n",
2430 len, set_arglen[CMDID(cmd)]);
2431 return -EINVAL;
2432 }
2433
2434 if (copy_from_user(arg, user, len) != 0)
2435 return -EFAULT;
2436
2437
2438 ip_vs_use_count_inc();
2439
2440
2441 if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2442 cmd == IP_VS_SO_SET_STOPDAEMON) {
2443 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2444
2445 if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2446 struct ipvs_sync_daemon_cfg cfg;
2447
2448 memset(&cfg, 0, sizeof(cfg));
2449 ret = -EINVAL;
2450 if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
2451 sizeof(cfg.mcast_ifn)) <= 0)
2452 goto out_dec;
2453 cfg.syncid = dm->syncid;
2454 ret = start_sync_thread(ipvs, &cfg, dm->state);
2455 } else {
2456 ret = stop_sync_thread(ipvs, dm->state);
2457 }
2458 goto out_dec;
2459 }
2460
2461 mutex_lock(&__ip_vs_mutex);
2462 if (cmd == IP_VS_SO_SET_FLUSH) {
2463
2464 ret = ip_vs_flush(ipvs, false);
2465 goto out_unlock;
2466 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2467
2468 ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
2469 goto out_unlock;
2470 }
2471
2472 usvc_compat = (struct ip_vs_service_user *)arg;
2473 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2474
2475
2476
2477 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2478 ip_vs_copy_udest_compat(&udest, udest_compat);
2479
2480 if (cmd == IP_VS_SO_SET_ZERO) {
2481
2482 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2483 ret = ip_vs_zero_all(ipvs);
2484 goto out_unlock;
2485 }
2486 }
2487
2488 if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) &&
2489 strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) ==
2490 IP_VS_SCHEDNAME_MAXLEN) {
2491 ret = -EINVAL;
2492 goto out_unlock;
2493 }
2494
2495
2496 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2497 usvc.protocol != IPPROTO_SCTP) {
2498 pr_err("set_ctl: invalid protocol: %d %pI4:%d\n",
2499 usvc.protocol, &usvc.addr.ip,
2500 ntohs(usvc.port));
2501 ret = -EFAULT;
2502 goto out_unlock;
2503 }
2504
2505
2506 rcu_read_lock();
2507 if (usvc.fwmark == 0)
2508 svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
2509 &usvc.addr, usvc.port);
2510 else
2511 svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
2512 rcu_read_unlock();
2513
2514 if (cmd != IP_VS_SO_SET_ADD
2515 && (svc == NULL || svc->protocol != usvc.protocol)) {
2516 ret = -ESRCH;
2517 goto out_unlock;
2518 }
2519
2520 switch (cmd) {
2521 case IP_VS_SO_SET_ADD:
2522 if (svc != NULL)
2523 ret = -EEXIST;
2524 else
2525 ret = ip_vs_add_service(ipvs, &usvc, &svc);
2526 break;
2527 case IP_VS_SO_SET_EDIT:
2528 ret = ip_vs_edit_service(svc, &usvc);
2529 break;
2530 case IP_VS_SO_SET_DEL:
2531 ret = ip_vs_del_service(svc);
2532 if (!ret)
2533 goto out_unlock;
2534 break;
2535 case IP_VS_SO_SET_ZERO:
2536 ret = ip_vs_zero_service(svc);
2537 break;
2538 case IP_VS_SO_SET_ADDDEST:
2539 ret = ip_vs_add_dest(svc, &udest);
2540 break;
2541 case IP_VS_SO_SET_EDITDEST:
2542 ret = ip_vs_edit_dest(svc, &udest);
2543 break;
2544 case IP_VS_SO_SET_DELDEST:
2545 ret = ip_vs_del_dest(svc, &udest);
2546 break;
2547 default:
2548 ret = -EINVAL;
2549 }
2550
2551 out_unlock:
2552 mutex_unlock(&__ip_vs_mutex);
2553 out_dec:
2554
2555 ip_vs_use_count_dec();
2556
2557 return ret;
2558}
2559
2560
2561static void
2562ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2563{
2564 struct ip_vs_scheduler *sched;
2565 struct ip_vs_kstats kstats;
2566 char *sched_name;
2567
2568 sched = rcu_dereference_protected(src->scheduler, 1);
2569 sched_name = sched ? sched->name : "none";
2570 dst->protocol = src->protocol;
2571 dst->addr = src->addr.ip;
2572 dst->port = src->port;
2573 dst->fwmark = src->fwmark;
2574 strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
2575 dst->flags = src->flags;
2576 dst->timeout = src->timeout / HZ;
2577 dst->netmask = src->netmask;
2578 dst->num_dests = src->num_dests;
2579 ip_vs_copy_stats(&kstats, &src->stats);
2580 ip_vs_export_stats_user(&dst->stats, &kstats);
2581}
2582
2583static inline int
2584__ip_vs_get_service_entries(struct netns_ipvs *ipvs,
2585 const struct ip_vs_get_services *get,
2586 struct ip_vs_get_services __user *uptr)
2587{
2588 int idx, count=0;
2589 struct ip_vs_service *svc;
2590 struct ip_vs_service_entry entry;
2591 int ret = 0;
2592
2593 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2594 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2595
2596 if (svc->af != AF_INET || (svc->ipvs != ipvs))
2597 continue;
2598
2599 if (count >= get->num_services)
2600 goto out;
2601 memset(&entry, 0, sizeof(entry));
2602 ip_vs_copy_service(&entry, svc);
2603 if (copy_to_user(&uptr->entrytable[count],
2604 &entry, sizeof(entry))) {
2605 ret = -EFAULT;
2606 goto out;
2607 }
2608 count++;
2609 }
2610 }
2611
2612 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2613 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2614
2615 if (svc->af != AF_INET || (svc->ipvs != ipvs))
2616 continue;
2617
2618 if (count >= get->num_services)
2619 goto out;
2620 memset(&entry, 0, sizeof(entry));
2621 ip_vs_copy_service(&entry, svc);
2622 if (copy_to_user(&uptr->entrytable[count],
2623 &entry, sizeof(entry))) {
2624 ret = -EFAULT;
2625 goto out;
2626 }
2627 count++;
2628 }
2629 }
2630out:
2631 return ret;
2632}
2633
2634static inline int
2635__ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
2636 struct ip_vs_get_dests __user *uptr)
2637{
2638 struct ip_vs_service *svc;
2639 union nf_inet_addr addr = { .ip = get->addr };
2640 int ret = 0;
2641
2642 rcu_read_lock();
2643 if (get->fwmark)
2644 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
2645 else
2646 svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
2647 get->port);
2648 rcu_read_unlock();
2649
2650 if (svc) {
2651 int count = 0;
2652 struct ip_vs_dest *dest;
2653 struct ip_vs_dest_entry entry;
2654 struct ip_vs_kstats kstats;
2655
2656 memset(&entry, 0, sizeof(entry));
2657 list_for_each_entry(dest, &svc->destinations, n_list) {
2658 if (count >= get->num_dests)
2659 break;
2660
2661
2662
2663
2664 if (dest->af != svc->af)
2665 continue;
2666
2667 entry.addr = dest->addr.ip;
2668 entry.port = dest->port;
2669 entry.conn_flags = atomic_read(&dest->conn_flags);
2670 entry.weight = atomic_read(&dest->weight);
2671 entry.u_threshold = dest->u_threshold;
2672 entry.l_threshold = dest->l_threshold;
2673 entry.activeconns = atomic_read(&dest->activeconns);
2674 entry.inactconns = atomic_read(&dest->inactconns);
2675 entry.persistconns = atomic_read(&dest->persistconns);
2676 ip_vs_copy_stats(&kstats, &dest->stats);
2677 ip_vs_export_stats_user(&entry.stats, &kstats);
2678 if (copy_to_user(&uptr->entrytable[count],
2679 &entry, sizeof(entry))) {
2680 ret = -EFAULT;
2681 break;
2682 }
2683 count++;
2684 }
2685 } else
2686 ret = -ESRCH;
2687 return ret;
2688}
2689
2690static inline void
2691__ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
2692{
2693#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2694 struct ip_vs_proto_data *pd;
2695#endif
2696
2697 memset(u, 0, sizeof (*u));
2698
2699#ifdef CONFIG_IP_VS_PROTO_TCP
2700 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2701 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2702 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2703#endif
2704#ifdef CONFIG_IP_VS_PROTO_UDP
2705 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
2706 u->udp_timeout =
2707 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2708#endif
2709}
2710
2711static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = {
2712 [CMDID(IP_VS_SO_GET_VERSION)] = 64,
2713 [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo),
2714 [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services),
2715 [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry),
2716 [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests),
2717 [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user),
2718 [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user),
2719};
2720
2721union ip_vs_get_arglen {
2722 char field_IP_VS_SO_GET_VERSION[64];
2723 struct ip_vs_getinfo field_IP_VS_SO_GET_INFO;
2724 struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES;
2725 struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE;
2726 struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS;
2727 struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT;
2728 struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2];
2729};
2730
2731#define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen)
2732
2733static int
2734do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2735{
2736 unsigned char arg[MAX_GET_ARGLEN];
2737 int ret = 0;
2738 unsigned int copylen;
2739 struct net *net = sock_net(sk);
2740 struct netns_ipvs *ipvs = net_ipvs(net);
2741
2742 BUG_ON(!net);
2743 BUILD_BUG_ON(sizeof(arg) > 255);
2744 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2745 return -EPERM;
2746
2747 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2748 return -EINVAL;
2749
2750 copylen = get_arglen[CMDID(cmd)];
2751 if (*len < (int) copylen) {
2752 IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen);
2753 return -EINVAL;
2754 }
2755
2756 if (copy_from_user(arg, user, copylen) != 0)
2757 return -EFAULT;
2758
2759
2760
2761 if (cmd == IP_VS_SO_GET_DAEMON) {
2762 struct ip_vs_daemon_user d[2];
2763
2764 memset(&d, 0, sizeof(d));
2765 mutex_lock(&ipvs->sync_mutex);
2766 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2767 d[0].state = IP_VS_STATE_MASTER;
2768 strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
2769 sizeof(d[0].mcast_ifn));
2770 d[0].syncid = ipvs->mcfg.syncid;
2771 }
2772 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2773 d[1].state = IP_VS_STATE_BACKUP;
2774 strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
2775 sizeof(d[1].mcast_ifn));
2776 d[1].syncid = ipvs->bcfg.syncid;
2777 }
2778 if (copy_to_user(user, &d, sizeof(d)) != 0)
2779 ret = -EFAULT;
2780 mutex_unlock(&ipvs->sync_mutex);
2781 return ret;
2782 }
2783
2784 mutex_lock(&__ip_vs_mutex);
2785 switch (cmd) {
2786 case IP_VS_SO_GET_VERSION:
2787 {
2788 char buf[64];
2789
2790 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2791 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2792 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2793 ret = -EFAULT;
2794 goto out;
2795 }
2796 *len = strlen(buf)+1;
2797 }
2798 break;
2799
2800 case IP_VS_SO_GET_INFO:
2801 {
2802 struct ip_vs_getinfo info;
2803 info.version = IP_VS_VERSION_CODE;
2804 info.size = ip_vs_conn_tab_size;
2805 info.num_services = ipvs->num_services;
2806 if (copy_to_user(user, &info, sizeof(info)) != 0)
2807 ret = -EFAULT;
2808 }
2809 break;
2810
2811 case IP_VS_SO_GET_SERVICES:
2812 {
2813 struct ip_vs_get_services *get;
2814 int size;
2815
2816 get = (struct ip_vs_get_services *)arg;
2817 size = struct_size(get, entrytable, get->num_services);
2818 if (*len != size) {
2819 pr_err("length: %u != %u\n", *len, size);
2820 ret = -EINVAL;
2821 goto out;
2822 }
2823 ret = __ip_vs_get_service_entries(ipvs, get, user);
2824 }
2825 break;
2826
2827 case IP_VS_SO_GET_SERVICE:
2828 {
2829 struct ip_vs_service_entry *entry;
2830 struct ip_vs_service *svc;
2831 union nf_inet_addr addr;
2832
2833 entry = (struct ip_vs_service_entry *)arg;
2834 addr.ip = entry->addr;
2835 rcu_read_lock();
2836 if (entry->fwmark)
2837 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
2838 else
2839 svc = __ip_vs_service_find(ipvs, AF_INET,
2840 entry->protocol, &addr,
2841 entry->port);
2842 rcu_read_unlock();
2843 if (svc) {
2844 ip_vs_copy_service(entry, svc);
2845 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2846 ret = -EFAULT;
2847 } else
2848 ret = -ESRCH;
2849 }
2850 break;
2851
2852 case IP_VS_SO_GET_DESTS:
2853 {
2854 struct ip_vs_get_dests *get;
2855 int size;
2856
2857 get = (struct ip_vs_get_dests *)arg;
2858 size = struct_size(get, entrytable, get->num_dests);
2859 if (*len != size) {
2860 pr_err("length: %u != %u\n", *len, size);
2861 ret = -EINVAL;
2862 goto out;
2863 }
2864 ret = __ip_vs_get_dest_entries(ipvs, get, user);
2865 }
2866 break;
2867
2868 case IP_VS_SO_GET_TIMEOUT:
2869 {
2870 struct ip_vs_timeout_user t;
2871
2872 __ip_vs_get_timeouts(ipvs, &t);
2873 if (copy_to_user(user, &t, sizeof(t)) != 0)
2874 ret = -EFAULT;
2875 }
2876 break;
2877
2878 default:
2879 ret = -EINVAL;
2880 }
2881
2882out:
2883 mutex_unlock(&__ip_vs_mutex);
2884 return ret;
2885}
2886
2887
2888static struct nf_sockopt_ops ip_vs_sockopts = {
2889 .pf = PF_INET,
2890 .set_optmin = IP_VS_BASE_CTL,
2891 .set_optmax = IP_VS_SO_SET_MAX+1,
2892 .set = do_ip_vs_set_ctl,
2893 .get_optmin = IP_VS_BASE_CTL,
2894 .get_optmax = IP_VS_SO_GET_MAX+1,
2895 .get = do_ip_vs_get_ctl,
2896 .owner = THIS_MODULE,
2897};
2898
2899
2900
2901
2902
2903
2904static struct genl_family ip_vs_genl_family;
2905
2906
2907static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2908 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2909 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2910 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2911 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2912 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2913 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2914};
2915
2916
2917static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2918 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2919 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2920 .len = IP_VS_IFNAME_MAXLEN - 1 },
2921 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2922 [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 },
2923 [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 },
2924 [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) },
2925 [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 },
2926 [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 },
2927};
2928
2929
2930static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2931 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2932 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2933 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2934 .len = sizeof(union nf_inet_addr) },
2935 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2936 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2937 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2938 .len = IP_VS_SCHEDNAME_MAXLEN - 1 },
2939 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2940 .len = IP_VS_PENAME_MAXLEN },
2941 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2942 .len = sizeof(struct ip_vs_flags) },
2943 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2944 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2945 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2946};
2947
2948
2949static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2950 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2951 .len = sizeof(union nf_inet_addr) },
2952 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2953 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2954 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2955 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2956 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2957 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2958 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2959 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2960 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2961 [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
2962 [IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 },
2963 [IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 },
2964 [IPVS_DEST_ATTR_TUN_FLAGS] = { .type = NLA_U16 },
2965};
2966
2967static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2968 struct ip_vs_kstats *kstats)
2969{
2970 struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type);
2971
2972 if (!nl_stats)
2973 return -EMSGSIZE;
2974
2975 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
2976 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
2977 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
2978 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes,
2979 IPVS_STATS_ATTR_PAD) ||
2980 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes,
2981 IPVS_STATS_ATTR_PAD) ||
2982 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
2983 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
2984 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
2985 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
2986 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
2987 goto nla_put_failure;
2988 nla_nest_end(skb, nl_stats);
2989
2990 return 0;
2991
2992nla_put_failure:
2993 nla_nest_cancel(skb, nl_stats);
2994 return -EMSGSIZE;
2995}
2996
2997static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
2998 struct ip_vs_kstats *kstats)
2999{
3000 struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type);
3001
3002 if (!nl_stats)
3003 return -EMSGSIZE;
3004
3005 if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns,
3006 IPVS_STATS_ATTR_PAD) ||
3007 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts,
3008 IPVS_STATS_ATTR_PAD) ||
3009 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts,
3010 IPVS_STATS_ATTR_PAD) ||
3011 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes,
3012 IPVS_STATS_ATTR_PAD) ||
3013 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes,
3014 IPVS_STATS_ATTR_PAD) ||
3015 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps,
3016 IPVS_STATS_ATTR_PAD) ||
3017 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps,
3018 IPVS_STATS_ATTR_PAD) ||
3019 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps,
3020 IPVS_STATS_ATTR_PAD) ||
3021 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps,
3022 IPVS_STATS_ATTR_PAD) ||
3023 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps,
3024 IPVS_STATS_ATTR_PAD))
3025 goto nla_put_failure;
3026 nla_nest_end(skb, nl_stats);
3027
3028 return 0;
3029
3030nla_put_failure:
3031 nla_nest_cancel(skb, nl_stats);
3032 return -EMSGSIZE;
3033}
3034
3035static int ip_vs_genl_fill_service(struct sk_buff *skb,
3036 struct ip_vs_service *svc)
3037{
3038 struct ip_vs_scheduler *sched;
3039 struct ip_vs_pe *pe;
3040 struct nlattr *nl_service;
3041 struct ip_vs_flags flags = { .flags = svc->flags,
3042 .mask = ~0 };
3043 struct ip_vs_kstats kstats;
3044 char *sched_name;
3045
3046 nl_service = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_SERVICE);
3047 if (!nl_service)
3048 return -EMSGSIZE;
3049
3050 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
3051 goto nla_put_failure;
3052 if (svc->fwmark) {
3053 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
3054 goto nla_put_failure;
3055 } else {
3056 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
3057 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
3058 nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
3059 goto nla_put_failure;
3060 }
3061
3062 sched = rcu_dereference_protected(svc->scheduler, 1);
3063 sched_name = sched ? sched->name : "none";
3064 pe = rcu_dereference_protected(svc->pe, 1);
3065 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
3066 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
3067 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
3068 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
3069 nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
3070 goto nla_put_failure;
3071 ip_vs_copy_stats(&kstats, &svc->stats);
3072 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
3073 goto nla_put_failure;
3074 if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
3075 goto nla_put_failure;
3076
3077 nla_nest_end(skb, nl_service);
3078
3079 return 0;
3080
3081nla_put_failure:
3082 nla_nest_cancel(skb, nl_service);
3083 return -EMSGSIZE;
3084}
3085
3086static int ip_vs_genl_dump_service(struct sk_buff *skb,
3087 struct ip_vs_service *svc,
3088 struct netlink_callback *cb)
3089{
3090 void *hdr;
3091
3092 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3093 &ip_vs_genl_family, NLM_F_MULTI,
3094 IPVS_CMD_NEW_SERVICE);
3095 if (!hdr)
3096 return -EMSGSIZE;
3097
3098 if (ip_vs_genl_fill_service(skb, svc) < 0)
3099 goto nla_put_failure;
3100
3101 genlmsg_end(skb, hdr);
3102 return 0;
3103
3104nla_put_failure:
3105 genlmsg_cancel(skb, hdr);
3106 return -EMSGSIZE;
3107}
3108
3109static int ip_vs_genl_dump_services(struct sk_buff *skb,
3110 struct netlink_callback *cb)
3111{
3112 int idx = 0, i;
3113 int start = cb->args[0];
3114 struct ip_vs_service *svc;
3115 struct net *net = sock_net(skb->sk);
3116 struct netns_ipvs *ipvs = net_ipvs(net);
3117
3118 mutex_lock(&__ip_vs_mutex);
3119 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3120 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
3121 if (++idx <= start || (svc->ipvs != ipvs))
3122 continue;
3123 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3124 idx--;
3125 goto nla_put_failure;
3126 }
3127 }
3128 }
3129
3130 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3131 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
3132 if (++idx <= start || (svc->ipvs != ipvs))
3133 continue;
3134 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3135 idx--;
3136 goto nla_put_failure;
3137 }
3138 }
3139 }
3140
3141nla_put_failure:
3142 mutex_unlock(&__ip_vs_mutex);
3143 cb->args[0] = idx;
3144
3145 return skb->len;
3146}
3147
3148static bool ip_vs_is_af_valid(int af)
3149{
3150 if (af == AF_INET)
3151 return true;
3152#ifdef CONFIG_IP_VS_IPV6
3153 if (af == AF_INET6 && ipv6_mod_enabled())
3154 return true;
3155#endif
3156 return false;
3157}
3158
3159static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
3160 struct ip_vs_service_user_kern *usvc,
3161 struct nlattr *nla, bool full_entry,
3162 struct ip_vs_service **ret_svc)
3163{
3164 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3165 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
3166 struct ip_vs_service *svc;
3167
3168
3169 if (nla == NULL ||
3170 nla_parse_nested_deprecated(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy, NULL))
3171 return -EINVAL;
3172
3173 nla_af = attrs[IPVS_SVC_ATTR_AF];
3174 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
3175 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
3176 nla_port = attrs[IPVS_SVC_ATTR_PORT];
3177 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
3178
3179 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3180 return -EINVAL;
3181
3182 memset(usvc, 0, sizeof(*usvc));
3183
3184 usvc->af = nla_get_u16(nla_af);
3185 if (!ip_vs_is_af_valid(usvc->af))
3186 return -EAFNOSUPPORT;
3187
3188 if (nla_fwmark) {
3189 usvc->protocol = IPPROTO_TCP;
3190 usvc->fwmark = nla_get_u32(nla_fwmark);
3191 } else {
3192 usvc->protocol = nla_get_u16(nla_protocol);
3193 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3194 usvc->port = nla_get_be16(nla_port);
3195 usvc->fwmark = 0;
3196 }
3197
3198 rcu_read_lock();
3199 if (usvc->fwmark)
3200 svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
3201 else
3202 svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
3203 &usvc->addr, usvc->port);
3204 rcu_read_unlock();
3205 *ret_svc = svc;
3206
3207
3208 if (full_entry) {
3209 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3210 *nla_netmask;
3211 struct ip_vs_flags flags;
3212
3213 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3214 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3215 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3216 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3217 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3218
3219 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3220 return -EINVAL;
3221
3222 nla_memcpy(&flags, nla_flags, sizeof(flags));
3223
3224
3225 if (svc)
3226 usvc->flags = svc->flags;
3227
3228
3229 usvc->flags = (usvc->flags & ~flags.mask) |
3230 (flags.flags & flags.mask);
3231 usvc->sched_name = nla_data(nla_sched);
3232 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3233 usvc->timeout = nla_get_u32(nla_timeout);
3234 usvc->netmask = nla_get_be32(nla_netmask);
3235 }
3236
3237 return 0;
3238}
3239
3240static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
3241 struct nlattr *nla)
3242{
3243 struct ip_vs_service_user_kern usvc;
3244 struct ip_vs_service *svc;
3245 int ret;
3246
3247 ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, false, &svc);
3248 return ret ? ERR_PTR(ret) : svc;
3249}
3250
3251static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3252{
3253 struct nlattr *nl_dest;
3254 struct ip_vs_kstats kstats;
3255
3256 nl_dest = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DEST);
3257 if (!nl_dest)
3258 return -EMSGSIZE;
3259
3260 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3261 nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3262 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3263 (atomic_read(&dest->conn_flags) &
3264 IP_VS_CONN_F_FWD_MASK)) ||
3265 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3266 atomic_read(&dest->weight)) ||
3267 nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE,
3268 dest->tun_type) ||
3269 nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT,
3270 dest->tun_port) ||
3271 nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS,
3272 dest->tun_flags) ||
3273 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3274 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3275 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3276 atomic_read(&dest->activeconns)) ||
3277 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3278 atomic_read(&dest->inactconns)) ||
3279 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3280 atomic_read(&dest->persistconns)) ||
3281 nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
3282 goto nla_put_failure;
3283 ip_vs_copy_stats(&kstats, &dest->stats);
3284 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
3285 goto nla_put_failure;
3286 if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
3287 goto nla_put_failure;
3288
3289 nla_nest_end(skb, nl_dest);
3290
3291 return 0;
3292
3293nla_put_failure:
3294 nla_nest_cancel(skb, nl_dest);
3295 return -EMSGSIZE;
3296}
3297
3298static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3299 struct netlink_callback *cb)
3300{
3301 void *hdr;
3302
3303 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3304 &ip_vs_genl_family, NLM_F_MULTI,
3305 IPVS_CMD_NEW_DEST);
3306 if (!hdr)
3307 return -EMSGSIZE;
3308
3309 if (ip_vs_genl_fill_dest(skb, dest) < 0)
3310 goto nla_put_failure;
3311
3312 genlmsg_end(skb, hdr);
3313 return 0;
3314
3315nla_put_failure:
3316 genlmsg_cancel(skb, hdr);
3317 return -EMSGSIZE;
3318}
3319
3320static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3321 struct netlink_callback *cb)
3322{
3323 int idx = 0;
3324 int start = cb->args[0];
3325 struct ip_vs_service *svc;
3326 struct ip_vs_dest *dest;
3327 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3328 struct net *net = sock_net(skb->sk);
3329 struct netns_ipvs *ipvs = net_ipvs(net);
3330
3331 mutex_lock(&__ip_vs_mutex);
3332
3333
3334 if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack))
3335 goto out_err;
3336
3337
3338 svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
3339 if (IS_ERR_OR_NULL(svc))
3340 goto out_err;
3341
3342
3343 list_for_each_entry(dest, &svc->destinations, n_list) {
3344 if (++idx <= start)
3345 continue;
3346 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3347 idx--;
3348 goto nla_put_failure;
3349 }
3350 }
3351
3352nla_put_failure:
3353 cb->args[0] = idx;
3354
3355out_err:
3356 mutex_unlock(&__ip_vs_mutex);
3357
3358 return skb->len;
3359}
3360
3361static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3362 struct nlattr *nla, bool full_entry)
3363{
3364 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3365 struct nlattr *nla_addr, *nla_port;
3366 struct nlattr *nla_addr_family;
3367
3368
3369 if (nla == NULL ||
3370 nla_parse_nested_deprecated(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy, NULL))
3371 return -EINVAL;
3372
3373 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3374 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3375 nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
3376
3377 if (!(nla_addr && nla_port))
3378 return -EINVAL;
3379
3380 memset(udest, 0, sizeof(*udest));
3381
3382 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3383 udest->port = nla_get_be16(nla_port);
3384
3385 if (nla_addr_family)
3386 udest->af = nla_get_u16(nla_addr_family);
3387 else
3388 udest->af = 0;
3389
3390
3391 if (full_entry) {
3392 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3393 *nla_l_thresh, *nla_tun_type, *nla_tun_port,
3394 *nla_tun_flags;
3395
3396 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3397 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3398 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3399 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3400 nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE];
3401 nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT];
3402 nla_tun_flags = attrs[IPVS_DEST_ATTR_TUN_FLAGS];
3403
3404 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3405 return -EINVAL;
3406
3407 udest->conn_flags = nla_get_u32(nla_fwd)
3408 & IP_VS_CONN_F_FWD_MASK;
3409 udest->weight = nla_get_u32(nla_weight);
3410 udest->u_threshold = nla_get_u32(nla_u_thresh);
3411 udest->l_threshold = nla_get_u32(nla_l_thresh);
3412
3413 if (nla_tun_type)
3414 udest->tun_type = nla_get_u8(nla_tun_type);
3415
3416 if (nla_tun_port)
3417 udest->tun_port = nla_get_be16(nla_tun_port);
3418
3419 if (nla_tun_flags)
3420 udest->tun_flags = nla_get_u16(nla_tun_flags);
3421 }
3422
3423 return 0;
3424}
3425
3426static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
3427 struct ipvs_sync_daemon_cfg *c)
3428{
3429 struct nlattr *nl_daemon;
3430
3431 nl_daemon = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DAEMON);
3432 if (!nl_daemon)
3433 return -EMSGSIZE;
3434
3435 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3436 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
3437 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
3438 nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
3439 nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
3440 nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
3441 goto nla_put_failure;
3442#ifdef CONFIG_IP_VS_IPV6
3443 if (c->mcast_af == AF_INET6) {
3444 if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
3445 &c->mcast_group.in6))
3446 goto nla_put_failure;
3447 } else
3448#endif
3449 if (c->mcast_af == AF_INET &&
3450 nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
3451 c->mcast_group.ip))
3452 goto nla_put_failure;
3453 nla_nest_end(skb, nl_daemon);
3454
3455 return 0;
3456
3457nla_put_failure:
3458 nla_nest_cancel(skb, nl_daemon);
3459 return -EMSGSIZE;
3460}
3461
3462static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
3463 struct ipvs_sync_daemon_cfg *c,
3464 struct netlink_callback *cb)
3465{
3466 void *hdr;
3467 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3468 &ip_vs_genl_family, NLM_F_MULTI,
3469 IPVS_CMD_NEW_DAEMON);
3470 if (!hdr)
3471 return -EMSGSIZE;
3472
3473 if (ip_vs_genl_fill_daemon(skb, state, c))
3474 goto nla_put_failure;
3475
3476 genlmsg_end(skb, hdr);
3477 return 0;
3478
3479nla_put_failure:
3480 genlmsg_cancel(skb, hdr);
3481 return -EMSGSIZE;
3482}
3483
3484static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3485 struct netlink_callback *cb)
3486{
3487 struct net *net = sock_net(skb->sk);
3488 struct netns_ipvs *ipvs = net_ipvs(net);
3489
3490 mutex_lock(&ipvs->sync_mutex);
3491 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3492 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3493 &ipvs->mcfg, cb) < 0)
3494 goto nla_put_failure;
3495
3496 cb->args[0] = 1;
3497 }
3498
3499 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3500 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3501 &ipvs->bcfg, cb) < 0)
3502 goto nla_put_failure;
3503
3504 cb->args[1] = 1;
3505 }
3506
3507nla_put_failure:
3508 mutex_unlock(&ipvs->sync_mutex);
3509
3510 return skb->len;
3511}
3512
3513static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
3514{
3515 struct ipvs_sync_daemon_cfg c;
3516 struct nlattr *a;
3517 int ret;
3518
3519 memset(&c, 0, sizeof(c));
3520 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3521 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3522 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3523 return -EINVAL;
3524 strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3525 sizeof(c.mcast_ifn));
3526 c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
3527
3528 a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
3529 if (a)
3530 c.sync_maxlen = nla_get_u16(a);
3531
3532 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
3533 if (a) {
3534 c.mcast_af = AF_INET;
3535 c.mcast_group.ip = nla_get_in_addr(a);
3536 if (!ipv4_is_multicast(c.mcast_group.ip))
3537 return -EINVAL;
3538 } else {
3539 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
3540 if (a) {
3541#ifdef CONFIG_IP_VS_IPV6
3542 int addr_type;
3543
3544 c.mcast_af = AF_INET6;
3545 c.mcast_group.in6 = nla_get_in6_addr(a);
3546 addr_type = ipv6_addr_type(&c.mcast_group.in6);
3547 if (!(addr_type & IPV6_ADDR_MULTICAST))
3548 return -EINVAL;
3549#else
3550 return -EAFNOSUPPORT;
3551#endif
3552 }
3553 }
3554
3555 a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
3556 if (a)
3557 c.mcast_port = nla_get_u16(a);
3558
3559 a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
3560 if (a)
3561 c.mcast_ttl = nla_get_u8(a);
3562
3563
3564
3565
3566 if (ipvs->mixed_address_family_dests > 0)
3567 return -EINVAL;
3568
3569 ret = start_sync_thread(ipvs, &c,
3570 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3571 return ret;
3572}
3573
3574static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
3575{
3576 int ret;
3577
3578 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3579 return -EINVAL;
3580
3581 ret = stop_sync_thread(ipvs,
3582 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3583 return ret;
3584}
3585
3586static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
3587{
3588 struct ip_vs_timeout_user t;
3589
3590 __ip_vs_get_timeouts(ipvs, &t);
3591
3592 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3593 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3594
3595 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3596 t.tcp_fin_timeout =
3597 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3598
3599 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3600 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3601
3602 return ip_vs_set_timeout(ipvs, &t);
3603}
3604
3605static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3606{
3607 int ret = -EINVAL, cmd;
3608 struct net *net = sock_net(skb->sk);
3609 struct netns_ipvs *ipvs = net_ipvs(net);
3610
3611 cmd = info->genlhdr->cmd;
3612
3613 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3614 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3615
3616 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3617 nla_parse_nested_deprecated(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], ip_vs_daemon_policy, info->extack))
3618 goto out;
3619
3620 if (cmd == IPVS_CMD_NEW_DAEMON)
3621 ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
3622 else
3623 ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
3624 }
3625
3626out:
3627 return ret;
3628}
3629
3630static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3631{
3632 bool need_full_svc = false, need_full_dest = false;
3633 struct ip_vs_service *svc = NULL;
3634 struct ip_vs_service_user_kern usvc;
3635 struct ip_vs_dest_user_kern udest;
3636 int ret = 0, cmd;
3637 struct net *net = sock_net(skb->sk);
3638 struct netns_ipvs *ipvs = net_ipvs(net);
3639
3640 cmd = info->genlhdr->cmd;
3641
3642 mutex_lock(&__ip_vs_mutex);
3643
3644 if (cmd == IPVS_CMD_FLUSH) {
3645 ret = ip_vs_flush(ipvs, false);
3646 goto out;
3647 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3648 ret = ip_vs_genl_set_config(ipvs, info->attrs);
3649 goto out;
3650 } else if (cmd == IPVS_CMD_ZERO &&
3651 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3652 ret = ip_vs_zero_all(ipvs);
3653 goto out;
3654 }
3655
3656
3657
3658
3659 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3660 need_full_svc = true;
3661
3662 ret = ip_vs_genl_parse_service(ipvs, &usvc,
3663 info->attrs[IPVS_CMD_ATTR_SERVICE],
3664 need_full_svc, &svc);
3665 if (ret)
3666 goto out;
3667
3668
3669 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3670 ret = -ESRCH;
3671 goto out;
3672 }
3673
3674
3675
3676
3677 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3678 cmd == IPVS_CMD_DEL_DEST) {
3679 if (cmd != IPVS_CMD_DEL_DEST)
3680 need_full_dest = true;
3681
3682 ret = ip_vs_genl_parse_dest(&udest,
3683 info->attrs[IPVS_CMD_ATTR_DEST],
3684 need_full_dest);
3685 if (ret)
3686 goto out;
3687
3688
3689
3690
3691
3692
3693
3694 if (udest.af == 0)
3695 udest.af = svc->af;
3696
3697 if (!ip_vs_is_af_valid(udest.af)) {
3698 ret = -EAFNOSUPPORT;
3699 goto out;
3700 }
3701
3702 if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
3703
3704
3705
3706 if (ipvs->sync_state) {
3707 ret = -EINVAL;
3708 goto out;
3709 }
3710
3711
3712 switch (udest.conn_flags) {
3713 case IP_VS_CONN_F_TUNNEL:
3714
3715 break;
3716 default:
3717 ret = -EINVAL;
3718 goto out;
3719 }
3720 }
3721 }
3722
3723 switch (cmd) {
3724 case IPVS_CMD_NEW_SERVICE:
3725 if (svc == NULL)
3726 ret = ip_vs_add_service(ipvs, &usvc, &svc);
3727 else
3728 ret = -EEXIST;
3729 break;
3730 case IPVS_CMD_SET_SERVICE:
3731 ret = ip_vs_edit_service(svc, &usvc);
3732 break;
3733 case IPVS_CMD_DEL_SERVICE:
3734 ret = ip_vs_del_service(svc);
3735
3736 break;
3737 case IPVS_CMD_NEW_DEST:
3738 ret = ip_vs_add_dest(svc, &udest);
3739 break;
3740 case IPVS_CMD_SET_DEST:
3741 ret = ip_vs_edit_dest(svc, &udest);
3742 break;
3743 case IPVS_CMD_DEL_DEST:
3744 ret = ip_vs_del_dest(svc, &udest);
3745 break;
3746 case IPVS_CMD_ZERO:
3747 ret = ip_vs_zero_service(svc);
3748 break;
3749 default:
3750 ret = -EINVAL;
3751 }
3752
3753out:
3754 mutex_unlock(&__ip_vs_mutex);
3755
3756 return ret;
3757}
3758
3759static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3760{
3761 struct sk_buff *msg;
3762 void *reply;
3763 int ret, cmd, reply_cmd;
3764 struct net *net = sock_net(skb->sk);
3765 struct netns_ipvs *ipvs = net_ipvs(net);
3766
3767 cmd = info->genlhdr->cmd;
3768
3769 if (cmd == IPVS_CMD_GET_SERVICE)
3770 reply_cmd = IPVS_CMD_NEW_SERVICE;
3771 else if (cmd == IPVS_CMD_GET_INFO)
3772 reply_cmd = IPVS_CMD_SET_INFO;
3773 else if (cmd == IPVS_CMD_GET_CONFIG)
3774 reply_cmd = IPVS_CMD_SET_CONFIG;
3775 else {
3776 pr_err("unknown Generic Netlink command\n");
3777 return -EINVAL;
3778 }
3779
3780 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3781 if (!msg)
3782 return -ENOMEM;
3783
3784 mutex_lock(&__ip_vs_mutex);
3785
3786 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3787 if (reply == NULL)
3788 goto nla_put_failure;
3789
3790 switch (cmd) {
3791 case IPVS_CMD_GET_SERVICE:
3792 {
3793 struct ip_vs_service *svc;
3794
3795 svc = ip_vs_genl_find_service(ipvs,
3796 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3797 if (IS_ERR(svc)) {
3798 ret = PTR_ERR(svc);
3799 goto out_err;
3800 } else if (svc) {
3801 ret = ip_vs_genl_fill_service(msg, svc);
3802 if (ret)
3803 goto nla_put_failure;
3804 } else {
3805 ret = -ESRCH;
3806 goto out_err;
3807 }
3808
3809 break;
3810 }
3811
3812 case IPVS_CMD_GET_CONFIG:
3813 {
3814 struct ip_vs_timeout_user t;
3815
3816 __ip_vs_get_timeouts(ipvs, &t);
3817#ifdef CONFIG_IP_VS_PROTO_TCP
3818 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3819 t.tcp_timeout) ||
3820 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3821 t.tcp_fin_timeout))
3822 goto nla_put_failure;
3823#endif
3824#ifdef CONFIG_IP_VS_PROTO_UDP
3825 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3826 goto nla_put_failure;
3827#endif
3828
3829 break;
3830 }
3831
3832 case IPVS_CMD_GET_INFO:
3833 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3834 IP_VS_VERSION_CODE) ||
3835 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3836 ip_vs_conn_tab_size))
3837 goto nla_put_failure;
3838 break;
3839 }
3840
3841 genlmsg_end(msg, reply);
3842 ret = genlmsg_reply(msg, info);
3843 goto out;
3844
3845nla_put_failure:
3846 pr_err("not enough space in Netlink message\n");
3847 ret = -EMSGSIZE;
3848
3849out_err:
3850 nlmsg_free(msg);
3851out:
3852 mutex_unlock(&__ip_vs_mutex);
3853
3854 return ret;
3855}
3856
3857
3858static const struct genl_ops ip_vs_genl_ops[] = {
3859 {
3860 .cmd = IPVS_CMD_NEW_SERVICE,
3861 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3862 .flags = GENL_ADMIN_PERM,
3863 .doit = ip_vs_genl_set_cmd,
3864 },
3865 {
3866 .cmd = IPVS_CMD_SET_SERVICE,
3867 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3868 .flags = GENL_ADMIN_PERM,
3869 .doit = ip_vs_genl_set_cmd,
3870 },
3871 {
3872 .cmd = IPVS_CMD_DEL_SERVICE,
3873 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3874 .flags = GENL_ADMIN_PERM,
3875 .doit = ip_vs_genl_set_cmd,
3876 },
3877 {
3878 .cmd = IPVS_CMD_GET_SERVICE,
3879 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3880 .flags = GENL_ADMIN_PERM,
3881 .doit = ip_vs_genl_get_cmd,
3882 .dumpit = ip_vs_genl_dump_services,
3883 },
3884 {
3885 .cmd = IPVS_CMD_NEW_DEST,
3886 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3887 .flags = GENL_ADMIN_PERM,
3888 .doit = ip_vs_genl_set_cmd,
3889 },
3890 {
3891 .cmd = IPVS_CMD_SET_DEST,
3892 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3893 .flags = GENL_ADMIN_PERM,
3894 .doit = ip_vs_genl_set_cmd,
3895 },
3896 {
3897 .cmd = IPVS_CMD_DEL_DEST,
3898 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3899 .flags = GENL_ADMIN_PERM,
3900 .doit = ip_vs_genl_set_cmd,
3901 },
3902 {
3903 .cmd = IPVS_CMD_GET_DEST,
3904 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3905 .flags = GENL_ADMIN_PERM,
3906 .dumpit = ip_vs_genl_dump_dests,
3907 },
3908 {
3909 .cmd = IPVS_CMD_NEW_DAEMON,
3910 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3911 .flags = GENL_ADMIN_PERM,
3912 .doit = ip_vs_genl_set_daemon,
3913 },
3914 {
3915 .cmd = IPVS_CMD_DEL_DAEMON,
3916 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3917 .flags = GENL_ADMIN_PERM,
3918 .doit = ip_vs_genl_set_daemon,
3919 },
3920 {
3921 .cmd = IPVS_CMD_GET_DAEMON,
3922 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3923 .flags = GENL_ADMIN_PERM,
3924 .dumpit = ip_vs_genl_dump_daemons,
3925 },
3926 {
3927 .cmd = IPVS_CMD_SET_CONFIG,
3928 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3929 .flags = GENL_ADMIN_PERM,
3930 .doit = ip_vs_genl_set_cmd,
3931 },
3932 {
3933 .cmd = IPVS_CMD_GET_CONFIG,
3934 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3935 .flags = GENL_ADMIN_PERM,
3936 .doit = ip_vs_genl_get_cmd,
3937 },
3938 {
3939 .cmd = IPVS_CMD_GET_INFO,
3940 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3941 .flags = GENL_ADMIN_PERM,
3942 .doit = ip_vs_genl_get_cmd,
3943 },
3944 {
3945 .cmd = IPVS_CMD_ZERO,
3946 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3947 .flags = GENL_ADMIN_PERM,
3948 .doit = ip_vs_genl_set_cmd,
3949 },
3950 {
3951 .cmd = IPVS_CMD_FLUSH,
3952 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
3953 .flags = GENL_ADMIN_PERM,
3954 .doit = ip_vs_genl_set_cmd,
3955 },
3956};
3957
3958static struct genl_family ip_vs_genl_family __ro_after_init = {
3959 .hdrsize = 0,
3960 .name = IPVS_GENL_NAME,
3961 .version = IPVS_GENL_VERSION,
3962 .maxattr = IPVS_CMD_ATTR_MAX,
3963 .policy = ip_vs_cmd_policy,
3964 .netnsok = true,
3965 .module = THIS_MODULE,
3966 .ops = ip_vs_genl_ops,
3967 .n_ops = ARRAY_SIZE(ip_vs_genl_ops),
3968};
3969
3970static int __init ip_vs_genl_register(void)
3971{
3972 return genl_register_family(&ip_vs_genl_family);
3973}
3974
3975static void ip_vs_genl_unregister(void)
3976{
3977 genl_unregister_family(&ip_vs_genl_family);
3978}
3979
3980
3981
3982
3983
3984
3985#ifdef CONFIG_SYSCTL
3986static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
3987{
3988 struct net *net = ipvs->net;
3989 int idx;
3990 struct ctl_table *tbl;
3991
3992 atomic_set(&ipvs->dropentry, 0);
3993 spin_lock_init(&ipvs->dropentry_lock);
3994 spin_lock_init(&ipvs->droppacket_lock);
3995 spin_lock_init(&ipvs->securetcp_lock);
3996
3997 if (!net_eq(net, &init_net)) {
3998 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3999 if (tbl == NULL)
4000 return -ENOMEM;
4001
4002
4003 if (net->user_ns != &init_user_ns)
4004 tbl[0].procname = NULL;
4005 } else
4006 tbl = vs_vars;
4007
4008 for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
4009 if (tbl[idx].proc_handler == proc_do_defense_mode)
4010 tbl[idx].extra2 = ipvs;
4011 }
4012 idx = 0;
4013 ipvs->sysctl_amemthresh = 1024;
4014 tbl[idx++].data = &ipvs->sysctl_amemthresh;
4015 ipvs->sysctl_am_droprate = 10;
4016 tbl[idx++].data = &ipvs->sysctl_am_droprate;
4017 tbl[idx++].data = &ipvs->sysctl_drop_entry;
4018 tbl[idx++].data = &ipvs->sysctl_drop_packet;
4019#ifdef CONFIG_IP_VS_NFCT
4020 tbl[idx++].data = &ipvs->sysctl_conntrack;
4021#endif
4022 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
4023 ipvs->sysctl_snat_reroute = 1;
4024 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
4025 ipvs->sysctl_sync_ver = 1;
4026 tbl[idx++].data = &ipvs->sysctl_sync_ver;
4027 ipvs->sysctl_sync_ports = 1;
4028 tbl[idx++].data = &ipvs->sysctl_sync_ports;
4029 tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
4030 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
4031 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
4032 ipvs->sysctl_sync_sock_size = 0;
4033 tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
4034 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
4035 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
4036 tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
4037 tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
4038 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
4039 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
4040 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
4041 tbl[idx].data = &ipvs->sysctl_sync_threshold;
4042 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
4043 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
4044 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
4045 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
4046 tbl[idx++].data = &ipvs->sysctl_sync_retries;
4047 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
4048 ipvs->sysctl_pmtu_disc = 1;
4049 tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
4050 tbl[idx++].data = &ipvs->sysctl_backup_only;
4051 ipvs->sysctl_conn_reuse_mode = 1;
4052 tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
4053 tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
4054 tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
4055
4056 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
4057 if (ipvs->sysctl_hdr == NULL) {
4058 if (!net_eq(net, &init_net))
4059 kfree(tbl);
4060 return -ENOMEM;
4061 }
4062 ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
4063 ipvs->sysctl_tbl = tbl;
4064
4065 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
4066 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
4067
4068 return 0;
4069}
4070
4071static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
4072{
4073 struct net *net = ipvs->net;
4074
4075 cancel_delayed_work_sync(&ipvs->defense_work);
4076 cancel_work_sync(&ipvs->defense_work.work);
4077 unregister_net_sysctl_table(ipvs->sysctl_hdr);
4078 ip_vs_stop_estimator(ipvs, &ipvs->tot_stats);
4079
4080 if (!net_eq(net, &init_net))
4081 kfree(ipvs->sysctl_tbl);
4082}
4083
4084#else
4085
4086static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
4087static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
4088
4089#endif
4090
4091static struct notifier_block ip_vs_dst_notifier = {
4092 .notifier_call = ip_vs_dst_event,
4093#ifdef CONFIG_IP_VS_IPV6
4094 .priority = ADDRCONF_NOTIFY_PRIORITY + 5,
4095#endif
4096};
4097
4098int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
4099{
4100 int i, idx;
4101
4102
4103 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
4104 INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
4105
4106 INIT_LIST_HEAD(&ipvs->dest_trash);
4107 spin_lock_init(&ipvs->dest_trash_lock);
4108 timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
4109 atomic_set(&ipvs->ftpsvc_counter, 0);
4110 atomic_set(&ipvs->nullsvc_counter, 0);
4111 atomic_set(&ipvs->conn_out_counter, 0);
4112
4113
4114 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
4115 if (!ipvs->tot_stats.cpustats)
4116 return -ENOMEM;
4117
4118 for_each_possible_cpu(i) {
4119 struct ip_vs_cpu_stats *ipvs_tot_stats;
4120 ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i);
4121 u64_stats_init(&ipvs_tot_stats->syncp);
4122 }
4123
4124 spin_lock_init(&ipvs->tot_stats.lock);
4125
4126 proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops,
4127 sizeof(struct ip_vs_iter));
4128 proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net,
4129 ip_vs_stats_show, NULL);
4130 proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
4131 ip_vs_stats_percpu_show, NULL);
4132
4133 if (ip_vs_control_net_init_sysctl(ipvs))
4134 goto err;
4135
4136 return 0;
4137
4138err:
4139 free_percpu(ipvs->tot_stats.cpustats);
4140 return -ENOMEM;
4141}
4142
4143void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
4144{
4145 ip_vs_trash_cleanup(ipvs);
4146 ip_vs_control_net_cleanup_sysctl(ipvs);
4147 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
4148 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
4149 remove_proc_entry("ip_vs", ipvs->net->proc_net);
4150 free_percpu(ipvs->tot_stats.cpustats);
4151}
4152
4153int __init ip_vs_register_nl_ioctl(void)
4154{
4155 int ret;
4156
4157 ret = nf_register_sockopt(&ip_vs_sockopts);
4158 if (ret) {
4159 pr_err("cannot register sockopt.\n");
4160 goto err_sock;
4161 }
4162
4163 ret = ip_vs_genl_register();
4164 if (ret) {
4165 pr_err("cannot register Generic Netlink interface.\n");
4166 goto err_genl;
4167 }
4168 return 0;
4169
4170err_genl:
4171 nf_unregister_sockopt(&ip_vs_sockopts);
4172err_sock:
4173 return ret;
4174}
4175
4176void ip_vs_unregister_nl_ioctl(void)
4177{
4178 ip_vs_genl_unregister();
4179 nf_unregister_sockopt(&ip_vs_sockopts);
4180}
4181
4182int __init ip_vs_control_init(void)
4183{
4184 int idx;
4185 int ret;
4186
4187 EnterFunction(2);
4188
4189
4190 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
4191 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
4192 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
4193 }
4194
4195 smp_wmb();
4196
4197 ret = register_netdevice_notifier(&ip_vs_dst_notifier);
4198 if (ret < 0)
4199 return ret;
4200
4201 LeaveFunction(2);
4202 return 0;
4203}
4204
4205
4206void ip_vs_control_cleanup(void)
4207{
4208 EnterFunction(2);
4209 unregister_netdevice_notifier(&ip_vs_dst_notifier);
4210 LeaveFunction(2);
4211}
4212