1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
27#include <linux/capability.h>
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
33#include <linux/seq_file.h>
34#include <linux/slab.h>
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
38#include <linux/mutex.h>
39
40#include <net/net_namespace.h>
41#include <linux/nsproxy.h>
42#include <net/ip.h>
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
47#endif
48#include <net/route.h>
49#include <net/sock.h>
50#include <net/genetlink.h>
51
52#include <linux/uaccess.h>
53
54#include <net/ip_vs.h>
55
56
57static DEFINE_MUTEX(__ip_vs_mutex);
58
59
60
61#ifdef CONFIG_IP_VS_DEBUG
62static int sysctl_ip_vs_debug_level = 0;
63
64int ip_vs_get_debug_level(void)
65{
66 return sysctl_ip_vs_debug_level;
67}
68#endif
69
70
71
72static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
73
74
75#ifdef CONFIG_IP_VS_IPV6
76
77static bool __ip_vs_addr_is_local_v6(struct net *net,
78 const struct in6_addr *addr)
79{
80 struct flowi6 fl6 = {
81 .daddr = *addr,
82 };
83 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
84 bool is_local;
85
86 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
87
88 dst_release(dst);
89 return is_local;
90}
91#endif
92
93#ifdef CONFIG_SYSCTL
94
95
96
97
98static void update_defense_level(struct netns_ipvs *ipvs)
99{
100 struct sysinfo i;
101 static int old_secure_tcp = 0;
102 int availmem;
103 int nomem;
104 int to_change = -1;
105
106
107 si_meminfo(&i);
108 availmem = i.freeram + i.bufferram;
109
110
111
112
113
114 nomem = (availmem < ipvs->sysctl_amemthresh);
115
116 local_bh_disable();
117
118
119 spin_lock(&ipvs->dropentry_lock);
120 switch (ipvs->sysctl_drop_entry) {
121 case 0:
122 atomic_set(&ipvs->dropentry, 0);
123 break;
124 case 1:
125 if (nomem) {
126 atomic_set(&ipvs->dropentry, 1);
127 ipvs->sysctl_drop_entry = 2;
128 } else {
129 atomic_set(&ipvs->dropentry, 0);
130 }
131 break;
132 case 2:
133 if (nomem) {
134 atomic_set(&ipvs->dropentry, 1);
135 } else {
136 atomic_set(&ipvs->dropentry, 0);
137 ipvs->sysctl_drop_entry = 1;
138 };
139 break;
140 case 3:
141 atomic_set(&ipvs->dropentry, 1);
142 break;
143 }
144 spin_unlock(&ipvs->dropentry_lock);
145
146
147 spin_lock(&ipvs->droppacket_lock);
148 switch (ipvs->sysctl_drop_packet) {
149 case 0:
150 ipvs->drop_rate = 0;
151 break;
152 case 1:
153 if (nomem) {
154 ipvs->drop_rate = ipvs->drop_counter
155 = ipvs->sysctl_amemthresh /
156 (ipvs->sysctl_amemthresh-availmem);
157 ipvs->sysctl_drop_packet = 2;
158 } else {
159 ipvs->drop_rate = 0;
160 }
161 break;
162 case 2:
163 if (nomem) {
164 ipvs->drop_rate = ipvs->drop_counter
165 = ipvs->sysctl_amemthresh /
166 (ipvs->sysctl_amemthresh-availmem);
167 } else {
168 ipvs->drop_rate = 0;
169 ipvs->sysctl_drop_packet = 1;
170 }
171 break;
172 case 3:
173 ipvs->drop_rate = ipvs->sysctl_am_droprate;
174 break;
175 }
176 spin_unlock(&ipvs->droppacket_lock);
177
178
179 spin_lock(&ipvs->securetcp_lock);
180 switch (ipvs->sysctl_secure_tcp) {
181 case 0:
182 if (old_secure_tcp >= 2)
183 to_change = 0;
184 break;
185 case 1:
186 if (nomem) {
187 if (old_secure_tcp < 2)
188 to_change = 1;
189 ipvs->sysctl_secure_tcp = 2;
190 } else {
191 if (old_secure_tcp >= 2)
192 to_change = 0;
193 }
194 break;
195 case 2:
196 if (nomem) {
197 if (old_secure_tcp < 2)
198 to_change = 1;
199 } else {
200 if (old_secure_tcp >= 2)
201 to_change = 0;
202 ipvs->sysctl_secure_tcp = 1;
203 }
204 break;
205 case 3:
206 if (old_secure_tcp < 2)
207 to_change = 1;
208 break;
209 }
210 old_secure_tcp = ipvs->sysctl_secure_tcp;
211 if (to_change >= 0)
212 ip_vs_protocol_timeout_change(ipvs,
213 ipvs->sysctl_secure_tcp > 1);
214 spin_unlock(&ipvs->securetcp_lock);
215
216 local_bh_enable();
217}
218
219
220
221
222
223#define DEFENSE_TIMER_PERIOD 1*HZ
224
225static void defense_work_handler(struct work_struct *work)
226{
227 struct netns_ipvs *ipvs =
228 container_of(work, struct netns_ipvs, defense_work.work);
229
230 update_defense_level(ipvs);
231 if (atomic_read(&ipvs->dropentry))
232 ip_vs_random_dropentry(ipvs);
233 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
234}
235#endif
236
237int
238ip_vs_use_count_inc(void)
239{
240 return try_module_get(THIS_MODULE);
241}
242
243void
244ip_vs_use_count_dec(void)
245{
246 module_put(THIS_MODULE);
247}
248
249
250
251
252
253#define IP_VS_SVC_TAB_BITS 8
254#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
255#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
256
257
258static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
259
260static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
261
262
263
264
265
266static inline unsigned int
267ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
268 const union nf_inet_addr *addr, __be16 port)
269{
270 register unsigned int porth = ntohs(port);
271 __be32 addr_fold = addr->ip;
272 __u32 ahash;
273
274#ifdef CONFIG_IP_VS_IPV6
275 if (af == AF_INET6)
276 addr_fold = addr->ip6[0]^addr->ip6[1]^
277 addr->ip6[2]^addr->ip6[3];
278#endif
279 ahash = ntohl(addr_fold);
280 ahash ^= ((size_t) ipvs >> 8);
281
282 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
283 IP_VS_SVC_TAB_MASK;
284}
285
286
287
288
289static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark)
290{
291 return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
292}
293
294
295
296
297
298
299static int ip_vs_svc_hash(struct ip_vs_service *svc)
300{
301 unsigned int hash;
302
303 if (svc->flags & IP_VS_SVC_F_HASHED) {
304 pr_err("%s(): request for already hashed, called from %pS\n",
305 __func__, __builtin_return_address(0));
306 return 0;
307 }
308
309 if (svc->fwmark == 0) {
310
311
312
313 hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
314 &svc->addr, svc->port);
315 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
316 } else {
317
318
319
320 hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
321 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322 }
323
324 svc->flags |= IP_VS_SVC_F_HASHED;
325
326 atomic_inc(&svc->refcnt);
327 return 1;
328}
329
330
331
332
333
334
335static int ip_vs_svc_unhash(struct ip_vs_service *svc)
336{
337 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
338 pr_err("%s(): request for unhash flagged, called from %pS\n",
339 __func__, __builtin_return_address(0));
340 return 0;
341 }
342
343 if (svc->fwmark == 0) {
344
345 hlist_del_rcu(&svc->s_list);
346 } else {
347
348 hlist_del_rcu(&svc->f_list);
349 }
350
351 svc->flags &= ~IP_VS_SVC_F_HASHED;
352 atomic_dec(&svc->refcnt);
353 return 1;
354}
355
356
357
358
359
360static inline struct ip_vs_service *
361__ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
362 const union nf_inet_addr *vaddr, __be16 vport)
363{
364 unsigned int hash;
365 struct ip_vs_service *svc;
366
367
368 hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport);
369
370 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
371 if ((svc->af == af)
372 && ip_vs_addr_equal(af, &svc->addr, vaddr)
373 && (svc->port == vport)
374 && (svc->protocol == protocol)
375 && (svc->ipvs == ipvs)) {
376
377 return svc;
378 }
379 }
380
381 return NULL;
382}
383
384
385
386
387
388static inline struct ip_vs_service *
389__ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
390{
391 unsigned int hash;
392 struct ip_vs_service *svc;
393
394
395 hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
396
397 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
398 if (svc->fwmark == fwmark && svc->af == af
399 && (svc->ipvs == ipvs)) {
400
401 return svc;
402 }
403 }
404
405 return NULL;
406}
407
408
409struct ip_vs_service *
410ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
411 const union nf_inet_addr *vaddr, __be16 vport)
412{
413 struct ip_vs_service *svc;
414
415
416
417
418 if (fwmark) {
419 svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
420 if (svc)
421 goto out;
422 }
423
424
425
426
427
428 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
429
430 if (!svc && protocol == IPPROTO_TCP &&
431 atomic_read(&ipvs->ftpsvc_counter) &&
432 (vport == FTPDATA || ntohs(vport) >= inet_prot_sock(ipvs->net))) {
433
434
435
436
437 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
438 }
439
440 if (svc == NULL
441 && atomic_read(&ipvs->nullsvc_counter)) {
442
443
444
445 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
446 }
447
448 out:
449 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
450 fwmark, ip_vs_proto_name(protocol),
451 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
452 svc ? "hit" : "not hit");
453
454 return svc;
455}
456
457
458static inline void
459__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
460{
461 atomic_inc(&svc->refcnt);
462 rcu_assign_pointer(dest->svc, svc);
463}
464
465static void ip_vs_service_free(struct ip_vs_service *svc)
466{
467 free_percpu(svc->stats.cpustats);
468 kfree(svc);
469}
470
471static void ip_vs_service_rcu_free(struct rcu_head *head)
472{
473 struct ip_vs_service *svc;
474
475 svc = container_of(head, struct ip_vs_service, rcu_head);
476 ip_vs_service_free(svc);
477}
478
479static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
480{
481 if (atomic_dec_and_test(&svc->refcnt)) {
482 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
483 svc->fwmark,
484 IP_VS_DBG_ADDR(svc->af, &svc->addr),
485 ntohs(svc->port));
486 if (do_delay)
487 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
488 else
489 ip_vs_service_free(svc);
490 }
491}
492
493
494
495
496
497static inline unsigned int ip_vs_rs_hashkey(int af,
498 const union nf_inet_addr *addr,
499 __be16 port)
500{
501 register unsigned int porth = ntohs(port);
502 __be32 addr_fold = addr->ip;
503
504#ifdef CONFIG_IP_VS_IPV6
505 if (af == AF_INET6)
506 addr_fold = addr->ip6[0]^addr->ip6[1]^
507 addr->ip6[2]^addr->ip6[3];
508#endif
509
510 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
511 & IP_VS_RTAB_MASK;
512}
513
514
515static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
516{
517 unsigned int hash;
518
519 if (dest->in_rs_table)
520 return;
521
522
523
524
525
526 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
527
528 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
529 dest->in_rs_table = 1;
530}
531
532
533static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
534{
535
536
537
538 if (dest->in_rs_table) {
539 hlist_del_rcu(&dest->d_list);
540 dest->in_rs_table = 0;
541 }
542}
543
544
545bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
546 const union nf_inet_addr *daddr, __be16 dport)
547{
548 unsigned int hash;
549 struct ip_vs_dest *dest;
550
551
552 hash = ip_vs_rs_hashkey(af, daddr, dport);
553
554 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
555 if (dest->port == dport &&
556 dest->af == af &&
557 ip_vs_addr_equal(af, &dest->addr, daddr) &&
558 (dest->protocol == protocol || dest->vfwmark)) {
559
560 return true;
561 }
562 }
563
564 return false;
565}
566
567
568
569
570
571
572
573struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
574 __u16 protocol,
575 const union nf_inet_addr *daddr,
576 __be16 dport)
577{
578 unsigned int hash;
579 struct ip_vs_dest *dest;
580
581
582 hash = ip_vs_rs_hashkey(af, daddr, dport);
583
584 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
585 if (dest->port == dport &&
586 dest->af == af &&
587 ip_vs_addr_equal(af, &dest->addr, daddr) &&
588 (dest->protocol == protocol || dest->vfwmark)) {
589
590 return dest;
591 }
592 }
593
594 return NULL;
595}
596
597
598
599
600static struct ip_vs_dest *
601ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
602 const union nf_inet_addr *daddr, __be16 dport)
603{
604 struct ip_vs_dest *dest;
605
606
607
608
609 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
610 if ((dest->af == dest_af) &&
611 ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
612 (dest->port == dport)) {
613
614 return dest;
615 }
616 }
617
618 return NULL;
619}
620
621
622
623
624
625
626
627
628
629struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
630 const union nf_inet_addr *daddr,
631 __be16 dport,
632 const union nf_inet_addr *vaddr,
633 __be16 vport, __u16 protocol, __u32 fwmark,
634 __u32 flags)
635{
636 struct ip_vs_dest *dest;
637 struct ip_vs_service *svc;
638 __be16 port = dport;
639
640 svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
641 if (!svc)
642 return NULL;
643 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
644 port = 0;
645 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
646 if (!dest)
647 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
648 return dest;
649}
650
651void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
652{
653 struct ip_vs_dest_dst *dest_dst = container_of(head,
654 struct ip_vs_dest_dst,
655 rcu_head);
656
657 dst_release(dest_dst->dst_cache);
658 kfree(dest_dst);
659}
660
661
662static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
663{
664 struct ip_vs_dest_dst *old;
665
666 old = rcu_dereference_protected(dest->dest_dst, 1);
667 if (old) {
668 RCU_INIT_POINTER(dest->dest_dst, NULL);
669 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
670 }
671}
672
673
674
675
676
677
678
679
680
681
682
683static struct ip_vs_dest *
684ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
685 const union nf_inet_addr *daddr, __be16 dport)
686{
687 struct ip_vs_dest *dest;
688 struct netns_ipvs *ipvs = svc->ipvs;
689
690
691
692
693 spin_lock_bh(&ipvs->dest_trash_lock);
694 list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
695 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
696 "dest->refcnt=%d\n",
697 dest->vfwmark,
698 IP_VS_DBG_ADDR(dest->af, &dest->addr),
699 ntohs(dest->port),
700 refcount_read(&dest->refcnt));
701 if (dest->af == dest_af &&
702 ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
703 dest->port == dport &&
704 dest->vfwmark == svc->fwmark &&
705 dest->protocol == svc->protocol &&
706 (svc->fwmark ||
707 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
708 dest->vport == svc->port))) {
709
710 list_del(&dest->t_list);
711 goto out;
712 }
713 }
714
715 dest = NULL;
716
717out:
718 spin_unlock_bh(&ipvs->dest_trash_lock);
719
720 return dest;
721}
722
723static void ip_vs_dest_free(struct ip_vs_dest *dest)
724{
725 struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
726
727 __ip_vs_dst_cache_reset(dest);
728 __ip_vs_svc_put(svc, false);
729 free_percpu(dest->stats.cpustats);
730 ip_vs_dest_put_and_free(dest);
731}
732
733
734
735
736
737
738
739
740
741
742static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
743{
744 struct ip_vs_dest *dest, *nxt;
745
746 del_timer_sync(&ipvs->dest_trash_timer);
747
748 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
749 list_del(&dest->t_list);
750 ip_vs_dest_free(dest);
751 }
752}
753
754static void
755ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
756{
757#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
758
759 spin_lock_bh(&src->lock);
760
761 IP_VS_SHOW_STATS_COUNTER(conns);
762 IP_VS_SHOW_STATS_COUNTER(inpkts);
763 IP_VS_SHOW_STATS_COUNTER(outpkts);
764 IP_VS_SHOW_STATS_COUNTER(inbytes);
765 IP_VS_SHOW_STATS_COUNTER(outbytes);
766
767 ip_vs_read_estimator(dst, src);
768
769 spin_unlock_bh(&src->lock);
770}
771
772static void
773ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
774{
775 dst->conns = (u32)src->conns;
776 dst->inpkts = (u32)src->inpkts;
777 dst->outpkts = (u32)src->outpkts;
778 dst->inbytes = src->inbytes;
779 dst->outbytes = src->outbytes;
780 dst->cps = (u32)src->cps;
781 dst->inpps = (u32)src->inpps;
782 dst->outpps = (u32)src->outpps;
783 dst->inbps = (u32)src->inbps;
784 dst->outbps = (u32)src->outbps;
785}
786
787static void
788ip_vs_zero_stats(struct ip_vs_stats *stats)
789{
790 spin_lock_bh(&stats->lock);
791
792
793
794#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
795
796 IP_VS_ZERO_STATS_COUNTER(conns);
797 IP_VS_ZERO_STATS_COUNTER(inpkts);
798 IP_VS_ZERO_STATS_COUNTER(outpkts);
799 IP_VS_ZERO_STATS_COUNTER(inbytes);
800 IP_VS_ZERO_STATS_COUNTER(outbytes);
801
802 ip_vs_zero_estimator(stats);
803
804 spin_unlock_bh(&stats->lock);
805}
806
807
808
809
810static void
811__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
812 struct ip_vs_dest_user_kern *udest, int add)
813{
814 struct netns_ipvs *ipvs = svc->ipvs;
815 struct ip_vs_service *old_svc;
816 struct ip_vs_scheduler *sched;
817 int conn_flags;
818
819
820 BUG_ON(!add && udest->af != dest->af);
821
822 if (add && udest->af != svc->af)
823 ipvs->mixed_address_family_dests++;
824
825
826 if (add || udest->weight != 0)
827 atomic_set(&dest->last_weight, udest->weight);
828
829
830 atomic_set(&dest->weight, udest->weight);
831 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
832 conn_flags |= IP_VS_CONN_F_INACTIVE;
833
834
835 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
836 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
837 } else {
838
839
840
841
842 ip_vs_rs_hash(ipvs, dest);
843
844 if (svc->port == FTPPORT)
845 ip_vs_register_conntrack(svc);
846 }
847 atomic_set(&dest->conn_flags, conn_flags);
848
849
850 old_svc = rcu_dereference_protected(dest->svc, 1);
851 if (!old_svc) {
852 __ip_vs_bind_svc(dest, svc);
853 } else {
854 if (old_svc != svc) {
855 ip_vs_zero_stats(&dest->stats);
856 __ip_vs_bind_svc(dest, svc);
857 __ip_vs_svc_put(old_svc, true);
858 }
859 }
860
861
862 dest->flags |= IP_VS_DEST_F_AVAILABLE;
863
864 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
865 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
866 dest->u_threshold = udest->u_threshold;
867 dest->l_threshold = udest->l_threshold;
868
869 dest->af = udest->af;
870
871 spin_lock_bh(&dest->dst_lock);
872 __ip_vs_dst_cache_reset(dest);
873 spin_unlock_bh(&dest->dst_lock);
874
875 if (add) {
876 ip_vs_start_estimator(svc->ipvs, &dest->stats);
877 list_add_rcu(&dest->n_list, &svc->destinations);
878 svc->num_dests++;
879 sched = rcu_dereference_protected(svc->scheduler, 1);
880 if (sched && sched->add_dest)
881 sched->add_dest(svc, dest);
882 } else {
883 sched = rcu_dereference_protected(svc->scheduler, 1);
884 if (sched && sched->upd_dest)
885 sched->upd_dest(svc, dest);
886 }
887}
888
889
890
891
892
893static int
894ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
895 struct ip_vs_dest **dest_p)
896{
897 struct ip_vs_dest *dest;
898 unsigned int atype, i;
899 int ret = 0;
900
901 EnterFunction(2);
902
903#ifdef CONFIG_IP_VS_IPV6
904 if (udest->af == AF_INET6) {
905 atype = ipv6_addr_type(&udest->addr.in6);
906 if ((!(atype & IPV6_ADDR_UNICAST) ||
907 atype & IPV6_ADDR_LINKLOCAL) &&
908 !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
909 return -EINVAL;
910
911 ret = nf_defrag_ipv6_enable(svc->ipvs->net);
912 if (ret)
913 return ret;
914 } else
915#endif
916 {
917 atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
918 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
919 return -EINVAL;
920 }
921
922 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
923 if (dest == NULL)
924 return -ENOMEM;
925
926 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
927 if (!dest->stats.cpustats)
928 goto err_alloc;
929
930 for_each_possible_cpu(i) {
931 struct ip_vs_cpu_stats *ip_vs_dest_stats;
932 ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i);
933 u64_stats_init(&ip_vs_dest_stats->syncp);
934 }
935
936 dest->af = udest->af;
937 dest->protocol = svc->protocol;
938 dest->vaddr = svc->addr;
939 dest->vport = svc->port;
940 dest->vfwmark = svc->fwmark;
941 ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
942 dest->port = udest->port;
943
944 atomic_set(&dest->activeconns, 0);
945 atomic_set(&dest->inactconns, 0);
946 atomic_set(&dest->persistconns, 0);
947 refcount_set(&dest->refcnt, 1);
948
949 INIT_HLIST_NODE(&dest->d_list);
950 spin_lock_init(&dest->dst_lock);
951 spin_lock_init(&dest->stats.lock);
952 __ip_vs_update_dest(svc, dest, udest, 1);
953
954 *dest_p = dest;
955
956 LeaveFunction(2);
957 return 0;
958
959err_alloc:
960 kfree(dest);
961 return -ENOMEM;
962}
963
964
965
966
967
968static int
969ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
970{
971 struct ip_vs_dest *dest;
972 union nf_inet_addr daddr;
973 __be16 dport = udest->port;
974 int ret;
975
976 EnterFunction(2);
977
978 if (udest->weight < 0) {
979 pr_err("%s(): server weight less than zero\n", __func__);
980 return -ERANGE;
981 }
982
983 if (udest->l_threshold > udest->u_threshold) {
984 pr_err("%s(): lower threshold is higher than upper threshold\n",
985 __func__);
986 return -ERANGE;
987 }
988
989 ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
990
991
992 rcu_read_lock();
993 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
994 rcu_read_unlock();
995
996 if (dest != NULL) {
997 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
998 return -EEXIST;
999 }
1000
1001
1002
1003
1004
1005 dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
1006
1007 if (dest != NULL) {
1008 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
1009 "dest->refcnt=%d, service %u/%s:%u\n",
1010 IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
1011 refcount_read(&dest->refcnt),
1012 dest->vfwmark,
1013 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
1014 ntohs(dest->vport));
1015
1016 __ip_vs_update_dest(svc, dest, udest, 1);
1017 ret = 0;
1018 } else {
1019
1020
1021
1022 ret = ip_vs_new_dest(svc, udest, &dest);
1023 }
1024 LeaveFunction(2);
1025
1026 return ret;
1027}
1028
1029
1030
1031
1032
1033static int
1034ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1035{
1036 struct ip_vs_dest *dest;
1037 union nf_inet_addr daddr;
1038 __be16 dport = udest->port;
1039
1040 EnterFunction(2);
1041
1042 if (udest->weight < 0) {
1043 pr_err("%s(): server weight less than zero\n", __func__);
1044 return -ERANGE;
1045 }
1046
1047 if (udest->l_threshold > udest->u_threshold) {
1048 pr_err("%s(): lower threshold is higher than upper threshold\n",
1049 __func__);
1050 return -ERANGE;
1051 }
1052
1053 ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
1054
1055
1056 rcu_read_lock();
1057 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
1058 rcu_read_unlock();
1059
1060 if (dest == NULL) {
1061 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1062 return -ENOENT;
1063 }
1064
1065 __ip_vs_update_dest(svc, dest, udest, 0);
1066 LeaveFunction(2);
1067
1068 return 0;
1069}
1070
1071
1072
1073
1074static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
1075 bool cleanup)
1076{
1077 ip_vs_stop_estimator(ipvs, &dest->stats);
1078
1079
1080
1081
1082 ip_vs_rs_unhash(dest);
1083
1084 spin_lock_bh(&ipvs->dest_trash_lock);
1085 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1086 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1087 refcount_read(&dest->refcnt));
1088 if (list_empty(&ipvs->dest_trash) && !cleanup)
1089 mod_timer(&ipvs->dest_trash_timer,
1090 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1091
1092 list_add(&dest->t_list, &ipvs->dest_trash);
1093 dest->idle_start = 0;
1094 spin_unlock_bh(&ipvs->dest_trash_lock);
1095}
1096
1097
1098
1099
1100
1101static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1102 struct ip_vs_dest *dest,
1103 int svcupd)
1104{
1105 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1106
1107
1108
1109
1110 list_del_rcu(&dest->n_list);
1111 svc->num_dests--;
1112
1113 if (dest->af != svc->af)
1114 svc->ipvs->mixed_address_family_dests--;
1115
1116 if (svcupd) {
1117 struct ip_vs_scheduler *sched;
1118
1119 sched = rcu_dereference_protected(svc->scheduler, 1);
1120 if (sched && sched->del_dest)
1121 sched->del_dest(svc, dest);
1122 }
1123}
1124
1125
1126
1127
1128
1129static int
1130ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1131{
1132 struct ip_vs_dest *dest;
1133 __be16 dport = udest->port;
1134
1135 EnterFunction(2);
1136
1137
1138 rcu_read_lock();
1139 dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
1140 rcu_read_unlock();
1141
1142 if (dest == NULL) {
1143 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1144 return -ENOENT;
1145 }
1146
1147
1148
1149
1150 __ip_vs_unlink_dest(svc, dest, 1);
1151
1152
1153
1154
1155 __ip_vs_del_dest(svc->ipvs, dest, false);
1156
1157 LeaveFunction(2);
1158
1159 return 0;
1160}
1161
1162static void ip_vs_dest_trash_expire(struct timer_list *t)
1163{
1164 struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer);
1165 struct ip_vs_dest *dest, *next;
1166 unsigned long now = jiffies;
1167
1168 spin_lock(&ipvs->dest_trash_lock);
1169 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1170 if (refcount_read(&dest->refcnt) > 1)
1171 continue;
1172 if (dest->idle_start) {
1173 if (time_before(now, dest->idle_start +
1174 IP_VS_DEST_TRASH_PERIOD))
1175 continue;
1176 } else {
1177 dest->idle_start = max(1UL, now);
1178 continue;
1179 }
1180 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1181 dest->vfwmark,
1182 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1183 ntohs(dest->port));
1184 list_del(&dest->t_list);
1185 ip_vs_dest_free(dest);
1186 }
1187 if (!list_empty(&ipvs->dest_trash))
1188 mod_timer(&ipvs->dest_trash_timer,
1189 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1190 spin_unlock(&ipvs->dest_trash_lock);
1191}
1192
1193
1194
1195
1196static int
1197ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
1198 struct ip_vs_service **svc_p)
1199{
1200 int ret = 0, i;
1201 struct ip_vs_scheduler *sched = NULL;
1202 struct ip_vs_pe *pe = NULL;
1203 struct ip_vs_service *svc = NULL;
1204
1205
1206 ip_vs_use_count_inc();
1207
1208
1209 if (strcmp(u->sched_name, "none")) {
1210 sched = ip_vs_scheduler_get(u->sched_name);
1211 if (!sched) {
1212 pr_info("Scheduler module ip_vs_%s not found\n",
1213 u->sched_name);
1214 ret = -ENOENT;
1215 goto out_err;
1216 }
1217 }
1218
1219 if (u->pe_name && *u->pe_name) {
1220 pe = ip_vs_pe_getbyname(u->pe_name);
1221 if (pe == NULL) {
1222 pr_info("persistence engine module ip_vs_pe_%s "
1223 "not found\n", u->pe_name);
1224 ret = -ENOENT;
1225 goto out_err;
1226 }
1227 }
1228
1229#ifdef CONFIG_IP_VS_IPV6
1230 if (u->af == AF_INET6) {
1231 __u32 plen = (__force __u32) u->netmask;
1232
1233 if (plen < 1 || plen > 128) {
1234 ret = -EINVAL;
1235 goto out_err;
1236 }
1237
1238 ret = nf_defrag_ipv6_enable(ipvs->net);
1239 if (ret)
1240 goto out_err;
1241 }
1242#endif
1243
1244 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1245 if (svc == NULL) {
1246 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1247 ret = -ENOMEM;
1248 goto out_err;
1249 }
1250 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1251 if (!svc->stats.cpustats) {
1252 ret = -ENOMEM;
1253 goto out_err;
1254 }
1255
1256 for_each_possible_cpu(i) {
1257 struct ip_vs_cpu_stats *ip_vs_stats;
1258 ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i);
1259 u64_stats_init(&ip_vs_stats->syncp);
1260 }
1261
1262
1263
1264 atomic_set(&svc->refcnt, 0);
1265
1266 svc->af = u->af;
1267 svc->protocol = u->protocol;
1268 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1269 svc->port = u->port;
1270 svc->fwmark = u->fwmark;
1271 svc->flags = u->flags;
1272 svc->timeout = u->timeout * HZ;
1273 svc->netmask = u->netmask;
1274 svc->ipvs = ipvs;
1275
1276 INIT_LIST_HEAD(&svc->destinations);
1277 spin_lock_init(&svc->sched_lock);
1278 spin_lock_init(&svc->stats.lock);
1279
1280
1281 if (sched) {
1282 ret = ip_vs_bind_scheduler(svc, sched);
1283 if (ret)
1284 goto out_err;
1285 sched = NULL;
1286 }
1287
1288
1289 RCU_INIT_POINTER(svc->pe, pe);
1290 pe = NULL;
1291
1292
1293 if (svc->port == FTPPORT)
1294 atomic_inc(&ipvs->ftpsvc_counter);
1295 else if (svc->port == 0)
1296 atomic_inc(&ipvs->nullsvc_counter);
1297 if (svc->pe && svc->pe->conn_out)
1298 atomic_inc(&ipvs->conn_out_counter);
1299
1300 ip_vs_start_estimator(ipvs, &svc->stats);
1301
1302
1303 if (svc->af == AF_INET)
1304 ipvs->num_services++;
1305
1306
1307 ip_vs_svc_hash(svc);
1308
1309 *svc_p = svc;
1310
1311 ipvs->enable = 1;
1312 return 0;
1313
1314
1315 out_err:
1316 if (svc != NULL) {
1317 ip_vs_unbind_scheduler(svc, sched);
1318 ip_vs_service_free(svc);
1319 }
1320 ip_vs_scheduler_put(sched);
1321 ip_vs_pe_put(pe);
1322
1323
1324 ip_vs_use_count_dec();
1325
1326 return ret;
1327}
1328
1329
1330
1331
1332
1333static int
1334ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1335{
1336 struct ip_vs_scheduler *sched = NULL, *old_sched;
1337 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1338 int ret = 0;
1339 bool new_pe_conn_out, old_pe_conn_out;
1340
1341
1342
1343
1344 if (strcmp(u->sched_name, "none")) {
1345 sched = ip_vs_scheduler_get(u->sched_name);
1346 if (!sched) {
1347 pr_info("Scheduler module ip_vs_%s not found\n",
1348 u->sched_name);
1349 return -ENOENT;
1350 }
1351 }
1352 old_sched = sched;
1353
1354 if (u->pe_name && *u->pe_name) {
1355 pe = ip_vs_pe_getbyname(u->pe_name);
1356 if (pe == NULL) {
1357 pr_info("persistence engine module ip_vs_pe_%s "
1358 "not found\n", u->pe_name);
1359 ret = -ENOENT;
1360 goto out;
1361 }
1362 old_pe = pe;
1363 }
1364
1365#ifdef CONFIG_IP_VS_IPV6
1366 if (u->af == AF_INET6) {
1367 __u32 plen = (__force __u32) u->netmask;
1368
1369 if (plen < 1 || plen > 128) {
1370 ret = -EINVAL;
1371 goto out;
1372 }
1373 }
1374#endif
1375
1376 old_sched = rcu_dereference_protected(svc->scheduler, 1);
1377 if (sched != old_sched) {
1378 if (old_sched) {
1379 ip_vs_unbind_scheduler(svc, old_sched);
1380 RCU_INIT_POINTER(svc->scheduler, NULL);
1381
1382 synchronize_rcu();
1383 }
1384
1385 if (sched) {
1386 ret = ip_vs_bind_scheduler(svc, sched);
1387 if (ret) {
1388 ip_vs_scheduler_put(sched);
1389 goto out;
1390 }
1391 }
1392 }
1393
1394
1395
1396
1397 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1398 svc->timeout = u->timeout * HZ;
1399 svc->netmask = u->netmask;
1400
1401 old_pe = rcu_dereference_protected(svc->pe, 1);
1402 if (pe != old_pe) {
1403 rcu_assign_pointer(svc->pe, pe);
1404
1405 new_pe_conn_out = (pe && pe->conn_out) ? true : false;
1406 old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false;
1407 if (new_pe_conn_out && !old_pe_conn_out)
1408 atomic_inc(&svc->ipvs->conn_out_counter);
1409 if (old_pe_conn_out && !new_pe_conn_out)
1410 atomic_dec(&svc->ipvs->conn_out_counter);
1411 }
1412
1413out:
1414 ip_vs_scheduler_put(old_sched);
1415 ip_vs_pe_put(old_pe);
1416 return ret;
1417}
1418
1419
1420
1421
1422
1423
1424static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1425{
1426 struct ip_vs_dest *dest, *nxt;
1427 struct ip_vs_scheduler *old_sched;
1428 struct ip_vs_pe *old_pe;
1429 struct netns_ipvs *ipvs = svc->ipvs;
1430
1431
1432 if (svc->af == AF_INET)
1433 ipvs->num_services--;
1434
1435 ip_vs_stop_estimator(svc->ipvs, &svc->stats);
1436
1437
1438 old_sched = rcu_dereference_protected(svc->scheduler, 1);
1439 ip_vs_unbind_scheduler(svc, old_sched);
1440 ip_vs_scheduler_put(old_sched);
1441
1442
1443 old_pe = rcu_dereference_protected(svc->pe, 1);
1444 if (old_pe && old_pe->conn_out)
1445 atomic_dec(&ipvs->conn_out_counter);
1446 ip_vs_pe_put(old_pe);
1447
1448
1449
1450
1451 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1452 __ip_vs_unlink_dest(svc, dest, 0);
1453 __ip_vs_del_dest(svc->ipvs, dest, cleanup);
1454 }
1455
1456
1457
1458
1459 if (svc->port == FTPPORT)
1460 atomic_dec(&ipvs->ftpsvc_counter);
1461 else if (svc->port == 0)
1462 atomic_dec(&ipvs->nullsvc_counter);
1463
1464
1465
1466
1467 __ip_vs_svc_put(svc, true);
1468
1469
1470 ip_vs_use_count_dec();
1471}
1472
1473
1474
1475
1476static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
1477{
1478 ip_vs_unregister_conntrack(svc);
1479
1480 atomic_inc(&svc->refcnt);
1481
1482
1483
1484 ip_vs_svc_unhash(svc);
1485
1486 __ip_vs_del_service(svc, cleanup);
1487}
1488
1489
1490
1491
1492static int ip_vs_del_service(struct ip_vs_service *svc)
1493{
1494 if (svc == NULL)
1495 return -EEXIST;
1496 ip_vs_unlink_service(svc, false);
1497
1498 return 0;
1499}
1500
1501
1502
1503
1504
1505static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
1506{
1507 int idx;
1508 struct ip_vs_service *svc;
1509 struct hlist_node *n;
1510
1511
1512
1513
1514 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1515 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
1516 s_list) {
1517 if (svc->ipvs == ipvs)
1518 ip_vs_unlink_service(svc, cleanup);
1519 }
1520 }
1521
1522
1523
1524
1525 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1526 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
1527 f_list) {
1528 if (svc->ipvs == ipvs)
1529 ip_vs_unlink_service(svc, cleanup);
1530 }
1531 }
1532
1533 return 0;
1534}
1535
1536
1537
1538
1539
1540void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
1541{
1542 EnterFunction(2);
1543
1544 mutex_lock(&__ip_vs_mutex);
1545 ip_vs_flush(ipvs, true);
1546 mutex_unlock(&__ip_vs_mutex);
1547 LeaveFunction(2);
1548}
1549
1550
1551static inline void
1552ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
1553{
1554 struct ip_vs_dest_dst *dest_dst;
1555
1556 spin_lock_bh(&dest->dst_lock);
1557 dest_dst = rcu_dereference_protected(dest->dest_dst, 1);
1558 if (dest_dst && dest_dst->dst_cache->dev == dev) {
1559 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1560 dev->name,
1561 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1562 ntohs(dest->port),
1563 refcount_read(&dest->refcnt));
1564 __ip_vs_dst_cache_reset(dest);
1565 }
1566 spin_unlock_bh(&dest->dst_lock);
1567
1568}
1569
1570
1571
1572static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1573 void *ptr)
1574{
1575 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1576 struct net *net = dev_net(dev);
1577 struct netns_ipvs *ipvs = net_ipvs(net);
1578 struct ip_vs_service *svc;
1579 struct ip_vs_dest *dest;
1580 unsigned int idx;
1581
1582 if (event != NETDEV_DOWN || !ipvs)
1583 return NOTIFY_DONE;
1584 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1585 EnterFunction(2);
1586 mutex_lock(&__ip_vs_mutex);
1587 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1588 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1589 if (svc->ipvs == ipvs) {
1590 list_for_each_entry(dest, &svc->destinations,
1591 n_list) {
1592 ip_vs_forget_dev(dest, dev);
1593 }
1594 }
1595 }
1596
1597 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1598 if (svc->ipvs == ipvs) {
1599 list_for_each_entry(dest, &svc->destinations,
1600 n_list) {
1601 ip_vs_forget_dev(dest, dev);
1602 }
1603 }
1604
1605 }
1606 }
1607
1608 spin_lock_bh(&ipvs->dest_trash_lock);
1609 list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
1610 ip_vs_forget_dev(dest, dev);
1611 }
1612 spin_unlock_bh(&ipvs->dest_trash_lock);
1613 mutex_unlock(&__ip_vs_mutex);
1614 LeaveFunction(2);
1615 return NOTIFY_DONE;
1616}
1617
1618
1619
1620
1621static int ip_vs_zero_service(struct ip_vs_service *svc)
1622{
1623 struct ip_vs_dest *dest;
1624
1625 list_for_each_entry(dest, &svc->destinations, n_list) {
1626 ip_vs_zero_stats(&dest->stats);
1627 }
1628 ip_vs_zero_stats(&svc->stats);
1629 return 0;
1630}
1631
1632static int ip_vs_zero_all(struct netns_ipvs *ipvs)
1633{
1634 int idx;
1635 struct ip_vs_service *svc;
1636
1637 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1638 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1639 if (svc->ipvs == ipvs)
1640 ip_vs_zero_service(svc);
1641 }
1642 }
1643
1644 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1645 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1646 if (svc->ipvs == ipvs)
1647 ip_vs_zero_service(svc);
1648 }
1649 }
1650
1651 ip_vs_zero_stats(&ipvs->tot_stats);
1652 return 0;
1653}
1654
1655#ifdef CONFIG_SYSCTL
1656
1657static int zero;
1658static int three = 3;
1659
1660static int
1661proc_do_defense_mode(struct ctl_table *table, int write,
1662 void __user *buffer, size_t *lenp, loff_t *ppos)
1663{
1664 struct netns_ipvs *ipvs = table->extra2;
1665 int *valp = table->data;
1666 int val = *valp;
1667 int rc;
1668
1669 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1670 if (write && (*valp != val)) {
1671 if ((*valp < 0) || (*valp > 3)) {
1672
1673 *valp = val;
1674 } else {
1675 update_defense_level(ipvs);
1676 }
1677 }
1678 return rc;
1679}
1680
1681static int
1682proc_do_sync_threshold(struct ctl_table *table, int write,
1683 void __user *buffer, size_t *lenp, loff_t *ppos)
1684{
1685 int *valp = table->data;
1686 int val[2];
1687 int rc;
1688
1689
1690 memcpy(val, valp, sizeof(val));
1691
1692 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1693 if (write && (valp[0] < 0 || valp[1] < 0 ||
1694 (valp[0] >= valp[1] && valp[1]))) {
1695
1696 memcpy(valp, val, sizeof(val));
1697 }
1698 return rc;
1699}
1700
1701static int
1702proc_do_sync_mode(struct ctl_table *table, int write,
1703 void __user *buffer, size_t *lenp, loff_t *ppos)
1704{
1705 int *valp = table->data;
1706 int val = *valp;
1707 int rc;
1708
1709 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1710 if (write && (*valp != val)) {
1711 if ((*valp < 0) || (*valp > 1)) {
1712
1713 *valp = val;
1714 }
1715 }
1716 return rc;
1717}
1718
1719static int
1720proc_do_sync_ports(struct ctl_table *table, int write,
1721 void __user *buffer, size_t *lenp, loff_t *ppos)
1722{
1723 int *valp = table->data;
1724 int val = *valp;
1725 int rc;
1726
1727 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1728 if (write && (*valp != val)) {
1729 if (*valp < 1 || !is_power_of_2(*valp)) {
1730
1731 *valp = val;
1732 }
1733 }
1734 return rc;
1735}
1736
1737
1738
1739
1740
1741
1742
1743static struct ctl_table vs_vars[] = {
1744 {
1745 .procname = "amemthresh",
1746 .maxlen = sizeof(int),
1747 .mode = 0644,
1748 .proc_handler = proc_dointvec,
1749 },
1750 {
1751 .procname = "am_droprate",
1752 .maxlen = sizeof(int),
1753 .mode = 0644,
1754 .proc_handler = proc_dointvec,
1755 },
1756 {
1757 .procname = "drop_entry",
1758 .maxlen = sizeof(int),
1759 .mode = 0644,
1760 .proc_handler = proc_do_defense_mode,
1761 },
1762 {
1763 .procname = "drop_packet",
1764 .maxlen = sizeof(int),
1765 .mode = 0644,
1766 .proc_handler = proc_do_defense_mode,
1767 },
1768#ifdef CONFIG_IP_VS_NFCT
1769 {
1770 .procname = "conntrack",
1771 .maxlen = sizeof(int),
1772 .mode = 0644,
1773 .proc_handler = &proc_dointvec,
1774 },
1775#endif
1776 {
1777 .procname = "secure_tcp",
1778 .maxlen = sizeof(int),
1779 .mode = 0644,
1780 .proc_handler = proc_do_defense_mode,
1781 },
1782 {
1783 .procname = "snat_reroute",
1784 .maxlen = sizeof(int),
1785 .mode = 0644,
1786 .proc_handler = &proc_dointvec,
1787 },
1788 {
1789 .procname = "sync_version",
1790 .maxlen = sizeof(int),
1791 .mode = 0644,
1792 .proc_handler = proc_do_sync_mode,
1793 },
1794 {
1795 .procname = "sync_ports",
1796 .maxlen = sizeof(int),
1797 .mode = 0644,
1798 .proc_handler = proc_do_sync_ports,
1799 },
1800 {
1801 .procname = "sync_persist_mode",
1802 .maxlen = sizeof(int),
1803 .mode = 0644,
1804 .proc_handler = proc_dointvec,
1805 },
1806 {
1807 .procname = "sync_qlen_max",
1808 .maxlen = sizeof(unsigned long),
1809 .mode = 0644,
1810 .proc_handler = proc_doulongvec_minmax,
1811 },
1812 {
1813 .procname = "sync_sock_size",
1814 .maxlen = sizeof(int),
1815 .mode = 0644,
1816 .proc_handler = proc_dointvec,
1817 },
1818 {
1819 .procname = "cache_bypass",
1820 .maxlen = sizeof(int),
1821 .mode = 0644,
1822 .proc_handler = proc_dointvec,
1823 },
1824 {
1825 .procname = "expire_nodest_conn",
1826 .maxlen = sizeof(int),
1827 .mode = 0644,
1828 .proc_handler = proc_dointvec,
1829 },
1830 {
1831 .procname = "sloppy_tcp",
1832 .maxlen = sizeof(int),
1833 .mode = 0644,
1834 .proc_handler = proc_dointvec,
1835 },
1836 {
1837 .procname = "sloppy_sctp",
1838 .maxlen = sizeof(int),
1839 .mode = 0644,
1840 .proc_handler = proc_dointvec,
1841 },
1842 {
1843 .procname = "expire_quiescent_template",
1844 .maxlen = sizeof(int),
1845 .mode = 0644,
1846 .proc_handler = proc_dointvec,
1847 },
1848 {
1849 .procname = "sync_threshold",
1850 .maxlen =
1851 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1852 .mode = 0644,
1853 .proc_handler = proc_do_sync_threshold,
1854 },
1855 {
1856 .procname = "sync_refresh_period",
1857 .maxlen = sizeof(int),
1858 .mode = 0644,
1859 .proc_handler = proc_dointvec_jiffies,
1860 },
1861 {
1862 .procname = "sync_retries",
1863 .maxlen = sizeof(int),
1864 .mode = 0644,
1865 .proc_handler = proc_dointvec_minmax,
1866 .extra1 = &zero,
1867 .extra2 = &three,
1868 },
1869 {
1870 .procname = "nat_icmp_send",
1871 .maxlen = sizeof(int),
1872 .mode = 0644,
1873 .proc_handler = proc_dointvec,
1874 },
1875 {
1876 .procname = "pmtu_disc",
1877 .maxlen = sizeof(int),
1878 .mode = 0644,
1879 .proc_handler = proc_dointvec,
1880 },
1881 {
1882 .procname = "backup_only",
1883 .maxlen = sizeof(int),
1884 .mode = 0644,
1885 .proc_handler = proc_dointvec,
1886 },
1887 {
1888 .procname = "conn_reuse_mode",
1889 .maxlen = sizeof(int),
1890 .mode = 0644,
1891 .proc_handler = proc_dointvec,
1892 },
1893 {
1894 .procname = "schedule_icmp",
1895 .maxlen = sizeof(int),
1896 .mode = 0644,
1897 .proc_handler = proc_dointvec,
1898 },
1899 {
1900 .procname = "ignore_tunneled",
1901 .maxlen = sizeof(int),
1902 .mode = 0644,
1903 .proc_handler = proc_dointvec,
1904 },
1905#ifdef CONFIG_IP_VS_DEBUG
1906 {
1907 .procname = "debug_level",
1908 .data = &sysctl_ip_vs_debug_level,
1909 .maxlen = sizeof(int),
1910 .mode = 0644,
1911 .proc_handler = proc_dointvec,
1912 },
1913#endif
1914 { }
1915};
1916
1917#endif
1918
1919#ifdef CONFIG_PROC_FS
1920
1921struct ip_vs_iter {
1922 struct seq_net_private p;
1923 struct hlist_head *table;
1924 int bucket;
1925};
1926
1927
1928
1929
1930
1931static inline const char *ip_vs_fwd_name(unsigned int flags)
1932{
1933 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1934 case IP_VS_CONN_F_LOCALNODE:
1935 return "Local";
1936 case IP_VS_CONN_F_TUNNEL:
1937 return "Tunnel";
1938 case IP_VS_CONN_F_DROUTE:
1939 return "Route";
1940 default:
1941 return "Masq";
1942 }
1943}
1944
1945
1946
1947static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1948{
1949 struct net *net = seq_file_net(seq);
1950 struct netns_ipvs *ipvs = net_ipvs(net);
1951 struct ip_vs_iter *iter = seq->private;
1952 int idx;
1953 struct ip_vs_service *svc;
1954
1955
1956 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1957 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
1958 if ((svc->ipvs == ipvs) && pos-- == 0) {
1959 iter->table = ip_vs_svc_table;
1960 iter->bucket = idx;
1961 return svc;
1962 }
1963 }
1964 }
1965
1966
1967 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1968 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
1969 f_list) {
1970 if ((svc->ipvs == ipvs) && pos-- == 0) {
1971 iter->table = ip_vs_svc_fwm_table;
1972 iter->bucket = idx;
1973 return svc;
1974 }
1975 }
1976 }
1977
1978 return NULL;
1979}
1980
1981static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1982 __acquires(RCU)
1983{
1984 rcu_read_lock();
1985 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1986}
1987
1988
1989static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1990{
1991 struct hlist_node *e;
1992 struct ip_vs_iter *iter;
1993 struct ip_vs_service *svc;
1994
1995 ++*pos;
1996 if (v == SEQ_START_TOKEN)
1997 return ip_vs_info_array(seq,0);
1998
1999 svc = v;
2000 iter = seq->private;
2001
2002 if (iter->table == ip_vs_svc_table) {
2003
2004 e = rcu_dereference(hlist_next_rcu(&svc->s_list));
2005 if (e)
2006 return hlist_entry(e, struct ip_vs_service, s_list);
2007
2008 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2009 hlist_for_each_entry_rcu(svc,
2010 &ip_vs_svc_table[iter->bucket],
2011 s_list) {
2012 return svc;
2013 }
2014 }
2015
2016 iter->table = ip_vs_svc_fwm_table;
2017 iter->bucket = -1;
2018 goto scan_fwmark;
2019 }
2020
2021
2022 e = rcu_dereference(hlist_next_rcu(&svc->f_list));
2023 if (e)
2024 return hlist_entry(e, struct ip_vs_service, f_list);
2025
2026 scan_fwmark:
2027 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2028 hlist_for_each_entry_rcu(svc,
2029 &ip_vs_svc_fwm_table[iter->bucket],
2030 f_list)
2031 return svc;
2032 }
2033
2034 return NULL;
2035}
2036
2037static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2038 __releases(RCU)
2039{
2040 rcu_read_unlock();
2041}
2042
2043
2044static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2045{
2046 if (v == SEQ_START_TOKEN) {
2047 seq_printf(seq,
2048 "IP Virtual Server version %d.%d.%d (size=%d)\n",
2049 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2050 seq_puts(seq,
2051 "Prot LocalAddress:Port Scheduler Flags\n");
2052 seq_puts(seq,
2053 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2054 } else {
2055 struct net *net = seq_file_net(seq);
2056 struct netns_ipvs *ipvs = net_ipvs(net);
2057 const struct ip_vs_service *svc = v;
2058 const struct ip_vs_iter *iter = seq->private;
2059 const struct ip_vs_dest *dest;
2060 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
2061 char *sched_name = sched ? sched->name : "none";
2062
2063 if (svc->ipvs != ipvs)
2064 return 0;
2065 if (iter->table == ip_vs_svc_table) {
2066#ifdef CONFIG_IP_VS_IPV6
2067 if (svc->af == AF_INET6)
2068 seq_printf(seq, "%s [%pI6]:%04X %s ",
2069 ip_vs_proto_name(svc->protocol),
2070 &svc->addr.in6,
2071 ntohs(svc->port),
2072 sched_name);
2073 else
2074#endif
2075 seq_printf(seq, "%s %08X:%04X %s %s ",
2076 ip_vs_proto_name(svc->protocol),
2077 ntohl(svc->addr.ip),
2078 ntohs(svc->port),
2079 sched_name,
2080 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2081 } else {
2082 seq_printf(seq, "FWM %08X %s %s",
2083 svc->fwmark, sched_name,
2084 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2085 }
2086
2087 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2088 seq_printf(seq, "persistent %d %08X\n",
2089 svc->timeout,
2090 ntohl(svc->netmask));
2091 else
2092 seq_putc(seq, '\n');
2093
2094 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
2095#ifdef CONFIG_IP_VS_IPV6
2096 if (dest->af == AF_INET6)
2097 seq_printf(seq,
2098 " -> [%pI6]:%04X"
2099 " %-7s %-6d %-10d %-10d\n",
2100 &dest->addr.in6,
2101 ntohs(dest->port),
2102 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2103 atomic_read(&dest->weight),
2104 atomic_read(&dest->activeconns),
2105 atomic_read(&dest->inactconns));
2106 else
2107#endif
2108 seq_printf(seq,
2109 " -> %08X:%04X "
2110 "%-7s %-6d %-10d %-10d\n",
2111 ntohl(dest->addr.ip),
2112 ntohs(dest->port),
2113 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2114 atomic_read(&dest->weight),
2115 atomic_read(&dest->activeconns),
2116 atomic_read(&dest->inactconns));
2117
2118 }
2119 }
2120 return 0;
2121}
2122
2123static const struct seq_operations ip_vs_info_seq_ops = {
2124 .start = ip_vs_info_seq_start,
2125 .next = ip_vs_info_seq_next,
2126 .stop = ip_vs_info_seq_stop,
2127 .show = ip_vs_info_seq_show,
2128};
2129
2130static int ip_vs_stats_show(struct seq_file *seq, void *v)
2131{
2132 struct net *net = seq_file_single_net(seq);
2133 struct ip_vs_kstats show;
2134
2135
2136 seq_puts(seq,
2137 " Total Incoming Outgoing Incoming Outgoing\n");
2138 seq_puts(seq,
2139 " Conns Packets Packets Bytes Bytes\n");
2140
2141 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2142 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
2143 (unsigned long long)show.conns,
2144 (unsigned long long)show.inpkts,
2145 (unsigned long long)show.outpkts,
2146 (unsigned long long)show.inbytes,
2147 (unsigned long long)show.outbytes);
2148
2149
2150 seq_puts(seq,
2151 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2152 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
2153 (unsigned long long)show.cps,
2154 (unsigned long long)show.inpps,
2155 (unsigned long long)show.outpps,
2156 (unsigned long long)show.inbps,
2157 (unsigned long long)show.outbps);
2158
2159 return 0;
2160}
2161
2162static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2163{
2164 struct net *net = seq_file_single_net(seq);
2165 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2166 struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
2167 struct ip_vs_kstats kstats;
2168 int i;
2169
2170
2171 seq_puts(seq,
2172 " Total Incoming Outgoing Incoming Outgoing\n");
2173 seq_puts(seq,
2174 "CPU Conns Packets Packets Bytes Bytes\n");
2175
2176 for_each_possible_cpu(i) {
2177 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2178 unsigned int start;
2179 u64 conns, inpkts, outpkts, inbytes, outbytes;
2180
2181 do {
2182 start = u64_stats_fetch_begin_irq(&u->syncp);
2183 conns = u->cnt.conns;
2184 inpkts = u->cnt.inpkts;
2185 outpkts = u->cnt.outpkts;
2186 inbytes = u->cnt.inbytes;
2187 outbytes = u->cnt.outbytes;
2188 } while (u64_stats_fetch_retry_irq(&u->syncp, start));
2189
2190 seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
2191 i, (u64)conns, (u64)inpkts,
2192 (u64)outpkts, (u64)inbytes,
2193 (u64)outbytes);
2194 }
2195
2196 ip_vs_copy_stats(&kstats, tot_stats);
2197
2198 seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n",
2199 (unsigned long long)kstats.conns,
2200 (unsigned long long)kstats.inpkts,
2201 (unsigned long long)kstats.outpkts,
2202 (unsigned long long)kstats.inbytes,
2203 (unsigned long long)kstats.outbytes);
2204
2205
2206 seq_puts(seq,
2207 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2208 seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n",
2209 kstats.cps,
2210 kstats.inpps,
2211 kstats.outpps,
2212 kstats.inbps,
2213 kstats.outbps);
2214
2215 return 0;
2216}
2217#endif
2218
2219
2220
2221
2222static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
2223{
2224#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2225 struct ip_vs_proto_data *pd;
2226#endif
2227
2228 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2229 u->tcp_timeout,
2230 u->tcp_fin_timeout,
2231 u->udp_timeout);
2232
2233#ifdef CONFIG_IP_VS_PROTO_TCP
2234 if (u->tcp_timeout) {
2235 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2236 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2237 = u->tcp_timeout * HZ;
2238 }
2239
2240 if (u->tcp_fin_timeout) {
2241 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2242 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2243 = u->tcp_fin_timeout * HZ;
2244 }
2245#endif
2246
2247#ifdef CONFIG_IP_VS_PROTO_UDP
2248 if (u->udp_timeout) {
2249 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
2250 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2251 = u->udp_timeout * HZ;
2252 }
2253#endif
2254 return 0;
2255}
2256
2257#define CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2258
2259struct ip_vs_svcdest_user {
2260 struct ip_vs_service_user s;
2261 struct ip_vs_dest_user d;
2262};
2263
2264static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = {
2265 [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user),
2266 [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user),
2267 [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user),
2268 [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user),
2269 [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user),
2270 [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user),
2271 [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user),
2272 [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user),
2273 [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user),
2274 [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user),
2275};
2276
2277union ip_vs_set_arglen {
2278 struct ip_vs_service_user field_IP_VS_SO_SET_ADD;
2279 struct ip_vs_service_user field_IP_VS_SO_SET_EDIT;
2280 struct ip_vs_service_user field_IP_VS_SO_SET_DEL;
2281 struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST;
2282 struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST;
2283 struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST;
2284 struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT;
2285 struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON;
2286 struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON;
2287 struct ip_vs_service_user field_IP_VS_SO_SET_ZERO;
2288};
2289
2290#define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen)
2291
2292static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2293 struct ip_vs_service_user *usvc_compat)
2294{
2295 memset(usvc, 0, sizeof(*usvc));
2296
2297 usvc->af = AF_INET;
2298 usvc->protocol = usvc_compat->protocol;
2299 usvc->addr.ip = usvc_compat->addr;
2300 usvc->port = usvc_compat->port;
2301 usvc->fwmark = usvc_compat->fwmark;
2302
2303
2304 usvc->sched_name = usvc_compat->sched_name;
2305
2306 usvc->flags = usvc_compat->flags;
2307 usvc->timeout = usvc_compat->timeout;
2308 usvc->netmask = usvc_compat->netmask;
2309}
2310
2311static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2312 struct ip_vs_dest_user *udest_compat)
2313{
2314 memset(udest, 0, sizeof(*udest));
2315
2316 udest->addr.ip = udest_compat->addr;
2317 udest->port = udest_compat->port;
2318 udest->conn_flags = udest_compat->conn_flags;
2319 udest->weight = udest_compat->weight;
2320 udest->u_threshold = udest_compat->u_threshold;
2321 udest->l_threshold = udest_compat->l_threshold;
2322 udest->af = AF_INET;
2323}
2324
2325static int
2326do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2327{
2328 struct net *net = sock_net(sk);
2329 int ret;
2330 unsigned char arg[MAX_SET_ARGLEN];
2331 struct ip_vs_service_user *usvc_compat;
2332 struct ip_vs_service_user_kern usvc;
2333 struct ip_vs_service *svc;
2334 struct ip_vs_dest_user *udest_compat;
2335 struct ip_vs_dest_user_kern udest;
2336 struct netns_ipvs *ipvs = net_ipvs(net);
2337
2338 BUILD_BUG_ON(sizeof(arg) > 255);
2339 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2340 return -EPERM;
2341
2342 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2343 return -EINVAL;
2344 if (len != set_arglen[CMDID(cmd)]) {
2345 IP_VS_DBG(1, "set_ctl: len %u != %u\n",
2346 len, set_arglen[CMDID(cmd)]);
2347 return -EINVAL;
2348 }
2349
2350 if (copy_from_user(arg, user, len) != 0)
2351 return -EFAULT;
2352
2353
2354 ip_vs_use_count_inc();
2355
2356
2357 if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2358 cmd == IP_VS_SO_SET_STOPDAEMON) {
2359 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2360
2361 if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2362 struct ipvs_sync_daemon_cfg cfg;
2363
2364 memset(&cfg, 0, sizeof(cfg));
2365 ret = -EINVAL;
2366 if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
2367 sizeof(cfg.mcast_ifn)) <= 0)
2368 goto out_dec;
2369 cfg.syncid = dm->syncid;
2370 ret = start_sync_thread(ipvs, &cfg, dm->state);
2371 } else {
2372 mutex_lock(&ipvs->sync_mutex);
2373 ret = stop_sync_thread(ipvs, dm->state);
2374 mutex_unlock(&ipvs->sync_mutex);
2375 }
2376 goto out_dec;
2377 }
2378
2379 mutex_lock(&__ip_vs_mutex);
2380 if (cmd == IP_VS_SO_SET_FLUSH) {
2381
2382 ret = ip_vs_flush(ipvs, false);
2383 goto out_unlock;
2384 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2385
2386 ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
2387 goto out_unlock;
2388 }
2389
2390 usvc_compat = (struct ip_vs_service_user *)arg;
2391 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2392
2393
2394
2395 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2396 ip_vs_copy_udest_compat(&udest, udest_compat);
2397
2398 if (cmd == IP_VS_SO_SET_ZERO) {
2399
2400 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2401 ret = ip_vs_zero_all(ipvs);
2402 goto out_unlock;
2403 }
2404 }
2405
2406 if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) &&
2407 strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) ==
2408 IP_VS_SCHEDNAME_MAXLEN) {
2409 ret = -EINVAL;
2410 goto out_unlock;
2411 }
2412
2413
2414 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2415 usvc.protocol != IPPROTO_SCTP) {
2416 pr_err("set_ctl: invalid protocol: %d %pI4:%d\n",
2417 usvc.protocol, &usvc.addr.ip,
2418 ntohs(usvc.port));
2419 ret = -EFAULT;
2420 goto out_unlock;
2421 }
2422
2423
2424 rcu_read_lock();
2425 if (usvc.fwmark == 0)
2426 svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
2427 &usvc.addr, usvc.port);
2428 else
2429 svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
2430 rcu_read_unlock();
2431
2432 if (cmd != IP_VS_SO_SET_ADD
2433 && (svc == NULL || svc->protocol != usvc.protocol)) {
2434 ret = -ESRCH;
2435 goto out_unlock;
2436 }
2437
2438 switch (cmd) {
2439 case IP_VS_SO_SET_ADD:
2440 if (svc != NULL)
2441 ret = -EEXIST;
2442 else
2443 ret = ip_vs_add_service(ipvs, &usvc, &svc);
2444 break;
2445 case IP_VS_SO_SET_EDIT:
2446 ret = ip_vs_edit_service(svc, &usvc);
2447 break;
2448 case IP_VS_SO_SET_DEL:
2449 ret = ip_vs_del_service(svc);
2450 if (!ret)
2451 goto out_unlock;
2452 break;
2453 case IP_VS_SO_SET_ZERO:
2454 ret = ip_vs_zero_service(svc);
2455 break;
2456 case IP_VS_SO_SET_ADDDEST:
2457 ret = ip_vs_add_dest(svc, &udest);
2458 break;
2459 case IP_VS_SO_SET_EDITDEST:
2460 ret = ip_vs_edit_dest(svc, &udest);
2461 break;
2462 case IP_VS_SO_SET_DELDEST:
2463 ret = ip_vs_del_dest(svc, &udest);
2464 break;
2465 default:
2466 ret = -EINVAL;
2467 }
2468
2469 out_unlock:
2470 mutex_unlock(&__ip_vs_mutex);
2471 out_dec:
2472
2473 ip_vs_use_count_dec();
2474
2475 return ret;
2476}
2477
2478
2479static void
2480ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2481{
2482 struct ip_vs_scheduler *sched;
2483 struct ip_vs_kstats kstats;
2484 char *sched_name;
2485
2486 sched = rcu_dereference_protected(src->scheduler, 1);
2487 sched_name = sched ? sched->name : "none";
2488 dst->protocol = src->protocol;
2489 dst->addr = src->addr.ip;
2490 dst->port = src->port;
2491 dst->fwmark = src->fwmark;
2492 strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
2493 dst->flags = src->flags;
2494 dst->timeout = src->timeout / HZ;
2495 dst->netmask = src->netmask;
2496 dst->num_dests = src->num_dests;
2497 ip_vs_copy_stats(&kstats, &src->stats);
2498 ip_vs_export_stats_user(&dst->stats, &kstats);
2499}
2500
2501static inline int
2502__ip_vs_get_service_entries(struct netns_ipvs *ipvs,
2503 const struct ip_vs_get_services *get,
2504 struct ip_vs_get_services __user *uptr)
2505{
2506 int idx, count=0;
2507 struct ip_vs_service *svc;
2508 struct ip_vs_service_entry entry;
2509 int ret = 0;
2510
2511 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2512 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2513
2514 if (svc->af != AF_INET || (svc->ipvs != ipvs))
2515 continue;
2516
2517 if (count >= get->num_services)
2518 goto out;
2519 memset(&entry, 0, sizeof(entry));
2520 ip_vs_copy_service(&entry, svc);
2521 if (copy_to_user(&uptr->entrytable[count],
2522 &entry, sizeof(entry))) {
2523 ret = -EFAULT;
2524 goto out;
2525 }
2526 count++;
2527 }
2528 }
2529
2530 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2531 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2532
2533 if (svc->af != AF_INET || (svc->ipvs != ipvs))
2534 continue;
2535
2536 if (count >= get->num_services)
2537 goto out;
2538 memset(&entry, 0, sizeof(entry));
2539 ip_vs_copy_service(&entry, svc);
2540 if (copy_to_user(&uptr->entrytable[count],
2541 &entry, sizeof(entry))) {
2542 ret = -EFAULT;
2543 goto out;
2544 }
2545 count++;
2546 }
2547 }
2548out:
2549 return ret;
2550}
2551
2552static inline int
2553__ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
2554 struct ip_vs_get_dests __user *uptr)
2555{
2556 struct ip_vs_service *svc;
2557 union nf_inet_addr addr = { .ip = get->addr };
2558 int ret = 0;
2559
2560 rcu_read_lock();
2561 if (get->fwmark)
2562 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
2563 else
2564 svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
2565 get->port);
2566 rcu_read_unlock();
2567
2568 if (svc) {
2569 int count = 0;
2570 struct ip_vs_dest *dest;
2571 struct ip_vs_dest_entry entry;
2572 struct ip_vs_kstats kstats;
2573
2574 memset(&entry, 0, sizeof(entry));
2575 list_for_each_entry(dest, &svc->destinations, n_list) {
2576 if (count >= get->num_dests)
2577 break;
2578
2579
2580
2581
2582 if (dest->af != svc->af)
2583 continue;
2584
2585 entry.addr = dest->addr.ip;
2586 entry.port = dest->port;
2587 entry.conn_flags = atomic_read(&dest->conn_flags);
2588 entry.weight = atomic_read(&dest->weight);
2589 entry.u_threshold = dest->u_threshold;
2590 entry.l_threshold = dest->l_threshold;
2591 entry.activeconns = atomic_read(&dest->activeconns);
2592 entry.inactconns = atomic_read(&dest->inactconns);
2593 entry.persistconns = atomic_read(&dest->persistconns);
2594 ip_vs_copy_stats(&kstats, &dest->stats);
2595 ip_vs_export_stats_user(&entry.stats, &kstats);
2596 if (copy_to_user(&uptr->entrytable[count],
2597 &entry, sizeof(entry))) {
2598 ret = -EFAULT;
2599 break;
2600 }
2601 count++;
2602 }
2603 } else
2604 ret = -ESRCH;
2605 return ret;
2606}
2607
2608static inline void
2609__ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
2610{
2611#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2612 struct ip_vs_proto_data *pd;
2613#endif
2614
2615 memset(u, 0, sizeof (*u));
2616
2617#ifdef CONFIG_IP_VS_PROTO_TCP
2618 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2619 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2620 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2621#endif
2622#ifdef CONFIG_IP_VS_PROTO_UDP
2623 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
2624 u->udp_timeout =
2625 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2626#endif
2627}
2628
2629static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = {
2630 [CMDID(IP_VS_SO_GET_VERSION)] = 64,
2631 [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo),
2632 [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services),
2633 [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry),
2634 [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests),
2635 [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user),
2636 [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user),
2637};
2638
2639union ip_vs_get_arglen {
2640 char field_IP_VS_SO_GET_VERSION[64];
2641 struct ip_vs_getinfo field_IP_VS_SO_GET_INFO;
2642 struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES;
2643 struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE;
2644 struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS;
2645 struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT;
2646 struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2];
2647};
2648
2649#define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen)
2650
2651static int
2652do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2653{
2654 unsigned char arg[MAX_GET_ARGLEN];
2655 int ret = 0;
2656 unsigned int copylen;
2657 struct net *net = sock_net(sk);
2658 struct netns_ipvs *ipvs = net_ipvs(net);
2659
2660 BUG_ON(!net);
2661 BUILD_BUG_ON(sizeof(arg) > 255);
2662 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2663 return -EPERM;
2664
2665 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2666 return -EINVAL;
2667
2668 copylen = get_arglen[CMDID(cmd)];
2669 if (*len < (int) copylen) {
2670 IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen);
2671 return -EINVAL;
2672 }
2673
2674 if (copy_from_user(arg, user, copylen) != 0)
2675 return -EFAULT;
2676
2677
2678
2679 if (cmd == IP_VS_SO_GET_DAEMON) {
2680 struct ip_vs_daemon_user d[2];
2681
2682 memset(&d, 0, sizeof(d));
2683 mutex_lock(&ipvs->sync_mutex);
2684 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2685 d[0].state = IP_VS_STATE_MASTER;
2686 strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
2687 sizeof(d[0].mcast_ifn));
2688 d[0].syncid = ipvs->mcfg.syncid;
2689 }
2690 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2691 d[1].state = IP_VS_STATE_BACKUP;
2692 strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
2693 sizeof(d[1].mcast_ifn));
2694 d[1].syncid = ipvs->bcfg.syncid;
2695 }
2696 if (copy_to_user(user, &d, sizeof(d)) != 0)
2697 ret = -EFAULT;
2698 mutex_unlock(&ipvs->sync_mutex);
2699 return ret;
2700 }
2701
2702 mutex_lock(&__ip_vs_mutex);
2703 switch (cmd) {
2704 case IP_VS_SO_GET_VERSION:
2705 {
2706 char buf[64];
2707
2708 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2709 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2710 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2711 ret = -EFAULT;
2712 goto out;
2713 }
2714 *len = strlen(buf)+1;
2715 }
2716 break;
2717
2718 case IP_VS_SO_GET_INFO:
2719 {
2720 struct ip_vs_getinfo info;
2721 info.version = IP_VS_VERSION_CODE;
2722 info.size = ip_vs_conn_tab_size;
2723 info.num_services = ipvs->num_services;
2724 if (copy_to_user(user, &info, sizeof(info)) != 0)
2725 ret = -EFAULT;
2726 }
2727 break;
2728
2729 case IP_VS_SO_GET_SERVICES:
2730 {
2731 struct ip_vs_get_services *get;
2732 int size;
2733
2734 get = (struct ip_vs_get_services *)arg;
2735 size = sizeof(*get) +
2736 sizeof(struct ip_vs_service_entry) * get->num_services;
2737 if (*len != size) {
2738 pr_err("length: %u != %u\n", *len, size);
2739 ret = -EINVAL;
2740 goto out;
2741 }
2742 ret = __ip_vs_get_service_entries(ipvs, get, user);
2743 }
2744 break;
2745
2746 case IP_VS_SO_GET_SERVICE:
2747 {
2748 struct ip_vs_service_entry *entry;
2749 struct ip_vs_service *svc;
2750 union nf_inet_addr addr;
2751
2752 entry = (struct ip_vs_service_entry *)arg;
2753 addr.ip = entry->addr;
2754 rcu_read_lock();
2755 if (entry->fwmark)
2756 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
2757 else
2758 svc = __ip_vs_service_find(ipvs, AF_INET,
2759 entry->protocol, &addr,
2760 entry->port);
2761 rcu_read_unlock();
2762 if (svc) {
2763 ip_vs_copy_service(entry, svc);
2764 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2765 ret = -EFAULT;
2766 } else
2767 ret = -ESRCH;
2768 }
2769 break;
2770
2771 case IP_VS_SO_GET_DESTS:
2772 {
2773 struct ip_vs_get_dests *get;
2774 int size;
2775
2776 get = (struct ip_vs_get_dests *)arg;
2777 size = sizeof(*get) +
2778 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2779 if (*len != size) {
2780 pr_err("length: %u != %u\n", *len, size);
2781 ret = -EINVAL;
2782 goto out;
2783 }
2784 ret = __ip_vs_get_dest_entries(ipvs, get, user);
2785 }
2786 break;
2787
2788 case IP_VS_SO_GET_TIMEOUT:
2789 {
2790 struct ip_vs_timeout_user t;
2791
2792 __ip_vs_get_timeouts(ipvs, &t);
2793 if (copy_to_user(user, &t, sizeof(t)) != 0)
2794 ret = -EFAULT;
2795 }
2796 break;
2797
2798 default:
2799 ret = -EINVAL;
2800 }
2801
2802out:
2803 mutex_unlock(&__ip_vs_mutex);
2804 return ret;
2805}
2806
2807
2808static struct nf_sockopt_ops ip_vs_sockopts = {
2809 .pf = PF_INET,
2810 .set_optmin = IP_VS_BASE_CTL,
2811 .set_optmax = IP_VS_SO_SET_MAX+1,
2812 .set = do_ip_vs_set_ctl,
2813 .get_optmin = IP_VS_BASE_CTL,
2814 .get_optmax = IP_VS_SO_GET_MAX+1,
2815 .get = do_ip_vs_get_ctl,
2816 .owner = THIS_MODULE,
2817};
2818
2819
2820
2821
2822
2823
2824static struct genl_family ip_vs_genl_family;
2825
2826
2827static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2828 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2829 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2830 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2831 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2832 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2833 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2834};
2835
2836
2837static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2838 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2839 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2840 .len = IP_VS_IFNAME_MAXLEN - 1 },
2841 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2842 [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 },
2843 [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 },
2844 [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) },
2845 [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 },
2846 [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 },
2847};
2848
2849
2850static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2851 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2852 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2853 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2854 .len = sizeof(union nf_inet_addr) },
2855 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2856 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2857 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2858 .len = IP_VS_SCHEDNAME_MAXLEN - 1 },
2859 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2860 .len = IP_VS_PENAME_MAXLEN },
2861 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2862 .len = sizeof(struct ip_vs_flags) },
2863 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2864 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2865 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2866};
2867
2868
2869static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2870 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2871 .len = sizeof(union nf_inet_addr) },
2872 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2873 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2874 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2875 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2876 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2877 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2878 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2879 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2880 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2881 [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
2882};
2883
2884static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2885 struct ip_vs_kstats *kstats)
2886{
2887 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2888
2889 if (!nl_stats)
2890 return -EMSGSIZE;
2891
2892 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
2893 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
2894 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
2895 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes,
2896 IPVS_STATS_ATTR_PAD) ||
2897 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes,
2898 IPVS_STATS_ATTR_PAD) ||
2899 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
2900 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
2901 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
2902 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
2903 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
2904 goto nla_put_failure;
2905 nla_nest_end(skb, nl_stats);
2906
2907 return 0;
2908
2909nla_put_failure:
2910 nla_nest_cancel(skb, nl_stats);
2911 return -EMSGSIZE;
2912}
2913
2914static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
2915 struct ip_vs_kstats *kstats)
2916{
2917 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2918
2919 if (!nl_stats)
2920 return -EMSGSIZE;
2921
2922 if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns,
2923 IPVS_STATS_ATTR_PAD) ||
2924 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts,
2925 IPVS_STATS_ATTR_PAD) ||
2926 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts,
2927 IPVS_STATS_ATTR_PAD) ||
2928 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes,
2929 IPVS_STATS_ATTR_PAD) ||
2930 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes,
2931 IPVS_STATS_ATTR_PAD) ||
2932 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps,
2933 IPVS_STATS_ATTR_PAD) ||
2934 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps,
2935 IPVS_STATS_ATTR_PAD) ||
2936 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps,
2937 IPVS_STATS_ATTR_PAD) ||
2938 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps,
2939 IPVS_STATS_ATTR_PAD) ||
2940 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps,
2941 IPVS_STATS_ATTR_PAD))
2942 goto nla_put_failure;
2943 nla_nest_end(skb, nl_stats);
2944
2945 return 0;
2946
2947nla_put_failure:
2948 nla_nest_cancel(skb, nl_stats);
2949 return -EMSGSIZE;
2950}
2951
2952static int ip_vs_genl_fill_service(struct sk_buff *skb,
2953 struct ip_vs_service *svc)
2954{
2955 struct ip_vs_scheduler *sched;
2956 struct ip_vs_pe *pe;
2957 struct nlattr *nl_service;
2958 struct ip_vs_flags flags = { .flags = svc->flags,
2959 .mask = ~0 };
2960 struct ip_vs_kstats kstats;
2961 char *sched_name;
2962
2963 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2964 if (!nl_service)
2965 return -EMSGSIZE;
2966
2967 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2968 goto nla_put_failure;
2969 if (svc->fwmark) {
2970 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2971 goto nla_put_failure;
2972 } else {
2973 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2974 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2975 nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2976 goto nla_put_failure;
2977 }
2978
2979 sched = rcu_dereference_protected(svc->scheduler, 1);
2980 sched_name = sched ? sched->name : "none";
2981 pe = rcu_dereference_protected(svc->pe, 1);
2982 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
2983 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
2984 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2985 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2986 nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2987 goto nla_put_failure;
2988 ip_vs_copy_stats(&kstats, &svc->stats);
2989 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
2990 goto nla_put_failure;
2991 if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
2992 goto nla_put_failure;
2993
2994 nla_nest_end(skb, nl_service);
2995
2996 return 0;
2997
2998nla_put_failure:
2999 nla_nest_cancel(skb, nl_service);
3000 return -EMSGSIZE;
3001}
3002
3003static int ip_vs_genl_dump_service(struct sk_buff *skb,
3004 struct ip_vs_service *svc,
3005 struct netlink_callback *cb)
3006{
3007 void *hdr;
3008
3009 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3010 &ip_vs_genl_family, NLM_F_MULTI,
3011 IPVS_CMD_NEW_SERVICE);
3012 if (!hdr)
3013 return -EMSGSIZE;
3014
3015 if (ip_vs_genl_fill_service(skb, svc) < 0)
3016 goto nla_put_failure;
3017
3018 genlmsg_end(skb, hdr);
3019 return 0;
3020
3021nla_put_failure:
3022 genlmsg_cancel(skb, hdr);
3023 return -EMSGSIZE;
3024}
3025
3026static int ip_vs_genl_dump_services(struct sk_buff *skb,
3027 struct netlink_callback *cb)
3028{
3029 int idx = 0, i;
3030 int start = cb->args[0];
3031 struct ip_vs_service *svc;
3032 struct net *net = sock_net(skb->sk);
3033 struct netns_ipvs *ipvs = net_ipvs(net);
3034
3035 mutex_lock(&__ip_vs_mutex);
3036 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3037 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
3038 if (++idx <= start || (svc->ipvs != ipvs))
3039 continue;
3040 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3041 idx--;
3042 goto nla_put_failure;
3043 }
3044 }
3045 }
3046
3047 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3048 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
3049 if (++idx <= start || (svc->ipvs != ipvs))
3050 continue;
3051 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3052 idx--;
3053 goto nla_put_failure;
3054 }
3055 }
3056 }
3057
3058nla_put_failure:
3059 mutex_unlock(&__ip_vs_mutex);
3060 cb->args[0] = idx;
3061
3062 return skb->len;
3063}
3064
3065static bool ip_vs_is_af_valid(int af)
3066{
3067 if (af == AF_INET)
3068 return true;
3069#ifdef CONFIG_IP_VS_IPV6
3070 if (af == AF_INET6 && ipv6_mod_enabled())
3071 return true;
3072#endif
3073 return false;
3074}
3075
3076static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
3077 struct ip_vs_service_user_kern *usvc,
3078 struct nlattr *nla, int full_entry,
3079 struct ip_vs_service **ret_svc)
3080{
3081 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3082 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
3083 struct ip_vs_service *svc;
3084
3085
3086 if (nla == NULL ||
3087 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla,
3088 ip_vs_svc_policy, NULL))
3089 return -EINVAL;
3090
3091 nla_af = attrs[IPVS_SVC_ATTR_AF];
3092 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
3093 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
3094 nla_port = attrs[IPVS_SVC_ATTR_PORT];
3095 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
3096
3097 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3098 return -EINVAL;
3099
3100 memset(usvc, 0, sizeof(*usvc));
3101
3102 usvc->af = nla_get_u16(nla_af);
3103 if (!ip_vs_is_af_valid(usvc->af))
3104 return -EAFNOSUPPORT;
3105
3106 if (nla_fwmark) {
3107 usvc->protocol = IPPROTO_TCP;
3108 usvc->fwmark = nla_get_u32(nla_fwmark);
3109 } else {
3110 usvc->protocol = nla_get_u16(nla_protocol);
3111 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3112 usvc->port = nla_get_be16(nla_port);
3113 usvc->fwmark = 0;
3114 }
3115
3116 rcu_read_lock();
3117 if (usvc->fwmark)
3118 svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
3119 else
3120 svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
3121 &usvc->addr, usvc->port);
3122 rcu_read_unlock();
3123 *ret_svc = svc;
3124
3125
3126 if (full_entry) {
3127 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3128 *nla_netmask;
3129 struct ip_vs_flags flags;
3130
3131 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3132 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3133 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3134 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3135 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3136
3137 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3138 return -EINVAL;
3139
3140 nla_memcpy(&flags, nla_flags, sizeof(flags));
3141
3142
3143 if (svc)
3144 usvc->flags = svc->flags;
3145
3146
3147 usvc->flags = (usvc->flags & ~flags.mask) |
3148 (flags.flags & flags.mask);
3149 usvc->sched_name = nla_data(nla_sched);
3150 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3151 usvc->timeout = nla_get_u32(nla_timeout);
3152 usvc->netmask = nla_get_be32(nla_netmask);
3153 }
3154
3155 return 0;
3156}
3157
3158static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
3159 struct nlattr *nla)
3160{
3161 struct ip_vs_service_user_kern usvc;
3162 struct ip_vs_service *svc;
3163 int ret;
3164
3165 ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc);
3166 return ret ? ERR_PTR(ret) : svc;
3167}
3168
3169static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3170{
3171 struct nlattr *nl_dest;
3172 struct ip_vs_kstats kstats;
3173
3174 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3175 if (!nl_dest)
3176 return -EMSGSIZE;
3177
3178 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3179 nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3180 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3181 (atomic_read(&dest->conn_flags) &
3182 IP_VS_CONN_F_FWD_MASK)) ||
3183 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3184 atomic_read(&dest->weight)) ||
3185 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3186 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3187 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3188 atomic_read(&dest->activeconns)) ||
3189 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3190 atomic_read(&dest->inactconns)) ||
3191 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3192 atomic_read(&dest->persistconns)) ||
3193 nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
3194 goto nla_put_failure;
3195 ip_vs_copy_stats(&kstats, &dest->stats);
3196 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
3197 goto nla_put_failure;
3198 if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
3199 goto nla_put_failure;
3200
3201 nla_nest_end(skb, nl_dest);
3202
3203 return 0;
3204
3205nla_put_failure:
3206 nla_nest_cancel(skb, nl_dest);
3207 return -EMSGSIZE;
3208}
3209
3210static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3211 struct netlink_callback *cb)
3212{
3213 void *hdr;
3214
3215 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3216 &ip_vs_genl_family, NLM_F_MULTI,
3217 IPVS_CMD_NEW_DEST);
3218 if (!hdr)
3219 return -EMSGSIZE;
3220
3221 if (ip_vs_genl_fill_dest(skb, dest) < 0)
3222 goto nla_put_failure;
3223
3224 genlmsg_end(skb, hdr);
3225 return 0;
3226
3227nla_put_failure:
3228 genlmsg_cancel(skb, hdr);
3229 return -EMSGSIZE;
3230}
3231
3232static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3233 struct netlink_callback *cb)
3234{
3235 int idx = 0;
3236 int start = cb->args[0];
3237 struct ip_vs_service *svc;
3238 struct ip_vs_dest *dest;
3239 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3240 struct net *net = sock_net(skb->sk);
3241 struct netns_ipvs *ipvs = net_ipvs(net);
3242
3243 mutex_lock(&__ip_vs_mutex);
3244
3245
3246 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX,
3247 ip_vs_cmd_policy, NULL))
3248 goto out_err;
3249
3250
3251 svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
3252 if (IS_ERR_OR_NULL(svc))
3253 goto out_err;
3254
3255
3256 list_for_each_entry(dest, &svc->destinations, n_list) {
3257 if (++idx <= start)
3258 continue;
3259 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3260 idx--;
3261 goto nla_put_failure;
3262 }
3263 }
3264
3265nla_put_failure:
3266 cb->args[0] = idx;
3267
3268out_err:
3269 mutex_unlock(&__ip_vs_mutex);
3270
3271 return skb->len;
3272}
3273
3274static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3275 struct nlattr *nla, int full_entry)
3276{
3277 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3278 struct nlattr *nla_addr, *nla_port;
3279 struct nlattr *nla_addr_family;
3280
3281
3282 if (nla == NULL ||
3283 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla,
3284 ip_vs_dest_policy, NULL))
3285 return -EINVAL;
3286
3287 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3288 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3289 nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
3290
3291 if (!(nla_addr && nla_port))
3292 return -EINVAL;
3293
3294 memset(udest, 0, sizeof(*udest));
3295
3296 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3297 udest->port = nla_get_be16(nla_port);
3298
3299 if (nla_addr_family)
3300 udest->af = nla_get_u16(nla_addr_family);
3301 else
3302 udest->af = 0;
3303
3304
3305 if (full_entry) {
3306 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3307 *nla_l_thresh;
3308
3309 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3310 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3311 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3312 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3313
3314 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3315 return -EINVAL;
3316
3317 udest->conn_flags = nla_get_u32(nla_fwd)
3318 & IP_VS_CONN_F_FWD_MASK;
3319 udest->weight = nla_get_u32(nla_weight);
3320 udest->u_threshold = nla_get_u32(nla_u_thresh);
3321 udest->l_threshold = nla_get_u32(nla_l_thresh);
3322 }
3323
3324 return 0;
3325}
3326
3327static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
3328 struct ipvs_sync_daemon_cfg *c)
3329{
3330 struct nlattr *nl_daemon;
3331
3332 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3333 if (!nl_daemon)
3334 return -EMSGSIZE;
3335
3336 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3337 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
3338 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
3339 nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
3340 nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
3341 nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
3342 goto nla_put_failure;
3343#ifdef CONFIG_IP_VS_IPV6
3344 if (c->mcast_af == AF_INET6) {
3345 if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
3346 &c->mcast_group.in6))
3347 goto nla_put_failure;
3348 } else
3349#endif
3350 if (c->mcast_af == AF_INET &&
3351 nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
3352 c->mcast_group.ip))
3353 goto nla_put_failure;
3354 nla_nest_end(skb, nl_daemon);
3355
3356 return 0;
3357
3358nla_put_failure:
3359 nla_nest_cancel(skb, nl_daemon);
3360 return -EMSGSIZE;
3361}
3362
3363static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
3364 struct ipvs_sync_daemon_cfg *c,
3365 struct netlink_callback *cb)
3366{
3367 void *hdr;
3368 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3369 &ip_vs_genl_family, NLM_F_MULTI,
3370 IPVS_CMD_NEW_DAEMON);
3371 if (!hdr)
3372 return -EMSGSIZE;
3373
3374 if (ip_vs_genl_fill_daemon(skb, state, c))
3375 goto nla_put_failure;
3376
3377 genlmsg_end(skb, hdr);
3378 return 0;
3379
3380nla_put_failure:
3381 genlmsg_cancel(skb, hdr);
3382 return -EMSGSIZE;
3383}
3384
3385static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3386 struct netlink_callback *cb)
3387{
3388 struct net *net = sock_net(skb->sk);
3389 struct netns_ipvs *ipvs = net_ipvs(net);
3390
3391 mutex_lock(&ipvs->sync_mutex);
3392 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3393 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3394 &ipvs->mcfg, cb) < 0)
3395 goto nla_put_failure;
3396
3397 cb->args[0] = 1;
3398 }
3399
3400 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3401 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3402 &ipvs->bcfg, cb) < 0)
3403 goto nla_put_failure;
3404
3405 cb->args[1] = 1;
3406 }
3407
3408nla_put_failure:
3409 mutex_unlock(&ipvs->sync_mutex);
3410
3411 return skb->len;
3412}
3413
3414static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
3415{
3416 struct ipvs_sync_daemon_cfg c;
3417 struct nlattr *a;
3418 int ret;
3419
3420 memset(&c, 0, sizeof(c));
3421 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3422 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3423 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3424 return -EINVAL;
3425 strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3426 sizeof(c.mcast_ifn));
3427 c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
3428
3429 a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
3430 if (a)
3431 c.sync_maxlen = nla_get_u16(a);
3432
3433 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
3434 if (a) {
3435 c.mcast_af = AF_INET;
3436 c.mcast_group.ip = nla_get_in_addr(a);
3437 if (!ipv4_is_multicast(c.mcast_group.ip))
3438 return -EINVAL;
3439 } else {
3440 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
3441 if (a) {
3442#ifdef CONFIG_IP_VS_IPV6
3443 int addr_type;
3444
3445 c.mcast_af = AF_INET6;
3446 c.mcast_group.in6 = nla_get_in6_addr(a);
3447 addr_type = ipv6_addr_type(&c.mcast_group.in6);
3448 if (!(addr_type & IPV6_ADDR_MULTICAST))
3449 return -EINVAL;
3450#else
3451 return -EAFNOSUPPORT;
3452#endif
3453 }
3454 }
3455
3456 a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
3457 if (a)
3458 c.mcast_port = nla_get_u16(a);
3459
3460 a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
3461 if (a)
3462 c.mcast_ttl = nla_get_u8(a);
3463
3464
3465
3466
3467 if (ipvs->mixed_address_family_dests > 0)
3468 return -EINVAL;
3469
3470 ret = start_sync_thread(ipvs, &c,
3471 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3472 return ret;
3473}
3474
3475static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
3476{
3477 int ret;
3478
3479 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3480 return -EINVAL;
3481
3482 mutex_lock(&ipvs->sync_mutex);
3483 ret = stop_sync_thread(ipvs,
3484 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3485 mutex_unlock(&ipvs->sync_mutex);
3486 return ret;
3487}
3488
3489static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
3490{
3491 struct ip_vs_timeout_user t;
3492
3493 __ip_vs_get_timeouts(ipvs, &t);
3494
3495 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3496 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3497
3498 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3499 t.tcp_fin_timeout =
3500 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3501
3502 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3503 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3504
3505 return ip_vs_set_timeout(ipvs, &t);
3506}
3507
3508static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3509{
3510 int ret = -EINVAL, cmd;
3511 struct net *net = sock_net(skb->sk);
3512 struct netns_ipvs *ipvs = net_ipvs(net);
3513
3514 cmd = info->genlhdr->cmd;
3515
3516 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3517 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3518
3519 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3520 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3521 info->attrs[IPVS_CMD_ATTR_DAEMON],
3522 ip_vs_daemon_policy, info->extack))
3523 goto out;
3524
3525 if (cmd == IPVS_CMD_NEW_DAEMON)
3526 ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
3527 else
3528 ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
3529 }
3530
3531out:
3532 return ret;
3533}
3534
3535static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3536{
3537 struct ip_vs_service *svc = NULL;
3538 struct ip_vs_service_user_kern usvc;
3539 struct ip_vs_dest_user_kern udest;
3540 int ret = 0, cmd;
3541 int need_full_svc = 0, need_full_dest = 0;
3542 struct net *net = sock_net(skb->sk);
3543 struct netns_ipvs *ipvs = net_ipvs(net);
3544
3545 cmd = info->genlhdr->cmd;
3546
3547 mutex_lock(&__ip_vs_mutex);
3548
3549 if (cmd == IPVS_CMD_FLUSH) {
3550 ret = ip_vs_flush(ipvs, false);
3551 goto out;
3552 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3553 ret = ip_vs_genl_set_config(ipvs, info->attrs);
3554 goto out;
3555 } else if (cmd == IPVS_CMD_ZERO &&
3556 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3557 ret = ip_vs_zero_all(ipvs);
3558 goto out;
3559 }
3560
3561
3562
3563
3564 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3565 need_full_svc = 1;
3566
3567 ret = ip_vs_genl_parse_service(ipvs, &usvc,
3568 info->attrs[IPVS_CMD_ATTR_SERVICE],
3569 need_full_svc, &svc);
3570 if (ret)
3571 goto out;
3572
3573
3574 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3575 ret = -ESRCH;
3576 goto out;
3577 }
3578
3579
3580
3581
3582 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3583 cmd == IPVS_CMD_DEL_DEST) {
3584 if (cmd != IPVS_CMD_DEL_DEST)
3585 need_full_dest = 1;
3586
3587 ret = ip_vs_genl_parse_dest(&udest,
3588 info->attrs[IPVS_CMD_ATTR_DEST],
3589 need_full_dest);
3590 if (ret)
3591 goto out;
3592
3593
3594
3595
3596
3597
3598
3599 if (udest.af == 0)
3600 udest.af = svc->af;
3601
3602 if (!ip_vs_is_af_valid(udest.af)) {
3603 ret = -EAFNOSUPPORT;
3604 goto out;
3605 }
3606
3607 if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
3608
3609
3610
3611 if (ipvs->sync_state) {
3612 ret = -EINVAL;
3613 goto out;
3614 }
3615
3616
3617 switch (udest.conn_flags) {
3618 case IP_VS_CONN_F_TUNNEL:
3619
3620 break;
3621 default:
3622 ret = -EINVAL;
3623 goto out;
3624 }
3625 }
3626 }
3627
3628 switch (cmd) {
3629 case IPVS_CMD_NEW_SERVICE:
3630 if (svc == NULL)
3631 ret = ip_vs_add_service(ipvs, &usvc, &svc);
3632 else
3633 ret = -EEXIST;
3634 break;
3635 case IPVS_CMD_SET_SERVICE:
3636 ret = ip_vs_edit_service(svc, &usvc);
3637 break;
3638 case IPVS_CMD_DEL_SERVICE:
3639 ret = ip_vs_del_service(svc);
3640
3641 break;
3642 case IPVS_CMD_NEW_DEST:
3643 ret = ip_vs_add_dest(svc, &udest);
3644 break;
3645 case IPVS_CMD_SET_DEST:
3646 ret = ip_vs_edit_dest(svc, &udest);
3647 break;
3648 case IPVS_CMD_DEL_DEST:
3649 ret = ip_vs_del_dest(svc, &udest);
3650 break;
3651 case IPVS_CMD_ZERO:
3652 ret = ip_vs_zero_service(svc);
3653 break;
3654 default:
3655 ret = -EINVAL;
3656 }
3657
3658out:
3659 mutex_unlock(&__ip_vs_mutex);
3660
3661 return ret;
3662}
3663
3664static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3665{
3666 struct sk_buff *msg;
3667 void *reply;
3668 int ret, cmd, reply_cmd;
3669 struct net *net = sock_net(skb->sk);
3670 struct netns_ipvs *ipvs = net_ipvs(net);
3671
3672 cmd = info->genlhdr->cmd;
3673
3674 if (cmd == IPVS_CMD_GET_SERVICE)
3675 reply_cmd = IPVS_CMD_NEW_SERVICE;
3676 else if (cmd == IPVS_CMD_GET_INFO)
3677 reply_cmd = IPVS_CMD_SET_INFO;
3678 else if (cmd == IPVS_CMD_GET_CONFIG)
3679 reply_cmd = IPVS_CMD_SET_CONFIG;
3680 else {
3681 pr_err("unknown Generic Netlink command\n");
3682 return -EINVAL;
3683 }
3684
3685 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3686 if (!msg)
3687 return -ENOMEM;
3688
3689 mutex_lock(&__ip_vs_mutex);
3690
3691 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3692 if (reply == NULL)
3693 goto nla_put_failure;
3694
3695 switch (cmd) {
3696 case IPVS_CMD_GET_SERVICE:
3697 {
3698 struct ip_vs_service *svc;
3699
3700 svc = ip_vs_genl_find_service(ipvs,
3701 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3702 if (IS_ERR(svc)) {
3703 ret = PTR_ERR(svc);
3704 goto out_err;
3705 } else if (svc) {
3706 ret = ip_vs_genl_fill_service(msg, svc);
3707 if (ret)
3708 goto nla_put_failure;
3709 } else {
3710 ret = -ESRCH;
3711 goto out_err;
3712 }
3713
3714 break;
3715 }
3716
3717 case IPVS_CMD_GET_CONFIG:
3718 {
3719 struct ip_vs_timeout_user t;
3720
3721 __ip_vs_get_timeouts(ipvs, &t);
3722#ifdef CONFIG_IP_VS_PROTO_TCP
3723 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3724 t.tcp_timeout) ||
3725 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3726 t.tcp_fin_timeout))
3727 goto nla_put_failure;
3728#endif
3729#ifdef CONFIG_IP_VS_PROTO_UDP
3730 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3731 goto nla_put_failure;
3732#endif
3733
3734 break;
3735 }
3736
3737 case IPVS_CMD_GET_INFO:
3738 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3739 IP_VS_VERSION_CODE) ||
3740 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3741 ip_vs_conn_tab_size))
3742 goto nla_put_failure;
3743 break;
3744 }
3745
3746 genlmsg_end(msg, reply);
3747 ret = genlmsg_reply(msg, info);
3748 goto out;
3749
3750nla_put_failure:
3751 pr_err("not enough space in Netlink message\n");
3752 ret = -EMSGSIZE;
3753
3754out_err:
3755 nlmsg_free(msg);
3756out:
3757 mutex_unlock(&__ip_vs_mutex);
3758
3759 return ret;
3760}
3761
3762
3763static const struct genl_ops ip_vs_genl_ops[] = {
3764 {
3765 .cmd = IPVS_CMD_NEW_SERVICE,
3766 .flags = GENL_ADMIN_PERM,
3767 .policy = ip_vs_cmd_policy,
3768 .doit = ip_vs_genl_set_cmd,
3769 },
3770 {
3771 .cmd = IPVS_CMD_SET_SERVICE,
3772 .flags = GENL_ADMIN_PERM,
3773 .policy = ip_vs_cmd_policy,
3774 .doit = ip_vs_genl_set_cmd,
3775 },
3776 {
3777 .cmd = IPVS_CMD_DEL_SERVICE,
3778 .flags = GENL_ADMIN_PERM,
3779 .policy = ip_vs_cmd_policy,
3780 .doit = ip_vs_genl_set_cmd,
3781 },
3782 {
3783 .cmd = IPVS_CMD_GET_SERVICE,
3784 .flags = GENL_ADMIN_PERM,
3785 .doit = ip_vs_genl_get_cmd,
3786 .dumpit = ip_vs_genl_dump_services,
3787 .policy = ip_vs_cmd_policy,
3788 },
3789 {
3790 .cmd = IPVS_CMD_NEW_DEST,
3791 .flags = GENL_ADMIN_PERM,
3792 .policy = ip_vs_cmd_policy,
3793 .doit = ip_vs_genl_set_cmd,
3794 },
3795 {
3796 .cmd = IPVS_CMD_SET_DEST,
3797 .flags = GENL_ADMIN_PERM,
3798 .policy = ip_vs_cmd_policy,
3799 .doit = ip_vs_genl_set_cmd,
3800 },
3801 {
3802 .cmd = IPVS_CMD_DEL_DEST,
3803 .flags = GENL_ADMIN_PERM,
3804 .policy = ip_vs_cmd_policy,
3805 .doit = ip_vs_genl_set_cmd,
3806 },
3807 {
3808 .cmd = IPVS_CMD_GET_DEST,
3809 .flags = GENL_ADMIN_PERM,
3810 .policy = ip_vs_cmd_policy,
3811 .dumpit = ip_vs_genl_dump_dests,
3812 },
3813 {
3814 .cmd = IPVS_CMD_NEW_DAEMON,
3815 .flags = GENL_ADMIN_PERM,
3816 .policy = ip_vs_cmd_policy,
3817 .doit = ip_vs_genl_set_daemon,
3818 },
3819 {
3820 .cmd = IPVS_CMD_DEL_DAEMON,
3821 .flags = GENL_ADMIN_PERM,
3822 .policy = ip_vs_cmd_policy,
3823 .doit = ip_vs_genl_set_daemon,
3824 },
3825 {
3826 .cmd = IPVS_CMD_GET_DAEMON,
3827 .flags = GENL_ADMIN_PERM,
3828 .dumpit = ip_vs_genl_dump_daemons,
3829 },
3830 {
3831 .cmd = IPVS_CMD_SET_CONFIG,
3832 .flags = GENL_ADMIN_PERM,
3833 .policy = ip_vs_cmd_policy,
3834 .doit = ip_vs_genl_set_cmd,
3835 },
3836 {
3837 .cmd = IPVS_CMD_GET_CONFIG,
3838 .flags = GENL_ADMIN_PERM,
3839 .doit = ip_vs_genl_get_cmd,
3840 },
3841 {
3842 .cmd = IPVS_CMD_GET_INFO,
3843 .flags = GENL_ADMIN_PERM,
3844 .doit = ip_vs_genl_get_cmd,
3845 },
3846 {
3847 .cmd = IPVS_CMD_ZERO,
3848 .flags = GENL_ADMIN_PERM,
3849 .policy = ip_vs_cmd_policy,
3850 .doit = ip_vs_genl_set_cmd,
3851 },
3852 {
3853 .cmd = IPVS_CMD_FLUSH,
3854 .flags = GENL_ADMIN_PERM,
3855 .doit = ip_vs_genl_set_cmd,
3856 },
3857};
3858
3859static struct genl_family ip_vs_genl_family __ro_after_init = {
3860 .hdrsize = 0,
3861 .name = IPVS_GENL_NAME,
3862 .version = IPVS_GENL_VERSION,
3863 .maxattr = IPVS_CMD_ATTR_MAX,
3864 .netnsok = true,
3865 .module = THIS_MODULE,
3866 .ops = ip_vs_genl_ops,
3867 .n_ops = ARRAY_SIZE(ip_vs_genl_ops),
3868};
3869
3870static int __init ip_vs_genl_register(void)
3871{
3872 return genl_register_family(&ip_vs_genl_family);
3873}
3874
3875static void ip_vs_genl_unregister(void)
3876{
3877 genl_unregister_family(&ip_vs_genl_family);
3878}
3879
3880
3881
3882
3883
3884
3885#ifdef CONFIG_SYSCTL
3886static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
3887{
3888 struct net *net = ipvs->net;
3889 int idx;
3890 struct ctl_table *tbl;
3891
3892 atomic_set(&ipvs->dropentry, 0);
3893 spin_lock_init(&ipvs->dropentry_lock);
3894 spin_lock_init(&ipvs->droppacket_lock);
3895 spin_lock_init(&ipvs->securetcp_lock);
3896
3897 if (!net_eq(net, &init_net)) {
3898 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3899 if (tbl == NULL)
3900 return -ENOMEM;
3901
3902
3903 if (net->user_ns != &init_user_ns)
3904 tbl[0].procname = NULL;
3905 } else
3906 tbl = vs_vars;
3907
3908 for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
3909 if (tbl[idx].proc_handler == proc_do_defense_mode)
3910 tbl[idx].extra2 = ipvs;
3911 }
3912 idx = 0;
3913 ipvs->sysctl_amemthresh = 1024;
3914 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3915 ipvs->sysctl_am_droprate = 10;
3916 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3917 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3918 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3919#ifdef CONFIG_IP_VS_NFCT
3920 tbl[idx++].data = &ipvs->sysctl_conntrack;
3921#endif
3922 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3923 ipvs->sysctl_snat_reroute = 1;
3924 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3925 ipvs->sysctl_sync_ver = 1;
3926 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3927 ipvs->sysctl_sync_ports = 1;
3928 tbl[idx++].data = &ipvs->sysctl_sync_ports;
3929 tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
3930 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3931 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3932 ipvs->sysctl_sync_sock_size = 0;
3933 tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3934 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3935 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3936 tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
3937 tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
3938 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3939 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3940 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3941 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3942 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3943 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3944 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3945 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3946 tbl[idx++].data = &ipvs->sysctl_sync_retries;
3947 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3948 ipvs->sysctl_pmtu_disc = 1;
3949 tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
3950 tbl[idx++].data = &ipvs->sysctl_backup_only;
3951 ipvs->sysctl_conn_reuse_mode = 1;
3952 tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
3953 tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
3954 tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
3955
3956 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3957 if (ipvs->sysctl_hdr == NULL) {
3958 if (!net_eq(net, &init_net))
3959 kfree(tbl);
3960 return -ENOMEM;
3961 }
3962 ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
3963 ipvs->sysctl_tbl = tbl;
3964
3965 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3966 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3967
3968 return 0;
3969}
3970
3971static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
3972{
3973 struct net *net = ipvs->net;
3974
3975 cancel_delayed_work_sync(&ipvs->defense_work);
3976 cancel_work_sync(&ipvs->defense_work.work);
3977 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3978 ip_vs_stop_estimator(ipvs, &ipvs->tot_stats);
3979
3980 if (!net_eq(net, &init_net))
3981 kfree(ipvs->sysctl_tbl);
3982}
3983
3984#else
3985
3986static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
3987static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
3988
3989#endif
3990
3991static struct notifier_block ip_vs_dst_notifier = {
3992 .notifier_call = ip_vs_dst_event,
3993#ifdef CONFIG_IP_VS_IPV6
3994 .priority = ADDRCONF_NOTIFY_PRIORITY + 5,
3995#endif
3996};
3997
3998int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
3999{
4000 int i, idx;
4001
4002
4003 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
4004 INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
4005
4006 INIT_LIST_HEAD(&ipvs->dest_trash);
4007 spin_lock_init(&ipvs->dest_trash_lock);
4008 timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
4009 atomic_set(&ipvs->ftpsvc_counter, 0);
4010 atomic_set(&ipvs->nullsvc_counter, 0);
4011 atomic_set(&ipvs->conn_out_counter, 0);
4012
4013
4014 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
4015 if (!ipvs->tot_stats.cpustats)
4016 return -ENOMEM;
4017
4018 for_each_possible_cpu(i) {
4019 struct ip_vs_cpu_stats *ipvs_tot_stats;
4020 ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i);
4021 u64_stats_init(&ipvs_tot_stats->syncp);
4022 }
4023
4024 spin_lock_init(&ipvs->tot_stats.lock);
4025
4026 proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops,
4027 sizeof(struct ip_vs_iter));
4028 proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net,
4029 ip_vs_stats_show, NULL);
4030 proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
4031 ip_vs_stats_percpu_show, NULL);
4032
4033 if (ip_vs_control_net_init_sysctl(ipvs))
4034 goto err;
4035
4036 return 0;
4037
4038err:
4039 free_percpu(ipvs->tot_stats.cpustats);
4040 return -ENOMEM;
4041}
4042
4043void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
4044{
4045 ip_vs_trash_cleanup(ipvs);
4046 ip_vs_control_net_cleanup_sysctl(ipvs);
4047 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
4048 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
4049 remove_proc_entry("ip_vs", ipvs->net->proc_net);
4050 free_percpu(ipvs->tot_stats.cpustats);
4051}
4052
4053int __init ip_vs_register_nl_ioctl(void)
4054{
4055 int ret;
4056
4057 ret = nf_register_sockopt(&ip_vs_sockopts);
4058 if (ret) {
4059 pr_err("cannot register sockopt.\n");
4060 goto err_sock;
4061 }
4062
4063 ret = ip_vs_genl_register();
4064 if (ret) {
4065 pr_err("cannot register Generic Netlink interface.\n");
4066 goto err_genl;
4067 }
4068 return 0;
4069
4070err_genl:
4071 nf_unregister_sockopt(&ip_vs_sockopts);
4072err_sock:
4073 return ret;
4074}
4075
4076void ip_vs_unregister_nl_ioctl(void)
4077{
4078 ip_vs_genl_unregister();
4079 nf_unregister_sockopt(&ip_vs_sockopts);
4080}
4081
4082int __init ip_vs_control_init(void)
4083{
4084 int idx;
4085 int ret;
4086
4087 EnterFunction(2);
4088
4089
4090 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
4091 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
4092 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
4093 }
4094
4095 smp_wmb();
4096
4097 ret = register_netdevice_notifier(&ip_vs_dst_notifier);
4098 if (ret < 0)
4099 return ret;
4100
4101 LeaveFunction(2);
4102 return 0;
4103}
4104
4105
4106void ip_vs_control_cleanup(void)
4107{
4108 EnterFunction(2);
4109 unregister_netdevice_notifier(&ip_vs_dst_notifier);
4110 LeaveFunction(2);
4111}
4112