1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#define KMSG_COMPONENT "IPVS"
41#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
42
43#include <linux/ip.h>
44#include <linux/slab.h>
45#include <linux/module.h>
46#include <linux/kernel.h>
47#include <linux/skbuff.h>
48
49#include <net/ip_vs.h>
50
51#include <net/tcp.h>
52#include <linux/udp.h>
53#include <linux/sctp.h>
54
55
56
57
58
59struct ip_vs_sh_bucket {
60 struct ip_vs_dest __rcu *dest;
61};
62
63
64
65
66#ifndef CONFIG_IP_VS_SH_TAB_BITS
67#define CONFIG_IP_VS_SH_TAB_BITS 8
68#endif
69#define IP_VS_SH_TAB_BITS CONFIG_IP_VS_SH_TAB_BITS
70#define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS)
71#define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1)
72
73struct ip_vs_sh_state {
74 struct rcu_head rcu_head;
75 struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE];
76};
77
78
79static inline bool is_unavailable(struct ip_vs_dest *dest)
80{
81 return atomic_read(&dest->weight) <= 0 ||
82 dest->flags & IP_VS_DEST_F_OVERLOAD;
83}
84
85
86
87
88static inline unsigned int
89ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
90 __be16 port, unsigned int offset)
91{
92 __be32 addr_fold = addr->ip;
93
94#ifdef CONFIG_IP_VS_IPV6
95 if (af == AF_INET6)
96 addr_fold = addr->ip6[0]^addr->ip6[1]^
97 addr->ip6[2]^addr->ip6[3];
98#endif
99 return (offset + hash_32(ntohs(port) + ntohl(addr_fold),
100 IP_VS_SH_TAB_BITS)) &
101 IP_VS_SH_TAB_MASK;
102}
103
104
105
106
107
108static inline struct ip_vs_dest *
109ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
110 const union nf_inet_addr *addr, __be16 port)
111{
112 unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
113 struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest);
114
115 return (!dest || is_unavailable(dest)) ? NULL : dest;
116}
117
118
119
120
121
122
123
124
125static inline struct ip_vs_dest *
126ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
127 const union nf_inet_addr *addr, __be16 port)
128{
129 unsigned int offset, roffset;
130 unsigned int hash, ihash;
131 struct ip_vs_dest *dest;
132
133
134 ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
135 dest = rcu_dereference(s->buckets[ihash].dest);
136 if (!dest)
137 return NULL;
138 if (!is_unavailable(dest))
139 return dest;
140
141 IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting",
142 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
143
144
145
146
147 for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
148 roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE;
149 hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset);
150 dest = rcu_dereference(s->buckets[hash].dest);
151 if (!dest)
152 break;
153 if (!is_unavailable(dest))
154 return dest;
155 IP_VS_DBG_BUF(6, "SH: selected unavailable "
156 "server %s:%d (offset %d), reselecting",
157 IP_VS_DBG_ADDR(dest->af, &dest->addr),
158 ntohs(dest->port), roffset);
159 }
160
161 return NULL;
162}
163
164
165
166
167static int
168ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
169{
170 int i;
171 struct ip_vs_sh_bucket *b;
172 struct list_head *p;
173 struct ip_vs_dest *dest;
174 int d_count;
175 bool empty;
176
177 b = &s->buckets[0];
178 p = &svc->destinations;
179 empty = list_empty(p);
180 d_count = 0;
181 for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
182 dest = rcu_dereference_protected(b->dest, 1);
183 if (dest)
184 ip_vs_dest_put(dest);
185 if (empty)
186 RCU_INIT_POINTER(b->dest, NULL);
187 else {
188 if (p == &svc->destinations)
189 p = p->next;
190
191 dest = list_entry(p, struct ip_vs_dest, n_list);
192 ip_vs_dest_hold(dest);
193 RCU_INIT_POINTER(b->dest, dest);
194
195 IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
196 i, IP_VS_DBG_ADDR(dest->af, &dest->addr),
197 atomic_read(&dest->weight));
198
199
200 if (++d_count >= atomic_read(&dest->weight)) {
201 p = p->next;
202 d_count = 0;
203 }
204
205 }
206 b++;
207 }
208 return 0;
209}
210
211
212
213
214
215static void ip_vs_sh_flush(struct ip_vs_sh_state *s)
216{
217 int i;
218 struct ip_vs_sh_bucket *b;
219 struct ip_vs_dest *dest;
220
221 b = &s->buckets[0];
222 for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
223 dest = rcu_dereference_protected(b->dest, 1);
224 if (dest) {
225 ip_vs_dest_put(dest);
226 RCU_INIT_POINTER(b->dest, NULL);
227 }
228 b++;
229 }
230}
231
232
233static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
234{
235 struct ip_vs_sh_state *s;
236
237
238 s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL);
239 if (s == NULL)
240 return -ENOMEM;
241
242 svc->sched_data = s;
243 IP_VS_DBG(6, "SH hash table (memory=%zdbytes) allocated for "
244 "current service\n",
245 sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
246
247
248 ip_vs_sh_reassign(s, svc);
249
250 return 0;
251}
252
253
254static void ip_vs_sh_done_svc(struct ip_vs_service *svc)
255{
256 struct ip_vs_sh_state *s = svc->sched_data;
257
258
259 ip_vs_sh_flush(s);
260
261
262 kfree_rcu(s, rcu_head);
263 IP_VS_DBG(6, "SH hash table (memory=%zdbytes) released\n",
264 sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
265}
266
267
268static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
269 struct ip_vs_dest *dest)
270{
271 struct ip_vs_sh_state *s = svc->sched_data;
272
273
274 ip_vs_sh_reassign(s, svc);
275
276 return 0;
277}
278
279
280
281static inline __be16
282ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
283{
284 __be16 _ports[2], *ports;
285
286
287
288
289
290
291 switch (iph->protocol) {
292 case IPPROTO_TCP:
293 case IPPROTO_UDP:
294 case IPPROTO_SCTP:
295 ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
296 &_ports);
297 if (unlikely(!ports))
298 return 0;
299
300 if (likely(!ip_vs_iph_inverse(iph)))
301 return ports[0];
302 else
303 return ports[1];
304 default:
305 return 0;
306 }
307}
308
309
310
311
312
313static struct ip_vs_dest *
314ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
315 struct ip_vs_iphdr *iph)
316{
317 struct ip_vs_dest *dest;
318 struct ip_vs_sh_state *s;
319 __be16 port = 0;
320 const union nf_inet_addr *hash_addr;
321
322 hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
323
324 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
325
326 if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT)
327 port = ip_vs_sh_get_port(skb, iph);
328
329 s = (struct ip_vs_sh_state *) svc->sched_data;
330
331 if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
332 dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port);
333 else
334 dest = ip_vs_sh_get(svc, s, hash_addr, port);
335
336 if (!dest) {
337 ip_vs_scheduler_err(svc, "no destination available");
338 return NULL;
339 }
340
341 IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
342 IP_VS_DBG_ADDR(svc->af, hash_addr),
343 IP_VS_DBG_ADDR(dest->af, &dest->addr),
344 ntohs(dest->port));
345
346 return dest;
347}
348
349
350
351
352
353static struct ip_vs_scheduler ip_vs_sh_scheduler =
354{
355 .name = "sh",
356 .refcnt = ATOMIC_INIT(0),
357 .module = THIS_MODULE,
358 .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
359 .init_service = ip_vs_sh_init_svc,
360 .done_service = ip_vs_sh_done_svc,
361 .add_dest = ip_vs_sh_dest_changed,
362 .del_dest = ip_vs_sh_dest_changed,
363 .upd_dest = ip_vs_sh_dest_changed,
364 .schedule = ip_vs_sh_schedule,
365};
366
367
368static int __init ip_vs_sh_init(void)
369{
370 return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
371}
372
373
374static void __exit ip_vs_sh_cleanup(void)
375{
376 unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
377 synchronize_rcu();
378}
379
380
381module_init(ip_vs_sh_init);
382module_exit(ip_vs_sh_cleanup);
383MODULE_LICENSE("GPL");
384