1
2
3
4
5
6
7
8
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/slab.h>
12#include <linux/interrupt.h>
13#include <linux/spinlock.h>
14#include <linux/random.h>
15#include <linux/timer.h>
16#include <linux/time.h>
17#include <linux/kernel.h>
18#include <linux/mm.h>
19#include <linux/net.h>
20#include <linux/workqueue.h>
21#include <net/ip.h>
22#include <net/inetpeer.h>
23#include <net/secure_seq.h>
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54static struct kmem_cache *peer_cachep __read_mostly;
55
56static LIST_HEAD(gc_list);
57static const int gc_delay = 60 * HZ;
58static struct delayed_work gc_work;
59static DEFINE_SPINLOCK(gc_lock);
60
61#define node_height(x) x->avl_height
62
63#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
64#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
65static const struct inet_peer peer_fake_node = {
66 .avl_left = peer_avl_empty_rcu,
67 .avl_right = peer_avl_empty_rcu,
68 .avl_height = 0
69};
70
71void inet_peer_base_init(struct inet_peer_base *bp)
72{
73 bp->root = peer_avl_empty_rcu;
74 seqlock_init(&bp->lock);
75 bp->total = 0;
76}
77EXPORT_SYMBOL_GPL(inet_peer_base_init);
78
79#define PEER_MAXDEPTH 40
80
81
82int inet_peer_threshold __read_mostly = 65536 + 128;
83
84int inet_peer_minttl __read_mostly = 120 * HZ;
85int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;
86
87static void inetpeer_gc_worker(struct work_struct *work)
88{
89 struct inet_peer *p, *n, *c;
90 struct list_head list;
91
92 spin_lock_bh(&gc_lock);
93 list_replace_init(&gc_list, &list);
94 spin_unlock_bh(&gc_lock);
95
96 if (list_empty(&list))
97 return;
98
99 list_for_each_entry_safe(p, n, &list, gc_list) {
100
101 if (need_resched())
102 cond_resched();
103
104 c = rcu_dereference_protected(p->avl_left, 1);
105 if (c != peer_avl_empty) {
106 list_add_tail(&c->gc_list, &list);
107 p->avl_left = peer_avl_empty_rcu;
108 }
109
110 c = rcu_dereference_protected(p->avl_right, 1);
111 if (c != peer_avl_empty) {
112 list_add_tail(&c->gc_list, &list);
113 p->avl_right = peer_avl_empty_rcu;
114 }
115
116 n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
117
118 if (!atomic_read(&p->refcnt)) {
119 list_del(&p->gc_list);
120 kmem_cache_free(peer_cachep, p);
121 }
122 }
123
124 if (list_empty(&list))
125 return;
126
127 spin_lock_bh(&gc_lock);
128 list_splice(&list, &gc_list);
129 spin_unlock_bh(&gc_lock);
130
131 schedule_delayed_work(&gc_work, gc_delay);
132}
133
134
135void __init inet_initpeers(void)
136{
137 struct sysinfo si;
138
139
140 si_meminfo(&si);
141
142
143
144
145 if (si.totalram <= (32768*1024)/PAGE_SIZE)
146 inet_peer_threshold >>= 1;
147 if (si.totalram <= (16384*1024)/PAGE_SIZE)
148 inet_peer_threshold >>= 1;
149 if (si.totalram <= (8192*1024)/PAGE_SIZE)
150 inet_peer_threshold >>= 2;
151
152 peer_cachep = kmem_cache_create("inet_peer_cache",
153 sizeof(struct inet_peer),
154 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
155 NULL);
156
157 INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
158}
159
160#define rcu_deref_locked(X, BASE) \
161 rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
162
163
164
165
166#define lookup(_daddr, _stack, _base) \
167({ \
168 struct inet_peer *u; \
169 struct inet_peer __rcu **v; \
170 \
171 stackptr = _stack; \
172 *stackptr++ = &_base->root; \
173 for (u = rcu_deref_locked(_base->root, _base); \
174 u != peer_avl_empty;) { \
175 int cmp = inetpeer_addr_cmp(_daddr, &u->daddr); \
176 if (cmp == 0) \
177 break; \
178 if (cmp == -1) \
179 v = &u->avl_left; \
180 else \
181 v = &u->avl_right; \
182 *stackptr++ = v; \
183 u = rcu_deref_locked(*v, _base); \
184 } \
185 u; \
186})
187
188
189
190
191
192
193
194
195static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
196 struct inet_peer_base *base)
197{
198 struct inet_peer *u = rcu_dereference(base->root);
199 int count = 0;
200
201 while (u != peer_avl_empty) {
202 int cmp = inetpeer_addr_cmp(daddr, &u->daddr);
203 if (cmp == 0) {
204
205
206
207 if (!atomic_add_unless(&u->refcnt, 1, -1))
208 u = NULL;
209 return u;
210 }
211 if (cmp == -1)
212 u = rcu_dereference(u->avl_left);
213 else
214 u = rcu_dereference(u->avl_right);
215 if (unlikely(++count == PEER_MAXDEPTH))
216 break;
217 }
218 return NULL;
219}
220
221
222#define lookup_rightempty(start, base) \
223({ \
224 struct inet_peer *u; \
225 struct inet_peer __rcu **v; \
226 *stackptr++ = &start->avl_left; \
227 v = &start->avl_left; \
228 for (u = rcu_deref_locked(*v, base); \
229 u->avl_right != peer_avl_empty_rcu;) { \
230 v = &u->avl_right; \
231 *stackptr++ = v; \
232 u = rcu_deref_locked(*v, base); \
233 } \
234 u; \
235})
236
237
238
239
240
241static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
242 struct inet_peer __rcu ***stackend,
243 struct inet_peer_base *base)
244{
245 struct inet_peer __rcu **nodep;
246 struct inet_peer *node, *l, *r;
247 int lh, rh;
248
249 while (stackend > stack) {
250 nodep = *--stackend;
251 node = rcu_deref_locked(*nodep, base);
252 l = rcu_deref_locked(node->avl_left, base);
253 r = rcu_deref_locked(node->avl_right, base);
254 lh = node_height(l);
255 rh = node_height(r);
256 if (lh > rh + 1) {
257 struct inet_peer *ll, *lr, *lrl, *lrr;
258 int lrh;
259 ll = rcu_deref_locked(l->avl_left, base);
260 lr = rcu_deref_locked(l->avl_right, base);
261 lrh = node_height(lr);
262 if (lrh <= node_height(ll)) {
263 RCU_INIT_POINTER(node->avl_left, lr);
264 RCU_INIT_POINTER(node->avl_right, r);
265 node->avl_height = lrh + 1;
266 RCU_INIT_POINTER(l->avl_left, ll);
267 RCU_INIT_POINTER(l->avl_right, node);
268 l->avl_height = node->avl_height + 1;
269 RCU_INIT_POINTER(*nodep, l);
270 } else {
271 lrl = rcu_deref_locked(lr->avl_left, base);
272 lrr = rcu_deref_locked(lr->avl_right, base);
273 RCU_INIT_POINTER(node->avl_left, lrr);
274 RCU_INIT_POINTER(node->avl_right, r);
275 node->avl_height = rh + 1;
276 RCU_INIT_POINTER(l->avl_left, ll);
277 RCU_INIT_POINTER(l->avl_right, lrl);
278 l->avl_height = rh + 1;
279 RCU_INIT_POINTER(lr->avl_left, l);
280 RCU_INIT_POINTER(lr->avl_right, node);
281 lr->avl_height = rh + 2;
282 RCU_INIT_POINTER(*nodep, lr);
283 }
284 } else if (rh > lh + 1) {
285 struct inet_peer *rr, *rl, *rlr, *rll;
286 int rlh;
287 rr = rcu_deref_locked(r->avl_right, base);
288 rl = rcu_deref_locked(r->avl_left, base);
289 rlh = node_height(rl);
290 if (rlh <= node_height(rr)) {
291 RCU_INIT_POINTER(node->avl_right, rl);
292 RCU_INIT_POINTER(node->avl_left, l);
293 node->avl_height = rlh + 1;
294 RCU_INIT_POINTER(r->avl_right, rr);
295 RCU_INIT_POINTER(r->avl_left, node);
296 r->avl_height = node->avl_height + 1;
297 RCU_INIT_POINTER(*nodep, r);
298 } else {
299 rlr = rcu_deref_locked(rl->avl_right, base);
300 rll = rcu_deref_locked(rl->avl_left, base);
301 RCU_INIT_POINTER(node->avl_right, rll);
302 RCU_INIT_POINTER(node->avl_left, l);
303 node->avl_height = lh + 1;
304 RCU_INIT_POINTER(r->avl_right, rr);
305 RCU_INIT_POINTER(r->avl_left, rlr);
306 r->avl_height = lh + 1;
307 RCU_INIT_POINTER(rl->avl_right, r);
308 RCU_INIT_POINTER(rl->avl_left, node);
309 rl->avl_height = lh + 2;
310 RCU_INIT_POINTER(*nodep, rl);
311 }
312 } else {
313 node->avl_height = (lh > rh ? lh : rh) + 1;
314 }
315 }
316}
317
318
319#define link_to_pool(n, base) \
320do { \
321 n->avl_height = 1; \
322 n->avl_left = peer_avl_empty_rcu; \
323 n->avl_right = peer_avl_empty_rcu; \
324 \
325 rcu_assign_pointer(**--stackptr, n); \
326 peer_avl_rebalance(stack, stackptr, base); \
327} while (0)
328
329static void inetpeer_free_rcu(struct rcu_head *head)
330{
331 kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
332}
333
334static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
335 struct inet_peer __rcu **stack[PEER_MAXDEPTH])
336{
337 struct inet_peer __rcu ***stackptr, ***delp;
338
339 if (lookup(&p->daddr, stack, base) != p)
340 BUG();
341 delp = stackptr - 1;
342 if (p->avl_left == peer_avl_empty_rcu) {
343 *delp[0] = p->avl_right;
344 --stackptr;
345 } else {
346
347 struct inet_peer *t;
348 t = lookup_rightempty(p, base);
349 BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
350 **--stackptr = t->avl_left;
351
352
353
354 RCU_INIT_POINTER(*delp[0], t);
355 t->avl_left = p->avl_left;
356 t->avl_right = p->avl_right;
357 t->avl_height = p->avl_height;
358 BUG_ON(delp[1] != &p->avl_left);
359 delp[1] = &t->avl_left;
360 }
361 peer_avl_rebalance(stack, stackptr, base);
362 base->total--;
363 call_rcu(&p->rcu, inetpeer_free_rcu);
364}
365
366
367static int inet_peer_gc(struct inet_peer_base *base,
368 struct inet_peer __rcu **stack[PEER_MAXDEPTH],
369 struct inet_peer __rcu ***stackptr)
370{
371 struct inet_peer *p, *gchead = NULL;
372 __u32 delta, ttl;
373 int cnt = 0;
374
375 if (base->total >= inet_peer_threshold)
376 ttl = 0;
377 else
378 ttl = inet_peer_maxttl
379 - (inet_peer_maxttl - inet_peer_minttl) / HZ *
380 base->total / inet_peer_threshold * HZ;
381 stackptr--;
382 while (stackptr > stack) {
383 stackptr--;
384 p = rcu_deref_locked(**stackptr, base);
385 if (atomic_read(&p->refcnt) == 0) {
386 smp_rmb();
387 delta = (__u32)jiffies - p->dtime;
388 if (delta >= ttl &&
389 atomic_cmpxchg(&p->refcnt, 0, -1) == 0) {
390 p->gc_next = gchead;
391 gchead = p;
392 }
393 }
394 }
395 while ((p = gchead) != NULL) {
396 gchead = p->gc_next;
397 cnt++;
398 unlink_from_pool(p, base, stack);
399 }
400 return cnt;
401}
402
403struct inet_peer *inet_getpeer(struct inet_peer_base *base,
404 const struct inetpeer_addr *daddr,
405 int create)
406{
407 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
408 struct inet_peer *p;
409 unsigned int sequence;
410 int invalidated, gccnt = 0;
411
412
413
414
415 rcu_read_lock();
416 sequence = read_seqbegin(&base->lock);
417 p = lookup_rcu(daddr, base);
418 invalidated = read_seqretry(&base->lock, sequence);
419 rcu_read_unlock();
420
421 if (p)
422 return p;
423
424
425 if (!create && !invalidated)
426 return NULL;
427
428
429
430
431 write_seqlock_bh(&base->lock);
432relookup:
433 p = lookup(daddr, stack, base);
434 if (p != peer_avl_empty) {
435 atomic_inc(&p->refcnt);
436 write_sequnlock_bh(&base->lock);
437 return p;
438 }
439 if (!gccnt) {
440 gccnt = inet_peer_gc(base, stack, stackptr);
441 if (gccnt && create)
442 goto relookup;
443 }
444 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
445 if (p) {
446 p->daddr = *daddr;
447 atomic_set(&p->refcnt, 1);
448 atomic_set(&p->rid, 0);
449 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
450 p->rate_tokens = 0;
451
452
453
454 p->rate_last = jiffies - 60*HZ;
455 INIT_LIST_HEAD(&p->gc_list);
456
457
458 link_to_pool(p, base);
459 base->total++;
460 }
461 write_sequnlock_bh(&base->lock);
462
463 return p;
464}
465EXPORT_SYMBOL_GPL(inet_getpeer);
466
467void inet_putpeer(struct inet_peer *p)
468{
469 p->dtime = (__u32)jiffies;
470 smp_mb__before_atomic();
471 atomic_dec(&p->refcnt);
472}
473EXPORT_SYMBOL_GPL(inet_putpeer);
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492#define XRLIM_BURST_FACTOR 6
493bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
494{
495 unsigned long now, token;
496 bool rc = false;
497
498 if (!peer)
499 return true;
500
501 token = peer->rate_tokens;
502 now = jiffies;
503 token += now - peer->rate_last;
504 peer->rate_last = now;
505 if (token > XRLIM_BURST_FACTOR * timeout)
506 token = XRLIM_BURST_FACTOR * timeout;
507 if (token >= timeout) {
508 token -= timeout;
509 rc = true;
510 }
511 peer->rate_tokens = token;
512 return rc;
513}
514EXPORT_SYMBOL(inet_peer_xrlim_allow);
515
516static void inetpeer_inval_rcu(struct rcu_head *head)
517{
518 struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu);
519
520 spin_lock_bh(&gc_lock);
521 list_add_tail(&p->gc_list, &gc_list);
522 spin_unlock_bh(&gc_lock);
523
524 schedule_delayed_work(&gc_work, gc_delay);
525}
526
527void inetpeer_invalidate_tree(struct inet_peer_base *base)
528{
529 struct inet_peer *root;
530
531 write_seqlock_bh(&base->lock);
532
533 root = rcu_deref_locked(base->root, base);
534 if (root != peer_avl_empty) {
535 base->root = peer_avl_empty_rcu;
536 base->total = 0;
537 call_rcu(&root->gc_rcu, inetpeer_inval_rcu);
538 }
539
540 write_sequnlock_bh(&base->lock);
541}
542EXPORT_SYMBOL(inetpeer_invalidate_tree);
543