1
2
3
4
5
6
7
8
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/slab.h>
12#include <linux/interrupt.h>
13#include <linux/spinlock.h>
14#include <linux/random.h>
15#include <linux/timer.h>
16#include <linux/time.h>
17#include <linux/kernel.h>
18#include <linux/mm.h>
19#include <linux/net.h>
20#include <linux/workqueue.h>
21#include <net/ip.h>
22#include <net/inetpeer.h>
23#include <net/secure_seq.h>
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54static struct kmem_cache *peer_cachep __read_mostly;
55
56static LIST_HEAD(gc_list);
57static const int gc_delay = 60 * HZ;
58static struct delayed_work gc_work;
59static DEFINE_SPINLOCK(gc_lock);
60
61#define node_height(x) x->avl_height
62
63#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
64#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
65static const struct inet_peer peer_fake_node = {
66 .avl_left = peer_avl_empty_rcu,
67 .avl_right = peer_avl_empty_rcu,
68 .avl_height = 0
69};
70
71void inet_peer_base_init(struct inet_peer_base *bp)
72{
73 bp->root = peer_avl_empty_rcu;
74 seqlock_init(&bp->lock);
75 bp->total = 0;
76}
77EXPORT_SYMBOL_GPL(inet_peer_base_init);
78
79#define PEER_MAXDEPTH 40
80
81
82int inet_peer_threshold __read_mostly = 65536 + 128;
83
84int inet_peer_minttl __read_mostly = 120 * HZ;
85int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;
86
87static void inetpeer_gc_worker(struct work_struct *work)
88{
89 struct inet_peer *p, *n, *c;
90 struct list_head list;
91
92 spin_lock_bh(&gc_lock);
93 list_replace_init(&gc_list, &list);
94 spin_unlock_bh(&gc_lock);
95
96 if (list_empty(&list))
97 return;
98
99 list_for_each_entry_safe(p, n, &list, gc_list) {
100
101 if (need_resched())
102 cond_resched();
103
104 c = rcu_dereference_protected(p->avl_left, 1);
105 if (c != peer_avl_empty) {
106 list_add_tail(&c->gc_list, &list);
107 p->avl_left = peer_avl_empty_rcu;
108 }
109
110 c = rcu_dereference_protected(p->avl_right, 1);
111 if (c != peer_avl_empty) {
112 list_add_tail(&c->gc_list, &list);
113 p->avl_right = peer_avl_empty_rcu;
114 }
115
116 n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
117
118 if (!atomic_read(&p->refcnt)) {
119 list_del(&p->gc_list);
120 kmem_cache_free(peer_cachep, p);
121 }
122 }
123
124 if (list_empty(&list))
125 return;
126
127 spin_lock_bh(&gc_lock);
128 list_splice(&list, &gc_list);
129 spin_unlock_bh(&gc_lock);
130
131 schedule_delayed_work(&gc_work, gc_delay);
132}
133
134
135void __init inet_initpeers(void)
136{
137 struct sysinfo si;
138
139
140 si_meminfo(&si);
141
142
143
144
145 if (si.totalram <= (32768*1024)/PAGE_SIZE)
146 inet_peer_threshold >>= 1;
147 if (si.totalram <= (16384*1024)/PAGE_SIZE)
148 inet_peer_threshold >>= 1;
149 if (si.totalram <= (8192*1024)/PAGE_SIZE)
150 inet_peer_threshold >>= 2;
151
152 peer_cachep = kmem_cache_create("inet_peer_cache",
153 sizeof(struct inet_peer),
154 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
155 NULL);
156
157 INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
158}
159
160static int addr_compare(const struct inetpeer_addr *a,
161 const struct inetpeer_addr *b)
162{
163 int i, n = (a->family == AF_INET ? 1 : 4);
164
165 for (i = 0; i < n; i++) {
166 if (a->addr.a6[i] == b->addr.a6[i])
167 continue;
168 if ((__force u32)a->addr.a6[i] < (__force u32)b->addr.a6[i])
169 return -1;
170 return 1;
171 }
172
173 return 0;
174}
175
176#define rcu_deref_locked(X, BASE) \
177 rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
178
179
180
181
182#define lookup(_daddr, _stack, _base) \
183({ \
184 struct inet_peer *u; \
185 struct inet_peer __rcu **v; \
186 \
187 stackptr = _stack; \
188 *stackptr++ = &_base->root; \
189 for (u = rcu_deref_locked(_base->root, _base); \
190 u != peer_avl_empty;) { \
191 int cmp = addr_compare(_daddr, &u->daddr); \
192 if (cmp == 0) \
193 break; \
194 if (cmp == -1) \
195 v = &u->avl_left; \
196 else \
197 v = &u->avl_right; \
198 *stackptr++ = v; \
199 u = rcu_deref_locked(*v, _base); \
200 } \
201 u; \
202})
203
204
205
206
207
208
209
210
211static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
212 struct inet_peer_base *base)
213{
214 struct inet_peer *u = rcu_dereference(base->root);
215 int count = 0;
216
217 while (u != peer_avl_empty) {
218 int cmp = addr_compare(daddr, &u->daddr);
219 if (cmp == 0) {
220
221
222
223 if (!atomic_add_unless(&u->refcnt, 1, -1))
224 u = NULL;
225 return u;
226 }
227 if (cmp == -1)
228 u = rcu_dereference(u->avl_left);
229 else
230 u = rcu_dereference(u->avl_right);
231 if (unlikely(++count == PEER_MAXDEPTH))
232 break;
233 }
234 return NULL;
235}
236
237
238#define lookup_rightempty(start, base) \
239({ \
240 struct inet_peer *u; \
241 struct inet_peer __rcu **v; \
242 *stackptr++ = &start->avl_left; \
243 v = &start->avl_left; \
244 for (u = rcu_deref_locked(*v, base); \
245 u->avl_right != peer_avl_empty_rcu;) { \
246 v = &u->avl_right; \
247 *stackptr++ = v; \
248 u = rcu_deref_locked(*v, base); \
249 } \
250 u; \
251})
252
253
254
255
256
257static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
258 struct inet_peer __rcu ***stackend,
259 struct inet_peer_base *base)
260{
261 struct inet_peer __rcu **nodep;
262 struct inet_peer *node, *l, *r;
263 int lh, rh;
264
265 while (stackend > stack) {
266 nodep = *--stackend;
267 node = rcu_deref_locked(*nodep, base);
268 l = rcu_deref_locked(node->avl_left, base);
269 r = rcu_deref_locked(node->avl_right, base);
270 lh = node_height(l);
271 rh = node_height(r);
272 if (lh > rh + 1) {
273 struct inet_peer *ll, *lr, *lrl, *lrr;
274 int lrh;
275 ll = rcu_deref_locked(l->avl_left, base);
276 lr = rcu_deref_locked(l->avl_right, base);
277 lrh = node_height(lr);
278 if (lrh <= node_height(ll)) {
279 RCU_INIT_POINTER(node->avl_left, lr);
280 RCU_INIT_POINTER(node->avl_right, r);
281 node->avl_height = lrh + 1;
282 RCU_INIT_POINTER(l->avl_left, ll);
283 RCU_INIT_POINTER(l->avl_right, node);
284 l->avl_height = node->avl_height + 1;
285 RCU_INIT_POINTER(*nodep, l);
286 } else {
287 lrl = rcu_deref_locked(lr->avl_left, base);
288 lrr = rcu_deref_locked(lr->avl_right, base);
289 RCU_INIT_POINTER(node->avl_left, lrr);
290 RCU_INIT_POINTER(node->avl_right, r);
291 node->avl_height = rh + 1;
292 RCU_INIT_POINTER(l->avl_left, ll);
293 RCU_INIT_POINTER(l->avl_right, lrl);
294 l->avl_height = rh + 1;
295 RCU_INIT_POINTER(lr->avl_left, l);
296 RCU_INIT_POINTER(lr->avl_right, node);
297 lr->avl_height = rh + 2;
298 RCU_INIT_POINTER(*nodep, lr);
299 }
300 } else if (rh > lh + 1) {
301 struct inet_peer *rr, *rl, *rlr, *rll;
302 int rlh;
303 rr = rcu_deref_locked(r->avl_right, base);
304 rl = rcu_deref_locked(r->avl_left, base);
305 rlh = node_height(rl);
306 if (rlh <= node_height(rr)) {
307 RCU_INIT_POINTER(node->avl_right, rl);
308 RCU_INIT_POINTER(node->avl_left, l);
309 node->avl_height = rlh + 1;
310 RCU_INIT_POINTER(r->avl_right, rr);
311 RCU_INIT_POINTER(r->avl_left, node);
312 r->avl_height = node->avl_height + 1;
313 RCU_INIT_POINTER(*nodep, r);
314 } else {
315 rlr = rcu_deref_locked(rl->avl_right, base);
316 rll = rcu_deref_locked(rl->avl_left, base);
317 RCU_INIT_POINTER(node->avl_right, rll);
318 RCU_INIT_POINTER(node->avl_left, l);
319 node->avl_height = lh + 1;
320 RCU_INIT_POINTER(r->avl_right, rr);
321 RCU_INIT_POINTER(r->avl_left, rlr);
322 r->avl_height = lh + 1;
323 RCU_INIT_POINTER(rl->avl_right, r);
324 RCU_INIT_POINTER(rl->avl_left, node);
325 rl->avl_height = lh + 2;
326 RCU_INIT_POINTER(*nodep, rl);
327 }
328 } else {
329 node->avl_height = (lh > rh ? lh : rh) + 1;
330 }
331 }
332}
333
334
335#define link_to_pool(n, base) \
336do { \
337 n->avl_height = 1; \
338 n->avl_left = peer_avl_empty_rcu; \
339 n->avl_right = peer_avl_empty_rcu; \
340 \
341 rcu_assign_pointer(**--stackptr, n); \
342 peer_avl_rebalance(stack, stackptr, base); \
343} while (0)
344
345static void inetpeer_free_rcu(struct rcu_head *head)
346{
347 kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
348}
349
350static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
351 struct inet_peer __rcu **stack[PEER_MAXDEPTH])
352{
353 struct inet_peer __rcu ***stackptr, ***delp;
354
355 if (lookup(&p->daddr, stack, base) != p)
356 BUG();
357 delp = stackptr - 1;
358 if (p->avl_left == peer_avl_empty_rcu) {
359 *delp[0] = p->avl_right;
360 --stackptr;
361 } else {
362
363 struct inet_peer *t;
364 t = lookup_rightempty(p, base);
365 BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
366 **--stackptr = t->avl_left;
367
368
369
370 RCU_INIT_POINTER(*delp[0], t);
371 t->avl_left = p->avl_left;
372 t->avl_right = p->avl_right;
373 t->avl_height = p->avl_height;
374 BUG_ON(delp[1] != &p->avl_left);
375 delp[1] = &t->avl_left;
376 }
377 peer_avl_rebalance(stack, stackptr, base);
378 base->total--;
379 call_rcu(&p->rcu, inetpeer_free_rcu);
380}
381
382
383static int inet_peer_gc(struct inet_peer_base *base,
384 struct inet_peer __rcu **stack[PEER_MAXDEPTH],
385 struct inet_peer __rcu ***stackptr)
386{
387 struct inet_peer *p, *gchead = NULL;
388 __u32 delta, ttl;
389 int cnt = 0;
390
391 if (base->total >= inet_peer_threshold)
392 ttl = 0;
393 else
394 ttl = inet_peer_maxttl
395 - (inet_peer_maxttl - inet_peer_minttl) / HZ *
396 base->total / inet_peer_threshold * HZ;
397 stackptr--;
398 while (stackptr > stack) {
399 stackptr--;
400 p = rcu_deref_locked(**stackptr, base);
401 if (atomic_read(&p->refcnt) == 0) {
402 smp_rmb();
403 delta = (__u32)jiffies - p->dtime;
404 if (delta >= ttl &&
405 atomic_cmpxchg(&p->refcnt, 0, -1) == 0) {
406 p->gc_next = gchead;
407 gchead = p;
408 }
409 }
410 }
411 while ((p = gchead) != NULL) {
412 gchead = p->gc_next;
413 cnt++;
414 unlink_from_pool(p, base, stack);
415 }
416 return cnt;
417}
418
419struct inet_peer *inet_getpeer(struct inet_peer_base *base,
420 const struct inetpeer_addr *daddr,
421 int create)
422{
423 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
424 struct inet_peer *p;
425 unsigned int sequence;
426 int invalidated, gccnt = 0;
427
428
429
430
431 rcu_read_lock();
432 sequence = read_seqbegin(&base->lock);
433 p = lookup_rcu(daddr, base);
434 invalidated = read_seqretry(&base->lock, sequence);
435 rcu_read_unlock();
436
437 if (p)
438 return p;
439
440
441 if (!create && !invalidated)
442 return NULL;
443
444
445
446
447 write_seqlock_bh(&base->lock);
448relookup:
449 p = lookup(daddr, stack, base);
450 if (p != peer_avl_empty) {
451 atomic_inc(&p->refcnt);
452 write_sequnlock_bh(&base->lock);
453 return p;
454 }
455 if (!gccnt) {
456 gccnt = inet_peer_gc(base, stack, stackptr);
457 if (gccnt && create)
458 goto relookup;
459 }
460 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
461 if (p) {
462 p->daddr = *daddr;
463 atomic_set(&p->refcnt, 1);
464 atomic_set(&p->rid, 0);
465 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
466 p->rate_tokens = 0;
467
468
469
470 p->rate_last = jiffies - 60*HZ;
471 INIT_LIST_HEAD(&p->gc_list);
472
473
474 link_to_pool(p, base);
475 base->total++;
476 }
477 write_sequnlock_bh(&base->lock);
478
479 return p;
480}
481EXPORT_SYMBOL_GPL(inet_getpeer);
482
483void inet_putpeer(struct inet_peer *p)
484{
485 p->dtime = (__u32)jiffies;
486 smp_mb__before_atomic();
487 atomic_dec(&p->refcnt);
488}
489EXPORT_SYMBOL_GPL(inet_putpeer);
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508#define XRLIM_BURST_FACTOR 6
509bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
510{
511 unsigned long now, token;
512 bool rc = false;
513
514 if (!peer)
515 return true;
516
517 token = peer->rate_tokens;
518 now = jiffies;
519 token += now - peer->rate_last;
520 peer->rate_last = now;
521 if (token > XRLIM_BURST_FACTOR * timeout)
522 token = XRLIM_BURST_FACTOR * timeout;
523 if (token >= timeout) {
524 token -= timeout;
525 rc = true;
526 }
527 peer->rate_tokens = token;
528 return rc;
529}
530EXPORT_SYMBOL(inet_peer_xrlim_allow);
531
532static void inetpeer_inval_rcu(struct rcu_head *head)
533{
534 struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu);
535
536 spin_lock_bh(&gc_lock);
537 list_add_tail(&p->gc_list, &gc_list);
538 spin_unlock_bh(&gc_lock);
539
540 schedule_delayed_work(&gc_work, gc_delay);
541}
542
543void inetpeer_invalidate_tree(struct inet_peer_base *base)
544{
545 struct inet_peer *root;
546
547 write_seqlock_bh(&base->lock);
548
549 root = rcu_deref_locked(base->root, base);
550 if (root != peer_avl_empty) {
551 base->root = peer_avl_empty_rcu;
552 base->total = 0;
553 call_rcu(&root->gc_rcu, inetpeer_inval_rcu);
554 }
555
556 write_sequnlock_bh(&base->lock);
557}
558EXPORT_SYMBOL(inetpeer_invalidate_tree);
559