1
2
3
4
5
6
7
8
9
10
11
12#include <linux/slab.h>
13#include <linux/vmalloc.h>
14#include <linux/sunrpc/addr.h>
15#include <linux/highmem.h>
16#include <linux/log2.h>
17#include <linux/hash.h>
18#include <net/checksum.h>
19
20#include "nfsd.h"
21#include "cache.h"
22
23#define NFSDDBG_FACILITY NFSDDBG_REPCACHE
24
25
26
27
28
29
30#define TARGET_BUCKET_SIZE 64
31
32struct nfsd_drc_bucket {
33 struct list_head lru_head;
34 spinlock_t cache_lock;
35};
36
37static struct nfsd_drc_bucket *drc_hashtbl;
38static struct kmem_cache *drc_slab;
39
40
41static unsigned int max_drc_entries;
42
43
44static unsigned int maskbits;
45static unsigned int drc_hashsize;
46
47
48
49
50
51
52
53static atomic_t num_drc_entries;
54
55
56static unsigned int payload_misses;
57
58
59static unsigned int drc_mem_usage;
60
61
62static unsigned int longest_chain;
63
64
65static unsigned int longest_chain_cachesize;
66
67static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
68static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
69 struct shrink_control *sc);
70static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink,
71 struct shrink_control *sc);
72
73static struct shrinker nfsd_reply_cache_shrinker = {
74 .scan_objects = nfsd_reply_cache_scan,
75 .count_objects = nfsd_reply_cache_count,
76 .seeks = 1,
77};
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97static unsigned int
98nfsd_cache_size_limit(void)
99{
100 unsigned int limit;
101 unsigned long low_pages = totalram_pages - totalhigh_pages;
102
103 limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10);
104 return min_t(unsigned int, limit, 256*1024);
105}
106
107
108
109
110
111static unsigned int
112nfsd_hashsize(unsigned int limit)
113{
114 return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
115}
116
117static u32
118nfsd_cache_hash(__be32 xid)
119{
120 return hash_32(be32_to_cpu(xid), maskbits);
121}
122
123static struct svc_cacherep *
124nfsd_reply_cache_alloc(void)
125{
126 struct svc_cacherep *rp;
127
128 rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
129 if (rp) {
130 rp->c_state = RC_UNUSED;
131 rp->c_type = RC_NOCACHE;
132 INIT_LIST_HEAD(&rp->c_lru);
133 }
134 return rp;
135}
136
137static void
138nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
139{
140 if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
141 drc_mem_usage -= rp->c_replvec.iov_len;
142 kfree(rp->c_replvec.iov_base);
143 }
144 list_del(&rp->c_lru);
145 atomic_dec(&num_drc_entries);
146 drc_mem_usage -= sizeof(*rp);
147 kmem_cache_free(drc_slab, rp);
148}
149
150static void
151nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
152{
153 spin_lock(&b->cache_lock);
154 nfsd_reply_cache_free_locked(rp);
155 spin_unlock(&b->cache_lock);
156}
157
158int nfsd_reply_cache_init(void)
159{
160 unsigned int hashsize;
161 unsigned int i;
162 int status = 0;
163
164 max_drc_entries = nfsd_cache_size_limit();
165 atomic_set(&num_drc_entries, 0);
166 hashsize = nfsd_hashsize(max_drc_entries);
167 maskbits = ilog2(hashsize);
168
169 status = register_shrinker(&nfsd_reply_cache_shrinker);
170 if (status)
171 return status;
172
173 drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
174 0, 0, NULL);
175 if (!drc_slab)
176 goto out_nomem;
177
178 drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL);
179 if (!drc_hashtbl) {
180 drc_hashtbl = vzalloc(array_size(hashsize,
181 sizeof(*drc_hashtbl)));
182 if (!drc_hashtbl)
183 goto out_nomem;
184 }
185
186 for (i = 0; i < hashsize; i++) {
187 INIT_LIST_HEAD(&drc_hashtbl[i].lru_head);
188 spin_lock_init(&drc_hashtbl[i].cache_lock);
189 }
190 drc_hashsize = hashsize;
191
192 return 0;
193out_nomem:
194 printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
195 nfsd_reply_cache_shutdown();
196 return -ENOMEM;
197}
198
199void nfsd_reply_cache_shutdown(void)
200{
201 struct svc_cacherep *rp;
202 unsigned int i;
203
204 unregister_shrinker(&nfsd_reply_cache_shrinker);
205
206 for (i = 0; i < drc_hashsize; i++) {
207 struct list_head *head = &drc_hashtbl[i].lru_head;
208 while (!list_empty(head)) {
209 rp = list_first_entry(head, struct svc_cacherep, c_lru);
210 nfsd_reply_cache_free_locked(rp);
211 }
212 }
213
214 kvfree(drc_hashtbl);
215 drc_hashtbl = NULL;
216 drc_hashsize = 0;
217
218 kmem_cache_destroy(drc_slab);
219 drc_slab = NULL;
220}
221
222
223
224
225
226static void
227lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
228{
229 rp->c_timestamp = jiffies;
230 list_move_tail(&rp->c_lru, &b->lru_head);
231}
232
233static long
234prune_bucket(struct nfsd_drc_bucket *b)
235{
236 struct svc_cacherep *rp, *tmp;
237 long freed = 0;
238
239 list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
240
241
242
243
244 if (rp->c_state == RC_INPROG)
245 continue;
246 if (atomic_read(&num_drc_entries) <= max_drc_entries &&
247 time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
248 break;
249 nfsd_reply_cache_free_locked(rp);
250 freed++;
251 }
252 return freed;
253}
254
255
256
257
258
259static long
260prune_cache_entries(void)
261{
262 unsigned int i;
263 long freed = 0;
264
265 for (i = 0; i < drc_hashsize; i++) {
266 struct nfsd_drc_bucket *b = &drc_hashtbl[i];
267
268 if (list_empty(&b->lru_head))
269 continue;
270 spin_lock(&b->cache_lock);
271 freed += prune_bucket(b);
272 spin_unlock(&b->cache_lock);
273 }
274 return freed;
275}
276
277static unsigned long
278nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
279{
280 return atomic_read(&num_drc_entries);
281}
282
283static unsigned long
284nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
285{
286 return prune_cache_entries();
287}
288
289
290
291static __wsum
292nfsd_cache_csum(struct svc_rqst *rqstp)
293{
294 int idx;
295 unsigned int base;
296 __wsum csum;
297 struct xdr_buf *buf = &rqstp->rq_arg;
298 const unsigned char *p = buf->head[0].iov_base;
299 size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len,
300 RC_CSUMLEN);
301 size_t len = min(buf->head[0].iov_len, csum_len);
302
303
304 csum = csum_partial(p, len, 0);
305 csum_len -= len;
306
307
308 idx = buf->page_base / PAGE_SIZE;
309 base = buf->page_base & ~PAGE_MASK;
310 while (csum_len) {
311 p = page_address(buf->pages[idx]) + base;
312 len = min_t(size_t, PAGE_SIZE - base, csum_len);
313 csum = csum_partial(p, len, csum);
314 csum_len -= len;
315 base = 0;
316 ++idx;
317 }
318 return csum;
319}
320
321static bool
322nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp)
323{
324
325 if (rqstp->rq_xid != rp->c_xid)
326 return false;
327
328 if (csum != rp->c_csum) {
329 ++payload_misses;
330 return false;
331 }
332
333
334 if (rqstp->rq_proc != rp->c_proc ||
335 rqstp->rq_prot != rp->c_prot ||
336 rqstp->rq_vers != rp->c_vers ||
337 rqstp->rq_arg.len != rp->c_len ||
338 !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) ||
339 rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr))
340 return false;
341
342 return true;
343}
344
345
346
347
348
349
350static struct svc_cacherep *
351nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp,
352 __wsum csum)
353{
354 struct svc_cacherep *rp, *ret = NULL;
355 struct list_head *rh = &b->lru_head;
356 unsigned int entries = 0;
357
358 list_for_each_entry(rp, rh, c_lru) {
359 ++entries;
360 if (nfsd_cache_match(rqstp, csum, rp)) {
361 ret = rp;
362 break;
363 }
364 }
365
366
367 if (entries > longest_chain) {
368 longest_chain = entries;
369 longest_chain_cachesize = atomic_read(&num_drc_entries);
370 } else if (entries == longest_chain) {
371
372 longest_chain_cachesize = min_t(unsigned int,
373 longest_chain_cachesize,
374 atomic_read(&num_drc_entries));
375 }
376
377 return ret;
378}
379
380
381
382
383
384
385
386
387int
388nfsd_cache_lookup(struct svc_rqst *rqstp)
389{
390 struct svc_cacherep *rp, *found;
391 __be32 xid = rqstp->rq_xid;
392 u32 proto = rqstp->rq_prot,
393 vers = rqstp->rq_vers,
394 proc = rqstp->rq_proc;
395 __wsum csum;
396 u32 hash = nfsd_cache_hash(xid);
397 struct nfsd_drc_bucket *b = &drc_hashtbl[hash];
398 int type = rqstp->rq_cachetype;
399 int rtn = RC_DOIT;
400
401 rqstp->rq_cacherep = NULL;
402 if (type == RC_NOCACHE) {
403 nfsdstats.rcnocache++;
404 return rtn;
405 }
406
407 csum = nfsd_cache_csum(rqstp);
408
409
410
411
412
413 rp = nfsd_reply_cache_alloc();
414 spin_lock(&b->cache_lock);
415 if (likely(rp)) {
416 atomic_inc(&num_drc_entries);
417 drc_mem_usage += sizeof(*rp);
418 }
419
420
421 prune_bucket(b);
422
423 found = nfsd_cache_search(b, rqstp, csum);
424 if (found) {
425 if (likely(rp))
426 nfsd_reply_cache_free_locked(rp);
427 rp = found;
428 goto found_entry;
429 }
430
431 if (!rp) {
432 dprintk("nfsd: unable to allocate DRC entry!\n");
433 goto out;
434 }
435
436 nfsdstats.rcmisses++;
437 rqstp->rq_cacherep = rp;
438 rp->c_state = RC_INPROG;
439 rp->c_xid = xid;
440 rp->c_proc = proc;
441 rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp));
442 rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp)));
443 rp->c_prot = proto;
444 rp->c_vers = vers;
445 rp->c_len = rqstp->rq_arg.len;
446 rp->c_csum = csum;
447
448 lru_put_end(b, rp);
449
450
451 if (rp->c_type == RC_REPLBUFF) {
452 drc_mem_usage -= rp->c_replvec.iov_len;
453 kfree(rp->c_replvec.iov_base);
454 rp->c_replvec.iov_base = NULL;
455 }
456 rp->c_type = RC_NOCACHE;
457 out:
458 spin_unlock(&b->cache_lock);
459 return rtn;
460
461found_entry:
462 nfsdstats.rchits++;
463
464 lru_put_end(b, rp);
465
466 rtn = RC_DROPIT;
467
468 if (rp->c_state == RC_INPROG)
469 goto out;
470
471
472
473 rtn = RC_DOIT;
474 if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
475 goto out;
476
477
478 switch (rp->c_type) {
479 case RC_NOCACHE:
480 break;
481 case RC_REPLSTAT:
482 svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat);
483 rtn = RC_REPLY;
484 break;
485 case RC_REPLBUFF:
486 if (!nfsd_cache_append(rqstp, &rp->c_replvec))
487 goto out;
488 rtn = RC_REPLY;
489 break;
490 default:
491 printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
492 nfsd_reply_cache_free_locked(rp);
493 }
494
495 goto out;
496}
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514void
515nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
516{
517 struct svc_cacherep *rp = rqstp->rq_cacherep;
518 struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
519 u32 hash;
520 struct nfsd_drc_bucket *b;
521 int len;
522 size_t bufsize = 0;
523
524 if (!rp)
525 return;
526
527 hash = nfsd_cache_hash(rp->c_xid);
528 b = &drc_hashtbl[hash];
529
530 len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
531 len >>= 2;
532
533
534 if (!statp || len > (256 >> 2)) {
535 nfsd_reply_cache_free(b, rp);
536 return;
537 }
538
539 switch (cachetype) {
540 case RC_REPLSTAT:
541 if (len != 1)
542 printk("nfsd: RC_REPLSTAT/reply len %d!\n",len);
543 rp->c_replstat = *statp;
544 break;
545 case RC_REPLBUFF:
546 cachv = &rp->c_replvec;
547 bufsize = len << 2;
548 cachv->iov_base = kmalloc(bufsize, GFP_KERNEL);
549 if (!cachv->iov_base) {
550 nfsd_reply_cache_free(b, rp);
551 return;
552 }
553 cachv->iov_len = bufsize;
554 memcpy(cachv->iov_base, statp, bufsize);
555 break;
556 case RC_NOCACHE:
557 nfsd_reply_cache_free(b, rp);
558 return;
559 }
560 spin_lock(&b->cache_lock);
561 drc_mem_usage += bufsize;
562 lru_put_end(b, rp);
563 rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
564 rp->c_type = cachetype;
565 rp->c_state = RC_DONE;
566 spin_unlock(&b->cache_lock);
567 return;
568}
569
570
571
572
573
574
575static int
576nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
577{
578 struct kvec *vec = &rqstp->rq_res.head[0];
579
580 if (vec->iov_len + data->iov_len > PAGE_SIZE) {
581 printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n",
582 data->iov_len);
583 return 0;
584 }
585 memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
586 vec->iov_len += data->iov_len;
587 return 1;
588}
589
590
591
592
593
594
595static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
596{
597 seq_printf(m, "max entries: %u\n", max_drc_entries);
598 seq_printf(m, "num entries: %u\n",
599 atomic_read(&num_drc_entries));
600 seq_printf(m, "hash buckets: %u\n", 1 << maskbits);
601 seq_printf(m, "mem usage: %u\n", drc_mem_usage);
602 seq_printf(m, "cache hits: %u\n", nfsdstats.rchits);
603 seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses);
604 seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache);
605 seq_printf(m, "payload misses: %u\n", payload_misses);
606 seq_printf(m, "longest chain len: %u\n", longest_chain);
607 seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize);
608 return 0;
609}
610
611int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file)
612{
613 return single_open(file, nfsd_reply_cache_stats_show, NULL);
614}
615