linux/lib/lru_cache.c
<<
>>
Prefs
   1/*
   2   lru_cache.c
   3
   4   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
   5
   6   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
   7   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   8   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
   9
  10   drbd is free software; you can redistribute it and/or modify
  11   it under the terms of the GNU General Public License as published by
  12   the Free Software Foundation; either version 2, or (at your option)
  13   any later version.
  14
  15   drbd is distributed in the hope that it will be useful,
  16   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18   GNU General Public License for more details.
  19
  20   You should have received a copy of the GNU General Public License
  21   along with drbd; see the file COPYING.  If not, write to
  22   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  23
  24 */
  25
  26#include <linux/module.h>
  27#include <linux/bitops.h>
  28#include <linux/slab.h>
  29#include <linux/string.h> /* for memset */
  30#include <linux/seq_file.h> /* for seq_printf */
  31#include <linux/lru_cache.h>
  32
  33MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
  34              "Lars Ellenberg <lars@linbit.com>");
  35MODULE_DESCRIPTION("lru_cache - Track sets of hot objects");
  36MODULE_LICENSE("GPL");
  37
  38/* this is developers aid only.
  39 * it catches concurrent access (lack of locking on the users part) */
  40#define PARANOIA_ENTRY() do {           \
  41        BUG_ON(!lc);                    \
  42        BUG_ON(!lc->nr_elements);       \
  43        BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \
  44} while (0)
  45
  46#define RETURN(x...)     do { \
  47        clear_bit(__LC_PARANOIA, &lc->flags); \
  48        smp_mb__after_clear_bit(); return x ; } while (0)
  49
  50/* BUG() if e is not one of the elements tracked by lc */
  51#define PARANOIA_LC_ELEMENT(lc, e) do { \
  52        struct lru_cache *lc_ = (lc);   \
  53        struct lc_element *e_ = (e);    \
  54        unsigned i = e_->lc_index;      \
  55        BUG_ON(i >= lc_->nr_elements);  \
  56        BUG_ON(lc_->lc_element[i] != e_); } while (0)
  57
  58/**
  59 * lc_create - prepares to track objects in an active set
  60 * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
  61 * @e_count: number of elements allowed to be active simultaneously
  62 * @e_size: size of the tracked objects
  63 * @e_off: offset to the &struct lc_element member in a tracked object
  64 *
  65 * Returns a pointer to a newly initialized struct lru_cache on success,
  66 * or NULL on (allocation) failure.
  67 */
  68struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
  69                unsigned e_count, size_t e_size, size_t e_off)
  70{
  71        struct hlist_head *slot = NULL;
  72        struct lc_element **element = NULL;
  73        struct lru_cache *lc;
  74        struct lc_element *e;
  75        unsigned cache_obj_size = kmem_cache_size(cache);
  76        unsigned i;
  77
  78        WARN_ON(cache_obj_size < e_size);
  79        if (cache_obj_size < e_size)
  80                return NULL;
  81
  82        /* e_count too big; would probably fail the allocation below anyways.
  83         * for typical use cases, e_count should be few thousand at most. */
  84        if (e_count > LC_MAX_ACTIVE)
  85                return NULL;
  86
  87        slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL);
  88        if (!slot)
  89                goto out_fail;
  90        element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL);
  91        if (!element)
  92                goto out_fail;
  93
  94        lc = kzalloc(sizeof(*lc), GFP_KERNEL);
  95        if (!lc)
  96                goto out_fail;
  97
  98        INIT_LIST_HEAD(&lc->in_use);
  99        INIT_LIST_HEAD(&lc->lru);
 100        INIT_LIST_HEAD(&lc->free);
 101
 102        lc->name = name;
 103        lc->element_size = e_size;
 104        lc->element_off = e_off;
 105        lc->nr_elements = e_count;
 106        lc->new_number = LC_FREE;
 107        lc->lc_cache = cache;
 108        lc->lc_element = element;
 109        lc->lc_slot = slot;
 110
 111        /* preallocate all objects */
 112        for (i = 0; i < e_count; i++) {
 113                void *p = kmem_cache_alloc(cache, GFP_KERNEL);
 114                if (!p)
 115                        break;
 116                memset(p, 0, lc->element_size);
 117                e = p + e_off;
 118                e->lc_index = i;
 119                e->lc_number = LC_FREE;
 120                list_add(&e->list, &lc->free);
 121                element[i] = e;
 122        }
 123        if (i == e_count)
 124                return lc;
 125
 126        /* else: could not allocate all elements, give up */
 127        for (i--; i; i--) {
 128                void *p = element[i];
 129                kmem_cache_free(cache, p - e_off);
 130        }
 131        kfree(lc);
 132out_fail:
 133        kfree(element);
 134        kfree(slot);
 135        return NULL;
 136}
 137
 138void lc_free_by_index(struct lru_cache *lc, unsigned i)
 139{
 140        void *p = lc->lc_element[i];
 141        WARN_ON(!p);
 142        if (p) {
 143                p -= lc->element_off;
 144                kmem_cache_free(lc->lc_cache, p);
 145        }
 146}
 147
 148/**
 149 * lc_destroy - frees memory allocated by lc_create()
 150 * @lc: the lru cache to destroy
 151 */
 152void lc_destroy(struct lru_cache *lc)
 153{
 154        unsigned i;
 155        if (!lc)
 156                return;
 157        for (i = 0; i < lc->nr_elements; i++)
 158                lc_free_by_index(lc, i);
 159        kfree(lc->lc_element);
 160        kfree(lc->lc_slot);
 161        kfree(lc);
 162}
 163
 164/**
 165 * lc_reset - does a full reset for @lc and the hash table slots.
 166 * @lc: the lru cache to operate on
 167 *
 168 * It is roughly the equivalent of re-allocating a fresh lru_cache object,
 169 * basically a short cut to lc_destroy(lc); lc = lc_create(...);
 170 */
 171void lc_reset(struct lru_cache *lc)
 172{
 173        unsigned i;
 174
 175        INIT_LIST_HEAD(&lc->in_use);
 176        INIT_LIST_HEAD(&lc->lru);
 177        INIT_LIST_HEAD(&lc->free);
 178        lc->used = 0;
 179        lc->hits = 0;
 180        lc->misses = 0;
 181        lc->starving = 0;
 182        lc->dirty = 0;
 183        lc->changed = 0;
 184        lc->flags = 0;
 185        lc->changing_element = NULL;
 186        lc->new_number = LC_FREE;
 187        memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
 188
 189        for (i = 0; i < lc->nr_elements; i++) {
 190                struct lc_element *e = lc->lc_element[i];
 191                void *p = e;
 192                p -= lc->element_off;
 193                memset(p, 0, lc->element_size);
 194                /* re-init it */
 195                e->lc_index = i;
 196                e->lc_number = LC_FREE;
 197                list_add(&e->list, &lc->free);
 198        }
 199}
 200
 201/**
 202 * lc_seq_printf_stats - print stats about @lc into @seq
 203 * @seq: the seq_file to print into
 204 * @lc: the lru cache to print statistics of
 205 */
 206size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
 207{
 208        /* NOTE:
 209         * total calls to lc_get are
 210         * (starving + hits + misses)
 211         * misses include "dirty" count (update from an other thread in
 212         * progress) and "changed", when this in fact lead to an successful
 213         * update of the cache.
 214         */
 215        return seq_printf(seq, "\t%s: used:%u/%u "
 216                "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
 217                lc->name, lc->used, lc->nr_elements,
 218                lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
 219}
 220
 221static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
 222{
 223        return  lc->lc_slot + (enr % lc->nr_elements);
 224}
 225
 226
 227/**
 228 * lc_find - find element by label, if present in the hash table
 229 * @lc: The lru_cache object
 230 * @enr: element number
 231 *
 232 * Returns the pointer to an element, if the element with the requested
 233 * "label" or element number is present in the hash table,
 234 * or NULL if not found. Does not change the refcnt.
 235 */
 236struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
 237{
 238        struct hlist_node *n;
 239        struct lc_element *e;
 240
 241        BUG_ON(!lc);
 242        BUG_ON(!lc->nr_elements);
 243        hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
 244                if (e->lc_number == enr)
 245                        return e;
 246        }
 247        return NULL;
 248}
 249
 250/* returned element will be "recycled" immediately */
 251static struct lc_element *lc_evict(struct lru_cache *lc)
 252{
 253        struct list_head  *n;
 254        struct lc_element *e;
 255
 256        if (list_empty(&lc->lru))
 257                return NULL;
 258
 259        n = lc->lru.prev;
 260        e = list_entry(n, struct lc_element, list);
 261
 262        PARANOIA_LC_ELEMENT(lc, e);
 263
 264        list_del(&e->list);
 265        hlist_del(&e->colision);
 266        return e;
 267}
 268
 269/**
 270 * lc_del - removes an element from the cache
 271 * @lc: The lru_cache object
 272 * @e: The element to remove
 273 *
 274 * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list,
 275 * sets @e->enr to %LC_FREE.
 276 */
 277void lc_del(struct lru_cache *lc, struct lc_element *e)
 278{
 279        PARANOIA_ENTRY();
 280        PARANOIA_LC_ELEMENT(lc, e);
 281        BUG_ON(e->refcnt);
 282
 283        e->lc_number = LC_FREE;
 284        hlist_del_init(&e->colision);
 285        list_move(&e->list, &lc->free);
 286        RETURN();
 287}
 288
 289static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
 290{
 291        struct list_head *n;
 292
 293        if (list_empty(&lc->free))
 294                return lc_evict(lc);
 295
 296        n = lc->free.next;
 297        list_del(n);
 298        return list_entry(n, struct lc_element, list);
 299}
 300
 301static int lc_unused_element_available(struct lru_cache *lc)
 302{
 303        if (!list_empty(&lc->free))
 304                return 1; /* something on the free list */
 305        if (!list_empty(&lc->lru))
 306                return 1;  /* something to evict */
 307
 308        return 0;
 309}
 310
 311
 312/**
 313 * lc_get - get element by label, maybe change the active set
 314 * @lc: the lru cache to operate on
 315 * @enr: the label to look up
 316 *
 317 * Finds an element in the cache, increases its usage count,
 318 * "touches" and returns it.
 319 *
 320 * In case the requested number is not present, it needs to be added to the
 321 * cache. Therefore it is possible that an other element becomes evicted from
 322 * the cache. In either case, the user is notified so he is able to e.g. keep
 323 * a persistent log of the cache changes, and therefore the objects in use.
 324 *
 325 * Return values:
 326 *  NULL
 327 *     The cache was marked %LC_STARVING,
 328 *     or the requested label was not in the active set
 329 *     and a changing transaction is still pending (@lc was marked %LC_DIRTY).
 330 *     Or no unused or free element could be recycled (@lc will be marked as
 331 *     %LC_STARVING, blocking further lc_get() operations).
 332 *
 333 *  pointer to the element with the REQUESTED element number.
 334 *     In this case, it can be used right away
 335 *
 336 *  pointer to an UNUSED element with some different element number,
 337 *          where that different number may also be %LC_FREE.
 338 *
 339 *          In this case, the cache is marked %LC_DIRTY (blocking further changes),
 340 *          and the returned element pointer is removed from the lru list and
 341 *          hash collision chains.  The user now should do whatever housekeeping
 342 *          is necessary.
 343 *          Then he must call lc_changed(lc,element_pointer), to finish
 344 *          the change.
 345 *
 346 * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
 347 *       any cache set change.
 348 */
 349struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
 350{
 351        struct lc_element *e;
 352
 353        PARANOIA_ENTRY();
 354        if (lc->flags & LC_STARVING) {
 355                ++lc->starving;
 356                RETURN(NULL);
 357        }
 358
 359        e = lc_find(lc, enr);
 360        if (e) {
 361                ++lc->hits;
 362                if (e->refcnt++ == 0)
 363                        lc->used++;
 364                list_move(&e->list, &lc->in_use); /* Not evictable... */
 365                RETURN(e);
 366        }
 367
 368        ++lc->misses;
 369
 370        /* In case there is nothing available and we can not kick out
 371         * the LRU element, we have to wait ...
 372         */
 373        if (!lc_unused_element_available(lc)) {
 374                __set_bit(__LC_STARVING, &lc->flags);
 375                RETURN(NULL);
 376        }
 377
 378        /* it was not present in the active set.
 379         * we are going to recycle an unused (or even "free") element.
 380         * user may need to commit a transaction to record that change.
 381         * we serialize on flags & TF_DIRTY */
 382        if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
 383                ++lc->dirty;
 384                RETURN(NULL);
 385        }
 386
 387        e = lc_get_unused_element(lc);
 388        BUG_ON(!e);
 389
 390        clear_bit(__LC_STARVING, &lc->flags);
 391        BUG_ON(++e->refcnt != 1);
 392        lc->used++;
 393
 394        lc->changing_element = e;
 395        lc->new_number = enr;
 396
 397        RETURN(e);
 398}
 399
 400/* similar to lc_get,
 401 * but only gets a new reference on an existing element.
 402 * you either get the requested element, or NULL.
 403 * will be consolidated into one function.
 404 */
 405struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
 406{
 407        struct lc_element *e;
 408
 409        PARANOIA_ENTRY();
 410        if (lc->flags & LC_STARVING) {
 411                ++lc->starving;
 412                RETURN(NULL);
 413        }
 414
 415        e = lc_find(lc, enr);
 416        if (e) {
 417                ++lc->hits;
 418                if (e->refcnt++ == 0)
 419                        lc->used++;
 420                list_move(&e->list, &lc->in_use); /* Not evictable... */
 421        }
 422        RETURN(e);
 423}
 424
 425/**
 426 * lc_changed - tell @lc that the change has been recorded
 427 * @lc: the lru cache to operate on
 428 * @e: the element pending label change
 429 */
 430void lc_changed(struct lru_cache *lc, struct lc_element *e)
 431{
 432        PARANOIA_ENTRY();
 433        BUG_ON(e != lc->changing_element);
 434        PARANOIA_LC_ELEMENT(lc, e);
 435        ++lc->changed;
 436        e->lc_number = lc->new_number;
 437        list_add(&e->list, &lc->in_use);
 438        hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
 439        lc->changing_element = NULL;
 440        lc->new_number = LC_FREE;
 441        clear_bit(__LC_DIRTY, &lc->flags);
 442        smp_mb__after_clear_bit();
 443        RETURN();
 444}
 445
 446
 447/**
 448 * lc_put - give up refcnt of @e
 449 * @lc: the lru cache to operate on
 450 * @e: the element to put
 451 *
 452 * If refcnt reaches zero, the element is moved to the lru list,
 453 * and a %LC_STARVING (if set) is cleared.
 454 * Returns the new (post-decrement) refcnt.
 455 */
 456unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
 457{
 458        PARANOIA_ENTRY();
 459        PARANOIA_LC_ELEMENT(lc, e);
 460        BUG_ON(e->refcnt == 0);
 461        BUG_ON(e == lc->changing_element);
 462        if (--e->refcnt == 0) {
 463                /* move it to the front of LRU. */
 464                list_move(&e->list, &lc->lru);
 465                lc->used--;
 466                clear_bit(__LC_STARVING, &lc->flags);
 467                smp_mb__after_clear_bit();
 468        }
 469        RETURN(e->refcnt);
 470}
 471
 472/**
 473 * lc_element_by_index
 474 * @lc: the lru cache to operate on
 475 * @i: the index of the element to return
 476 */
 477struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i)
 478{
 479        BUG_ON(i >= lc->nr_elements);
 480        BUG_ON(lc->lc_element[i] == NULL);
 481        BUG_ON(lc->lc_element[i]->lc_index != i);
 482        return lc->lc_element[i];
 483}
 484
 485/**
 486 * lc_index_of
 487 * @lc: the lru cache to operate on
 488 * @e: the element to query for its index position in lc->element
 489 */
 490unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
 491{
 492        PARANOIA_LC_ELEMENT(lc, e);
 493        return e->lc_index;
 494}
 495
 496/**
 497 * lc_set - associate index with label
 498 * @lc: the lru cache to operate on
 499 * @enr: the label to set
 500 * @index: the element index to associate label with.
 501 *
 502 * Used to initialize the active set to some previously recorded state.
 503 */
 504void lc_set(struct lru_cache *lc, unsigned int enr, int index)
 505{
 506        struct lc_element *e;
 507
 508        if (index < 0 || index >= lc->nr_elements)
 509                return;
 510
 511        e = lc_element_by_index(lc, index);
 512        e->lc_number = enr;
 513
 514        hlist_del_init(&e->colision);
 515        hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
 516        list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
 517}
 518
 519/**
 520 * lc_dump - Dump a complete LRU cache to seq in textual form.
 521 * @lc: the lru cache to operate on
 522 * @seq: the &struct seq_file pointer to seq_printf into
 523 * @utext: user supplied "heading" or other info
 524 * @detail: function pointer the user may provide to dump further details
 525 * of the object the lc_element is embedded in.
 526 */
 527void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
 528             void (*detail) (struct seq_file *, struct lc_element *))
 529{
 530        unsigned int nr_elements = lc->nr_elements;
 531        struct lc_element *e;
 532        int i;
 533
 534        seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext);
 535        for (i = 0; i < nr_elements; i++) {
 536                e = lc_element_by_index(lc, i);
 537                if (e->lc_number == LC_FREE) {
 538                        seq_printf(seq, "\t%2d: FREE\n", i);
 539                } else {
 540                        seq_printf(seq, "\t%2d: %4u %4u    ", i,
 541                                   e->lc_number, e->refcnt);
 542                        detail(seq, e);
 543                }
 544        }
 545}
 546
 547EXPORT_SYMBOL(lc_create);
 548EXPORT_SYMBOL(lc_reset);
 549EXPORT_SYMBOL(lc_destroy);
 550EXPORT_SYMBOL(lc_set);
 551EXPORT_SYMBOL(lc_del);
 552EXPORT_SYMBOL(lc_try_get);
 553EXPORT_SYMBOL(lc_find);
 554EXPORT_SYMBOL(lc_get);
 555EXPORT_SYMBOL(lc_put);
 556EXPORT_SYMBOL(lc_changed);
 557EXPORT_SYMBOL(lc_element_by_index);
 558EXPORT_SYMBOL(lc_index_of);
 559EXPORT_SYMBOL(lc_seq_printf_stats);
 560EXPORT_SYMBOL(lc_seq_dump_details);
 561