linux/drivers/md/dm-bufio.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2009-2011 Red Hat, Inc.
   3 *
   4 * Author: Mikulas Patocka <mpatocka@redhat.com>
   5 *
   6 * This file is released under the GPL.
   7 */
   8
   9#include <linux/dm-bufio.h>
  10
  11#include <linux/device-mapper.h>
  12#include <linux/dm-io.h>
  13#include <linux/slab.h>
  14#include <linux/sched/mm.h>
  15#include <linux/jiffies.h>
  16#include <linux/vmalloc.h>
  17#include <linux/shrinker.h>
  18#include <linux/module.h>
  19#include <linux/rbtree.h>
  20#include <linux/stacktrace.h>
  21
  22#define DM_MSG_PREFIX "bufio"
  23
  24/*
  25 * Memory management policy:
  26 *      Limit the number of buffers to DM_BUFIO_MEMORY_PERCENT of main memory
  27 *      or DM_BUFIO_VMALLOC_PERCENT of vmalloc memory (whichever is lower).
  28 *      Always allocate at least DM_BUFIO_MIN_BUFFERS buffers.
  29 *      Start background writeback when there are DM_BUFIO_WRITEBACK_PERCENT
  30 *      dirty buffers.
  31 */
  32#define DM_BUFIO_MIN_BUFFERS            8
  33
  34#define DM_BUFIO_MEMORY_PERCENT         2
  35#define DM_BUFIO_VMALLOC_PERCENT        25
  36#define DM_BUFIO_WRITEBACK_RATIO        3
  37#define DM_BUFIO_LOW_WATERMARK_RATIO    16
  38
  39/*
  40 * Check buffer ages in this interval (seconds)
  41 */
  42#define DM_BUFIO_WORK_TIMER_SECS        30
  43
  44/*
  45 * Free buffers when they are older than this (seconds)
  46 */
  47#define DM_BUFIO_DEFAULT_AGE_SECS       300
  48
  49/*
  50 * The nr of bytes of cached data to keep around.
  51 */
  52#define DM_BUFIO_DEFAULT_RETAIN_BYTES   (256 * 1024)
  53
  54/*
  55 * Align buffer writes to this boundary.
  56 * Tests show that SSDs have the highest IOPS when using 4k writes.
  57 */
  58#define DM_BUFIO_WRITE_ALIGN            4096
  59
  60/*
  61 * dm_buffer->list_mode
  62 */
  63#define LIST_CLEAN      0
  64#define LIST_DIRTY      1
  65#define LIST_SIZE       2
  66
  67/*
  68 * Linking of buffers:
  69 *      All buffers are linked to buffer_tree with their node field.
  70 *
  71 *      Clean buffers that are not being written (B_WRITING not set)
  72 *      are linked to lru[LIST_CLEAN] with their lru_list field.
  73 *
  74 *      Dirty and clean buffers that are being written are linked to
  75 *      lru[LIST_DIRTY] with their lru_list field. When the write
  76 *      finishes, the buffer cannot be relinked immediately (because we
  77 *      are in an interrupt context and relinking requires process
  78 *      context), so some clean-not-writing buffers can be held on
  79 *      dirty_lru too.  They are later added to lru in the process
  80 *      context.
  81 */
  82struct dm_bufio_client {
  83        struct mutex lock;
  84
  85        struct list_head lru[LIST_SIZE];
  86        unsigned long n_buffers[LIST_SIZE];
  87
  88        struct block_device *bdev;
  89        unsigned block_size;
  90        s8 sectors_per_block_bits;
  91        void (*alloc_callback)(struct dm_buffer *);
  92        void (*write_callback)(struct dm_buffer *);
  93
  94        struct kmem_cache *slab_buffer;
  95        struct kmem_cache *slab_cache;
  96        struct dm_io_client *dm_io;
  97
  98        struct list_head reserved_buffers;
  99        unsigned need_reserved_buffers;
 100
 101        unsigned minimum_buffers;
 102
 103        struct rb_root buffer_tree;
 104        wait_queue_head_t free_buffer_wait;
 105
 106        sector_t start;
 107
 108        int async_write_error;
 109
 110        struct list_head client_list;
 111
 112        struct shrinker shrinker;
 113        struct work_struct shrink_work;
 114        atomic_long_t need_shrink;
 115};
 116
 117/*
 118 * Buffer state bits.
 119 */
 120#define B_READING       0
 121#define B_WRITING       1
 122#define B_DIRTY         2
 123
 124/*
 125 * Describes how the block was allocated:
 126 * kmem_cache_alloc(), __get_free_pages() or vmalloc().
 127 * See the comment at alloc_buffer_data.
 128 */
 129enum data_mode {
 130        DATA_MODE_SLAB = 0,
 131        DATA_MODE_GET_FREE_PAGES = 1,
 132        DATA_MODE_VMALLOC = 2,
 133        DATA_MODE_LIMIT = 3
 134};
 135
 136struct dm_buffer {
 137        struct rb_node node;
 138        struct list_head lru_list;
 139        struct list_head global_list;
 140        sector_t block;
 141        void *data;
 142        unsigned char data_mode;                /* DATA_MODE_* */
 143        unsigned char list_mode;                /* LIST_* */
 144        blk_status_t read_error;
 145        blk_status_t write_error;
 146        unsigned accessed;
 147        unsigned hold_count;
 148        unsigned long state;
 149        unsigned long last_accessed;
 150        unsigned dirty_start;
 151        unsigned dirty_end;
 152        unsigned write_start;
 153        unsigned write_end;
 154        struct dm_bufio_client *c;
 155        struct list_head write_list;
 156        void (*end_io)(struct dm_buffer *, blk_status_t);
 157#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
 158#define MAX_STACK 10
 159        unsigned int stack_len;
 160        unsigned long stack_entries[MAX_STACK];
 161#endif
 162};
 163
 164/*----------------------------------------------------------------*/
 165
 166#define dm_bufio_in_request()   (!!current->bio_list)
 167
 168static void dm_bufio_lock(struct dm_bufio_client *c)
 169{
 170        mutex_lock_nested(&c->lock, dm_bufio_in_request());
 171}
 172
 173static int dm_bufio_trylock(struct dm_bufio_client *c)
 174{
 175        return mutex_trylock(&c->lock);
 176}
 177
 178static void dm_bufio_unlock(struct dm_bufio_client *c)
 179{
 180        mutex_unlock(&c->lock);
 181}
 182
 183/*----------------------------------------------------------------*/
 184
 185/*
 186 * Default cache size: available memory divided by the ratio.
 187 */
 188static unsigned long dm_bufio_default_cache_size;
 189
 190/*
 191 * Total cache size set by the user.
 192 */
 193static unsigned long dm_bufio_cache_size;
 194
 195/*
 196 * A copy of dm_bufio_cache_size because dm_bufio_cache_size can change
 197 * at any time.  If it disagrees, the user has changed cache size.
 198 */
 199static unsigned long dm_bufio_cache_size_latch;
 200
 201static DEFINE_SPINLOCK(global_spinlock);
 202
 203static LIST_HEAD(global_queue);
 204
 205static unsigned long global_num = 0;
 206
 207/*
 208 * Buffers are freed after this timeout
 209 */
 210static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
 211static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
 212
 213static unsigned long dm_bufio_peak_allocated;
 214static unsigned long dm_bufio_allocated_kmem_cache;
 215static unsigned long dm_bufio_allocated_get_free_pages;
 216static unsigned long dm_bufio_allocated_vmalloc;
 217static unsigned long dm_bufio_current_allocated;
 218
 219/*----------------------------------------------------------------*/
 220
 221/*
 222 * The current number of clients.
 223 */
 224static int dm_bufio_client_count;
 225
 226/*
 227 * The list of all clients.
 228 */
 229static LIST_HEAD(dm_bufio_all_clients);
 230
 231/*
 232 * This mutex protects dm_bufio_cache_size_latch and dm_bufio_client_count
 233 */
 234static DEFINE_MUTEX(dm_bufio_clients_lock);
 235
 236static struct workqueue_struct *dm_bufio_wq;
 237static struct delayed_work dm_bufio_cleanup_old_work;
 238static struct work_struct dm_bufio_replacement_work;
 239
 240
 241#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
 242static void buffer_record_stack(struct dm_buffer *b)
 243{
 244        b->stack_len = stack_trace_save(b->stack_entries, MAX_STACK, 2);
 245}
 246#endif
 247
 248/*----------------------------------------------------------------
 249 * A red/black tree acts as an index for all the buffers.
 250 *--------------------------------------------------------------*/
 251static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
 252{
 253        struct rb_node *n = c->buffer_tree.rb_node;
 254        struct dm_buffer *b;
 255
 256        while (n) {
 257                b = container_of(n, struct dm_buffer, node);
 258
 259                if (b->block == block)
 260                        return b;
 261
 262                n = block < b->block ? n->rb_left : n->rb_right;
 263        }
 264
 265        return NULL;
 266}
 267
 268static void __insert(struct dm_bufio_client *c, struct dm_buffer *b)
 269{
 270        struct rb_node **new = &c->buffer_tree.rb_node, *parent = NULL;
 271        struct dm_buffer *found;
 272
 273        while (*new) {
 274                found = container_of(*new, struct dm_buffer, node);
 275
 276                if (found->block == b->block) {
 277                        BUG_ON(found != b);
 278                        return;
 279                }
 280
 281                parent = *new;
 282                new = b->block < found->block ?
 283                        &found->node.rb_left : &found->node.rb_right;
 284        }
 285
 286        rb_link_node(&b->node, parent, new);
 287        rb_insert_color(&b->node, &c->buffer_tree);
 288}
 289
 290static void __remove(struct dm_bufio_client *c, struct dm_buffer *b)
 291{
 292        rb_erase(&b->node, &c->buffer_tree);
 293}
 294
 295/*----------------------------------------------------------------*/
 296
 297static void adjust_total_allocated(struct dm_buffer *b, bool unlink)
 298{
 299        unsigned char data_mode;
 300        long diff;
 301
 302        static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
 303                &dm_bufio_allocated_kmem_cache,
 304                &dm_bufio_allocated_get_free_pages,
 305                &dm_bufio_allocated_vmalloc,
 306        };
 307
 308        data_mode = b->data_mode;
 309        diff = (long)b->c->block_size;
 310        if (unlink)
 311                diff = -diff;
 312
 313        spin_lock(&global_spinlock);
 314
 315        *class_ptr[data_mode] += diff;
 316
 317        dm_bufio_current_allocated += diff;
 318
 319        if (dm_bufio_current_allocated > dm_bufio_peak_allocated)
 320                dm_bufio_peak_allocated = dm_bufio_current_allocated;
 321
 322        b->accessed = 1;
 323
 324        if (!unlink) {
 325                list_add(&b->global_list, &global_queue);
 326                global_num++;
 327                if (dm_bufio_current_allocated > dm_bufio_cache_size)
 328                        queue_work(dm_bufio_wq, &dm_bufio_replacement_work);
 329        } else {
 330                list_del(&b->global_list);
 331                global_num--;
 332        }
 333
 334        spin_unlock(&global_spinlock);
 335}
 336
 337/*
 338 * Change the number of clients and recalculate per-client limit.
 339 */
 340static void __cache_size_refresh(void)
 341{
 342        BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock));
 343        BUG_ON(dm_bufio_client_count < 0);
 344
 345        dm_bufio_cache_size_latch = READ_ONCE(dm_bufio_cache_size);
 346
 347        /*
 348         * Use default if set to 0 and report the actual cache size used.
 349         */
 350        if (!dm_bufio_cache_size_latch) {
 351                (void)cmpxchg(&dm_bufio_cache_size, 0,
 352                              dm_bufio_default_cache_size);
 353                dm_bufio_cache_size_latch = dm_bufio_default_cache_size;
 354        }
 355}
 356
 357/*
 358 * Allocating buffer data.
 359 *
 360 * Small buffers are allocated with kmem_cache, to use space optimally.
 361 *
 362 * For large buffers, we choose between get_free_pages and vmalloc.
 363 * Each has advantages and disadvantages.
 364 *
 365 * __get_free_pages can randomly fail if the memory is fragmented.
 366 * __vmalloc won't randomly fail, but vmalloc space is limited (it may be
 367 * as low as 128M) so using it for caching is not appropriate.
 368 *
 369 * If the allocation may fail we use __get_free_pages. Memory fragmentation
 370 * won't have a fatal effect here, but it just causes flushes of some other
 371 * buffers and more I/O will be performed. Don't use __get_free_pages if it
 372 * always fails (i.e. order >= MAX_ORDER).
 373 *
 374 * If the allocation shouldn't fail we use __vmalloc. This is only for the
 375 * initial reserve allocation, so there's no risk of wasting all vmalloc
 376 * space.
 377 */
 378static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
 379                               unsigned char *data_mode)
 380{
 381        if (unlikely(c->slab_cache != NULL)) {
 382                *data_mode = DATA_MODE_SLAB;
 383                return kmem_cache_alloc(c->slab_cache, gfp_mask);
 384        }
 385
 386        if (c->block_size <= KMALLOC_MAX_SIZE &&
 387            gfp_mask & __GFP_NORETRY) {
 388                *data_mode = DATA_MODE_GET_FREE_PAGES;
 389                return (void *)__get_free_pages(gfp_mask,
 390                                                c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
 391        }
 392
 393        *data_mode = DATA_MODE_VMALLOC;
 394
 395        /*
 396         * __vmalloc allocates the data pages and auxiliary structures with
 397         * gfp_flags that were specified, but pagetables are always allocated
 398         * with GFP_KERNEL, no matter what was specified as gfp_mask.
 399         *
 400         * Consequently, we must set per-process flag PF_MEMALLOC_NOIO so that
 401         * all allocations done by this process (including pagetables) are done
 402         * as if GFP_NOIO was specified.
 403         */
 404        if (gfp_mask & __GFP_NORETRY) {
 405                unsigned noio_flag = memalloc_noio_save();
 406                void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
 407
 408                memalloc_noio_restore(noio_flag);
 409                return ptr;
 410        }
 411
 412        return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
 413}
 414
 415/*
 416 * Free buffer's data.
 417 */
 418static void free_buffer_data(struct dm_bufio_client *c,
 419                             void *data, unsigned char data_mode)
 420{
 421        switch (data_mode) {
 422        case DATA_MODE_SLAB:
 423                kmem_cache_free(c->slab_cache, data);
 424                break;
 425
 426        case DATA_MODE_GET_FREE_PAGES:
 427                free_pages((unsigned long)data,
 428                           c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
 429                break;
 430
 431        case DATA_MODE_VMALLOC:
 432                vfree(data);
 433                break;
 434
 435        default:
 436                DMCRIT("dm_bufio_free_buffer_data: bad data mode: %d",
 437                       data_mode);
 438                BUG();
 439        }
 440}
 441
 442/*
 443 * Allocate buffer and its data.
 444 */
 445static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
 446{
 447        struct dm_buffer *b = kmem_cache_alloc(c->slab_buffer, gfp_mask);
 448
 449        if (!b)
 450                return NULL;
 451
 452        b->c = c;
 453
 454        b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode);
 455        if (!b->data) {
 456                kmem_cache_free(c->slab_buffer, b);
 457                return NULL;
 458        }
 459
 460#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
 461        b->stack_len = 0;
 462#endif
 463        return b;
 464}
 465
 466/*
 467 * Free buffer and its data.
 468 */
 469static void free_buffer(struct dm_buffer *b)
 470{
 471        struct dm_bufio_client *c = b->c;
 472
 473        free_buffer_data(c, b->data, b->data_mode);
 474        kmem_cache_free(c->slab_buffer, b);
 475}
 476
 477/*
 478 * Link buffer to the buffer tree and clean or dirty queue.
 479 */
 480static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
 481{
 482        struct dm_bufio_client *c = b->c;
 483
 484        c->n_buffers[dirty]++;
 485        b->block = block;
 486        b->list_mode = dirty;
 487        list_add(&b->lru_list, &c->lru[dirty]);
 488        __insert(b->c, b);
 489        b->last_accessed = jiffies;
 490
 491        adjust_total_allocated(b, false);
 492}
 493
 494/*
 495 * Unlink buffer from the buffer tree and dirty or clean queue.
 496 */
 497static void __unlink_buffer(struct dm_buffer *b)
 498{
 499        struct dm_bufio_client *c = b->c;
 500
 501        BUG_ON(!c->n_buffers[b->list_mode]);
 502
 503        c->n_buffers[b->list_mode]--;
 504        __remove(b->c, b);
 505        list_del(&b->lru_list);
 506
 507        adjust_total_allocated(b, true);
 508}
 509
 510/*
 511 * Place the buffer to the head of dirty or clean LRU queue.
 512 */
 513static void __relink_lru(struct dm_buffer *b, int dirty)
 514{
 515        struct dm_bufio_client *c = b->c;
 516
 517        b->accessed = 1;
 518
 519        BUG_ON(!c->n_buffers[b->list_mode]);
 520
 521        c->n_buffers[b->list_mode]--;
 522        c->n_buffers[dirty]++;
 523        b->list_mode = dirty;
 524        list_move(&b->lru_list, &c->lru[dirty]);
 525        b->last_accessed = jiffies;
 526}
 527
 528/*----------------------------------------------------------------
 529 * Submit I/O on the buffer.
 530 *
 531 * Bio interface is faster but it has some problems:
 532 *      the vector list is limited (increasing this limit increases
 533 *      memory-consumption per buffer, so it is not viable);
 534 *
 535 *      the memory must be direct-mapped, not vmalloced;
 536 *
 537 * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and
 538 * it is not vmalloced, try using the bio interface.
 539 *
 540 * If the buffer is big, if it is vmalloced or if the underlying device
 541 * rejects the bio because it is too large, use dm-io layer to do the I/O.
 542 * The dm-io layer splits the I/O into multiple requests, avoiding the above
 543 * shortcomings.
 544 *--------------------------------------------------------------*/
 545
 546/*
 547 * dm-io completion routine. It just calls b->bio.bi_end_io, pretending
 548 * that the request was handled directly with bio interface.
 549 */
 550static void dmio_complete(unsigned long error, void *context)
 551{
 552        struct dm_buffer *b = context;
 553
 554        b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0);
 555}
 556
 557static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
 558                     unsigned n_sectors, unsigned offset)
 559{
 560        int r;
 561        struct dm_io_request io_req = {
 562                .bi_op = rw,
 563                .bi_op_flags = 0,
 564                .notify.fn = dmio_complete,
 565                .notify.context = b,
 566                .client = b->c->dm_io,
 567        };
 568        struct dm_io_region region = {
 569                .bdev = b->c->bdev,
 570                .sector = sector,
 571                .count = n_sectors,
 572        };
 573
 574        if (b->data_mode != DATA_MODE_VMALLOC) {
 575                io_req.mem.type = DM_IO_KMEM;
 576                io_req.mem.ptr.addr = (char *)b->data + offset;
 577        } else {
 578                io_req.mem.type = DM_IO_VMA;
 579                io_req.mem.ptr.vma = (char *)b->data + offset;
 580        }
 581
 582        r = dm_io(&io_req, 1, &region, NULL);
 583        if (unlikely(r))
 584                b->end_io(b, errno_to_blk_status(r));
 585}
 586
 587static void bio_complete(struct bio *bio)
 588{
 589        struct dm_buffer *b = bio->bi_private;
 590        blk_status_t status = bio->bi_status;
 591        bio_put(bio);
 592        b->end_io(b, status);
 593}
 594
 595static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
 596                    unsigned n_sectors, unsigned offset)
 597{
 598        struct bio *bio;
 599        char *ptr;
 600        unsigned vec_size, len;
 601
 602        vec_size = b->c->block_size >> PAGE_SHIFT;
 603        if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT))
 604                vec_size += 2;
 605
 606        bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size);
 607        if (!bio) {
 608dmio:
 609                use_dmio(b, rw, sector, n_sectors, offset);
 610                return;
 611        }
 612
 613        bio->bi_iter.bi_sector = sector;
 614        bio_set_dev(bio, b->c->bdev);
 615        bio_set_op_attrs(bio, rw, 0);
 616        bio->bi_end_io = bio_complete;
 617        bio->bi_private = b;
 618
 619        ptr = (char *)b->data + offset;
 620        len = n_sectors << SECTOR_SHIFT;
 621
 622        do {
 623                unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len);
 624                if (!bio_add_page(bio, virt_to_page(ptr), this_step,
 625                                  offset_in_page(ptr))) {
 626                        bio_put(bio);
 627                        goto dmio;
 628                }
 629
 630                len -= this_step;
 631                ptr += this_step;
 632        } while (len > 0);
 633
 634        submit_bio(bio);
 635}
 636
 637static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t))
 638{
 639        unsigned n_sectors;
 640        sector_t sector;
 641        unsigned offset, end;
 642
 643        b->end_io = end_io;
 644
 645        if (likely(b->c->sectors_per_block_bits >= 0))
 646                sector = b->block << b->c->sectors_per_block_bits;
 647        else
 648                sector = b->block * (b->c->block_size >> SECTOR_SHIFT);
 649        sector += b->c->start;
 650
 651        if (rw != REQ_OP_WRITE) {
 652                n_sectors = b->c->block_size >> SECTOR_SHIFT;
 653                offset = 0;
 654        } else {
 655                if (b->c->write_callback)
 656                        b->c->write_callback(b);
 657                offset = b->write_start;
 658                end = b->write_end;
 659                offset &= -DM_BUFIO_WRITE_ALIGN;
 660                end += DM_BUFIO_WRITE_ALIGN - 1;
 661                end &= -DM_BUFIO_WRITE_ALIGN;
 662                if (unlikely(end > b->c->block_size))
 663                        end = b->c->block_size;
 664
 665                sector += offset >> SECTOR_SHIFT;
 666                n_sectors = (end - offset) >> SECTOR_SHIFT;
 667        }
 668
 669        if (b->data_mode != DATA_MODE_VMALLOC)
 670                use_bio(b, rw, sector, n_sectors, offset);
 671        else
 672                use_dmio(b, rw, sector, n_sectors, offset);
 673}
 674
 675/*----------------------------------------------------------------
 676 * Writing dirty buffers
 677 *--------------------------------------------------------------*/
 678
 679/*
 680 * The endio routine for write.
 681 *
 682 * Set the error, clear B_WRITING bit and wake anyone who was waiting on
 683 * it.
 684 */
 685static void write_endio(struct dm_buffer *b, blk_status_t status)
 686{
 687        b->write_error = status;
 688        if (unlikely(status)) {
 689                struct dm_bufio_client *c = b->c;
 690
 691                (void)cmpxchg(&c->async_write_error, 0,
 692                                blk_status_to_errno(status));
 693        }
 694
 695        BUG_ON(!test_bit(B_WRITING, &b->state));
 696
 697        smp_mb__before_atomic();
 698        clear_bit(B_WRITING, &b->state);
 699        smp_mb__after_atomic();
 700
 701        wake_up_bit(&b->state, B_WRITING);
 702}
 703
 704/*
 705 * Initiate a write on a dirty buffer, but don't wait for it.
 706 *
 707 * - If the buffer is not dirty, exit.
 708 * - If there some previous write going on, wait for it to finish (we can't
 709 *   have two writes on the same buffer simultaneously).
 710 * - Submit our write and don't wait on it. We set B_WRITING indicating
 711 *   that there is a write in progress.
 712 */
 713static void __write_dirty_buffer(struct dm_buffer *b,
 714                                 struct list_head *write_list)
 715{
 716        if (!test_bit(B_DIRTY, &b->state))
 717                return;
 718
 719        clear_bit(B_DIRTY, &b->state);
 720        wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
 721
 722        b->write_start = b->dirty_start;
 723        b->write_end = b->dirty_end;
 724
 725        if (!write_list)
 726                submit_io(b, REQ_OP_WRITE, write_endio);
 727        else
 728                list_add_tail(&b->write_list, write_list);
 729}
 730
 731static void __flush_write_list(struct list_head *write_list)
 732{
 733        struct blk_plug plug;
 734        blk_start_plug(&plug);
 735        while (!list_empty(write_list)) {
 736                struct dm_buffer *b =
 737                        list_entry(write_list->next, struct dm_buffer, write_list);
 738                list_del(&b->write_list);
 739                submit_io(b, REQ_OP_WRITE, write_endio);
 740                cond_resched();
 741        }
 742        blk_finish_plug(&plug);
 743}
 744
 745/*
 746 * Wait until any activity on the buffer finishes.  Possibly write the
 747 * buffer if it is dirty.  When this function finishes, there is no I/O
 748 * running on the buffer and the buffer is not dirty.
 749 */
 750static void __make_buffer_clean(struct dm_buffer *b)
 751{
 752        BUG_ON(b->hold_count);
 753
 754        if (!b->state)  /* fast case */
 755                return;
 756
 757        wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
 758        __write_dirty_buffer(b, NULL);
 759        wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
 760}
 761
 762/*
 763 * Find some buffer that is not held by anybody, clean it, unlink it and
 764 * return it.
 765 */
 766static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
 767{
 768        struct dm_buffer *b;
 769
 770        list_for_each_entry_reverse(b, &c->lru[LIST_CLEAN], lru_list) {
 771                BUG_ON(test_bit(B_WRITING, &b->state));
 772                BUG_ON(test_bit(B_DIRTY, &b->state));
 773
 774                if (!b->hold_count) {
 775                        __make_buffer_clean(b);
 776                        __unlink_buffer(b);
 777                        return b;
 778                }
 779                cond_resched();
 780        }
 781
 782        list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
 783                BUG_ON(test_bit(B_READING, &b->state));
 784
 785                if (!b->hold_count) {
 786                        __make_buffer_clean(b);
 787                        __unlink_buffer(b);
 788                        return b;
 789                }
 790                cond_resched();
 791        }
 792
 793        return NULL;
 794}
 795
 796/*
 797 * Wait until some other threads free some buffer or release hold count on
 798 * some buffer.
 799 *
 800 * This function is entered with c->lock held, drops it and regains it
 801 * before exiting.
 802 */
 803static void __wait_for_free_buffer(struct dm_bufio_client *c)
 804{
 805        DECLARE_WAITQUEUE(wait, current);
 806
 807        add_wait_queue(&c->free_buffer_wait, &wait);
 808        set_current_state(TASK_UNINTERRUPTIBLE);
 809        dm_bufio_unlock(c);
 810
 811        io_schedule();
 812
 813        remove_wait_queue(&c->free_buffer_wait, &wait);
 814
 815        dm_bufio_lock(c);
 816}
 817
 818enum new_flag {
 819        NF_FRESH = 0,
 820        NF_READ = 1,
 821        NF_GET = 2,
 822        NF_PREFETCH = 3
 823};
 824
 825/*
 826 * Allocate a new buffer. If the allocation is not possible, wait until
 827 * some other thread frees a buffer.
 828 *
 829 * May drop the lock and regain it.
 830 */
 831static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf)
 832{
 833        struct dm_buffer *b;
 834        bool tried_noio_alloc = false;
 835
 836        /*
 837         * dm-bufio is resistant to allocation failures (it just keeps
 838         * one buffer reserved in cases all the allocations fail).
 839         * So set flags to not try too hard:
 840         *      GFP_NOWAIT: don't wait; if we need to sleep we'll release our
 841         *                  mutex and wait ourselves.
 842         *      __GFP_NORETRY: don't retry and rather return failure
 843         *      __GFP_NOMEMALLOC: don't use emergency reserves
 844         *      __GFP_NOWARN: don't print a warning in case of failure
 845         *
 846         * For debugging, if we set the cache size to 1, no new buffers will
 847         * be allocated.
 848         */
 849        while (1) {
 850                if (dm_bufio_cache_size_latch != 1) {
 851                        b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
 852                        if (b)
 853                                return b;
 854                }
 855
 856                if (nf == NF_PREFETCH)
 857                        return NULL;
 858
 859                if (dm_bufio_cache_size_latch != 1 && !tried_noio_alloc) {
 860                        dm_bufio_unlock(c);
 861                        b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
 862                        dm_bufio_lock(c);
 863                        if (b)
 864                                return b;
 865                        tried_noio_alloc = true;
 866                }
 867
 868                if (!list_empty(&c->reserved_buffers)) {
 869                        b = list_entry(c->reserved_buffers.next,
 870                                       struct dm_buffer, lru_list);
 871                        list_del(&b->lru_list);
 872                        c->need_reserved_buffers++;
 873
 874                        return b;
 875                }
 876
 877                b = __get_unclaimed_buffer(c);
 878                if (b)
 879                        return b;
 880
 881                __wait_for_free_buffer(c);
 882        }
 883}
 884
 885static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c, enum new_flag nf)
 886{
 887        struct dm_buffer *b = __alloc_buffer_wait_no_callback(c, nf);
 888
 889        if (!b)
 890                return NULL;
 891
 892        if (c->alloc_callback)
 893                c->alloc_callback(b);
 894
 895        return b;
 896}
 897
 898/*
 899 * Free a buffer and wake other threads waiting for free buffers.
 900 */
 901static void __free_buffer_wake(struct dm_buffer *b)
 902{
 903        struct dm_bufio_client *c = b->c;
 904
 905        if (!c->need_reserved_buffers)
 906                free_buffer(b);
 907        else {
 908                list_add(&b->lru_list, &c->reserved_buffers);
 909                c->need_reserved_buffers--;
 910        }
 911
 912        wake_up(&c->free_buffer_wait);
 913}
 914
 915static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait,
 916                                        struct list_head *write_list)
 917{
 918        struct dm_buffer *b, *tmp;
 919
 920        list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
 921                BUG_ON(test_bit(B_READING, &b->state));
 922
 923                if (!test_bit(B_DIRTY, &b->state) &&
 924                    !test_bit(B_WRITING, &b->state)) {
 925                        __relink_lru(b, LIST_CLEAN);
 926                        continue;
 927                }
 928
 929                if (no_wait && test_bit(B_WRITING, &b->state))
 930                        return;
 931
 932                __write_dirty_buffer(b, write_list);
 933                cond_resched();
 934        }
 935}
 936
 937/*
 938 * Check if we're over watermark.
 939 * If we are over threshold_buffers, start freeing buffers.
 940 * If we're over "limit_buffers", block until we get under the limit.
 941 */
 942static void __check_watermark(struct dm_bufio_client *c,
 943                              struct list_head *write_list)
 944{
 945        if (c->n_buffers[LIST_DIRTY] > c->n_buffers[LIST_CLEAN] * DM_BUFIO_WRITEBACK_RATIO)
 946                __write_dirty_buffers_async(c, 1, write_list);
 947}
 948
 949/*----------------------------------------------------------------
 950 * Getting a buffer
 951 *--------------------------------------------------------------*/
 952
 953static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
 954                                     enum new_flag nf, int *need_submit,
 955                                     struct list_head *write_list)
 956{
 957        struct dm_buffer *b, *new_b = NULL;
 958
 959        *need_submit = 0;
 960
 961        b = __find(c, block);
 962        if (b)
 963                goto found_buffer;
 964
 965        if (nf == NF_GET)
 966                return NULL;
 967
 968        new_b = __alloc_buffer_wait(c, nf);
 969        if (!new_b)
 970                return NULL;
 971
 972        /*
 973         * We've had a period where the mutex was unlocked, so need to
 974         * recheck the buffer tree.
 975         */
 976        b = __find(c, block);
 977        if (b) {
 978                __free_buffer_wake(new_b);
 979                goto found_buffer;
 980        }
 981
 982        __check_watermark(c, write_list);
 983
 984        b = new_b;
 985        b->hold_count = 1;
 986        b->read_error = 0;
 987        b->write_error = 0;
 988        __link_buffer(b, block, LIST_CLEAN);
 989
 990        if (nf == NF_FRESH) {
 991                b->state = 0;
 992                return b;
 993        }
 994
 995        b->state = 1 << B_READING;
 996        *need_submit = 1;
 997
 998        return b;
 999
1000found_buffer:
1001        if (nf == NF_PREFETCH)
1002                return NULL;
1003        /*
1004         * Note: it is essential that we don't wait for the buffer to be
1005         * read if dm_bufio_get function is used. Both dm_bufio_get and
1006         * dm_bufio_prefetch can be used in the driver request routine.
1007         * If the user called both dm_bufio_prefetch and dm_bufio_get on
1008         * the same buffer, it would deadlock if we waited.
1009         */
1010        if (nf == NF_GET && unlikely(test_bit(B_READING, &b->state)))
1011                return NULL;
1012
1013        b->hold_count++;
1014        __relink_lru(b, test_bit(B_DIRTY, &b->state) ||
1015                     test_bit(B_WRITING, &b->state));
1016        return b;
1017}
1018
1019/*
1020 * The endio routine for reading: set the error, clear the bit and wake up
1021 * anyone waiting on the buffer.
1022 */
1023static void read_endio(struct dm_buffer *b, blk_status_t status)
1024{
1025        b->read_error = status;
1026
1027        BUG_ON(!test_bit(B_READING, &b->state));
1028
1029        smp_mb__before_atomic();
1030        clear_bit(B_READING, &b->state);
1031        smp_mb__after_atomic();
1032
1033        wake_up_bit(&b->state, B_READING);
1034}
1035
1036/*
1037 * A common routine for dm_bufio_new and dm_bufio_read.  Operation of these
1038 * functions is similar except that dm_bufio_new doesn't read the
1039 * buffer from the disk (assuming that the caller overwrites all the data
1040 * and uses dm_bufio_mark_buffer_dirty to write new data back).
1041 */
1042static void *new_read(struct dm_bufio_client *c, sector_t block,
1043                      enum new_flag nf, struct dm_buffer **bp)
1044{
1045        int need_submit;
1046        struct dm_buffer *b;
1047
1048        LIST_HEAD(write_list);
1049
1050        dm_bufio_lock(c);
1051        b = __bufio_new(c, block, nf, &need_submit, &write_list);
1052#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
1053        if (b && b->hold_count == 1)
1054                buffer_record_stack(b);
1055#endif
1056        dm_bufio_unlock(c);
1057
1058        __flush_write_list(&write_list);
1059
1060        if (!b)
1061                return NULL;
1062
1063        if (need_submit)
1064                submit_io(b, REQ_OP_READ, read_endio);
1065
1066        wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
1067
1068        if (b->read_error) {
1069                int error = blk_status_to_errno(b->read_error);
1070
1071                dm_bufio_release(b);
1072
1073                return ERR_PTR(error);
1074        }
1075
1076        *bp = b;
1077
1078        return b->data;
1079}
1080
1081void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
1082                   struct dm_buffer **bp)
1083{
1084        return new_read(c, block, NF_GET, bp);
1085}
1086EXPORT_SYMBOL_GPL(dm_bufio_get);
1087
1088void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
1089                    struct dm_buffer **bp)
1090{
1091        BUG_ON(dm_bufio_in_request());
1092
1093        return new_read(c, block, NF_READ, bp);
1094}
1095EXPORT_SYMBOL_GPL(dm_bufio_read);
1096
1097void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
1098                   struct dm_buffer **bp)
1099{
1100        BUG_ON(dm_bufio_in_request());
1101
1102        return new_read(c, block, NF_FRESH, bp);
1103}
1104EXPORT_SYMBOL_GPL(dm_bufio_new);
1105
1106void dm_bufio_prefetch(struct dm_bufio_client *c,
1107                       sector_t block, unsigned n_blocks)
1108{
1109        struct blk_plug plug;
1110
1111        LIST_HEAD(write_list);
1112
1113        BUG_ON(dm_bufio_in_request());
1114
1115        blk_start_plug(&plug);
1116        dm_bufio_lock(c);
1117
1118        for (; n_blocks--; block++) {
1119                int need_submit;
1120                struct dm_buffer *b;
1121                b = __bufio_new(c, block, NF_PREFETCH, &need_submit,
1122                                &write_list);
1123                if (unlikely(!list_empty(&write_list))) {
1124                        dm_bufio_unlock(c);
1125                        blk_finish_plug(&plug);
1126                        __flush_write_list(&write_list);
1127                        blk_start_plug(&plug);
1128                        dm_bufio_lock(c);
1129                }
1130                if (unlikely(b != NULL)) {
1131                        dm_bufio_unlock(c);
1132
1133                        if (need_submit)
1134                                submit_io(b, REQ_OP_READ, read_endio);
1135                        dm_bufio_release(b);
1136
1137                        cond_resched();
1138
1139                        if (!n_blocks)
1140                                goto flush_plug;
1141                        dm_bufio_lock(c);
1142                }
1143        }
1144
1145        dm_bufio_unlock(c);
1146
1147flush_plug:
1148        blk_finish_plug(&plug);
1149}
1150EXPORT_SYMBOL_GPL(dm_bufio_prefetch);
1151
1152void dm_bufio_release(struct dm_buffer *b)
1153{
1154        struct dm_bufio_client *c = b->c;
1155
1156        dm_bufio_lock(c);
1157
1158        BUG_ON(!b->hold_count);
1159
1160        b->hold_count--;
1161        if (!b->hold_count) {
1162                wake_up(&c->free_buffer_wait);
1163
1164                /*
1165                 * If there were errors on the buffer, and the buffer is not
1166                 * to be written, free the buffer. There is no point in caching
1167                 * invalid buffer.
1168                 */
1169                if ((b->read_error || b->write_error) &&
1170                    !test_bit(B_READING, &b->state) &&
1171                    !test_bit(B_WRITING, &b->state) &&
1172                    !test_bit(B_DIRTY, &b->state)) {
1173                        __unlink_buffer(b);
1174                        __free_buffer_wake(b);
1175                }
1176        }
1177
1178        dm_bufio_unlock(c);
1179}
1180EXPORT_SYMBOL_GPL(dm_bufio_release);
1181
1182void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
1183                                        unsigned start, unsigned end)
1184{
1185        struct dm_bufio_client *c = b->c;
1186
1187        BUG_ON(start >= end);
1188        BUG_ON(end > b->c->block_size);
1189
1190        dm_bufio_lock(c);
1191
1192        BUG_ON(test_bit(B_READING, &b->state));
1193
1194        if (!test_and_set_bit(B_DIRTY, &b->state)) {
1195                b->dirty_start = start;
1196                b->dirty_end = end;
1197                __relink_lru(b, LIST_DIRTY);
1198        } else {
1199                if (start < b->dirty_start)
1200                        b->dirty_start = start;
1201                if (end > b->dirty_end)
1202                        b->dirty_end = end;
1203        }
1204
1205        dm_bufio_unlock(c);
1206}
1207EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty);
1208
1209void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
1210{
1211        dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size);
1212}
1213EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty);
1214
1215void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
1216{
1217        LIST_HEAD(write_list);
1218
1219        BUG_ON(dm_bufio_in_request());
1220
1221        dm_bufio_lock(c);
1222        __write_dirty_buffers_async(c, 0, &write_list);
1223        dm_bufio_unlock(c);
1224        __flush_write_list(&write_list);
1225}
1226EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
1227
1228/*
1229 * For performance, it is essential that the buffers are written asynchronously
1230 * and simultaneously (so that the block layer can merge the writes) and then
1231 * waited upon.
1232 *
1233 * Finally, we flush hardware disk cache.
1234 */
1235int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
1236{
1237        int a, f;
1238        unsigned long buffers_processed = 0;
1239        struct dm_buffer *b, *tmp;
1240
1241        LIST_HEAD(write_list);
1242
1243        dm_bufio_lock(c);
1244        __write_dirty_buffers_async(c, 0, &write_list);
1245        dm_bufio_unlock(c);
1246        __flush_write_list(&write_list);
1247        dm_bufio_lock(c);
1248
1249again:
1250        list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
1251                int dropped_lock = 0;
1252
1253                if (buffers_processed < c->n_buffers[LIST_DIRTY])
1254                        buffers_processed++;
1255
1256                BUG_ON(test_bit(B_READING, &b->state));
1257
1258                if (test_bit(B_WRITING, &b->state)) {
1259                        if (buffers_processed < c->n_buffers[LIST_DIRTY]) {
1260                                dropped_lock = 1;
1261                                b->hold_count++;
1262                                dm_bufio_unlock(c);
1263                                wait_on_bit_io(&b->state, B_WRITING,
1264                                               TASK_UNINTERRUPTIBLE);
1265                                dm_bufio_lock(c);
1266                                b->hold_count--;
1267                        } else
1268                                wait_on_bit_io(&b->state, B_WRITING,
1269                                               TASK_UNINTERRUPTIBLE);
1270                }
1271
1272                if (!test_bit(B_DIRTY, &b->state) &&
1273                    !test_bit(B_WRITING, &b->state))
1274                        __relink_lru(b, LIST_CLEAN);
1275
1276                cond_resched();
1277
1278                /*
1279                 * If we dropped the lock, the list is no longer consistent,
1280                 * so we must restart the search.
1281                 *
1282                 * In the most common case, the buffer just processed is
1283                 * relinked to the clean list, so we won't loop scanning the
1284                 * same buffer again and again.
1285                 *
1286                 * This may livelock if there is another thread simultaneously
1287                 * dirtying buffers, so we count the number of buffers walked
1288                 * and if it exceeds the total number of buffers, it means that
1289                 * someone is doing some writes simultaneously with us.  In
1290                 * this case, stop, dropping the lock.
1291                 */
1292                if (dropped_lock)
1293                        goto again;
1294        }
1295        wake_up(&c->free_buffer_wait);
1296        dm_bufio_unlock(c);
1297
1298        a = xchg(&c->async_write_error, 0);
1299        f = dm_bufio_issue_flush(c);
1300        if (a)
1301                return a;
1302
1303        return f;
1304}
1305EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
1306
1307/*
1308 * Use dm-io to send an empty barrier to flush the device.
1309 */
1310int dm_bufio_issue_flush(struct dm_bufio_client *c)
1311{
1312        struct dm_io_request io_req = {
1313                .bi_op = REQ_OP_WRITE,
1314                .bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
1315                .mem.type = DM_IO_KMEM,
1316                .mem.ptr.addr = NULL,
1317                .client = c->dm_io,
1318        };
1319        struct dm_io_region io_reg = {
1320                .bdev = c->bdev,
1321                .sector = 0,
1322                .count = 0,
1323        };
1324
1325        BUG_ON(dm_bufio_in_request());
1326
1327        return dm_io(&io_req, 1, &io_reg, NULL);
1328}
1329EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
1330
1331/*
1332 * We first delete any other buffer that may be at that new location.
1333 *
1334 * Then, we write the buffer to the original location if it was dirty.
1335 *
1336 * Then, if we are the only one who is holding the buffer, relink the buffer
1337 * in the buffer tree for the new location.
1338 *
1339 * If there was someone else holding the buffer, we write it to the new
1340 * location but not relink it, because that other user needs to have the buffer
1341 * at the same place.
1342 */
1343void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block)
1344{
1345        struct dm_bufio_client *c = b->c;
1346        struct dm_buffer *new;
1347
1348        BUG_ON(dm_bufio_in_request());
1349
1350        dm_bufio_lock(c);
1351
1352retry:
1353        new = __find(c, new_block);
1354        if (new) {
1355                if (new->hold_count) {
1356                        __wait_for_free_buffer(c);
1357                        goto retry;
1358                }
1359
1360                /*
1361                 * FIXME: Is there any point waiting for a write that's going
1362                 * to be overwritten in a bit?
1363                 */
1364                __make_buffer_clean(new);
1365                __unlink_buffer(new);
1366                __free_buffer_wake(new);
1367        }
1368
1369        BUG_ON(!b->hold_count);
1370        BUG_ON(test_bit(B_READING, &b->state));
1371
1372        __write_dirty_buffer(b, NULL);
1373        if (b->hold_count == 1) {
1374                wait_on_bit_io(&b->state, B_WRITING,
1375                               TASK_UNINTERRUPTIBLE);
1376                set_bit(B_DIRTY, &b->state);
1377                b->dirty_start = 0;
1378                b->dirty_end = c->block_size;
1379                __unlink_buffer(b);
1380                __link_buffer(b, new_block, LIST_DIRTY);
1381        } else {
1382                sector_t old_block;
1383                wait_on_bit_lock_io(&b->state, B_WRITING,
1384                                    TASK_UNINTERRUPTIBLE);
1385                /*
1386                 * Relink buffer to "new_block" so that write_callback
1387                 * sees "new_block" as a block number.
1388                 * After the write, link the buffer back to old_block.
1389                 * All this must be done in bufio lock, so that block number
1390                 * change isn't visible to other threads.
1391                 */
1392                old_block = b->block;
1393                __unlink_buffer(b);
1394                __link_buffer(b, new_block, b->list_mode);
1395                submit_io(b, REQ_OP_WRITE, write_endio);
1396                wait_on_bit_io(&b->state, B_WRITING,
1397                               TASK_UNINTERRUPTIBLE);
1398                __unlink_buffer(b);
1399                __link_buffer(b, old_block, b->list_mode);
1400        }
1401
1402        dm_bufio_unlock(c);
1403        dm_bufio_release(b);
1404}
1405EXPORT_SYMBOL_GPL(dm_bufio_release_move);
1406
1407/*
1408 * Free the given buffer.
1409 *
1410 * This is just a hint, if the buffer is in use or dirty, this function
1411 * does nothing.
1412 */
1413void dm_bufio_forget(struct dm_bufio_client *c, sector_t block)
1414{
1415        struct dm_buffer *b;
1416
1417        dm_bufio_lock(c);
1418
1419        b = __find(c, block);
1420        if (b && likely(!b->hold_count) && likely(!b->state)) {
1421                __unlink_buffer(b);
1422                __free_buffer_wake(b);
1423        }
1424
1425        dm_bufio_unlock(c);
1426}
1427EXPORT_SYMBOL_GPL(dm_bufio_forget);
1428
1429void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n)
1430{
1431        c->minimum_buffers = n;
1432}
1433EXPORT_SYMBOL_GPL(dm_bufio_set_minimum_buffers);
1434
1435unsigned dm_bufio_get_block_size(struct dm_bufio_client *c)
1436{
1437        return c->block_size;
1438}
1439EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
1440
1441sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
1442{
1443        sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT;
1444        if (s >= c->start)
1445                s -= c->start;
1446        else
1447                s = 0;
1448        if (likely(c->sectors_per_block_bits >= 0))
1449                s >>= c->sectors_per_block_bits;
1450        else
1451                sector_div(s, c->block_size >> SECTOR_SHIFT);
1452        return s;
1453}
1454EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
1455
1456struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c)
1457{
1458        return c->dm_io;
1459}
1460EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client);
1461
1462sector_t dm_bufio_get_block_number(struct dm_buffer *b)
1463{
1464        return b->block;
1465}
1466EXPORT_SYMBOL_GPL(dm_bufio_get_block_number);
1467
1468void *dm_bufio_get_block_data(struct dm_buffer *b)
1469{
1470        return b->data;
1471}
1472EXPORT_SYMBOL_GPL(dm_bufio_get_block_data);
1473
1474void *dm_bufio_get_aux_data(struct dm_buffer *b)
1475{
1476        return b + 1;
1477}
1478EXPORT_SYMBOL_GPL(dm_bufio_get_aux_data);
1479
1480struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b)
1481{
1482        return b->c;
1483}
1484EXPORT_SYMBOL_GPL(dm_bufio_get_client);
1485
1486static void drop_buffers(struct dm_bufio_client *c)
1487{
1488        struct dm_buffer *b;
1489        int i;
1490        bool warned = false;
1491
1492        BUG_ON(dm_bufio_in_request());
1493
1494        /*
1495         * An optimization so that the buffers are not written one-by-one.
1496         */
1497        dm_bufio_write_dirty_buffers_async(c);
1498
1499        dm_bufio_lock(c);
1500
1501        while ((b = __get_unclaimed_buffer(c)))
1502                __free_buffer_wake(b);
1503
1504        for (i = 0; i < LIST_SIZE; i++)
1505                list_for_each_entry(b, &c->lru[i], lru_list) {
1506                        WARN_ON(!warned);
1507                        warned = true;
1508                        DMERR("leaked buffer %llx, hold count %u, list %d",
1509                              (unsigned long long)b->block, b->hold_count, i);
1510#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
1511                        stack_trace_print(b->stack_entries, b->stack_len, 1);
1512                        /* mark unclaimed to avoid BUG_ON below */
1513                        b->hold_count = 0;
1514#endif
1515                }
1516
1517#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
1518        while ((b = __get_unclaimed_buffer(c)))
1519                __free_buffer_wake(b);
1520#endif
1521
1522        for (i = 0; i < LIST_SIZE; i++)
1523                BUG_ON(!list_empty(&c->lru[i]));
1524
1525        dm_bufio_unlock(c);
1526}
1527
1528/*
1529 * We may not be able to evict this buffer if IO pending or the client
1530 * is still using it.  Caller is expected to know buffer is too old.
1531 *
1532 * And if GFP_NOFS is used, we must not do any I/O because we hold
1533 * dm_bufio_clients_lock and we would risk deadlock if the I/O gets
1534 * rerouted to different bufio client.
1535 */
1536static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
1537{
1538        if (!(gfp & __GFP_FS)) {
1539                if (test_bit(B_READING, &b->state) ||
1540                    test_bit(B_WRITING, &b->state) ||
1541                    test_bit(B_DIRTY, &b->state))
1542                        return false;
1543        }
1544
1545        if (b->hold_count)
1546                return false;
1547
1548        __make_buffer_clean(b);
1549        __unlink_buffer(b);
1550        __free_buffer_wake(b);
1551
1552        return true;
1553}
1554
1555static unsigned long get_retain_buffers(struct dm_bufio_client *c)
1556{
1557        unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes);
1558        if (likely(c->sectors_per_block_bits >= 0))
1559                retain_bytes >>= c->sectors_per_block_bits + SECTOR_SHIFT;
1560        else
1561                retain_bytes /= c->block_size;
1562        return retain_bytes;
1563}
1564
1565static void __scan(struct dm_bufio_client *c)
1566{
1567        int l;
1568        struct dm_buffer *b, *tmp;
1569        unsigned long freed = 0;
1570        unsigned long count = c->n_buffers[LIST_CLEAN] +
1571                              c->n_buffers[LIST_DIRTY];
1572        unsigned long retain_target = get_retain_buffers(c);
1573
1574        for (l = 0; l < LIST_SIZE; l++) {
1575                list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
1576                        if (count - freed <= retain_target)
1577                                atomic_long_set(&c->need_shrink, 0);
1578                        if (!atomic_long_read(&c->need_shrink))
1579                                return;
1580                        if (__try_evict_buffer(b, GFP_KERNEL)) {
1581                                atomic_long_dec(&c->need_shrink);
1582                                freed++;
1583                        }
1584                        cond_resched();
1585                }
1586        }
1587}
1588
1589static void shrink_work(struct work_struct *w)
1590{
1591        struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work);
1592
1593        dm_bufio_lock(c);
1594        __scan(c);
1595        dm_bufio_unlock(c);
1596}
1597
1598static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
1599{
1600        struct dm_bufio_client *c;
1601
1602        c = container_of(shrink, struct dm_bufio_client, shrinker);
1603        atomic_long_add(sc->nr_to_scan, &c->need_shrink);
1604        queue_work(dm_bufio_wq, &c->shrink_work);
1605
1606        return sc->nr_to_scan;
1607}
1608
1609static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
1610{
1611        struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
1612        unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) +
1613                              READ_ONCE(c->n_buffers[LIST_DIRTY]);
1614        unsigned long retain_target = get_retain_buffers(c);
1615        unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink);
1616
1617        if (unlikely(count < retain_target))
1618                count = 0;
1619        else
1620                count -= retain_target;
1621
1622        if (unlikely(count < queued_for_cleanup))
1623                count = 0;
1624        else
1625                count -= queued_for_cleanup;
1626
1627        return count;
1628}
1629
1630/*
1631 * Create the buffering interface
1632 */
1633struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
1634                                               unsigned reserved_buffers, unsigned aux_size,
1635                                               void (*alloc_callback)(struct dm_buffer *),
1636                                               void (*write_callback)(struct dm_buffer *))
1637{
1638        int r;
1639        struct dm_bufio_client *c;
1640        unsigned i;
1641        char slab_name[27];
1642
1643        if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) {
1644                DMERR("%s: block size not specified or is not multiple of 512b", __func__);
1645                r = -EINVAL;
1646                goto bad_client;
1647        }
1648
1649        c = kzalloc(sizeof(*c), GFP_KERNEL);
1650        if (!c) {
1651                r = -ENOMEM;
1652                goto bad_client;
1653        }
1654        c->buffer_tree = RB_ROOT;
1655
1656        c->bdev = bdev;
1657        c->block_size = block_size;
1658        if (is_power_of_2(block_size))
1659                c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT;
1660        else
1661                c->sectors_per_block_bits = -1;
1662
1663        c->alloc_callback = alloc_callback;
1664        c->write_callback = write_callback;
1665
1666        for (i = 0; i < LIST_SIZE; i++) {
1667                INIT_LIST_HEAD(&c->lru[i]);
1668                c->n_buffers[i] = 0;
1669        }
1670
1671        mutex_init(&c->lock);
1672        INIT_LIST_HEAD(&c->reserved_buffers);
1673        c->need_reserved_buffers = reserved_buffers;
1674
1675        dm_bufio_set_minimum_buffers(c, DM_BUFIO_MIN_BUFFERS);
1676
1677        init_waitqueue_head(&c->free_buffer_wait);
1678        c->async_write_error = 0;
1679
1680        c->dm_io = dm_io_client_create();
1681        if (IS_ERR(c->dm_io)) {
1682                r = PTR_ERR(c->dm_io);
1683                goto bad_dm_io;
1684        }
1685
1686        if (block_size <= KMALLOC_MAX_SIZE &&
1687            (block_size < PAGE_SIZE || !is_power_of_2(block_size))) {
1688                unsigned align = min(1U << __ffs(block_size), (unsigned)PAGE_SIZE);
1689                snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", block_size);
1690                c->slab_cache = kmem_cache_create(slab_name, block_size, align,
1691                                                  SLAB_RECLAIM_ACCOUNT, NULL);
1692                if (!c->slab_cache) {
1693                        r = -ENOMEM;
1694                        goto bad;
1695                }
1696        }
1697        if (aux_size)
1698                snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer-%u", aux_size);
1699        else
1700                snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer");
1701        c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size,
1702                                           0, SLAB_RECLAIM_ACCOUNT, NULL);
1703        if (!c->slab_buffer) {
1704                r = -ENOMEM;
1705                goto bad;
1706        }
1707
1708        while (c->need_reserved_buffers) {
1709                struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL);
1710
1711                if (!b) {
1712                        r = -ENOMEM;
1713                        goto bad;
1714                }
1715                __free_buffer_wake(b);
1716        }
1717
1718        INIT_WORK(&c->shrink_work, shrink_work);
1719        atomic_long_set(&c->need_shrink, 0);
1720
1721        c->shrinker.count_objects = dm_bufio_shrink_count;
1722        c->shrinker.scan_objects = dm_bufio_shrink_scan;
1723        c->shrinker.seeks = 1;
1724        c->shrinker.batch = 0;
1725        r = register_shrinker(&c->shrinker);
1726        if (r)
1727                goto bad;
1728
1729        mutex_lock(&dm_bufio_clients_lock);
1730        dm_bufio_client_count++;
1731        list_add(&c->client_list, &dm_bufio_all_clients);
1732        __cache_size_refresh();
1733        mutex_unlock(&dm_bufio_clients_lock);
1734
1735        return c;
1736
1737bad:
1738        while (!list_empty(&c->reserved_buffers)) {
1739                struct dm_buffer *b = list_entry(c->reserved_buffers.next,
1740                                                 struct dm_buffer, lru_list);
1741                list_del(&b->lru_list);
1742                free_buffer(b);
1743        }
1744        kmem_cache_destroy(c->slab_cache);
1745        kmem_cache_destroy(c->slab_buffer);
1746        dm_io_client_destroy(c->dm_io);
1747bad_dm_io:
1748        mutex_destroy(&c->lock);
1749        kfree(c);
1750bad_client:
1751        return ERR_PTR(r);
1752}
1753EXPORT_SYMBOL_GPL(dm_bufio_client_create);
1754
1755/*
1756 * Free the buffering interface.
1757 * It is required that there are no references on any buffers.
1758 */
1759void dm_bufio_client_destroy(struct dm_bufio_client *c)
1760{
1761        unsigned i;
1762
1763        drop_buffers(c);
1764
1765        unregister_shrinker(&c->shrinker);
1766        flush_work(&c->shrink_work);
1767
1768        mutex_lock(&dm_bufio_clients_lock);
1769
1770        list_del(&c->client_list);
1771        dm_bufio_client_count--;
1772        __cache_size_refresh();
1773
1774        mutex_unlock(&dm_bufio_clients_lock);
1775
1776        BUG_ON(!RB_EMPTY_ROOT(&c->buffer_tree));
1777        BUG_ON(c->need_reserved_buffers);
1778
1779        while (!list_empty(&c->reserved_buffers)) {
1780                struct dm_buffer *b = list_entry(c->reserved_buffers.next,
1781                                                 struct dm_buffer, lru_list);
1782                list_del(&b->lru_list);
1783                free_buffer(b);
1784        }
1785
1786        for (i = 0; i < LIST_SIZE; i++)
1787                if (c->n_buffers[i])
1788                        DMERR("leaked buffer count %d: %ld", i, c->n_buffers[i]);
1789
1790        for (i = 0; i < LIST_SIZE; i++)
1791                BUG_ON(c->n_buffers[i]);
1792
1793        kmem_cache_destroy(c->slab_cache);
1794        kmem_cache_destroy(c->slab_buffer);
1795        dm_io_client_destroy(c->dm_io);
1796        mutex_destroy(&c->lock);
1797        kfree(c);
1798}
1799EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
1800
1801void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start)
1802{
1803        c->start = start;
1804}
1805EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset);
1806
1807static unsigned get_max_age_hz(void)
1808{
1809        unsigned max_age = READ_ONCE(dm_bufio_max_age);
1810
1811        if (max_age > UINT_MAX / HZ)
1812                max_age = UINT_MAX / HZ;
1813
1814        return max_age * HZ;
1815}
1816
1817static bool older_than(struct dm_buffer *b, unsigned long age_hz)
1818{
1819        return time_after_eq(jiffies, b->last_accessed + age_hz);
1820}
1821
1822static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
1823{
1824        struct dm_buffer *b, *tmp;
1825        unsigned long retain_target = get_retain_buffers(c);
1826        unsigned long count;
1827        LIST_HEAD(write_list);
1828
1829        dm_bufio_lock(c);
1830
1831        __check_watermark(c, &write_list);
1832        if (unlikely(!list_empty(&write_list))) {
1833                dm_bufio_unlock(c);
1834                __flush_write_list(&write_list);
1835                dm_bufio_lock(c);
1836        }
1837
1838        count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
1839        list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_CLEAN], lru_list) {
1840                if (count <= retain_target)
1841                        break;
1842
1843                if (!older_than(b, age_hz))
1844                        break;
1845
1846                if (__try_evict_buffer(b, 0))
1847                        count--;
1848
1849                cond_resched();
1850        }
1851
1852        dm_bufio_unlock(c);
1853}
1854
1855static void do_global_cleanup(struct work_struct *w)
1856{
1857        struct dm_bufio_client *locked_client = NULL;
1858        struct dm_bufio_client *current_client;
1859        struct dm_buffer *b;
1860        unsigned spinlock_hold_count;
1861        unsigned long threshold = dm_bufio_cache_size -
1862                dm_bufio_cache_size / DM_BUFIO_LOW_WATERMARK_RATIO;
1863        unsigned long loops = global_num * 2;
1864
1865        mutex_lock(&dm_bufio_clients_lock);
1866
1867        while (1) {
1868                cond_resched();
1869
1870                spin_lock(&global_spinlock);
1871                if (unlikely(dm_bufio_current_allocated <= threshold))
1872                        break;
1873
1874                spinlock_hold_count = 0;
1875get_next:
1876                if (!loops--)
1877                        break;
1878                if (unlikely(list_empty(&global_queue)))
1879                        break;
1880                b = list_entry(global_queue.prev, struct dm_buffer, global_list);
1881
1882                if (b->accessed) {
1883                        b->accessed = 0;
1884                        list_move(&b->global_list, &global_queue);
1885                        if (likely(++spinlock_hold_count < 16))
1886                                goto get_next;
1887                        spin_unlock(&global_spinlock);
1888                        continue;
1889                }
1890
1891                current_client = b->c;
1892                if (unlikely(current_client != locked_client)) {
1893                        if (locked_client)
1894                                dm_bufio_unlock(locked_client);
1895
1896                        if (!dm_bufio_trylock(current_client)) {
1897                                spin_unlock(&global_spinlock);
1898                                dm_bufio_lock(current_client);
1899                                locked_client = current_client;
1900                                continue;
1901                        }
1902
1903                        locked_client = current_client;
1904                }
1905
1906                spin_unlock(&global_spinlock);
1907
1908                if (unlikely(!__try_evict_buffer(b, GFP_KERNEL))) {
1909                        spin_lock(&global_spinlock);
1910                        list_move(&b->global_list, &global_queue);
1911                        spin_unlock(&global_spinlock);
1912                }
1913        }
1914
1915        spin_unlock(&global_spinlock);
1916
1917        if (locked_client)
1918                dm_bufio_unlock(locked_client);
1919
1920        mutex_unlock(&dm_bufio_clients_lock);
1921}
1922
1923static void cleanup_old_buffers(void)
1924{
1925        unsigned long max_age_hz = get_max_age_hz();
1926        struct dm_bufio_client *c;
1927
1928        mutex_lock(&dm_bufio_clients_lock);
1929
1930        __cache_size_refresh();
1931
1932        list_for_each_entry(c, &dm_bufio_all_clients, client_list)
1933                __evict_old_buffers(c, max_age_hz);
1934
1935        mutex_unlock(&dm_bufio_clients_lock);
1936}
1937
1938static void work_fn(struct work_struct *w)
1939{
1940        cleanup_old_buffers();
1941
1942        queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
1943                           DM_BUFIO_WORK_TIMER_SECS * HZ);
1944}
1945
1946/*----------------------------------------------------------------
1947 * Module setup
1948 *--------------------------------------------------------------*/
1949
1950/*
1951 * This is called only once for the whole dm_bufio module.
1952 * It initializes memory limit.
1953 */
1954static int __init dm_bufio_init(void)
1955{
1956        __u64 mem;
1957
1958        dm_bufio_allocated_kmem_cache = 0;
1959        dm_bufio_allocated_get_free_pages = 0;
1960        dm_bufio_allocated_vmalloc = 0;
1961        dm_bufio_current_allocated = 0;
1962
1963        mem = (__u64)mult_frac(totalram_pages() - totalhigh_pages(),
1964                               DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT;
1965
1966        if (mem > ULONG_MAX)
1967                mem = ULONG_MAX;
1968
1969#ifdef CONFIG_MMU
1970        if (mem > mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100))
1971                mem = mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100);
1972#endif
1973
1974        dm_bufio_default_cache_size = mem;
1975
1976        mutex_lock(&dm_bufio_clients_lock);
1977        __cache_size_refresh();
1978        mutex_unlock(&dm_bufio_clients_lock);
1979
1980        dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0);
1981        if (!dm_bufio_wq)
1982                return -ENOMEM;
1983
1984        INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn);
1985        INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup);
1986        queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
1987                           DM_BUFIO_WORK_TIMER_SECS * HZ);
1988
1989        return 0;
1990}
1991
1992/*
1993 * This is called once when unloading the dm_bufio module.
1994 */
1995static void __exit dm_bufio_exit(void)
1996{
1997        int bug = 0;
1998
1999        cancel_delayed_work_sync(&dm_bufio_cleanup_old_work);
2000        flush_workqueue(dm_bufio_wq);
2001        destroy_workqueue(dm_bufio_wq);
2002
2003        if (dm_bufio_client_count) {
2004                DMCRIT("%s: dm_bufio_client_count leaked: %d",
2005                        __func__, dm_bufio_client_count);
2006                bug = 1;
2007        }
2008
2009        if (dm_bufio_current_allocated) {
2010                DMCRIT("%s: dm_bufio_current_allocated leaked: %lu",
2011                        __func__, dm_bufio_current_allocated);
2012                bug = 1;
2013        }
2014
2015        if (dm_bufio_allocated_get_free_pages) {
2016                DMCRIT("%s: dm_bufio_allocated_get_free_pages leaked: %lu",
2017                       __func__, dm_bufio_allocated_get_free_pages);
2018                bug = 1;
2019        }
2020
2021        if (dm_bufio_allocated_vmalloc) {
2022                DMCRIT("%s: dm_bufio_vmalloc leaked: %lu",
2023                       __func__, dm_bufio_allocated_vmalloc);
2024                bug = 1;
2025        }
2026
2027        BUG_ON(bug);
2028}
2029
2030module_init(dm_bufio_init)
2031module_exit(dm_bufio_exit)
2032
2033module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, S_IRUGO | S_IWUSR);
2034MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
2035
2036module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
2037MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
2038
2039module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR);
2040MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");
2041
2042module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
2043MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory");
2044
2045module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, S_IRUGO);
2046MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc");
2047
2048module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, S_IRUGO);
2049MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages");
2050
2051module_param_named(allocated_vmalloc_bytes, dm_bufio_allocated_vmalloc, ulong, S_IRUGO);
2052MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc");
2053
2054module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, S_IRUGO);
2055MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache");
2056
2057MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>");
2058MODULE_DESCRIPTION(DM_NAME " buffered I/O library");
2059MODULE_LICENSE("GPL");
2060