linux/drivers/md/dm-io.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2003 Sistina Software
   3 * Copyright (C) 2006 Red Hat GmbH
   4 *
   5 * This file is released under the GPL.
   6 */
   7
   8#include "dm-io.h"
   9
  10#include <linux/bio.h>
  11#include <linux/mempool.h>
  12#include <linux/module.h>
  13#include <linux/sched.h>
  14#include <linux/slab.h>
  15
  16struct dm_io_client {
  17        mempool_t *pool;
  18        struct bio_set *bios;
  19};
  20
  21/* FIXME: can we shrink this ? */
  22struct io {
  23        unsigned long error;
  24        atomic_t count;
  25        struct task_struct *sleeper;
  26        struct dm_io_client *client;
  27        io_notify_fn callback;
  28        void *context;
  29};
  30
  31/*
  32 * io contexts are only dynamically allocated for asynchronous
  33 * io.  Since async io is likely to be the majority of io we'll
  34 * have the same number of io contexts as bios! (FIXME: must reduce this).
  35 */
  36
  37static unsigned int pages_to_ios(unsigned int pages)
  38{
  39        return 4 * pages;       /* too many ? */
  40}
  41
  42/*
  43 * Create a client with mempool and bioset.
  44 */
  45struct dm_io_client *dm_io_client_create(unsigned num_pages)
  46{
  47        unsigned ios = pages_to_ios(num_pages);
  48        struct dm_io_client *client;
  49
  50        client = kmalloc(sizeof(*client), GFP_KERNEL);
  51        if (!client)
  52                return ERR_PTR(-ENOMEM);
  53
  54        client->pool = mempool_create_kmalloc_pool(ios, sizeof(struct io));
  55        if (!client->pool)
  56                goto bad;
  57
  58        client->bios = bioset_create(16, 16);
  59        if (!client->bios)
  60                goto bad;
  61
  62        return client;
  63
  64   bad:
  65        if (client->pool)
  66                mempool_destroy(client->pool);
  67        kfree(client);
  68        return ERR_PTR(-ENOMEM);
  69}
  70EXPORT_SYMBOL(dm_io_client_create);
  71
  72int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client)
  73{
  74        return mempool_resize(client->pool, pages_to_ios(num_pages),
  75                              GFP_KERNEL);
  76}
  77EXPORT_SYMBOL(dm_io_client_resize);
  78
  79void dm_io_client_destroy(struct dm_io_client *client)
  80{
  81        mempool_destroy(client->pool);
  82        bioset_free(client->bios);
  83        kfree(client);
  84}
  85EXPORT_SYMBOL(dm_io_client_destroy);
  86
  87/*-----------------------------------------------------------------
  88 * We need to keep track of which region a bio is doing io for.
  89 * In order to save a memory allocation we store this the last
  90 * bvec which we know is unused (blech).
  91 * XXX This is ugly and can OOPS with some configs... find another way.
  92 *---------------------------------------------------------------*/
  93static inline void bio_set_region(struct bio *bio, unsigned region)
  94{
  95        bio->bi_io_vec[bio->bi_max_vecs].bv_len = region;
  96}
  97
  98static inline unsigned bio_get_region(struct bio *bio)
  99{
 100        return bio->bi_io_vec[bio->bi_max_vecs].bv_len;
 101}
 102
 103/*-----------------------------------------------------------------
 104 * We need an io object to keep track of the number of bios that
 105 * have been dispatched for a particular io.
 106 *---------------------------------------------------------------*/
 107static void dec_count(struct io *io, unsigned int region, int error)
 108{
 109        if (error)
 110                set_bit(region, &io->error);
 111
 112        if (atomic_dec_and_test(&io->count)) {
 113                if (io->sleeper)
 114                        wake_up_process(io->sleeper);
 115
 116                else {
 117                        int r = io->error;
 118                        io_notify_fn fn = io->callback;
 119                        void *context = io->context;
 120
 121                        mempool_free(io, io->client->pool);
 122                        fn(r, context);
 123                }
 124        }
 125}
 126
 127static void endio(struct bio *bio, int error)
 128{
 129        struct io *io;
 130        unsigned region;
 131
 132        if (error && bio_data_dir(bio) == READ)
 133                zero_fill_bio(bio);
 134
 135        /*
 136         * The bio destructor in bio_put() may use the io object.
 137         */
 138        io = bio->bi_private;
 139        region = bio_get_region(bio);
 140
 141        bio->bi_max_vecs++;
 142        bio_put(bio);
 143
 144        dec_count(io, region, error);
 145}
 146
 147/*-----------------------------------------------------------------
 148 * These little objects provide an abstraction for getting a new
 149 * destination page for io.
 150 *---------------------------------------------------------------*/
 151struct dpages {
 152        void (*get_page)(struct dpages *dp,
 153                         struct page **p, unsigned long *len, unsigned *offset);
 154        void (*next_page)(struct dpages *dp);
 155
 156        unsigned context_u;
 157        void *context_ptr;
 158};
 159
 160/*
 161 * Functions for getting the pages from a list.
 162 */
 163static void list_get_page(struct dpages *dp,
 164                  struct page **p, unsigned long *len, unsigned *offset)
 165{
 166        unsigned o = dp->context_u;
 167        struct page_list *pl = (struct page_list *) dp->context_ptr;
 168
 169        *p = pl->page;
 170        *len = PAGE_SIZE - o;
 171        *offset = o;
 172}
 173
 174static void list_next_page(struct dpages *dp)
 175{
 176        struct page_list *pl = (struct page_list *) dp->context_ptr;
 177        dp->context_ptr = pl->next;
 178        dp->context_u = 0;
 179}
 180
 181static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset)
 182{
 183        dp->get_page = list_get_page;
 184        dp->next_page = list_next_page;
 185        dp->context_u = offset;
 186        dp->context_ptr = pl;
 187}
 188
 189/*
 190 * Functions for getting the pages from a bvec.
 191 */
 192static void bvec_get_page(struct dpages *dp,
 193                  struct page **p, unsigned long *len, unsigned *offset)
 194{
 195        struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
 196        *p = bvec->bv_page;
 197        *len = bvec->bv_len;
 198        *offset = bvec->bv_offset;
 199}
 200
 201static void bvec_next_page(struct dpages *dp)
 202{
 203        struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
 204        dp->context_ptr = bvec + 1;
 205}
 206
 207static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec)
 208{
 209        dp->get_page = bvec_get_page;
 210        dp->next_page = bvec_next_page;
 211        dp->context_ptr = bvec;
 212}
 213
 214/*
 215 * Functions for getting the pages from a VMA.
 216 */
 217static void vm_get_page(struct dpages *dp,
 218                 struct page **p, unsigned long *len, unsigned *offset)
 219{
 220        *p = vmalloc_to_page(dp->context_ptr);
 221        *offset = dp->context_u;
 222        *len = PAGE_SIZE - dp->context_u;
 223}
 224
 225static void vm_next_page(struct dpages *dp)
 226{
 227        dp->context_ptr += PAGE_SIZE - dp->context_u;
 228        dp->context_u = 0;
 229}
 230
 231static void vm_dp_init(struct dpages *dp, void *data)
 232{
 233        dp->get_page = vm_get_page;
 234        dp->next_page = vm_next_page;
 235        dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1);
 236        dp->context_ptr = data;
 237}
 238
 239static void dm_bio_destructor(struct bio *bio)
 240{
 241        struct io *io = bio->bi_private;
 242
 243        bio_free(bio, io->client->bios);
 244}
 245
 246/*
 247 * Functions for getting the pages from kernel memory.
 248 */
 249static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len,
 250                        unsigned *offset)
 251{
 252        *p = virt_to_page(dp->context_ptr);
 253        *offset = dp->context_u;
 254        *len = PAGE_SIZE - dp->context_u;
 255}
 256
 257static void km_next_page(struct dpages *dp)
 258{
 259        dp->context_ptr += PAGE_SIZE - dp->context_u;
 260        dp->context_u = 0;
 261}
 262
 263static void km_dp_init(struct dpages *dp, void *data)
 264{
 265        dp->get_page = km_get_page;
 266        dp->next_page = km_next_page;
 267        dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1);
 268        dp->context_ptr = data;
 269}
 270
 271/*-----------------------------------------------------------------
 272 * IO routines that accept a list of pages.
 273 *---------------------------------------------------------------*/
 274static void do_region(int rw, unsigned int region, struct io_region *where,
 275                      struct dpages *dp, struct io *io)
 276{
 277        struct bio *bio;
 278        struct page *page;
 279        unsigned long len;
 280        unsigned offset;
 281        unsigned num_bvecs;
 282        sector_t remaining = where->count;
 283
 284        while (remaining) {
 285                /*
 286                 * Allocate a suitably sized-bio: we add an extra
 287                 * bvec for bio_get/set_region() and decrement bi_max_vecs
 288                 * to hide it from bio_add_page().
 289                 */
 290                num_bvecs = dm_sector_div_up(remaining,
 291                                             (PAGE_SIZE >> SECTOR_SHIFT));
 292                num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev),
 293                                      num_bvecs);
 294                bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
 295                bio->bi_sector = where->sector + (where->count - remaining);
 296                bio->bi_bdev = where->bdev;
 297                bio->bi_end_io = endio;
 298                bio->bi_private = io;
 299                bio->bi_destructor = dm_bio_destructor;
 300                bio->bi_max_vecs--;
 301                bio_set_region(bio, region);
 302
 303                /*
 304                 * Try and add as many pages as possible.
 305                 */
 306                while (remaining) {
 307                        dp->get_page(dp, &page, &len, &offset);
 308                        len = min(len, to_bytes(remaining));
 309                        if (!bio_add_page(bio, page, len, offset))
 310                                break;
 311
 312                        offset = 0;
 313                        remaining -= to_sector(len);
 314                        dp->next_page(dp);
 315                }
 316
 317                atomic_inc(&io->count);
 318                submit_bio(rw, bio);
 319        }
 320}
 321
 322static void dispatch_io(int rw, unsigned int num_regions,
 323                        struct io_region *where, struct dpages *dp,
 324                        struct io *io, int sync)
 325{
 326        int i;
 327        struct dpages old_pages = *dp;
 328
 329        if (sync)
 330                rw |= (1 << BIO_RW_SYNC);
 331
 332        /*
 333         * For multiple regions we need to be careful to rewind
 334         * the dp object for each call to do_region.
 335         */
 336        for (i = 0; i < num_regions; i++) {
 337                *dp = old_pages;
 338                if (where[i].count)
 339                        do_region(rw, i, where + i, dp, io);
 340        }
 341
 342        /*
 343         * Drop the extra reference that we were holding to avoid
 344         * the io being completed too early.
 345         */
 346        dec_count(io, 0, 0);
 347}
 348
 349static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 350                   struct io_region *where, int rw, struct dpages *dp,
 351                   unsigned long *error_bits)
 352{
 353        struct io io;
 354
 355        if (num_regions > 1 && rw != WRITE) {
 356                WARN_ON(1);
 357                return -EIO;
 358        }
 359
 360        io.error = 0;
 361        atomic_set(&io.count, 1); /* see dispatch_io() */
 362        io.sleeper = current;
 363        io.client = client;
 364
 365        dispatch_io(rw, num_regions, where, dp, &io, 1);
 366
 367        while (1) {
 368                set_current_state(TASK_UNINTERRUPTIBLE);
 369
 370                if (!atomic_read(&io.count) || signal_pending(current))
 371                        break;
 372
 373                io_schedule();
 374        }
 375        set_current_state(TASK_RUNNING);
 376
 377        if (atomic_read(&io.count))
 378                return -EINTR;
 379
 380        if (error_bits)
 381                *error_bits = io.error;
 382
 383        return io.error ? -EIO : 0;
 384}
 385
 386static int async_io(struct dm_io_client *client, unsigned int num_regions,
 387                    struct io_region *where, int rw, struct dpages *dp,
 388                    io_notify_fn fn, void *context)
 389{
 390        struct io *io;
 391
 392        if (num_regions > 1 && rw != WRITE) {
 393                WARN_ON(1);
 394                fn(1, context);
 395                return -EIO;
 396        }
 397
 398        io = mempool_alloc(client->pool, GFP_NOIO);
 399        io->error = 0;
 400        atomic_set(&io->count, 1); /* see dispatch_io() */
 401        io->sleeper = NULL;
 402        io->client = client;
 403        io->callback = fn;
 404        io->context = context;
 405
 406        dispatch_io(rw, num_regions, where, dp, io, 0);
 407        return 0;
 408}
 409
 410static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
 411{
 412        /* Set up dpages based on memory type */
 413        switch (io_req->mem.type) {
 414        case DM_IO_PAGE_LIST:
 415                list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
 416                break;
 417
 418        case DM_IO_BVEC:
 419                bvec_dp_init(dp, io_req->mem.ptr.bvec);
 420                break;
 421
 422        case DM_IO_VMA:
 423                vm_dp_init(dp, io_req->mem.ptr.vma);
 424                break;
 425
 426        case DM_IO_KMEM:
 427                km_dp_init(dp, io_req->mem.ptr.addr);
 428                break;
 429
 430        default:
 431                return -EINVAL;
 432        }
 433
 434        return 0;
 435}
 436
 437/*
 438 * New collapsed (a)synchronous interface
 439 */
 440int dm_io(struct dm_io_request *io_req, unsigned num_regions,
 441          struct io_region *where, unsigned long *sync_error_bits)
 442{
 443        int r;
 444        struct dpages dp;
 445
 446        r = dp_init(io_req, &dp);
 447        if (r)
 448                return r;
 449
 450        if (!io_req->notify.fn)
 451                return sync_io(io_req->client, num_regions, where,
 452                               io_req->bi_rw, &dp, sync_error_bits);
 453
 454        return async_io(io_req->client, num_regions, where, io_req->bi_rw,
 455                        &dp, io_req->notify.fn, io_req->notify.context);
 456}
 457EXPORT_SYMBOL(dm_io);
 458