linux/block/blk-barrier.c
<<
>>
Prefs
   1/*
   2 * Functions related to barrier IO handling
   3 */
   4#include <linux/kernel.h>
   5#include <linux/module.h>
   6#include <linux/bio.h>
   7#include <linux/blkdev.h>
   8
   9#include "blk.h"
  10
  11/**
  12 * blk_queue_ordered - does this queue support ordered writes
  13 * @q:        the request queue
  14 * @ordered:  one of QUEUE_ORDERED_*
  15 * @prepare_flush_fn: rq setup helper for cache flush ordered writes
  16 *
  17 * Description:
  18 *   For journalled file systems, doing ordered writes on a commit
  19 *   block instead of explicitly doing wait_on_buffer (which is bad
  20 *   for performance) can be a big win. Block drivers supporting this
  21 *   feature should call this function and indicate so.
  22 *
  23 **/
  24int blk_queue_ordered(struct request_queue *q, unsigned ordered,
  25                      prepare_flush_fn *prepare_flush_fn)
  26{
  27        if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
  28                                             QUEUE_ORDERED_DO_POSTFLUSH))) {
  29                printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
  30                return -EINVAL;
  31        }
  32
  33        if (ordered != QUEUE_ORDERED_NONE &&
  34            ordered != QUEUE_ORDERED_DRAIN &&
  35            ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
  36            ordered != QUEUE_ORDERED_DRAIN_FUA &&
  37            ordered != QUEUE_ORDERED_TAG &&
  38            ordered != QUEUE_ORDERED_TAG_FLUSH &&
  39            ordered != QUEUE_ORDERED_TAG_FUA) {
  40                printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
  41                return -EINVAL;
  42        }
  43
  44        q->ordered = ordered;
  45        q->next_ordered = ordered;
  46        q->prepare_flush_fn = prepare_flush_fn;
  47
  48        return 0;
  49}
  50EXPORT_SYMBOL(blk_queue_ordered);
  51
  52/*
  53 * Cache flushing for ordered writes handling
  54 */
  55unsigned blk_ordered_cur_seq(struct request_queue *q)
  56{
  57        if (!q->ordseq)
  58                return 0;
  59        return 1 << ffz(q->ordseq);
  60}
  61
  62unsigned blk_ordered_req_seq(struct request *rq)
  63{
  64        struct request_queue *q = rq->q;
  65
  66        BUG_ON(q->ordseq == 0);
  67
  68        if (rq == &q->pre_flush_rq)
  69                return QUEUE_ORDSEQ_PREFLUSH;
  70        if (rq == &q->bar_rq)
  71                return QUEUE_ORDSEQ_BAR;
  72        if (rq == &q->post_flush_rq)
  73                return QUEUE_ORDSEQ_POSTFLUSH;
  74
  75        /*
  76         * !fs requests don't need to follow barrier ordering.  Always
  77         * put them at the front.  This fixes the following deadlock.
  78         *
  79         * http://thread.gmane.org/gmane.linux.kernel/537473
  80         */
  81        if (!blk_fs_request(rq))
  82                return QUEUE_ORDSEQ_DRAIN;
  83
  84        if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
  85            (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
  86                return QUEUE_ORDSEQ_DRAIN;
  87        else
  88                return QUEUE_ORDSEQ_DONE;
  89}
  90
  91bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
  92{
  93        struct request *rq;
  94
  95        if (error && !q->orderr)
  96                q->orderr = error;
  97
  98        BUG_ON(q->ordseq & seq);
  99        q->ordseq |= seq;
 100
 101        if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
 102                return false;
 103
 104        /*
 105         * Okay, sequence complete.
 106         */
 107        q->ordseq = 0;
 108        rq = q->orig_bar_rq;
 109        __blk_end_request_all(rq, q->orderr);
 110        return true;
 111}
 112
 113static void pre_flush_end_io(struct request *rq, int error)
 114{
 115        elv_completed_request(rq->q, rq);
 116        blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
 117}
 118
 119static void bar_end_io(struct request *rq, int error)
 120{
 121        elv_completed_request(rq->q, rq);
 122        blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
 123}
 124
 125static void post_flush_end_io(struct request *rq, int error)
 126{
 127        elv_completed_request(rq->q, rq);
 128        blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
 129}
 130
 131static void queue_flush(struct request_queue *q, unsigned which)
 132{
 133        struct request *rq;
 134        rq_end_io_fn *end_io;
 135
 136        if (which == QUEUE_ORDERED_DO_PREFLUSH) {
 137                rq = &q->pre_flush_rq;
 138                end_io = pre_flush_end_io;
 139        } else {
 140                rq = &q->post_flush_rq;
 141                end_io = post_flush_end_io;
 142        }
 143
 144        blk_rq_init(q, rq);
 145        rq->cmd_flags = REQ_HARDBARRIER;
 146        rq->rq_disk = q->bar_rq.rq_disk;
 147        rq->end_io = end_io;
 148        q->prepare_flush_fn(q, rq);
 149
 150        elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 151}
 152
 153static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 154{
 155        struct request *rq = *rqp;
 156        unsigned skip = 0;
 157
 158        q->orderr = 0;
 159        q->ordered = q->next_ordered;
 160        q->ordseq |= QUEUE_ORDSEQ_STARTED;
 161
 162        /*
 163         * For an empty barrier, there's no actual BAR request, which
 164         * in turn makes POSTFLUSH unnecessary.  Mask them off.
 165         */
 166        if (!blk_rq_sectors(rq)) {
 167                q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
 168                                QUEUE_ORDERED_DO_POSTFLUSH);
 169                /*
 170                 * Empty barrier on a write-through device w/ ordered
 171                 * tag has no command to issue and without any command
 172                 * to issue, ordering by tag can't be used.  Drain
 173                 * instead.
 174                 */
 175                if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
 176                    !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
 177                        q->ordered &= ~QUEUE_ORDERED_BY_TAG;
 178                        q->ordered |= QUEUE_ORDERED_BY_DRAIN;
 179                }
 180        }
 181
 182        /* stash away the original request */
 183        blk_dequeue_request(rq);
 184        q->orig_bar_rq = rq;
 185        rq = NULL;
 186
 187        /*
 188         * Queue ordered sequence.  As we stack them at the head, we
 189         * need to queue in reverse order.  Note that we rely on that
 190         * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
 191         * request gets inbetween ordered sequence.
 192         */
 193        if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
 194                queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
 195                rq = &q->post_flush_rq;
 196        } else
 197                skip |= QUEUE_ORDSEQ_POSTFLUSH;
 198
 199        if (q->ordered & QUEUE_ORDERED_DO_BAR) {
 200                rq = &q->bar_rq;
 201
 202                /* initialize proxy request and queue it */
 203                blk_rq_init(q, rq);
 204                if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
 205                        rq->cmd_flags |= REQ_RW;
 206                if (q->ordered & QUEUE_ORDERED_DO_FUA)
 207                        rq->cmd_flags |= REQ_FUA;
 208                init_request_from_bio(rq, q->orig_bar_rq->bio);
 209                rq->end_io = bar_end_io;
 210
 211                elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 212        } else
 213                skip |= QUEUE_ORDSEQ_BAR;
 214
 215        if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
 216                queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
 217                rq = &q->pre_flush_rq;
 218        } else
 219                skip |= QUEUE_ORDSEQ_PREFLUSH;
 220
 221        if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
 222                rq = NULL;
 223        else
 224                skip |= QUEUE_ORDSEQ_DRAIN;
 225
 226        *rqp = rq;
 227
 228        /*
 229         * Complete skipped sequences.  If whole sequence is complete,
 230         * return false to tell elevator that this request is gone.
 231         */
 232        return !blk_ordered_complete_seq(q, skip, 0);
 233}
 234
 235bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 236{
 237        struct request *rq = *rqp;
 238        const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
 239
 240        if (!q->ordseq) {
 241                if (!is_barrier)
 242                        return true;
 243
 244                if (q->next_ordered != QUEUE_ORDERED_NONE)
 245                        return start_ordered(q, rqp);
 246                else {
 247                        /*
 248                         * Queue ordering not supported.  Terminate
 249                         * with prejudice.
 250                         */
 251                        blk_dequeue_request(rq);
 252                        __blk_end_request_all(rq, -EOPNOTSUPP);
 253                        *rqp = NULL;
 254                        return false;
 255                }
 256        }
 257
 258        /*
 259         * Ordered sequence in progress
 260         */
 261
 262        /* Special requests are not subject to ordering rules. */
 263        if (!blk_fs_request(rq) &&
 264            rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
 265                return true;
 266
 267        if (q->ordered & QUEUE_ORDERED_BY_TAG) {
 268                /* Ordered by tag.  Blocking the next barrier is enough. */
 269                if (is_barrier && rq != &q->bar_rq)
 270                        *rqp = NULL;
 271        } else {
 272                /* Ordered by draining.  Wait for turn. */
 273                WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
 274                if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
 275                        *rqp = NULL;
 276        }
 277
 278        return true;
 279}
 280
 281static void bio_end_empty_barrier(struct bio *bio, int err)
 282{
 283        if (err) {
 284                if (err == -EOPNOTSUPP)
 285                        set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
 286                clear_bit(BIO_UPTODATE, &bio->bi_flags);
 287        }
 288
 289        complete(bio->bi_private);
 290}
 291
 292/**
 293 * blkdev_issue_flush - queue a flush
 294 * @bdev:       blockdev to issue flush for
 295 * @error_sector:       error sector
 296 *
 297 * Description:
 298 *    Issue a flush for the block device in question. Caller can supply
 299 *    room for storing the error offset in case of a flush error, if they
 300 *    wish to.
 301 */
 302int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 303{
 304        DECLARE_COMPLETION_ONSTACK(wait);
 305        struct request_queue *q;
 306        struct bio *bio;
 307        int ret;
 308
 309        if (bdev->bd_disk == NULL)
 310                return -ENXIO;
 311
 312        q = bdev_get_queue(bdev);
 313        if (!q)
 314                return -ENXIO;
 315
 316        bio = bio_alloc(GFP_KERNEL, 0);
 317        bio->bi_end_io = bio_end_empty_barrier;
 318        bio->bi_private = &wait;
 319        bio->bi_bdev = bdev;
 320        submit_bio(WRITE_BARRIER, bio);
 321
 322        wait_for_completion(&wait);
 323
 324        /*
 325         * The driver must store the error location in ->bi_sector, if
 326         * it supports it. For non-stacked drivers, this should be copied
 327         * from blk_rq_pos(rq).
 328         */
 329        if (error_sector)
 330                *error_sector = bio->bi_sector;
 331
 332        ret = 0;
 333        if (bio_flagged(bio, BIO_EOPNOTSUPP))
 334                ret = -EOPNOTSUPP;
 335        else if (!bio_flagged(bio, BIO_UPTODATE))
 336                ret = -EIO;
 337
 338        bio_put(bio);
 339        return ret;
 340}
 341EXPORT_SYMBOL(blkdev_issue_flush);
 342
 343static void blkdev_discard_end_io(struct bio *bio, int err)
 344{
 345        if (err) {
 346                if (err == -EOPNOTSUPP)
 347                        set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
 348                clear_bit(BIO_UPTODATE, &bio->bi_flags);
 349        }
 350
 351        if (bio->bi_private)
 352                complete(bio->bi_private);
 353        __free_page(bio_page(bio));
 354
 355        bio_put(bio);
 356}
 357
 358/**
 359 * blkdev_issue_discard - queue a discard
 360 * @bdev:       blockdev to issue discard for
 361 * @sector:     start sector
 362 * @nr_sects:   number of sectors to discard
 363 * @gfp_mask:   memory allocation flags (for bio_alloc)
 364 * @flags:      DISCARD_FL_* flags to control behaviour
 365 *
 366 * Description:
 367 *    Issue a discard request for the sectors in question.
 368 */
 369int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 370                sector_t nr_sects, gfp_t gfp_mask, int flags)
 371{
 372        DECLARE_COMPLETION_ONSTACK(wait);
 373        struct request_queue *q = bdev_get_queue(bdev);
 374        int type = flags & DISCARD_FL_BARRIER ?
 375                DISCARD_BARRIER : DISCARD_NOBARRIER;
 376        struct bio *bio;
 377        struct page *page;
 378        int ret = 0;
 379
 380        if (!q)
 381                return -ENXIO;
 382
 383        if (!blk_queue_discard(q))
 384                return -EOPNOTSUPP;
 385
 386        while (nr_sects && !ret) {
 387                unsigned int sector_size = q->limits.logical_block_size;
 388                unsigned int max_discard_sectors =
 389                        min(q->limits.max_discard_sectors, UINT_MAX >> 9);
 390
 391                bio = bio_alloc(gfp_mask, 1);
 392                if (!bio)
 393                        goto out;
 394                bio->bi_sector = sector;
 395                bio->bi_end_io = blkdev_discard_end_io;
 396                bio->bi_bdev = bdev;
 397                if (flags & DISCARD_FL_WAIT)
 398                        bio->bi_private = &wait;
 399
 400                /*
 401                 * Add a zeroed one-sector payload as that's what
 402                 * our current implementations need.  If we'll ever need
 403                 * more the interface will need revisiting.
 404                 */
 405                page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 406                if (!page)
 407                        goto out_free_bio;
 408                if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
 409                        goto out_free_page;
 410
 411                /*
 412                 * And override the bio size - the way discard works we
 413                 * touch many more blocks on disk than the actual payload
 414                 * length.
 415                 */
 416                if (nr_sects > max_discard_sectors) {
 417                        bio->bi_size = max_discard_sectors << 9;
 418                        nr_sects -= max_discard_sectors;
 419                        sector += max_discard_sectors;
 420                } else {
 421                        bio->bi_size = nr_sects << 9;
 422                        nr_sects = 0;
 423                }
 424
 425                bio_get(bio);
 426                submit_bio(type, bio);
 427
 428                if (flags & DISCARD_FL_WAIT)
 429                        wait_for_completion(&wait);
 430
 431                if (bio_flagged(bio, BIO_EOPNOTSUPP))
 432                        ret = -EOPNOTSUPP;
 433                else if (!bio_flagged(bio, BIO_UPTODATE))
 434                        ret = -EIO;
 435                bio_put(bio);
 436        }
 437        return ret;
 438out_free_page:
 439        __free_page(page);
 440out_free_bio:
 441        bio_put(bio);
 442out:
 443        return -ENOMEM;
 444}
 445EXPORT_SYMBOL(blkdev_issue_discard);
 446