linux/block/blk-flush.c
<<
>>
Prefs
   1/*
   2 * Functions to sequence FLUSH and FUA writes.
   3 */
   4#include <linux/kernel.h>
   5#include <linux/module.h>
   6#include <linux/bio.h>
   7#include <linux/blkdev.h>
   8#include <linux/gfp.h>
   9
  10#include "blk.h"
  11
  12/* FLUSH/FUA sequences */
  13enum {
  14        QUEUE_FSEQ_STARTED      = (1 << 0), /* flushing in progress */
  15        QUEUE_FSEQ_PREFLUSH     = (1 << 1), /* pre-flushing in progress */
  16        QUEUE_FSEQ_DATA         = (1 << 2), /* data write in progress */
  17        QUEUE_FSEQ_POSTFLUSH    = (1 << 3), /* post-flushing in progress */
  18        QUEUE_FSEQ_DONE         = (1 << 4),
  19};
  20
  21static struct request *queue_next_fseq(struct request_queue *q);
  22
  23unsigned blk_flush_cur_seq(struct request_queue *q)
  24{
  25        if (!q->flush_seq)
  26                return 0;
  27        return 1 << ffz(q->flush_seq);
  28}
  29
  30static struct request *blk_flush_complete_seq(struct request_queue *q,
  31                                              unsigned seq, int error)
  32{
  33        struct request *next_rq = NULL;
  34
  35        if (error && !q->flush_err)
  36                q->flush_err = error;
  37
  38        BUG_ON(q->flush_seq & seq);
  39        q->flush_seq |= seq;
  40
  41        if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) {
  42                /* not complete yet, queue the next flush sequence */
  43                next_rq = queue_next_fseq(q);
  44        } else {
  45                /* complete this flush request */
  46                __blk_end_request_all(q->orig_flush_rq, q->flush_err);
  47                q->orig_flush_rq = NULL;
  48                q->flush_seq = 0;
  49
  50                /* dispatch the next flush if there's one */
  51                if (!list_empty(&q->pending_flushes)) {
  52                        next_rq = list_entry_rq(q->pending_flushes.next);
  53                        list_move(&next_rq->queuelist, &q->queue_head);
  54                }
  55        }
  56        return next_rq;
  57}
  58
  59static void blk_flush_complete_seq_end_io(struct request_queue *q,
  60                                          unsigned seq, int error)
  61{
  62        bool was_empty = elv_queue_empty(q);
  63        struct request *next_rq;
  64
  65        next_rq = blk_flush_complete_seq(q, seq, error);
  66
  67        /*
  68         * Moving a request silently to empty queue_head may stall the
  69         * queue.  Kick the queue in those cases.  This function is called
  70         * from request completion path and calling directly into
  71         * request_fn may confuse the driver.  Always use kblockd.
  72         */
  73        if (was_empty && next_rq)
  74                __blk_run_queue(q, true);
  75}
  76
  77static void pre_flush_end_io(struct request *rq, int error)
  78{
  79        elv_completed_request(rq->q, rq);
  80        blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error);
  81}
  82
  83static void flush_data_end_io(struct request *rq, int error)
  84{
  85        elv_completed_request(rq->q, rq);
  86        blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error);
  87}
  88
  89static void post_flush_end_io(struct request *rq, int error)
  90{
  91        elv_completed_request(rq->q, rq);
  92        blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
  93}
  94
  95static void init_flush_request(struct request *rq, struct gendisk *disk)
  96{
  97        rq->cmd_type = REQ_TYPE_FS;
  98        rq->cmd_flags = WRITE_FLUSH;
  99        rq->rq_disk = disk;
 100}
 101
 102static struct request *queue_next_fseq(struct request_queue *q)
 103{
 104        struct request *orig_rq = q->orig_flush_rq;
 105        struct request *rq = &q->flush_rq;
 106
 107        blk_rq_init(q, rq);
 108
 109        switch (blk_flush_cur_seq(q)) {
 110        case QUEUE_FSEQ_PREFLUSH:
 111                init_flush_request(rq, orig_rq->rq_disk);
 112                rq->end_io = pre_flush_end_io;
 113                break;
 114        case QUEUE_FSEQ_DATA:
 115                init_request_from_bio(rq, orig_rq->bio);
 116                /*
 117                 * orig_rq->rq_disk may be different from
 118                 * bio->bi_bdev->bd_disk if orig_rq got here through
 119                 * remapping drivers.  Make sure rq->rq_disk points
 120                 * to the same one as orig_rq.
 121                 */
 122                rq->rq_disk = orig_rq->rq_disk;
 123                rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
 124                rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
 125                rq->end_io = flush_data_end_io;
 126                break;
 127        case QUEUE_FSEQ_POSTFLUSH:
 128                init_flush_request(rq, orig_rq->rq_disk);
 129                rq->end_io = post_flush_end_io;
 130                break;
 131        default:
 132                BUG();
 133        }
 134
 135        elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 136        return rq;
 137}
 138
 139struct request *blk_do_flush(struct request_queue *q, struct request *rq)
 140{
 141        unsigned int fflags = q->flush_flags; /* may change, cache it */
 142        bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA;
 143        bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
 144        bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
 145        unsigned skip = 0;
 146
 147        /*
 148         * Special case.  If there's data but flush is not necessary,
 149         * the request can be issued directly.
 150         *
 151         * Flush w/o data should be able to be issued directly too but
 152         * currently some drivers assume that rq->bio contains
 153         * non-zero data if it isn't NULL and empty FLUSH requests
 154         * getting here usually have bio's without data.
 155         */
 156        if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) {
 157                rq->cmd_flags &= ~REQ_FLUSH;
 158                if (!has_fua)
 159                        rq->cmd_flags &= ~REQ_FUA;
 160                return rq;
 161        }
 162
 163        /*
 164         * Sequenced flushes can't be processed in parallel.  If
 165         * another one is already in progress, queue for later
 166         * processing.
 167         */
 168        if (q->flush_seq) {
 169                list_move_tail(&rq->queuelist, &q->pending_flushes);
 170                return NULL;
 171        }
 172
 173        /*
 174         * Start a new flush sequence
 175         */
 176        q->flush_err = 0;
 177        q->flush_seq |= QUEUE_FSEQ_STARTED;
 178
 179        /* adjust FLUSH/FUA of the original request and stash it away */
 180        rq->cmd_flags &= ~REQ_FLUSH;
 181        if (!has_fua)
 182                rq->cmd_flags &= ~REQ_FUA;
 183        blk_dequeue_request(rq);
 184        q->orig_flush_rq = rq;
 185
 186        /* skip unneded sequences and return the first one */
 187        if (!do_preflush)
 188                skip |= QUEUE_FSEQ_PREFLUSH;
 189        if (!blk_rq_sectors(rq))
 190                skip |= QUEUE_FSEQ_DATA;
 191        if (!do_postflush)
 192                skip |= QUEUE_FSEQ_POSTFLUSH;
 193        return blk_flush_complete_seq(q, skip, 0);
 194}
 195
 196static void bio_end_flush(struct bio *bio, int err)
 197{
 198        if (err)
 199                clear_bit(BIO_UPTODATE, &bio->bi_flags);
 200        if (bio->bi_private)
 201                complete(bio->bi_private);
 202        bio_put(bio);
 203}
 204
 205/**
 206 * blkdev_issue_flush - queue a flush
 207 * @bdev:       blockdev to issue flush for
 208 * @gfp_mask:   memory allocation flags (for bio_alloc)
 209 * @error_sector:       error sector
 210 *
 211 * Description:
 212 *    Issue a flush for the block device in question. Caller can supply
 213 *    room for storing the error offset in case of a flush error, if they
 214 *    wish to. If WAIT flag is not passed then caller may check only what
 215 *    request was pushed in some internal queue for later handling.
 216 */
 217int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 218                sector_t *error_sector)
 219{
 220        DECLARE_COMPLETION_ONSTACK(wait);
 221        struct request_queue *q;
 222        struct bio *bio;
 223        int ret = 0;
 224
 225        if (bdev->bd_disk == NULL)
 226                return -ENXIO;
 227
 228        q = bdev_get_queue(bdev);
 229        if (!q)
 230                return -ENXIO;
 231
 232        /*
 233         * some block devices may not have their queue correctly set up here
 234         * (e.g. loop device without a backing file) and so issuing a flush
 235         * here will panic. Ensure there is a request function before issuing
 236         * the flush.
 237         */
 238        if (!q->make_request_fn)
 239                return -ENXIO;
 240
 241        bio = bio_alloc(gfp_mask, 0);
 242        bio->bi_end_io = bio_end_flush;
 243        bio->bi_bdev = bdev;
 244        bio->bi_private = &wait;
 245
 246        bio_get(bio);
 247        submit_bio(WRITE_FLUSH, bio);
 248        wait_for_completion(&wait);
 249
 250        /*
 251         * The driver must store the error location in ->bi_sector, if
 252         * it supports it. For non-stacked drivers, this should be
 253         * copied from blk_rq_pos(rq).
 254         */
 255        if (error_sector)
 256               *error_sector = bio->bi_sector;
 257
 258        if (!bio_flagged(bio, BIO_UPTODATE))
 259                ret = -EIO;
 260
 261        bio_put(bio);
 262        return ret;
 263}
 264EXPORT_SYMBOL(blkdev_issue_flush);
 265