linux/block/deadline-iosched.c
<<
>>
Prefs
   1/*
   2 *  Deadline i/o scheduler.
   3 *
   4 *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
   5 */
   6#include <linux/kernel.h>
   7#include <linux/fs.h>
   8#include <linux/blkdev.h>
   9#include <linux/elevator.h>
  10#include <linux/bio.h>
  11#include <linux/module.h>
  12#include <linux/slab.h>
  13#include <linux/init.h>
  14#include <linux/compiler.h>
  15#include <linux/rbtree.h>
  16
  17/*
  18 * See Documentation/block/deadline-iosched.txt
  19 */
  20static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
  21static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
  22static const int writes_starved = 2;    /* max times reads can starve a write */
  23static const int fifo_batch = 16;       /* # of sequential requests treated as one
  24                                     by the above parameters. For throughput. */
  25
  26struct deadline_data {
  27        /*
  28         * run time data
  29         */
  30
  31        /*
  32         * requests (deadline_rq s) are present on both sort_list and fifo_list
  33         */
  34        struct rb_root sort_list[2];    
  35        struct list_head fifo_list[2];
  36
  37        /*
  38         * next in sort order. read, write or both are NULL
  39         */
  40        struct request *next_rq[2];
  41        unsigned int batching;          /* number of sequential requests made */
  42        unsigned int starved;           /* times reads have starved writes */
  43
  44        /*
  45         * settings that change how the i/o scheduler behaves
  46         */
  47        int fifo_expire[2];
  48        int fifo_batch;
  49        int writes_starved;
  50        int front_merges;
  51};
  52
  53static inline struct rb_root *
  54deadline_rb_root(struct deadline_data *dd, struct request *rq)
  55{
  56        return &dd->sort_list[rq_data_dir(rq)];
  57}
  58
  59/*
  60 * get the request after `rq' in sector-sorted order
  61 */
  62static inline struct request *
  63deadline_latter_request(struct request *rq)
  64{
  65        struct rb_node *node = rb_next(&rq->rb_node);
  66
  67        if (node)
  68                return rb_entry_rq(node);
  69
  70        return NULL;
  71}
  72
  73static void
  74deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
  75{
  76        struct rb_root *root = deadline_rb_root(dd, rq);
  77
  78        elv_rb_add(root, rq);
  79}
  80
  81static inline void
  82deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
  83{
  84        const int data_dir = rq_data_dir(rq);
  85
  86        if (dd->next_rq[data_dir] == rq)
  87                dd->next_rq[data_dir] = deadline_latter_request(rq);
  88
  89        elv_rb_del(deadline_rb_root(dd, rq), rq);
  90}
  91
  92/*
  93 * add rq to rbtree and fifo
  94 */
  95static void
  96deadline_add_request(struct request_queue *q, struct request *rq)
  97{
  98        struct deadline_data *dd = q->elevator->elevator_data;
  99        const int data_dir = rq_data_dir(rq);
 100
 101        /*
 102         * This may be a requeue of a write request that has locked its
 103         * target zone. If it is the case, this releases the zone lock.
 104         */
 105        blk_req_zone_write_unlock(rq);
 106
 107        deadline_add_rq_rb(dd, rq);
 108
 109        /*
 110         * set expire time and add to fifo list
 111         */
 112        rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
 113        list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
 114}
 115
 116/*
 117 * remove rq from rbtree and fifo.
 118 */
 119static void deadline_remove_request(struct request_queue *q, struct request *rq)
 120{
 121        struct deadline_data *dd = q->elevator->elevator_data;
 122
 123        rq_fifo_clear(rq);
 124        deadline_del_rq_rb(dd, rq);
 125}
 126
 127static enum elv_merge
 128deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
 129{
 130        struct deadline_data *dd = q->elevator->elevator_data;
 131        struct request *__rq;
 132
 133        /*
 134         * check for front merge
 135         */
 136        if (dd->front_merges) {
 137                sector_t sector = bio_end_sector(bio);
 138
 139                __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
 140                if (__rq) {
 141                        BUG_ON(sector != blk_rq_pos(__rq));
 142
 143                        if (elv_bio_merge_ok(__rq, bio)) {
 144                                *req = __rq;
 145                                return ELEVATOR_FRONT_MERGE;
 146                        }
 147                }
 148        }
 149
 150        return ELEVATOR_NO_MERGE;
 151}
 152
 153static void deadline_merged_request(struct request_queue *q,
 154                                    struct request *req, enum elv_merge type)
 155{
 156        struct deadline_data *dd = q->elevator->elevator_data;
 157
 158        /*
 159         * if the merge was a front merge, we need to reposition request
 160         */
 161        if (type == ELEVATOR_FRONT_MERGE) {
 162                elv_rb_del(deadline_rb_root(dd, req), req);
 163                deadline_add_rq_rb(dd, req);
 164        }
 165}
 166
 167static void
 168deadline_merged_requests(struct request_queue *q, struct request *req,
 169                         struct request *next)
 170{
 171        /*
 172         * if next expires before rq, assign its expire time to rq
 173         * and move into next position (next will be deleted) in fifo
 174         */
 175        if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
 176                if (time_before((unsigned long)next->fifo_time,
 177                                (unsigned long)req->fifo_time)) {
 178                        list_move(&req->queuelist, &next->queuelist);
 179                        req->fifo_time = next->fifo_time;
 180                }
 181        }
 182
 183        /*
 184         * kill knowledge of next, this one is a goner
 185         */
 186        deadline_remove_request(q, next);
 187}
 188
 189/*
 190 * move request from sort list to dispatch queue.
 191 */
 192static inline void
 193deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
 194{
 195        struct request_queue *q = rq->q;
 196
 197        /*
 198         * For a zoned block device, write requests must write lock their
 199         * target zone.
 200         */
 201        blk_req_zone_write_lock(rq);
 202
 203        deadline_remove_request(q, rq);
 204        elv_dispatch_add_tail(q, rq);
 205}
 206
 207/*
 208 * move an entry to dispatch queue
 209 */
 210static void
 211deadline_move_request(struct deadline_data *dd, struct request *rq)
 212{
 213        const int data_dir = rq_data_dir(rq);
 214
 215        dd->next_rq[READ] = NULL;
 216        dd->next_rq[WRITE] = NULL;
 217        dd->next_rq[data_dir] = deadline_latter_request(rq);
 218
 219        /*
 220         * take it off the sort and fifo list, move
 221         * to dispatch queue
 222         */
 223        deadline_move_to_dispatch(dd, rq);
 224}
 225
 226/*
 227 * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
 228 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
 229 */
 230static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
 231{
 232        struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
 233
 234        /*
 235         * rq is expired!
 236         */
 237        if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
 238                return 1;
 239
 240        return 0;
 241}
 242
 243/*
 244 * For the specified data direction, return the next request to dispatch using
 245 * arrival ordered lists.
 246 */
 247static struct request *
 248deadline_fifo_request(struct deadline_data *dd, int data_dir)
 249{
 250        struct request *rq;
 251
 252        if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
 253                return NULL;
 254
 255        if (list_empty(&dd->fifo_list[data_dir]))
 256                return NULL;
 257
 258        rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
 259        if (data_dir == READ || !blk_queue_is_zoned(rq->q))
 260                return rq;
 261
 262        /*
 263         * Look for a write request that can be dispatched, that is one with
 264         * an unlocked target zone.
 265         */
 266        list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) {
 267                if (blk_req_can_dispatch_to_zone(rq))
 268                        return rq;
 269        }
 270
 271        return NULL;
 272}
 273
 274/*
 275 * For the specified data direction, return the next request to dispatch using
 276 * sector position sorted lists.
 277 */
 278static struct request *
 279deadline_next_request(struct deadline_data *dd, int data_dir)
 280{
 281        struct request *rq;
 282
 283        if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
 284                return NULL;
 285
 286        rq = dd->next_rq[data_dir];
 287        if (!rq)
 288                return NULL;
 289
 290        if (data_dir == READ || !blk_queue_is_zoned(rq->q))
 291                return rq;
 292
 293        /*
 294         * Look for a write request that can be dispatched, that is one with
 295         * an unlocked target zone.
 296         */
 297        while (rq) {
 298                if (blk_req_can_dispatch_to_zone(rq))
 299                        return rq;
 300                rq = deadline_latter_request(rq);
 301        }
 302
 303        return NULL;
 304}
 305
 306/*
 307 * deadline_dispatch_requests selects the best request according to
 308 * read/write expire, fifo_batch, etc
 309 */
 310static int deadline_dispatch_requests(struct request_queue *q, int force)
 311{
 312        struct deadline_data *dd = q->elevator->elevator_data;
 313        const int reads = !list_empty(&dd->fifo_list[READ]);
 314        const int writes = !list_empty(&dd->fifo_list[WRITE]);
 315        struct request *rq, *next_rq;
 316        int data_dir;
 317
 318        /*
 319         * batches are currently reads XOR writes
 320         */
 321        rq = deadline_next_request(dd, WRITE);
 322        if (!rq)
 323                rq = deadline_next_request(dd, READ);
 324
 325        if (rq && dd->batching < dd->fifo_batch)
 326                /* we have a next request are still entitled to batch */
 327                goto dispatch_request;
 328
 329        /*
 330         * at this point we are not running a batch. select the appropriate
 331         * data direction (read / write)
 332         */
 333
 334        if (reads) {
 335                BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
 336
 337                if (deadline_fifo_request(dd, WRITE) &&
 338                    (dd->starved++ >= dd->writes_starved))
 339                        goto dispatch_writes;
 340
 341                data_dir = READ;
 342
 343                goto dispatch_find_request;
 344        }
 345
 346        /*
 347         * there are either no reads or writes have been starved
 348         */
 349
 350        if (writes) {
 351dispatch_writes:
 352                BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
 353
 354                dd->starved = 0;
 355
 356                data_dir = WRITE;
 357
 358                goto dispatch_find_request;
 359        }
 360
 361        return 0;
 362
 363dispatch_find_request:
 364        /*
 365         * we are not running a batch, find best request for selected data_dir
 366         */
 367        next_rq = deadline_next_request(dd, data_dir);
 368        if (deadline_check_fifo(dd, data_dir) || !next_rq) {
 369                /*
 370                 * A deadline has expired, the last request was in the other
 371                 * direction, or we have run out of higher-sectored requests.
 372                 * Start again from the request with the earliest expiry time.
 373                 */
 374                rq = deadline_fifo_request(dd, data_dir);
 375        } else {
 376                /*
 377                 * The last req was the same dir and we have a next request in
 378                 * sort order. No expired requests so continue on from here.
 379                 */
 380                rq = next_rq;
 381        }
 382
 383        /*
 384         * For a zoned block device, if we only have writes queued and none of
 385         * them can be dispatched, rq will be NULL.
 386         */
 387        if (!rq)
 388                return 0;
 389
 390        dd->batching = 0;
 391
 392dispatch_request:
 393        /*
 394         * rq is the selected appropriate request.
 395         */
 396        dd->batching++;
 397        deadline_move_request(dd, rq);
 398
 399        return 1;
 400}
 401
 402/*
 403 * For zoned block devices, write unlock the target zone of completed
 404 * write requests.
 405 */
 406static void
 407deadline_completed_request(struct request_queue *q, struct request *rq)
 408{
 409        blk_req_zone_write_unlock(rq);
 410}
 411
 412static void deadline_exit_queue(struct elevator_queue *e)
 413{
 414        struct deadline_data *dd = e->elevator_data;
 415
 416        BUG_ON(!list_empty(&dd->fifo_list[READ]));
 417        BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
 418
 419        kfree(dd);
 420}
 421
 422/*
 423 * initialize elevator private data (deadline_data).
 424 */
 425static int deadline_init_queue(struct request_queue *q, struct elevator_type *e)
 426{
 427        struct deadline_data *dd;
 428        struct elevator_queue *eq;
 429
 430        eq = elevator_alloc(q, e);
 431        if (!eq)
 432                return -ENOMEM;
 433
 434        dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
 435        if (!dd) {
 436                kobject_put(&eq->kobj);
 437                return -ENOMEM;
 438        }
 439        eq->elevator_data = dd;
 440
 441        INIT_LIST_HEAD(&dd->fifo_list[READ]);
 442        INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
 443        dd->sort_list[READ] = RB_ROOT;
 444        dd->sort_list[WRITE] = RB_ROOT;
 445        dd->fifo_expire[READ] = read_expire;
 446        dd->fifo_expire[WRITE] = write_expire;
 447        dd->writes_starved = writes_starved;
 448        dd->front_merges = 1;
 449        dd->fifo_batch = fifo_batch;
 450
 451        spin_lock_irq(q->queue_lock);
 452        q->elevator = eq;
 453        spin_unlock_irq(q->queue_lock);
 454        return 0;
 455}
 456
 457/*
 458 * sysfs parts below
 459 */
 460
 461static ssize_t
 462deadline_var_show(int var, char *page)
 463{
 464        return sprintf(page, "%d\n", var);
 465}
 466
 467static void
 468deadline_var_store(int *var, const char *page)
 469{
 470        char *p = (char *) page;
 471
 472        *var = simple_strtol(p, &p, 10);
 473}
 474
 475#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)                            \
 476static ssize_t __FUNC(struct elevator_queue *e, char *page)             \
 477{                                                                       \
 478        struct deadline_data *dd = e->elevator_data;                    \
 479        int __data = __VAR;                                             \
 480        if (__CONV)                                                     \
 481                __data = jiffies_to_msecs(__data);                      \
 482        return deadline_var_show(__data, (page));                       \
 483}
 484SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
 485SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
 486SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
 487SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
 488SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
 489#undef SHOW_FUNCTION
 490
 491#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)                 \
 492static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
 493{                                                                       \
 494        struct deadline_data *dd = e->elevator_data;                    \
 495        int __data;                                                     \
 496        deadline_var_store(&__data, (page));                            \
 497        if (__data < (MIN))                                             \
 498                __data = (MIN);                                         \
 499        else if (__data > (MAX))                                        \
 500                __data = (MAX);                                         \
 501        if (__CONV)                                                     \
 502                *(__PTR) = msecs_to_jiffies(__data);                    \
 503        else                                                            \
 504                *(__PTR) = __data;                                      \
 505        return count;                                                   \
 506}
 507STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
 508STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
 509STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
 510STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
 511STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
 512#undef STORE_FUNCTION
 513
 514#define DD_ATTR(name) \
 515        __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
 516                                      deadline_##name##_store)
 517
 518static struct elv_fs_entry deadline_attrs[] = {
 519        DD_ATTR(read_expire),
 520        DD_ATTR(write_expire),
 521        DD_ATTR(writes_starved),
 522        DD_ATTR(front_merges),
 523        DD_ATTR(fifo_batch),
 524        __ATTR_NULL
 525};
 526
 527static struct elevator_type iosched_deadline = {
 528        .ops.sq = {
 529                .elevator_merge_fn =            deadline_merge,
 530                .elevator_merged_fn =           deadline_merged_request,
 531                .elevator_merge_req_fn =        deadline_merged_requests,
 532                .elevator_dispatch_fn =         deadline_dispatch_requests,
 533                .elevator_completed_req_fn =    deadline_completed_request,
 534                .elevator_add_req_fn =          deadline_add_request,
 535                .elevator_former_req_fn =       elv_rb_former_request,
 536                .elevator_latter_req_fn =       elv_rb_latter_request,
 537                .elevator_init_fn =             deadline_init_queue,
 538                .elevator_exit_fn =             deadline_exit_queue,
 539        },
 540
 541        .elevator_attrs = deadline_attrs,
 542        .elevator_name = "deadline",
 543        .elevator_owner = THIS_MODULE,
 544};
 545
 546static int __init deadline_init(void)
 547{
 548        return elv_register(&iosched_deadline);
 549}
 550
 551static void __exit deadline_exit(void)
 552{
 553        elv_unregister(&iosched_deadline);
 554}
 555
 556module_init(deadline_init);
 557module_exit(deadline_exit);
 558
 559MODULE_AUTHOR("Jens Axboe");
 560MODULE_LICENSE("GPL");
 561MODULE_DESCRIPTION("deadline IO scheduler");
 562