linux/block/elevator.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  Block device elevator/IO-scheduler.
   4 *
   5 *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
   6 *
   7 * 30042000 Jens Axboe <axboe@kernel.dk> :
   8 *
   9 * Split the elevator a bit so that it is possible to choose a different
  10 * one or even write a new "plug in". There are three pieces:
  11 * - elevator_fn, inserts a new request in the queue list
  12 * - elevator_merge_fn, decides whether a new buffer can be merged with
  13 *   an existing request
  14 * - elevator_dequeue_fn, called when a request is taken off the active list
  15 *
  16 * 20082000 Dave Jones <davej@suse.de> :
  17 * Removed tests for max-bomb-segments, which was breaking elvtune
  18 *  when run without -bN
  19 *
  20 * Jens:
  21 * - Rework again to work with bio instead of buffer_heads
  22 * - loose bi_dev comparisons, partition handling is right now
  23 * - completely modularize elevator setup and teardown
  24 *
  25 */
  26#include <linux/kernel.h>
  27#include <linux/fs.h>
  28#include <linux/blkdev.h>
  29#include <linux/elevator.h>
  30#include <linux/bio.h>
  31#include <linux/module.h>
  32#include <linux/slab.h>
  33#include <linux/init.h>
  34#include <linux/compiler.h>
  35#include <linux/blktrace_api.h>
  36#include <linux/hash.h>
  37#include <linux/uaccess.h>
  38#include <linux/pm_runtime.h>
  39#include <linux/blk-cgroup.h>
  40
  41#include <trace/events/block.h>
  42
  43#include "blk.h"
  44#include "blk-mq-sched.h"
  45#include "blk-pm.h"
  46#include "blk-wbt.h"
  47
  48static DEFINE_SPINLOCK(elv_list_lock);
  49static LIST_HEAD(elv_list);
  50
  51/*
  52 * Merge hash stuff.
  53 */
  54#define rq_hash_key(rq)         (blk_rq_pos(rq) + blk_rq_sectors(rq))
  55
  56/*
  57 * Query io scheduler to see if the current process issuing bio may be
  58 * merged with rq.
  59 */
  60static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
  61{
  62        struct request_queue *q = rq->q;
  63        struct elevator_queue *e = q->elevator;
  64
  65        if (e->type->ops.allow_merge)
  66                return e->type->ops.allow_merge(q, rq, bio);
  67
  68        return 1;
  69}
  70
  71/*
  72 * can we safely merge with this request?
  73 */
  74bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
  75{
  76        if (!blk_rq_merge_ok(rq, bio))
  77                return false;
  78
  79        if (!elv_iosched_allow_bio_merge(rq, bio))
  80                return false;
  81
  82        return true;
  83}
  84EXPORT_SYMBOL(elv_bio_merge_ok);
  85
  86static bool elevator_match(const struct elevator_type *e, const char *name)
  87{
  88        if (!strcmp(e->elevator_name, name))
  89                return true;
  90        if (e->elevator_alias && !strcmp(e->elevator_alias, name))
  91                return true;
  92
  93        return false;
  94}
  95
  96/*
  97 * Return scheduler with name 'name'
  98 */
  99static struct elevator_type *elevator_find(const char *name)
 100{
 101        struct elevator_type *e;
 102
 103        list_for_each_entry(e, &elv_list, list) {
 104                if (elevator_match(e, name))
 105                        return e;
 106        }
 107
 108        return NULL;
 109}
 110
 111static void elevator_put(struct elevator_type *e)
 112{
 113        module_put(e->elevator_owner);
 114}
 115
 116static struct elevator_type *elevator_get(struct request_queue *q,
 117                                          const char *name, bool try_loading)
 118{
 119        struct elevator_type *e;
 120
 121        spin_lock(&elv_list_lock);
 122
 123        e = elevator_find(name);
 124        if (!e && try_loading) {
 125                spin_unlock(&elv_list_lock);
 126                request_module("%s-iosched", name);
 127                spin_lock(&elv_list_lock);
 128                e = elevator_find(name);
 129        }
 130
 131        if (e && !try_module_get(e->elevator_owner))
 132                e = NULL;
 133
 134        spin_unlock(&elv_list_lock);
 135        return e;
 136}
 137
 138static char chosen_elevator[ELV_NAME_MAX];
 139
 140static int __init elevator_setup(char *str)
 141{
 142        /*
 143         * Be backwards-compatible with previous kernels, so users
 144         * won't get the wrong elevator.
 145         */
 146        strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
 147        return 1;
 148}
 149
 150__setup("elevator=", elevator_setup);
 151
 152static struct kobj_type elv_ktype;
 153
 154struct elevator_queue *elevator_alloc(struct request_queue *q,
 155                                  struct elevator_type *e)
 156{
 157        struct elevator_queue *eq;
 158
 159        eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
 160        if (unlikely(!eq))
 161                return NULL;
 162
 163        eq->type = e;
 164        kobject_init(&eq->kobj, &elv_ktype);
 165        mutex_init(&eq->sysfs_lock);
 166        hash_init(eq->hash);
 167
 168        return eq;
 169}
 170EXPORT_SYMBOL(elevator_alloc);
 171
 172static void elevator_release(struct kobject *kobj)
 173{
 174        struct elevator_queue *e;
 175
 176        e = container_of(kobj, struct elevator_queue, kobj);
 177        elevator_put(e->type);
 178        kfree(e);
 179}
 180
 181void __elevator_exit(struct request_queue *q, struct elevator_queue *e)
 182{
 183        mutex_lock(&e->sysfs_lock);
 184        if (e->type->ops.exit_sched)
 185                blk_mq_exit_sched(q, e);
 186        mutex_unlock(&e->sysfs_lock);
 187
 188        kobject_put(&e->kobj);
 189}
 190
 191static inline void __elv_rqhash_del(struct request *rq)
 192{
 193        hash_del(&rq->hash);
 194        rq->rq_flags &= ~RQF_HASHED;
 195}
 196
 197void elv_rqhash_del(struct request_queue *q, struct request *rq)
 198{
 199        if (ELV_ON_HASH(rq))
 200                __elv_rqhash_del(rq);
 201}
 202EXPORT_SYMBOL_GPL(elv_rqhash_del);
 203
 204void elv_rqhash_add(struct request_queue *q, struct request *rq)
 205{
 206        struct elevator_queue *e = q->elevator;
 207
 208        BUG_ON(ELV_ON_HASH(rq));
 209        hash_add(e->hash, &rq->hash, rq_hash_key(rq));
 210        rq->rq_flags |= RQF_HASHED;
 211}
 212EXPORT_SYMBOL_GPL(elv_rqhash_add);
 213
 214void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
 215{
 216        __elv_rqhash_del(rq);
 217        elv_rqhash_add(q, rq);
 218}
 219
 220struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
 221{
 222        struct elevator_queue *e = q->elevator;
 223        struct hlist_node *next;
 224        struct request *rq;
 225
 226        hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
 227                BUG_ON(!ELV_ON_HASH(rq));
 228
 229                if (unlikely(!rq_mergeable(rq))) {
 230                        __elv_rqhash_del(rq);
 231                        continue;
 232                }
 233
 234                if (rq_hash_key(rq) == offset)
 235                        return rq;
 236        }
 237
 238        return NULL;
 239}
 240
 241/*
 242 * RB-tree support functions for inserting/lookup/removal of requests
 243 * in a sorted RB tree.
 244 */
 245void elv_rb_add(struct rb_root *root, struct request *rq)
 246{
 247        struct rb_node **p = &root->rb_node;
 248        struct rb_node *parent = NULL;
 249        struct request *__rq;
 250
 251        while (*p) {
 252                parent = *p;
 253                __rq = rb_entry(parent, struct request, rb_node);
 254
 255                if (blk_rq_pos(rq) < blk_rq_pos(__rq))
 256                        p = &(*p)->rb_left;
 257                else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
 258                        p = &(*p)->rb_right;
 259        }
 260
 261        rb_link_node(&rq->rb_node, parent, p);
 262        rb_insert_color(&rq->rb_node, root);
 263}
 264EXPORT_SYMBOL(elv_rb_add);
 265
 266void elv_rb_del(struct rb_root *root, struct request *rq)
 267{
 268        BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
 269        rb_erase(&rq->rb_node, root);
 270        RB_CLEAR_NODE(&rq->rb_node);
 271}
 272EXPORT_SYMBOL(elv_rb_del);
 273
 274struct request *elv_rb_find(struct rb_root *root, sector_t sector)
 275{
 276        struct rb_node *n = root->rb_node;
 277        struct request *rq;
 278
 279        while (n) {
 280                rq = rb_entry(n, struct request, rb_node);
 281
 282                if (sector < blk_rq_pos(rq))
 283                        n = n->rb_left;
 284                else if (sector > blk_rq_pos(rq))
 285                        n = n->rb_right;
 286                else
 287                        return rq;
 288        }
 289
 290        return NULL;
 291}
 292EXPORT_SYMBOL(elv_rb_find);
 293
 294enum elv_merge elv_merge(struct request_queue *q, struct request **req,
 295                struct bio *bio)
 296{
 297        struct elevator_queue *e = q->elevator;
 298        struct request *__rq;
 299
 300        /*
 301         * Levels of merges:
 302         *      nomerges:  No merges at all attempted
 303         *      noxmerges: Only simple one-hit cache try
 304         *      merges:    All merge tries attempted
 305         */
 306        if (blk_queue_nomerges(q) || !bio_mergeable(bio))
 307                return ELEVATOR_NO_MERGE;
 308
 309        /*
 310         * First try one-hit cache.
 311         */
 312        if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
 313                enum elv_merge ret = blk_try_merge(q->last_merge, bio);
 314
 315                if (ret != ELEVATOR_NO_MERGE) {
 316                        *req = q->last_merge;
 317                        return ret;
 318                }
 319        }
 320
 321        if (blk_queue_noxmerges(q))
 322                return ELEVATOR_NO_MERGE;
 323
 324        /*
 325         * See if our hash lookup can find a potential backmerge.
 326         */
 327        __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
 328        if (__rq && elv_bio_merge_ok(__rq, bio)) {
 329                *req = __rq;
 330                return ELEVATOR_BACK_MERGE;
 331        }
 332
 333        if (e->type->ops.request_merge)
 334                return e->type->ops.request_merge(q, req, bio);
 335
 336        return ELEVATOR_NO_MERGE;
 337}
 338
 339/*
 340 * Attempt to do an insertion back merge. Only check for the case where
 341 * we can append 'rq' to an existing request, so we can throw 'rq' away
 342 * afterwards.
 343 *
 344 * Returns true if we merged, false otherwise
 345 */
 346bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
 347{
 348        struct request *__rq;
 349        bool ret;
 350
 351        if (blk_queue_nomerges(q))
 352                return false;
 353
 354        /*
 355         * First try one-hit cache.
 356         */
 357        if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
 358                return true;
 359
 360        if (blk_queue_noxmerges(q))
 361                return false;
 362
 363        ret = false;
 364        /*
 365         * See if our hash lookup can find a potential backmerge.
 366         */
 367        while (1) {
 368                __rq = elv_rqhash_find(q, blk_rq_pos(rq));
 369                if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
 370                        break;
 371
 372                /* The merged request could be merged with others, try again */
 373                ret = true;
 374                rq = __rq;
 375        }
 376
 377        return ret;
 378}
 379
 380void elv_merged_request(struct request_queue *q, struct request *rq,
 381                enum elv_merge type)
 382{
 383        struct elevator_queue *e = q->elevator;
 384
 385        if (e->type->ops.request_merged)
 386                e->type->ops.request_merged(q, rq, type);
 387
 388        if (type == ELEVATOR_BACK_MERGE)
 389                elv_rqhash_reposition(q, rq);
 390
 391        q->last_merge = rq;
 392}
 393
 394void elv_merge_requests(struct request_queue *q, struct request *rq,
 395                             struct request *next)
 396{
 397        struct elevator_queue *e = q->elevator;
 398
 399        if (e->type->ops.requests_merged)
 400                e->type->ops.requests_merged(q, rq, next);
 401
 402        elv_rqhash_reposition(q, rq);
 403        q->last_merge = rq;
 404}
 405
 406struct request *elv_latter_request(struct request_queue *q, struct request *rq)
 407{
 408        struct elevator_queue *e = q->elevator;
 409
 410        if (e->type->ops.next_request)
 411                return e->type->ops.next_request(q, rq);
 412
 413        return NULL;
 414}
 415
 416struct request *elv_former_request(struct request_queue *q, struct request *rq)
 417{
 418        struct elevator_queue *e = q->elevator;
 419
 420        if (e->type->ops.former_request)
 421                return e->type->ops.former_request(q, rq);
 422
 423        return NULL;
 424}
 425
 426#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
 427
 428static ssize_t
 429elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 430{
 431        struct elv_fs_entry *entry = to_elv(attr);
 432        struct elevator_queue *e;
 433        ssize_t error;
 434
 435        if (!entry->show)
 436                return -EIO;
 437
 438        e = container_of(kobj, struct elevator_queue, kobj);
 439        mutex_lock(&e->sysfs_lock);
 440        error = e->type ? entry->show(e, page) : -ENOENT;
 441        mutex_unlock(&e->sysfs_lock);
 442        return error;
 443}
 444
 445static ssize_t
 446elv_attr_store(struct kobject *kobj, struct attribute *attr,
 447               const char *page, size_t length)
 448{
 449        struct elv_fs_entry *entry = to_elv(attr);
 450        struct elevator_queue *e;
 451        ssize_t error;
 452
 453        if (!entry->store)
 454                return -EIO;
 455
 456        e = container_of(kobj, struct elevator_queue, kobj);
 457        mutex_lock(&e->sysfs_lock);
 458        error = e->type ? entry->store(e, page, length) : -ENOENT;
 459        mutex_unlock(&e->sysfs_lock);
 460        return error;
 461}
 462
 463static const struct sysfs_ops elv_sysfs_ops = {
 464        .show   = elv_attr_show,
 465        .store  = elv_attr_store,
 466};
 467
 468static struct kobj_type elv_ktype = {
 469        .sysfs_ops      = &elv_sysfs_ops,
 470        .release        = elevator_release,
 471};
 472
 473int elv_register_queue(struct request_queue *q)
 474{
 475        struct elevator_queue *e = q->elevator;
 476        int error;
 477
 478        lockdep_assert_held(&q->sysfs_lock);
 479
 480        error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
 481        if (!error) {
 482                struct elv_fs_entry *attr = e->type->elevator_attrs;
 483                if (attr) {
 484                        while (attr->attr.name) {
 485                                if (sysfs_create_file(&e->kobj, &attr->attr))
 486                                        break;
 487                                attr++;
 488                        }
 489                }
 490                kobject_uevent(&e->kobj, KOBJ_ADD);
 491                e->registered = 1;
 492        }
 493        return error;
 494}
 495
 496void elv_unregister_queue(struct request_queue *q)
 497{
 498        lockdep_assert_held(&q->sysfs_lock);
 499
 500        if (q) {
 501                struct elevator_queue *e = q->elevator;
 502
 503                kobject_uevent(&e->kobj, KOBJ_REMOVE);
 504                kobject_del(&e->kobj);
 505                e->registered = 0;
 506                /* Re-enable throttling in case elevator disabled it */
 507                wbt_enable_default(q);
 508        }
 509}
 510
 511int elv_register(struct elevator_type *e)
 512{
 513        /* create icq_cache if requested */
 514        if (e->icq_size) {
 515                if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
 516                    WARN_ON(e->icq_align < __alignof__(struct io_cq)))
 517                        return -EINVAL;
 518
 519                snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
 520                         "%s_io_cq", e->elevator_name);
 521                e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
 522                                                 e->icq_align, 0, NULL);
 523                if (!e->icq_cache)
 524                        return -ENOMEM;
 525        }
 526
 527        /* register, don't allow duplicate names */
 528        spin_lock(&elv_list_lock);
 529        if (elevator_find(e->elevator_name)) {
 530                spin_unlock(&elv_list_lock);
 531                kmem_cache_destroy(e->icq_cache);
 532                return -EBUSY;
 533        }
 534        list_add_tail(&e->list, &elv_list);
 535        spin_unlock(&elv_list_lock);
 536
 537        printk(KERN_INFO "io scheduler %s registered\n", e->elevator_name);
 538
 539        return 0;
 540}
 541EXPORT_SYMBOL_GPL(elv_register);
 542
 543void elv_unregister(struct elevator_type *e)
 544{
 545        /* unregister */
 546        spin_lock(&elv_list_lock);
 547        list_del_init(&e->list);
 548        spin_unlock(&elv_list_lock);
 549
 550        /*
 551         * Destroy icq_cache if it exists.  icq's are RCU managed.  Make
 552         * sure all RCU operations are complete before proceeding.
 553         */
 554        if (e->icq_cache) {
 555                rcu_barrier();
 556                kmem_cache_destroy(e->icq_cache);
 557                e->icq_cache = NULL;
 558        }
 559}
 560EXPORT_SYMBOL_GPL(elv_unregister);
 561
 562int elevator_switch_mq(struct request_queue *q,
 563                              struct elevator_type *new_e)
 564{
 565        int ret;
 566
 567        lockdep_assert_held(&q->sysfs_lock);
 568
 569        if (q->elevator) {
 570                if (q->elevator->registered)
 571                        elv_unregister_queue(q);
 572                ioc_clear_queue(q);
 573                elevator_exit(q, q->elevator);
 574        }
 575
 576        ret = blk_mq_init_sched(q, new_e);
 577        if (ret)
 578                goto out;
 579
 580        if (new_e) {
 581                ret = elv_register_queue(q);
 582                if (ret) {
 583                        elevator_exit(q, q->elevator);
 584                        goto out;
 585                }
 586        }
 587
 588        if (new_e)
 589                blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
 590        else
 591                blk_add_trace_msg(q, "elv switch: none");
 592
 593out:
 594        return ret;
 595}
 596
 597/*
 598 * For blk-mq devices, we default to using mq-deadline, if available, for single
 599 * queue devices.  If deadline isn't available OR we have multiple queues,
 600 * default to "none".
 601 */
 602int elevator_init_mq(struct request_queue *q)
 603{
 604        struct elevator_type *e;
 605        int err = 0;
 606
 607        if (q->nr_hw_queues != 1)
 608                return 0;
 609
 610        /*
 611         * q->sysfs_lock must be held to provide mutual exclusion between
 612         * elevator_switch() and here.
 613         */
 614        mutex_lock(&q->sysfs_lock);
 615        if (unlikely(q->elevator))
 616                goto out_unlock;
 617
 618        e = elevator_get(q, "mq-deadline", false);
 619        if (!e)
 620                goto out_unlock;
 621
 622        err = blk_mq_init_sched(q, e);
 623        if (err)
 624                elevator_put(e);
 625out_unlock:
 626        mutex_unlock(&q->sysfs_lock);
 627        return err;
 628}
 629
 630
 631/*
 632 * switch to new_e io scheduler. be careful not to introduce deadlocks -
 633 * we don't free the old io scheduler, before we have allocated what we
 634 * need for the new one. this way we have a chance of going back to the old
 635 * one, if the new one fails init for some reason.
 636 */
 637static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 638{
 639        int err;
 640
 641        lockdep_assert_held(&q->sysfs_lock);
 642
 643        blk_mq_freeze_queue(q);
 644        blk_mq_quiesce_queue(q);
 645
 646        err = elevator_switch_mq(q, new_e);
 647
 648        blk_mq_unquiesce_queue(q);
 649        blk_mq_unfreeze_queue(q);
 650
 651        return err;
 652}
 653
 654/*
 655 * Switch this queue to the given IO scheduler.
 656 */
 657static int __elevator_change(struct request_queue *q, const char *name)
 658{
 659        char elevator_name[ELV_NAME_MAX];
 660        struct elevator_type *e;
 661
 662        /* Make sure queue is not in the middle of being removed */
 663        if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
 664                return -ENOENT;
 665
 666        /*
 667         * Special case for mq, turn off scheduling
 668         */
 669        if (!strncmp(name, "none", 4)) {
 670                if (!q->elevator)
 671                        return 0;
 672                return elevator_switch(q, NULL);
 673        }
 674
 675        strlcpy(elevator_name, name, sizeof(elevator_name));
 676        e = elevator_get(q, strstrip(elevator_name), true);
 677        if (!e)
 678                return -EINVAL;
 679
 680        if (q->elevator && elevator_match(q->elevator->type, elevator_name)) {
 681                elevator_put(e);
 682                return 0;
 683        }
 684
 685        return elevator_switch(q, e);
 686}
 687
 688static inline bool elv_support_iosched(struct request_queue *q)
 689{
 690        if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))
 691                return false;
 692        return true;
 693}
 694
 695ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 696                          size_t count)
 697{
 698        int ret;
 699
 700        if (!queue_is_mq(q) || !elv_support_iosched(q))
 701                return count;
 702
 703        ret = __elevator_change(q, name);
 704        if (!ret)
 705                return count;
 706
 707        return ret;
 708}
 709
 710ssize_t elv_iosched_show(struct request_queue *q, char *name)
 711{
 712        struct elevator_queue *e = q->elevator;
 713        struct elevator_type *elv = NULL;
 714        struct elevator_type *__e;
 715        int len = 0;
 716
 717        if (!queue_is_mq(q))
 718                return sprintf(name, "none\n");
 719
 720        if (!q->elevator)
 721                len += sprintf(name+len, "[none] ");
 722        else
 723                elv = e->type;
 724
 725        spin_lock(&elv_list_lock);
 726        list_for_each_entry(__e, &elv_list, list) {
 727                if (elv && elevator_match(elv, __e->elevator_name)) {
 728                        len += sprintf(name+len, "[%s] ", elv->elevator_name);
 729                        continue;
 730                }
 731                if (elv_support_iosched(q))
 732                        len += sprintf(name+len, "%s ", __e->elevator_name);
 733        }
 734        spin_unlock(&elv_list_lock);
 735
 736        if (q->elevator)
 737                len += sprintf(name+len, "none");
 738
 739        len += sprintf(len+name, "\n");
 740        return len;
 741}
 742
 743struct request *elv_rb_former_request(struct request_queue *q,
 744                                      struct request *rq)
 745{
 746        struct rb_node *rbprev = rb_prev(&rq->rb_node);
 747
 748        if (rbprev)
 749                return rb_entry_rq(rbprev);
 750
 751        return NULL;
 752}
 753EXPORT_SYMBOL(elv_rb_former_request);
 754
 755struct request *elv_rb_latter_request(struct request_queue *q,
 756                                      struct request *rq)
 757{
 758        struct rb_node *rbnext = rb_next(&rq->rb_node);
 759
 760        if (rbnext)
 761                return rb_entry_rq(rbnext);
 762
 763        return NULL;
 764}
 765EXPORT_SYMBOL(elv_rb_latter_request);
 766