linux/kernel/wait.c
<<
>>
Prefs
   1/*
   2 * Generic waiting primitives.
   3 *
   4 * (C) 2004 Nadia Yvette Chambers, Oracle
   5 */
   6#include <linux/init.h>
   7#include <linux/export.h>
   8#include <linux/sched.h>
   9#include <linux/mm.h>
  10#include <linux/wait.h>
  11#include <linux/hash.h>
  12#include <linux/kthread.h>
  13
  14void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
  15{
  16        spin_lock_init(&q->lock);
  17        lockdep_set_class_and_name(&q->lock, key, name);
  18        INIT_LIST_HEAD(&q->task_list);
  19}
  20
  21EXPORT_SYMBOL(__init_waitqueue_head);
  22
  23void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
  24{
  25        unsigned long flags;
  26
  27        wait->flags &= ~WQ_FLAG_EXCLUSIVE;
  28        spin_lock_irqsave(&q->lock, flags);
  29        __add_wait_queue(q, wait);
  30        spin_unlock_irqrestore(&q->lock, flags);
  31}
  32EXPORT_SYMBOL(add_wait_queue);
  33
  34void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
  35{
  36        unsigned long flags;
  37
  38        wait->flags |= WQ_FLAG_EXCLUSIVE;
  39        spin_lock_irqsave(&q->lock, flags);
  40        __add_wait_queue_tail(q, wait);
  41        spin_unlock_irqrestore(&q->lock, flags);
  42}
  43EXPORT_SYMBOL(add_wait_queue_exclusive);
  44
  45void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
  46{
  47        unsigned long flags;
  48
  49        spin_lock_irqsave(&q->lock, flags);
  50        __remove_wait_queue(q, wait);
  51        spin_unlock_irqrestore(&q->lock, flags);
  52}
  53EXPORT_SYMBOL(remove_wait_queue);
  54
  55
  56/*
  57 * Note: we use "set_current_state()" _after_ the wait-queue add,
  58 * because we need a memory barrier there on SMP, so that any
  59 * wake-function that tests for the wait-queue being active
  60 * will be guaranteed to see waitqueue addition _or_ subsequent
  61 * tests in this thread will see the wakeup having taken place.
  62 *
  63 * The spin_unlock() itself is semi-permeable and only protects
  64 * one way (it only protects stuff inside the critical region and
  65 * stops them from bleeding out - it would still allow subsequent
  66 * loads to move into the critical region).
  67 */
  68void
  69prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
  70{
  71        unsigned long flags;
  72
  73        wait->flags &= ~WQ_FLAG_EXCLUSIVE;
  74        spin_lock_irqsave(&q->lock, flags);
  75        if (list_empty(&wait->task_list))
  76                __add_wait_queue(q, wait);
  77        set_current_state(state);
  78        spin_unlock_irqrestore(&q->lock, flags);
  79}
  80EXPORT_SYMBOL(prepare_to_wait);
  81
  82void
  83prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
  84{
  85        unsigned long flags;
  86
  87        wait->flags |= WQ_FLAG_EXCLUSIVE;
  88        spin_lock_irqsave(&q->lock, flags);
  89        if (list_empty(&wait->task_list))
  90                __add_wait_queue_tail(q, wait);
  91        set_current_state(state);
  92        spin_unlock_irqrestore(&q->lock, flags);
  93}
  94EXPORT_SYMBOL(prepare_to_wait_exclusive);
  95
  96/**
  97 * finish_wait - clean up after waiting in a queue
  98 * @q: waitqueue waited on
  99 * @wait: wait descriptor
 100 *
 101 * Sets current thread back to running state and removes
 102 * the wait descriptor from the given waitqueue if still
 103 * queued.
 104 */
 105void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
 106{
 107        unsigned long flags;
 108
 109        __set_current_state(TASK_RUNNING);
 110        /*
 111         * We can check for list emptiness outside the lock
 112         * IFF:
 113         *  - we use the "careful" check that verifies both
 114         *    the next and prev pointers, so that there cannot
 115         *    be any half-pending updates in progress on other
 116         *    CPU's that we haven't seen yet (and that might
 117         *    still change the stack area.
 118         * and
 119         *  - all other users take the lock (ie we can only
 120         *    have _one_ other CPU that looks at or modifies
 121         *    the list).
 122         */
 123        if (!list_empty_careful(&wait->task_list)) {
 124                spin_lock_irqsave(&q->lock, flags);
 125                list_del_init(&wait->task_list);
 126                spin_unlock_irqrestore(&q->lock, flags);
 127        }
 128}
 129EXPORT_SYMBOL(finish_wait);
 130
 131/**
 132 * abort_exclusive_wait - abort exclusive waiting in a queue
 133 * @q: waitqueue waited on
 134 * @wait: wait descriptor
 135 * @mode: runstate of the waiter to be woken
 136 * @key: key to identify a wait bit queue or %NULL
 137 *
 138 * Sets current thread back to running state and removes
 139 * the wait descriptor from the given waitqueue if still
 140 * queued.
 141 *
 142 * Wakes up the next waiter if the caller is concurrently
 143 * woken up through the queue.
 144 *
 145 * This prevents waiter starvation where an exclusive waiter
 146 * aborts and is woken up concurrently and no one wakes up
 147 * the next waiter.
 148 */
 149void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
 150                        unsigned int mode, void *key)
 151{
 152        unsigned long flags;
 153
 154        __set_current_state(TASK_RUNNING);
 155        spin_lock_irqsave(&q->lock, flags);
 156        if (!list_empty(&wait->task_list))
 157                list_del_init(&wait->task_list);
 158        else if (waitqueue_active(q))
 159                __wake_up_locked_key(q, mode, key);
 160        spin_unlock_irqrestore(&q->lock, flags);
 161}
 162EXPORT_SYMBOL(abort_exclusive_wait);
 163
 164int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
 165{
 166        int ret = default_wake_function(wait, mode, sync, key);
 167
 168        if (ret)
 169                list_del_init(&wait->task_list);
 170        return ret;
 171}
 172EXPORT_SYMBOL(autoremove_wake_function);
 173
 174static inline bool is_kthread_should_stop(void)
 175{
 176        return (current->flags & PF_KTHREAD) && kthread_should_stop();
 177}
 178
 179static int
 180var_wake_function(wait_queue_t *wq_entry, unsigned int mode,
 181                  int sync, void *arg)
 182{
 183        struct wait_bit_key *key = arg;
 184        struct wait_bit_queue *wbq_entry =
 185                container_of(wq_entry, struct wait_bit_queue, wait);
 186
 187        if (wbq_entry->key.flags != key->flags ||
 188            wbq_entry->key.bit_nr != key->bit_nr)
 189                return 0;
 190
 191        return autoremove_wake_function(wq_entry, mode, sync, key);
 192}
 193
 194/*
 195 * DEFINE_WAIT_FUNC(wait, woken_wake_func);
 196 *
 197 * add_wait_queue(&wq, &wait);
 198 * for (;;) {
 199 *     if (condition)
 200 *         break;
 201 *
 202 *     p->state = mode;                         condition = true;
 203 *     smp_mb(); // A                           smp_wmb(); // C
 204 *     if (!wait->flags & WQ_FLAG_WOKEN)        wait->flags |= WQ_FLAG_WOKEN;
 205 *         schedule()                           try_to_wake_up();
 206 *     p->state = TASK_RUNNING;             ~~~~~~~~~~~~~~~~~~
 207 *     wait->flags &= ~WQ_FLAG_WOKEN;           condition = true;
 208 *     smp_mb() // B                            smp_wmb(); // C
 209 *                                              wait->flags |= WQ_FLAG_WOKEN;
 210 * }
 211 * remove_wait_queue(&wq, &wait);
 212 *
 213 */
 214long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
 215{
 216        set_current_state(mode); /* A */
 217        /*
 218         * The above implies an smp_mb(), which matches with the smp_wmb() from
 219         * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must
 220         * also observe all state before the wakeup.
 221         */
 222        if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
 223                timeout = schedule_timeout(timeout);
 224        __set_current_state(TASK_RUNNING);
 225
 226        /*
 227         * The below implies an smp_mb(), it too pairs with the smp_wmb() from
 228         * woken_wake_function() such that we must either observe the wait
 229         * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
 230         * an event.
 231         */
 232        smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
 233
 234        return timeout;
 235}
 236EXPORT_SYMBOL(wait_woken);
 237
 238int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
 239{
 240        /*
 241         * Although this function is called under waitqueue lock, LOCK
 242         * doesn't imply write barrier and the users expects write
 243         * barrier semantics on wakeup functions.  The following
 244         * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
 245         * and is paired with smp_store_mb() in wait_woken().
 246         */
 247        smp_wmb(); /* C */
 248        wait->flags |= WQ_FLAG_WOKEN;
 249
 250        return default_wake_function(wait, mode, sync, key);
 251}
 252EXPORT_SYMBOL(woken_wake_function);
 253
 254/*
 255 * RH: the original wake_bit_function() is retained for possible 3rd-party
 256 * users of DEFINE_WAIT_BIT before commit
 257 * "sched: Allow wait_on_bit_action() functions to support a timeout"
 258 */
 259int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
 260{
 261        struct wait_bit_key_deprecated *key = arg;
 262        struct wait_bit_queue_deprecated *wait_bit
 263                = container_of(wait, struct wait_bit_queue_deprecated, wait);
 264
 265        if (wait_bit->key.flags != key->flags ||
 266                        wait_bit->key.bit_nr != key->bit_nr ||
 267                        test_bit(key->bit_nr, key->flags))
 268                return 0;
 269        else
 270                return autoremove_wake_function(wait, mode, sync, key);
 271}
 272EXPORT_SYMBOL(wake_bit_function);
 273
 274int wake_bit_function_rh(wait_queue_t *wait, unsigned mode, int sync, void *arg)
 275{
 276        struct wait_bit_key *key = arg;
 277        struct wait_bit_queue *wait_bit
 278                = container_of(wait, struct wait_bit_queue, wait);
 279
 280        if (wait_bit->key.flags != key->flags ||
 281                        wait_bit->key.bit_nr != key->bit_nr ||
 282                        test_bit(key->bit_nr, key->flags))
 283                return 0;
 284        else
 285                return autoremove_wake_function(wait, mode, sync, key);
 286}
 287EXPORT_SYMBOL(wake_bit_function_rh);
 288
 289/*
 290 * To allow interruptible waiting and asynchronous (i.e. nonblocking)
 291 * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
 292 * permitted return codes. Nonzero return codes halt waiting and return.
 293 */
 294int __sched
 295__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
 296              wait_bit_action_f *action, unsigned mode)
 297{
 298        int ret = 0;
 299
 300        do {
 301                prepare_to_wait(wq, &q->wait, mode);
 302                if (test_bit(q->key.bit_nr, q->key.flags))
 303                        ret = (*action)(&q->key, mode);
 304        } while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
 305        finish_wait(wq, &q->wait);
 306        return ret;
 307}
 308EXPORT_SYMBOL(__wait_on_bit);
 309
 310int __sched out_of_line_wait_on_bit(void *word, int bit,
 311                                    wait_bit_action_f *action, unsigned mode)
 312{
 313        wait_queue_head_t *wq = bit_waitqueue(word, bit);
 314        DEFINE_WAIT_BIT(wait, word, bit);
 315
 316        return __wait_on_bit(wq, &wait, action, mode);
 317}
 318EXPORT_SYMBOL(out_of_line_wait_on_bit);
 319
 320int __sched out_of_line_wait_on_bit_timeout(
 321        void *word, int bit, wait_bit_action_f *action,
 322        unsigned mode, unsigned long timeout)
 323{
 324        wait_queue_head_t *wq = bit_waitqueue(word, bit);
 325        DEFINE_WAIT_BIT(wait, word, bit);
 326
 327        wait.key.timeout = jiffies + timeout;
 328        return __wait_on_bit(wq, &wait, action, mode);
 329}
 330EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
 331
 332int __sched
 333__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
 334                        wait_bit_action_f *action, unsigned mode)
 335{
 336        do {
 337                int ret;
 338
 339                prepare_to_wait_exclusive(wq, &q->wait, mode);
 340                if (!test_bit(q->key.bit_nr, q->key.flags))
 341                        continue;
 342                ret = action(&q->key, mode);
 343                if (!ret)
 344                        continue;
 345                abort_exclusive_wait(wq, &q->wait, mode, &q->key);
 346                return ret;
 347        } while (test_and_set_bit(q->key.bit_nr, q->key.flags));
 348        finish_wait(wq, &q->wait);
 349        return 0;
 350}
 351EXPORT_SYMBOL(__wait_on_bit_lock);
 352
 353int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
 354                                         wait_bit_action_f *action, unsigned mode)
 355{
 356        wait_queue_head_t *wq = bit_waitqueue(word, bit);
 357        DEFINE_WAIT_BIT(wait, word, bit);
 358
 359        return __wait_on_bit_lock(wq, &wait, action, mode);
 360}
 361EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
 362
 363void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
 364{
 365        struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
 366        if (waitqueue_active(wq))
 367                __wake_up(wq, TASK_NORMAL, 1, &key);
 368}
 369EXPORT_SYMBOL(__wake_up_bit);
 370
 371/**
 372 * wake_up_bit - wake up a waiter on a bit
 373 * @word: the word being waited on, a kernel virtual address
 374 * @bit: the bit of the word being waited on
 375 *
 376 * There is a standard hashed waitqueue table for generic use. This
 377 * is the part of the hashtable's accessor API that wakes up waiters
 378 * on a bit. For instance, if one were to have waiters on a bitflag,
 379 * one would call wake_up_bit() after clearing the bit.
 380 *
 381 * In order for this to function properly, as it uses waitqueue_active()
 382 * internally, some kind of memory barrier must be done prior to calling
 383 * this. Typically, this will be smp_mb__after_clear_bit(), but in some
 384 * cases where bitflags are manipulated non-atomically under a lock, one
 385 * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
 386 * because spin_unlock() does not guarantee a memory barrier.
 387 */
 388void wake_up_bit(void *word, int bit)
 389{
 390        __wake_up_bit(bit_waitqueue(word, bit), word, bit);
 391}
 392EXPORT_SYMBOL(wake_up_bit);
 393
 394#define WAIT_TABLE_BITS 8
 395#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
 396static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
 397
 398wait_queue_head_t *bit_waitqueue(void *word, int bit)
 399{
 400        const int shift = BITS_PER_LONG == 32 ? 5 : 6;
 401        struct page *page = is_vmalloc_addr(word) ?
 402                vmalloc_to_page(word) : virt_to_page(word);
 403        const struct zone *zone = page_zone(page);
 404        unsigned long val = (unsigned long)word << shift | bit;
 405
 406        return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
 407}
 408EXPORT_SYMBOL(bit_waitqueue);
 409
 410/*
 411 * Manipulate the atomic_t address to produce a better bit waitqueue table hash
 412 * index (we're keying off bit -1, but that would produce a horrible hash
 413 * value).
 414 */
 415static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
 416{
 417        if (BITS_PER_LONG == 64) {
 418                unsigned long q = (unsigned long)p;
 419                return bit_waitqueue((void *)(q & ~1), q & 1);
 420        }
 421        return bit_waitqueue(p, 0);
 422}
 423
 424static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync,
 425                                  void *arg)
 426{
 427        struct wait_bit_key *key = arg;
 428        struct wait_bit_queue *wait_bit
 429                = container_of(wait, struct wait_bit_queue, wait);
 430        atomic_t *val = key->flags;
 431
 432        if (wait_bit->key.flags != key->flags ||
 433            wait_bit->key.bit_nr != key->bit_nr ||
 434            atomic_read(val) != 0)
 435                return 0;
 436        return autoremove_wake_function(wait, mode, sync, key);
 437}
 438
 439/*
 440 * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
 441 * the actions of __wait_on_atomic_t() are permitted return codes.  Nonzero
 442 * return codes halt waiting and return.
 443 */
 444static __sched
 445int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
 446                       int (*action)(atomic_t *), unsigned mode)
 447{
 448        atomic_t *val;
 449        int ret = 0;
 450
 451        do {
 452                prepare_to_wait(wq, &q->wait, mode);
 453                val = q->key.flags;
 454                if (atomic_read(val) == 0)
 455                        break;
 456                ret = (*action)(val);
 457        } while (!ret && atomic_read(val) != 0);
 458        finish_wait(wq, &q->wait);
 459        return ret;
 460}
 461
 462#define DEFINE_WAIT_ATOMIC_T(name, p)                                   \
 463        struct wait_bit_queue name = {                                  \
 464                .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),              \
 465                .wait   = {                                             \
 466                        .private        = current,                      \
 467                        .func           = wake_atomic_t_function,       \
 468                        .task_list      =                               \
 469                                LIST_HEAD_INIT((name).wait.task_list),  \
 470                },                                                      \
 471        }
 472
 473__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
 474                                         unsigned mode)
 475{
 476        wait_queue_head_t *wq = atomic_t_waitqueue(p);
 477        DEFINE_WAIT_ATOMIC_T(wait, p);
 478
 479        return __wait_on_atomic_t(wq, &wait, action, mode);
 480}
 481EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
 482
 483/**
 484 * wake_up_atomic_t - Wake up a waiter on a atomic_t
 485 * @p: The atomic_t being waited on, a kernel virtual address
 486 *
 487 * Wake up anyone waiting for the atomic_t to go to zero.
 488 *
 489 * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
 490 * check is done by the waiter's wake function, not the by the waker itself).
 491 */
 492void wake_up_atomic_t(atomic_t *p)
 493{
 494        __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
 495}
 496EXPORT_SYMBOL(wake_up_atomic_t);
 497
 498__sched int bit_wait(struct wait_bit_key *word, int mode)
 499{
 500        schedule();
 501        if (signal_pending_state(mode, current))
 502                return -EINTR;
 503        return 0;
 504}
 505EXPORT_SYMBOL(bit_wait);
 506
 507__sched int bit_wait_io(struct wait_bit_key *word, int mode)
 508{
 509        io_schedule();
 510        if (signal_pending_state(mode, current))
 511                return -EINTR;
 512        return 0;
 513}
 514EXPORT_SYMBOL(bit_wait_io);
 515
 516__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
 517{
 518        unsigned long now = ACCESS_ONCE(jiffies);
 519        if (time_after_eq(now, word->timeout))
 520                return -EAGAIN;
 521        schedule_timeout(word->timeout - now);
 522        if (signal_pending_state(mode, current))
 523                return -EINTR;
 524        return 0;
 525}
 526EXPORT_SYMBOL_GPL(bit_wait_timeout);
 527
 528__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
 529{
 530        unsigned long now = ACCESS_ONCE(jiffies);
 531        if (time_after_eq(now, word->timeout))
 532                return -EAGAIN;
 533        io_schedule_timeout(word->timeout - now);
 534        if (signal_pending_state(mode, current))
 535                return -EINTR;
 536        return 0;
 537}
 538EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
 539
 540void init_wait_var_entry(struct wait_bit_queue *wbq_entry, void *var, int flags)
 541{
 542        *wbq_entry = (struct wait_bit_queue){
 543                .key = {
 544                        .flags  = (var),
 545                        .bit_nr = -1,
 546                },
 547                .wait = {
 548                        .private = current,
 549                        .func    = var_wake_function,
 550                        .task_list = LIST_HEAD_INIT(wbq_entry->wait.task_list),
 551                },
 552        };
 553}
 554EXPORT_SYMBOL(init_wait_var_entry);
 555
 556void wake_up_var(void *var)
 557{
 558        __wake_up_bit(__var_waitqueue(var), var, -1);
 559}
 560EXPORT_SYMBOL(wake_up_var);
 561
 562wait_queue_head_t *__var_waitqueue(void *p)
 563{
 564        return bit_wait_table + hash_ptr(p, WAIT_TABLE_BITS);
 565}
 566EXPORT_SYMBOL(__var_waitqueue);
 567
 568void __init wait_bit_init(void)
 569{
 570        int i;
 571
 572        for (i = 0; i < WAIT_TABLE_SIZE; i++)
 573                init_waitqueue_head(bit_wait_table + i);
 574}
 575