linux/kernel/wait.c
<<
>>
Prefs
   1/*
   2 * Generic waiting primitives.
   3 *
   4 * (C) 2004 Nadia Yvette Chambers, Oracle
   5 */
   6#include <linux/init.h>
   7#include <linux/export.h>
   8#include <linux/sched.h>
   9#include <linux/mm.h>
  10#include <linux/wait.h>
  11#include <linux/hash.h>
  12#include <linux/kthread.h>
  13
  14void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
  15{
  16        spin_lock_init(&q->lock);
  17        lockdep_set_class_and_name(&q->lock, key, name);
  18        INIT_LIST_HEAD(&q->task_list);
  19}
  20
  21EXPORT_SYMBOL(__init_waitqueue_head);
  22
  23void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
  24{
  25        unsigned long flags;
  26
  27        wait->flags &= ~WQ_FLAG_EXCLUSIVE;
  28        spin_lock_irqsave(&q->lock, flags);
  29        __add_wait_queue(q, wait);
  30        spin_unlock_irqrestore(&q->lock, flags);
  31}
  32EXPORT_SYMBOL(add_wait_queue);
  33
  34void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
  35{
  36        unsigned long flags;
  37
  38        wait->flags |= WQ_FLAG_EXCLUSIVE;
  39        spin_lock_irqsave(&q->lock, flags);
  40        __add_wait_queue_tail(q, wait);
  41        spin_unlock_irqrestore(&q->lock, flags);
  42}
  43EXPORT_SYMBOL(add_wait_queue_exclusive);
  44
  45void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
  46{
  47        unsigned long flags;
  48
  49        spin_lock_irqsave(&q->lock, flags);
  50        __remove_wait_queue(q, wait);
  51        spin_unlock_irqrestore(&q->lock, flags);
  52}
  53EXPORT_SYMBOL(remove_wait_queue);
  54
  55
  56/*
  57 * Note: we use "set_current_state()" _after_ the wait-queue add,
  58 * because we need a memory barrier there on SMP, so that any
  59 * wake-function that tests for the wait-queue being active
  60 * will be guaranteed to see waitqueue addition _or_ subsequent
  61 * tests in this thread will see the wakeup having taken place.
  62 *
  63 * The spin_unlock() itself is semi-permeable and only protects
  64 * one way (it only protects stuff inside the critical region and
  65 * stops them from bleeding out - it would still allow subsequent
  66 * loads to move into the critical region).
  67 */
  68void
  69prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
  70{
  71        unsigned long flags;
  72
  73        wait->flags &= ~WQ_FLAG_EXCLUSIVE;
  74        spin_lock_irqsave(&q->lock, flags);
  75        if (list_empty(&wait->task_list))
  76                __add_wait_queue(q, wait);
  77        set_current_state(state);
  78        spin_unlock_irqrestore(&q->lock, flags);
  79}
  80EXPORT_SYMBOL(prepare_to_wait);
  81
  82void
  83prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
  84{
  85        unsigned long flags;
  86
  87        wait->flags |= WQ_FLAG_EXCLUSIVE;
  88        spin_lock_irqsave(&q->lock, flags);
  89        if (list_empty(&wait->task_list))
  90                __add_wait_queue_tail(q, wait);
  91        set_current_state(state);
  92        spin_unlock_irqrestore(&q->lock, flags);
  93}
  94EXPORT_SYMBOL(prepare_to_wait_exclusive);
  95
  96/**
  97 * finish_wait - clean up after waiting in a queue
  98 * @q: waitqueue waited on
  99 * @wait: wait descriptor
 100 *
 101 * Sets current thread back to running state and removes
 102 * the wait descriptor from the given waitqueue if still
 103 * queued.
 104 */
 105void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
 106{
 107        unsigned long flags;
 108
 109        __set_current_state(TASK_RUNNING);
 110        /*
 111         * We can check for list emptiness outside the lock
 112         * IFF:
 113         *  - we use the "careful" check that verifies both
 114         *    the next and prev pointers, so that there cannot
 115         *    be any half-pending updates in progress on other
 116         *    CPU's that we haven't seen yet (and that might
 117         *    still change the stack area.
 118         * and
 119         *  - all other users take the lock (ie we can only
 120         *    have _one_ other CPU that looks at or modifies
 121         *    the list).
 122         */
 123        if (!list_empty_careful(&wait->task_list)) {
 124                spin_lock_irqsave(&q->lock, flags);
 125                list_del_init(&wait->task_list);
 126                spin_unlock_irqrestore(&q->lock, flags);
 127        }
 128}
 129EXPORT_SYMBOL(finish_wait);
 130
 131/**
 132 * abort_exclusive_wait - abort exclusive waiting in a queue
 133 * @q: waitqueue waited on
 134 * @wait: wait descriptor
 135 * @mode: runstate of the waiter to be woken
 136 * @key: key to identify a wait bit queue or %NULL
 137 *
 138 * Sets current thread back to running state and removes
 139 * the wait descriptor from the given waitqueue if still
 140 * queued.
 141 *
 142 * Wakes up the next waiter if the caller is concurrently
 143 * woken up through the queue.
 144 *
 145 * This prevents waiter starvation where an exclusive waiter
 146 * aborts and is woken up concurrently and no one wakes up
 147 * the next waiter.
 148 */
 149void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
 150                        unsigned int mode, void *key)
 151{
 152        unsigned long flags;
 153
 154        __set_current_state(TASK_RUNNING);
 155        spin_lock_irqsave(&q->lock, flags);
 156        if (!list_empty(&wait->task_list))
 157                list_del_init(&wait->task_list);
 158        else if (waitqueue_active(q))
 159                __wake_up_locked_key(q, mode, key);
 160        spin_unlock_irqrestore(&q->lock, flags);
 161}
 162EXPORT_SYMBOL(abort_exclusive_wait);
 163
 164int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
 165{
 166        int ret = default_wake_function(wait, mode, sync, key);
 167
 168        if (ret)
 169                list_del_init(&wait->task_list);
 170        return ret;
 171}
 172EXPORT_SYMBOL(autoremove_wake_function);
 173
 174static inline bool is_kthread_should_stop(void)
 175{
 176        return (current->flags & PF_KTHREAD) && kthread_should_stop();
 177}
 178
 179/*
 180 * DEFINE_WAIT_FUNC(wait, woken_wake_func);
 181 *
 182 * add_wait_queue(&wq, &wait);
 183 * for (;;) {
 184 *     if (condition)
 185 *         break;
 186 *
 187 *     p->state = mode;                         condition = true;
 188 *     smp_mb(); // A                           smp_wmb(); // C
 189 *     if (!wait->flags & WQ_FLAG_WOKEN)        wait->flags |= WQ_FLAG_WOKEN;
 190 *         schedule()                           try_to_wake_up();
 191 *     p->state = TASK_RUNNING;             ~~~~~~~~~~~~~~~~~~
 192 *     wait->flags &= ~WQ_FLAG_WOKEN;           condition = true;
 193 *     smp_mb() // B                            smp_wmb(); // C
 194 *                                              wait->flags |= WQ_FLAG_WOKEN;
 195 * }
 196 * remove_wait_queue(&wq, &wait);
 197 *
 198 */
 199long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
 200{
 201        set_current_state(mode); /* A */
 202        /*
 203         * The above implies an smp_mb(), which matches with the smp_wmb() from
 204         * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must
 205         * also observe all state before the wakeup.
 206         */
 207        if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
 208                timeout = schedule_timeout(timeout);
 209        __set_current_state(TASK_RUNNING);
 210
 211        /*
 212         * The below implies an smp_mb(), it too pairs with the smp_wmb() from
 213         * woken_wake_function() such that we must either observe the wait
 214         * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
 215         * an event.
 216         */
 217        set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
 218
 219        return timeout;
 220}
 221EXPORT_SYMBOL(wait_woken);
 222
 223int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
 224{
 225        /*
 226         * Although this function is called under waitqueue lock, LOCK
 227         * doesn't imply write barrier and the users expects write
 228         * barrier semantics on wakeup functions.  The following
 229         * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
 230         * and is paired with set_mb() in wait_woken().
 231         */
 232        smp_wmb(); /* C */
 233        wait->flags |= WQ_FLAG_WOKEN;
 234
 235        return default_wake_function(wait, mode, sync, key);
 236}
 237EXPORT_SYMBOL(woken_wake_function);
 238
 239/*
 240 * RH: the original wake_bit_function() is retained for possible 3rd-party
 241 * users of DEFINE_WAIT_BIT before commit
 242 * "sched: Allow wait_on_bit_action() functions to support a timeout"
 243 */
 244int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
 245{
 246        struct wait_bit_key_deprecated *key = arg;
 247        struct wait_bit_queue_deprecated *wait_bit
 248                = container_of(wait, struct wait_bit_queue_deprecated, wait);
 249
 250        if (wait_bit->key.flags != key->flags ||
 251                        wait_bit->key.bit_nr != key->bit_nr ||
 252                        test_bit(key->bit_nr, key->flags))
 253                return 0;
 254        else
 255                return autoremove_wake_function(wait, mode, sync, key);
 256}
 257EXPORT_SYMBOL(wake_bit_function);
 258
 259int wake_bit_function_rh(wait_queue_t *wait, unsigned mode, int sync, void *arg)
 260{
 261        struct wait_bit_key *key = arg;
 262        struct wait_bit_queue *wait_bit
 263                = container_of(wait, struct wait_bit_queue, wait);
 264
 265        if (wait_bit->key.flags != key->flags ||
 266                        wait_bit->key.bit_nr != key->bit_nr ||
 267                        test_bit(key->bit_nr, key->flags))
 268                return 0;
 269        else
 270                return autoremove_wake_function(wait, mode, sync, key);
 271}
 272EXPORT_SYMBOL(wake_bit_function_rh);
 273
 274/*
 275 * To allow interruptible waiting and asynchronous (i.e. nonblocking)
 276 * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
 277 * permitted return codes. Nonzero return codes halt waiting and return.
 278 */
 279int __sched
 280__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
 281              wait_bit_action_f *action, unsigned mode)
 282{
 283        int ret = 0;
 284
 285        do {
 286                prepare_to_wait(wq, &q->wait, mode);
 287                if (test_bit(q->key.bit_nr, q->key.flags))
 288                        ret = (*action)(&q->key, mode);
 289        } while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
 290        finish_wait(wq, &q->wait);
 291        return ret;
 292}
 293EXPORT_SYMBOL(__wait_on_bit);
 294
 295int __sched out_of_line_wait_on_bit(void *word, int bit,
 296                                    wait_bit_action_f *action, unsigned mode)
 297{
 298        wait_queue_head_t *wq = bit_waitqueue(word, bit);
 299        DEFINE_WAIT_BIT(wait, word, bit);
 300
 301        return __wait_on_bit(wq, &wait, action, mode);
 302}
 303EXPORT_SYMBOL(out_of_line_wait_on_bit);
 304
 305int __sched out_of_line_wait_on_bit_timeout(
 306        void *word, int bit, wait_bit_action_f *action,
 307        unsigned mode, unsigned long timeout)
 308{
 309        wait_queue_head_t *wq = bit_waitqueue(word, bit);
 310        DEFINE_WAIT_BIT(wait, word, bit);
 311
 312        wait.key.timeout = jiffies + timeout;
 313        return __wait_on_bit(wq, &wait, action, mode);
 314}
 315EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
 316
 317int __sched
 318__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
 319                        wait_bit_action_f *action, unsigned mode)
 320{
 321        do {
 322                int ret;
 323
 324                prepare_to_wait_exclusive(wq, &q->wait, mode);
 325                if (!test_bit(q->key.bit_nr, q->key.flags))
 326                        continue;
 327                ret = action(&q->key, mode);
 328                if (!ret)
 329                        continue;
 330                abort_exclusive_wait(wq, &q->wait, mode, &q->key);
 331                return ret;
 332        } while (test_and_set_bit(q->key.bit_nr, q->key.flags));
 333        finish_wait(wq, &q->wait);
 334        return 0;
 335}
 336EXPORT_SYMBOL(__wait_on_bit_lock);
 337
 338int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
 339                                         wait_bit_action_f *action, unsigned mode)
 340{
 341        wait_queue_head_t *wq = bit_waitqueue(word, bit);
 342        DEFINE_WAIT_BIT(wait, word, bit);
 343
 344        return __wait_on_bit_lock(wq, &wait, action, mode);
 345}
 346EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
 347
 348void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
 349{
 350        struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
 351        if (waitqueue_active(wq))
 352                __wake_up(wq, TASK_NORMAL, 1, &key);
 353}
 354EXPORT_SYMBOL(__wake_up_bit);
 355
 356/**
 357 * wake_up_bit - wake up a waiter on a bit
 358 * @word: the word being waited on, a kernel virtual address
 359 * @bit: the bit of the word being waited on
 360 *
 361 * There is a standard hashed waitqueue table for generic use. This
 362 * is the part of the hashtable's accessor API that wakes up waiters
 363 * on a bit. For instance, if one were to have waiters on a bitflag,
 364 * one would call wake_up_bit() after clearing the bit.
 365 *
 366 * In order for this to function properly, as it uses waitqueue_active()
 367 * internally, some kind of memory barrier must be done prior to calling
 368 * this. Typically, this will be smp_mb__after_clear_bit(), but in some
 369 * cases where bitflags are manipulated non-atomically under a lock, one
 370 * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
 371 * because spin_unlock() does not guarantee a memory barrier.
 372 */
 373void wake_up_bit(void *word, int bit)
 374{
 375        __wake_up_bit(bit_waitqueue(word, bit), word, bit);
 376}
 377EXPORT_SYMBOL(wake_up_bit);
 378
 379wait_queue_head_t *bit_waitqueue(void *word, int bit)
 380{
 381        const int shift = BITS_PER_LONG == 32 ? 5 : 6;
 382        struct page *page = is_vmalloc_addr(word) ?
 383                vmalloc_to_page(word) : virt_to_page(word);
 384        const struct zone *zone = page_zone(page);
 385        unsigned long val = (unsigned long)word << shift | bit;
 386
 387        return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
 388}
 389EXPORT_SYMBOL(bit_waitqueue);
 390
 391/*
 392 * Manipulate the atomic_t address to produce a better bit waitqueue table hash
 393 * index (we're keying off bit -1, but that would produce a horrible hash
 394 * value).
 395 */
 396static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
 397{
 398        if (BITS_PER_LONG == 64) {
 399                unsigned long q = (unsigned long)p;
 400                return bit_waitqueue((void *)(q & ~1), q & 1);
 401        }
 402        return bit_waitqueue(p, 0);
 403}
 404
 405static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync,
 406                                  void *arg)
 407{
 408        struct wait_bit_key *key = arg;
 409        struct wait_bit_queue *wait_bit
 410                = container_of(wait, struct wait_bit_queue, wait);
 411        atomic_t *val = key->flags;
 412
 413        if (wait_bit->key.flags != key->flags ||
 414            wait_bit->key.bit_nr != key->bit_nr ||
 415            atomic_read(val) != 0)
 416                return 0;
 417        return autoremove_wake_function(wait, mode, sync, key);
 418}
 419
 420/*
 421 * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
 422 * the actions of __wait_on_atomic_t() are permitted return codes.  Nonzero
 423 * return codes halt waiting and return.
 424 */
 425static __sched
 426int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
 427                       int (*action)(atomic_t *), unsigned mode)
 428{
 429        atomic_t *val;
 430        int ret = 0;
 431
 432        do {
 433                prepare_to_wait(wq, &q->wait, mode);
 434                val = q->key.flags;
 435                if (atomic_read(val) == 0)
 436                        break;
 437                ret = (*action)(val);
 438        } while (!ret && atomic_read(val) != 0);
 439        finish_wait(wq, &q->wait);
 440        return ret;
 441}
 442
 443#define DEFINE_WAIT_ATOMIC_T(name, p)                                   \
 444        struct wait_bit_queue name = {                                  \
 445                .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),              \
 446                .wait   = {                                             \
 447                        .private        = current,                      \
 448                        .func           = wake_atomic_t_function,       \
 449                        .task_list      =                               \
 450                                LIST_HEAD_INIT((name).wait.task_list),  \
 451                },                                                      \
 452        }
 453
 454__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
 455                                         unsigned mode)
 456{
 457        wait_queue_head_t *wq = atomic_t_waitqueue(p);
 458        DEFINE_WAIT_ATOMIC_T(wait, p);
 459
 460        return __wait_on_atomic_t(wq, &wait, action, mode);
 461}
 462EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
 463
 464/**
 465 * wake_up_atomic_t - Wake up a waiter on a atomic_t
 466 * @p: The atomic_t being waited on, a kernel virtual address
 467 *
 468 * Wake up anyone waiting for the atomic_t to go to zero.
 469 *
 470 * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
 471 * check is done by the waiter's wake function, not the by the waker itself).
 472 */
 473void wake_up_atomic_t(atomic_t *p)
 474{
 475        __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
 476}
 477EXPORT_SYMBOL(wake_up_atomic_t);
 478
 479__sched int bit_wait(struct wait_bit_key *word, int mode)
 480{
 481        schedule();
 482        if (signal_pending_state(mode, current))
 483                return -EINTR;
 484        return 0;
 485}
 486EXPORT_SYMBOL(bit_wait);
 487
 488__sched int bit_wait_io(struct wait_bit_key *word, int mode)
 489{
 490        io_schedule();
 491        if (signal_pending_state(mode, current))
 492                return -EINTR;
 493        return 0;
 494}
 495EXPORT_SYMBOL(bit_wait_io);
 496
 497__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
 498{
 499        unsigned long now = ACCESS_ONCE(jiffies);
 500        if (time_after_eq(now, word->timeout))
 501                return -EAGAIN;
 502        schedule_timeout(word->timeout - now);
 503        if (signal_pending_state(mode, current))
 504                return -EINTR;
 505        return 0;
 506}
 507EXPORT_SYMBOL_GPL(bit_wait_timeout);
 508
 509__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
 510{
 511        unsigned long now = ACCESS_ONCE(jiffies);
 512        if (time_after_eq(now, word->timeout))
 513                return -EAGAIN;
 514        io_schedule_timeout(word->timeout - now);
 515        if (signal_pending_state(mode, current))
 516                return -EINTR;
 517        return 0;
 518}
 519EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
 520