linux/kernel/sched/wait.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Generic waiting primitives.
   4 *
   5 * (C) 2004 Nadia Yvette Chambers, Oracle
   6 */
   7#include "sched.h"
   8
   9void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
  10{
  11        spin_lock_init(&wq_head->lock);
  12        lockdep_set_class_and_name(&wq_head->lock, key, name);
  13        INIT_LIST_HEAD(&wq_head->head);
  14}
  15
  16EXPORT_SYMBOL(__init_waitqueue_head);
  17
  18void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
  19{
  20        unsigned long flags;
  21
  22        wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
  23        spin_lock_irqsave(&wq_head->lock, flags);
  24        __add_wait_queue(wq_head, wq_entry);
  25        spin_unlock_irqrestore(&wq_head->lock, flags);
  26}
  27EXPORT_SYMBOL(add_wait_queue);
  28
  29void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
  30{
  31        unsigned long flags;
  32
  33        wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
  34        spin_lock_irqsave(&wq_head->lock, flags);
  35        __add_wait_queue_entry_tail(wq_head, wq_entry);
  36        spin_unlock_irqrestore(&wq_head->lock, flags);
  37}
  38EXPORT_SYMBOL(add_wait_queue_exclusive);
  39
  40void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
  41{
  42        unsigned long flags;
  43
  44        wq_entry->flags |= WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY;
  45        spin_lock_irqsave(&wq_head->lock, flags);
  46        __add_wait_queue(wq_head, wq_entry);
  47        spin_unlock_irqrestore(&wq_head->lock, flags);
  48}
  49EXPORT_SYMBOL_GPL(add_wait_queue_priority);
  50
  51void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
  52{
  53        unsigned long flags;
  54
  55        spin_lock_irqsave(&wq_head->lock, flags);
  56        __remove_wait_queue(wq_head, wq_entry);
  57        spin_unlock_irqrestore(&wq_head->lock, flags);
  58}
  59EXPORT_SYMBOL(remove_wait_queue);
  60
  61/*
  62 * Scan threshold to break wait queue walk.
  63 * This allows a waker to take a break from holding the
  64 * wait queue lock during the wait queue walk.
  65 */
  66#define WAITQUEUE_WALK_BREAK_CNT 64
  67
  68/*
  69 * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
  70 * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
  71 * number) then we wake that number of exclusive tasks, and potentially all
  72 * the non-exclusive tasks. Normally, exclusive tasks will be at the end of
  73 * the list and any non-exclusive tasks will be woken first. A priority task
  74 * may be at the head of the list, and can consume the event without any other
  75 * tasks being woken.
  76 *
  77 * There are circumstances in which we can try to wake a task which has already
  78 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
  79 * zero in this (rare) case, and we handle it by continuing to scan the queue.
  80 */
  81static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
  82                        int nr_exclusive, int wake_flags, void *key,
  83                        wait_queue_entry_t *bookmark)
  84{
  85        wait_queue_entry_t *curr, *next;
  86        int cnt = 0;
  87
  88        lockdep_assert_held(&wq_head->lock);
  89
  90        if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) {
  91                curr = list_next_entry(bookmark, entry);
  92
  93                list_del(&bookmark->entry);
  94                bookmark->flags = 0;
  95        } else
  96                curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry);
  97
  98        if (&curr->entry == &wq_head->head)
  99                return nr_exclusive;
 100
 101        list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) {
 102                unsigned flags = curr->flags;
 103                int ret;
 104
 105                if (flags & WQ_FLAG_BOOKMARK)
 106                        continue;
 107
 108                ret = curr->func(curr, mode, wake_flags, key);
 109                if (ret < 0)
 110                        break;
 111                if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
 112                        break;
 113
 114                if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) &&
 115                                (&next->entry != &wq_head->head)) {
 116                        bookmark->flags = WQ_FLAG_BOOKMARK;
 117                        list_add_tail(&bookmark->entry, &next->entry);
 118                        break;
 119                }
 120        }
 121
 122        return nr_exclusive;
 123}
 124
 125static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode,
 126                        int nr_exclusive, int wake_flags, void *key)
 127{
 128        unsigned long flags;
 129        wait_queue_entry_t bookmark;
 130
 131        bookmark.flags = 0;
 132        bookmark.private = NULL;
 133        bookmark.func = NULL;
 134        INIT_LIST_HEAD(&bookmark.entry);
 135
 136        do {
 137                spin_lock_irqsave(&wq_head->lock, flags);
 138                nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive,
 139                                                wake_flags, key, &bookmark);
 140                spin_unlock_irqrestore(&wq_head->lock, flags);
 141        } while (bookmark.flags & WQ_FLAG_BOOKMARK);
 142}
 143
 144/**
 145 * __wake_up - wake up threads blocked on a waitqueue.
 146 * @wq_head: the waitqueue
 147 * @mode: which threads
 148 * @nr_exclusive: how many wake-one or wake-many threads to wake up
 149 * @key: is directly passed to the wakeup function
 150 *
 151 * If this function wakes up a task, it executes a full memory barrier before
 152 * accessing the task state.
 153 */
 154void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
 155                        int nr_exclusive, void *key)
 156{
 157        __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
 158}
 159EXPORT_SYMBOL(__wake_up);
 160
 161/*
 162 * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
 163 */
 164void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
 165{
 166        __wake_up_common(wq_head, mode, nr, 0, NULL, NULL);
 167}
 168EXPORT_SYMBOL_GPL(__wake_up_locked);
 169
 170void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
 171{
 172        __wake_up_common(wq_head, mode, 1, 0, key, NULL);
 173}
 174EXPORT_SYMBOL_GPL(__wake_up_locked_key);
 175
 176void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
 177                unsigned int mode, void *key, wait_queue_entry_t *bookmark)
 178{
 179        __wake_up_common(wq_head, mode, 1, 0, key, bookmark);
 180}
 181EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
 182
 183/**
 184 * __wake_up_sync_key - wake up threads blocked on a waitqueue.
 185 * @wq_head: the waitqueue
 186 * @mode: which threads
 187 * @key: opaque value to be passed to wakeup targets
 188 *
 189 * The sync wakeup differs that the waker knows that it will schedule
 190 * away soon, so while the target thread will be woken up, it will not
 191 * be migrated to another CPU - ie. the two threads are 'synchronized'
 192 * with each other. This can prevent needless bouncing between CPUs.
 193 *
 194 * On UP it can prevent extra preemption.
 195 *
 196 * If this function wakes up a task, it executes a full memory barrier before
 197 * accessing the task state.
 198 */
 199void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
 200                        void *key)
 201{
 202        if (unlikely(!wq_head))
 203                return;
 204
 205        __wake_up_common_lock(wq_head, mode, 1, WF_SYNC, key);
 206}
 207EXPORT_SYMBOL_GPL(__wake_up_sync_key);
 208
 209/**
 210 * __wake_up_locked_sync_key - wake up a thread blocked on a locked waitqueue.
 211 * @wq_head: the waitqueue
 212 * @mode: which threads
 213 * @key: opaque value to be passed to wakeup targets
 214 *
 215 * The sync wakeup differs in that the waker knows that it will schedule
 216 * away soon, so while the target thread will be woken up, it will not
 217 * be migrated to another CPU - ie. the two threads are 'synchronized'
 218 * with each other. This can prevent needless bouncing between CPUs.
 219 *
 220 * On UP it can prevent extra preemption.
 221 *
 222 * If this function wakes up a task, it executes a full memory barrier before
 223 * accessing the task state.
 224 */
 225void __wake_up_locked_sync_key(struct wait_queue_head *wq_head,
 226                               unsigned int mode, void *key)
 227{
 228        __wake_up_common(wq_head, mode, 1, WF_SYNC, key, NULL);
 229}
 230EXPORT_SYMBOL_GPL(__wake_up_locked_sync_key);
 231
 232/*
 233 * __wake_up_sync - see __wake_up_sync_key()
 234 */
 235void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode)
 236{
 237        __wake_up_sync_key(wq_head, mode, NULL);
 238}
 239EXPORT_SYMBOL_GPL(__wake_up_sync);      /* For internal use only */
 240
 241/*
 242 * Note: we use "set_current_state()" _after_ the wait-queue add,
 243 * because we need a memory barrier there on SMP, so that any
 244 * wake-function that tests for the wait-queue being active
 245 * will be guaranteed to see waitqueue addition _or_ subsequent
 246 * tests in this thread will see the wakeup having taken place.
 247 *
 248 * The spin_unlock() itself is semi-permeable and only protects
 249 * one way (it only protects stuff inside the critical region and
 250 * stops them from bleeding out - it would still allow subsequent
 251 * loads to move into the critical region).
 252 */
 253void
 254prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 255{
 256        unsigned long flags;
 257
 258        wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
 259        spin_lock_irqsave(&wq_head->lock, flags);
 260        if (list_empty(&wq_entry->entry))
 261                __add_wait_queue(wq_head, wq_entry);
 262        set_current_state(state);
 263        spin_unlock_irqrestore(&wq_head->lock, flags);
 264}
 265EXPORT_SYMBOL(prepare_to_wait);
 266
 267/* Returns true if we are the first waiter in the queue, false otherwise. */
 268bool
 269prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 270{
 271        unsigned long flags;
 272        bool was_empty = false;
 273
 274        wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
 275        spin_lock_irqsave(&wq_head->lock, flags);
 276        if (list_empty(&wq_entry->entry)) {
 277                was_empty = list_empty(&wq_head->head);
 278                __add_wait_queue_entry_tail(wq_head, wq_entry);
 279        }
 280        set_current_state(state);
 281        spin_unlock_irqrestore(&wq_head->lock, flags);
 282        return was_empty;
 283}
 284EXPORT_SYMBOL(prepare_to_wait_exclusive);
 285
 286void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
 287{
 288        wq_entry->flags = flags;
 289        wq_entry->private = current;
 290        wq_entry->func = autoremove_wake_function;
 291        INIT_LIST_HEAD(&wq_entry->entry);
 292}
 293EXPORT_SYMBOL(init_wait_entry);
 294
 295long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 296{
 297        unsigned long flags;
 298        long ret = 0;
 299
 300        spin_lock_irqsave(&wq_head->lock, flags);
 301        if (signal_pending_state(state, current)) {
 302                /*
 303                 * Exclusive waiter must not fail if it was selected by wakeup,
 304                 * it should "consume" the condition we were waiting for.
 305                 *
 306                 * The caller will recheck the condition and return success if
 307                 * we were already woken up, we can not miss the event because
 308                 * wakeup locks/unlocks the same wq_head->lock.
 309                 *
 310                 * But we need to ensure that set-condition + wakeup after that
 311                 * can't see us, it should wake up another exclusive waiter if
 312                 * we fail.
 313                 */
 314                list_del_init(&wq_entry->entry);
 315                ret = -ERESTARTSYS;
 316        } else {
 317                if (list_empty(&wq_entry->entry)) {
 318                        if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
 319                                __add_wait_queue_entry_tail(wq_head, wq_entry);
 320                        else
 321                                __add_wait_queue(wq_head, wq_entry);
 322                }
 323                set_current_state(state);
 324        }
 325        spin_unlock_irqrestore(&wq_head->lock, flags);
 326
 327        return ret;
 328}
 329EXPORT_SYMBOL(prepare_to_wait_event);
 330
 331/*
 332 * Note! These two wait functions are entered with the
 333 * wait-queue lock held (and interrupts off in the _irq
 334 * case), so there is no race with testing the wakeup
 335 * condition in the caller before they add the wait
 336 * entry to the wake queue.
 337 */
 338int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 339{
 340        if (likely(list_empty(&wait->entry)))
 341                __add_wait_queue_entry_tail(wq, wait);
 342
 343        set_current_state(TASK_INTERRUPTIBLE);
 344        if (signal_pending(current))
 345                return -ERESTARTSYS;
 346
 347        spin_unlock(&wq->lock);
 348        schedule();
 349        spin_lock(&wq->lock);
 350
 351        return 0;
 352}
 353EXPORT_SYMBOL(do_wait_intr);
 354
 355int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 356{
 357        if (likely(list_empty(&wait->entry)))
 358                __add_wait_queue_entry_tail(wq, wait);
 359
 360        set_current_state(TASK_INTERRUPTIBLE);
 361        if (signal_pending(current))
 362                return -ERESTARTSYS;
 363
 364        spin_unlock_irq(&wq->lock);
 365        schedule();
 366        spin_lock_irq(&wq->lock);
 367
 368        return 0;
 369}
 370EXPORT_SYMBOL(do_wait_intr_irq);
 371
 372/**
 373 * finish_wait - clean up after waiting in a queue
 374 * @wq_head: waitqueue waited on
 375 * @wq_entry: wait descriptor
 376 *
 377 * Sets current thread back to running state and removes
 378 * the wait descriptor from the given waitqueue if still
 379 * queued.
 380 */
 381void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 382{
 383        unsigned long flags;
 384
 385        __set_current_state(TASK_RUNNING);
 386        /*
 387         * We can check for list emptiness outside the lock
 388         * IFF:
 389         *  - we use the "careful" check that verifies both
 390         *    the next and prev pointers, so that there cannot
 391         *    be any half-pending updates in progress on other
 392         *    CPU's that we haven't seen yet (and that might
 393         *    still change the stack area.
 394         * and
 395         *  - all other users take the lock (ie we can only
 396         *    have _one_ other CPU that looks at or modifies
 397         *    the list).
 398         */
 399        if (!list_empty_careful(&wq_entry->entry)) {
 400                spin_lock_irqsave(&wq_head->lock, flags);
 401                list_del_init(&wq_entry->entry);
 402                spin_unlock_irqrestore(&wq_head->lock, flags);
 403        }
 404}
 405EXPORT_SYMBOL(finish_wait);
 406
 407int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
 408{
 409        int ret = default_wake_function(wq_entry, mode, sync, key);
 410
 411        if (ret)
 412                list_del_init_careful(&wq_entry->entry);
 413
 414        return ret;
 415}
 416EXPORT_SYMBOL(autoremove_wake_function);
 417
 418static inline bool is_kthread_should_stop(void)
 419{
 420        return (current->flags & PF_KTHREAD) && kthread_should_stop();
 421}
 422
 423/*
 424 * DEFINE_WAIT_FUNC(wait, woken_wake_func);
 425 *
 426 * add_wait_queue(&wq_head, &wait);
 427 * for (;;) {
 428 *     if (condition)
 429 *         break;
 430 *
 431 *     // in wait_woken()                       // in woken_wake_function()
 432 *
 433 *     p->state = mode;                         wq_entry->flags |= WQ_FLAG_WOKEN;
 434 *     smp_mb(); // A                           try_to_wake_up():
 435 *     if (!(wq_entry->flags & WQ_FLAG_WOKEN))     <full barrier>
 436 *         schedule()                              if (p->state & mode)
 437 *     p->state = TASK_RUNNING;                       p->state = TASK_RUNNING;
 438 *     wq_entry->flags &= ~WQ_FLAG_WOKEN;       ~~~~~~~~~~~~~~~~~~
 439 *     smp_mb(); // B                           condition = true;
 440 * }                                            smp_mb(); // C
 441 * remove_wait_queue(&wq_head, &wait);          wq_entry->flags |= WQ_FLAG_WOKEN;
 442 */
 443long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
 444{
 445        /*
 446         * The below executes an smp_mb(), which matches with the full barrier
 447         * executed by the try_to_wake_up() in woken_wake_function() such that
 448         * either we see the store to wq_entry->flags in woken_wake_function()
 449         * or woken_wake_function() sees our store to current->state.
 450         */
 451        set_current_state(mode); /* A */
 452        if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
 453                timeout = schedule_timeout(timeout);
 454        __set_current_state(TASK_RUNNING);
 455
 456        /*
 457         * The below executes an smp_mb(), which matches with the smp_mb() (C)
 458         * in woken_wake_function() such that either we see the wait condition
 459         * being true or the store to wq_entry->flags in woken_wake_function()
 460         * follows ours in the coherence order.
 461         */
 462        smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
 463
 464        return timeout;
 465}
 466EXPORT_SYMBOL(wait_woken);
 467
 468int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
 469{
 470        /* Pairs with the smp_store_mb() in wait_woken(). */
 471        smp_mb(); /* C */
 472        wq_entry->flags |= WQ_FLAG_WOKEN;
 473
 474        return default_wake_function(wq_entry, mode, sync, key);
 475}
 476EXPORT_SYMBOL(woken_wake_function);
 477