linux/kernel/rtmutex.c
<<
>>
Prefs
   1/*
   2 * RT-Mutexes: simple blocking mutual exclusion locks with PI support
   3 *
   4 * started by Ingo Molnar and Thomas Gleixner.
   5 *
   6 *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
   7 *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
   8 *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
   9 *  Copyright (C) 2006 Esben Nielsen
  10 *
  11 *  See Documentation/rt-mutex-design.txt for details.
  12 */
  13#include <linux/spinlock.h>
  14#include <linux/module.h>
  15#include <linux/sched.h>
  16#include <linux/timer.h>
  17
  18#include "rtmutex_common.h"
  19
  20/*
  21 * lock->owner state tracking:
  22 *
  23 * lock->owner holds the task_struct pointer of the owner. Bit 0 and 1
  24 * are used to keep track of the "owner is pending" and "lock has
  25 * waiters" state.
  26 *
  27 * owner        bit1    bit0
  28 * NULL         0       0       lock is free (fast acquire possible)
  29 * NULL         0       1       invalid state
  30 * NULL         1       0       Transitional State*
  31 * NULL         1       1       invalid state
  32 * taskpointer  0       0       lock is held (fast release possible)
  33 * taskpointer  0       1       task is pending owner
  34 * taskpointer  1       0       lock is held and has waiters
  35 * taskpointer  1       1       task is pending owner and lock has more waiters
  36 *
  37 * Pending ownership is assigned to the top (highest priority)
  38 * waiter of the lock, when the lock is released. The thread is woken
  39 * up and can now take the lock. Until the lock is taken (bit 0
  40 * cleared) a competing higher priority thread can steal the lock
  41 * which puts the woken up thread back on the waiters list.
  42 *
  43 * The fast atomic compare exchange based acquire and release is only
  44 * possible when bit 0 and 1 of lock->owner are 0.
  45 *
  46 * (*) There's a small time where the owner can be NULL and the
  47 * "lock has waiters" bit is set.  This can happen when grabbing the lock.
  48 * To prevent a cmpxchg of the owner releasing the lock, we need to set this
  49 * bit before looking at the lock, hence the reason this is a transitional
  50 * state.
  51 */
  52
  53static void
  54rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner,
  55                   unsigned long mask)
  56{
  57        unsigned long val = (unsigned long)owner | mask;
  58
  59        if (rt_mutex_has_waiters(lock))
  60                val |= RT_MUTEX_HAS_WAITERS;
  61
  62        lock->owner = (struct task_struct *)val;
  63}
  64
  65static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
  66{
  67        lock->owner = (struct task_struct *)
  68                        ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
  69}
  70
  71static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
  72{
  73        if (!rt_mutex_has_waiters(lock))
  74                clear_rt_mutex_waiters(lock);
  75}
  76
  77/*
  78 * We can speed up the acquire/release, if the architecture
  79 * supports cmpxchg and if there's no debugging state to be set up
  80 */
  81#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
  82# define rt_mutex_cmpxchg(l,c,n)        (cmpxchg(&l->owner, c, n) == c)
  83static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
  84{
  85        unsigned long owner, *p = (unsigned long *) &lock->owner;
  86
  87        do {
  88                owner = *p;
  89        } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
  90}
  91#else
  92# define rt_mutex_cmpxchg(l,c,n)        (0)
  93static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
  94{
  95        lock->owner = (struct task_struct *)
  96                        ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
  97}
  98#endif
  99
 100/*
 101 * Calculate task priority from the waiter list priority
 102 *
 103 * Return task->normal_prio when the waiter list is empty or when
 104 * the waiter is not allowed to do priority boosting
 105 */
 106int rt_mutex_getprio(struct task_struct *task)
 107{
 108        if (likely(!task_has_pi_waiters(task)))
 109                return task->normal_prio;
 110
 111        return min(task_top_pi_waiter(task)->pi_list_entry.prio,
 112                   task->normal_prio);
 113}
 114
 115/*
 116 * Adjust the priority of a task, after its pi_waiters got modified.
 117 *
 118 * This can be both boosting and unboosting. task->pi_lock must be held.
 119 */
 120static void __rt_mutex_adjust_prio(struct task_struct *task)
 121{
 122        int prio = rt_mutex_getprio(task);
 123
 124        if (task->prio != prio)
 125                rt_mutex_setprio(task, prio);
 126}
 127
 128/*
 129 * Adjust task priority (undo boosting). Called from the exit path of
 130 * rt_mutex_slowunlock() and rt_mutex_slowlock().
 131 *
 132 * (Note: We do this outside of the protection of lock->wait_lock to
 133 * allow the lock to be taken while or before we readjust the priority
 134 * of task. We do not use the spin_xx_mutex() variants here as we are
 135 * outside of the debug path.)
 136 */
 137static void rt_mutex_adjust_prio(struct task_struct *task)
 138{
 139        unsigned long flags;
 140
 141        raw_spin_lock_irqsave(&task->pi_lock, flags);
 142        __rt_mutex_adjust_prio(task);
 143        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 144}
 145
 146/*
 147 * Max number of times we'll walk the boosting chain:
 148 */
 149int max_lock_depth = 1024;
 150
 151/*
 152 * Adjust the priority chain. Also used for deadlock detection.
 153 * Decreases task's usage by one - may thus free the task.
 154 * Returns 0 or -EDEADLK.
 155 */
 156static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 157                                      int deadlock_detect,
 158                                      struct rt_mutex *orig_lock,
 159                                      struct rt_mutex_waiter *orig_waiter,
 160                                      struct task_struct *top_task)
 161{
 162        struct rt_mutex *lock;
 163        struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
 164        int detect_deadlock, ret = 0, depth = 0;
 165        unsigned long flags;
 166
 167        detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter,
 168                                                         deadlock_detect);
 169
 170        /*
 171         * The (de)boosting is a step by step approach with a lot of
 172         * pitfalls. We want this to be preemptible and we want hold a
 173         * maximum of two locks per step. So we have to check
 174         * carefully whether things change under us.
 175         */
 176 again:
 177        if (++depth > max_lock_depth) {
 178                static int prev_max;
 179
 180                /*
 181                 * Print this only once. If the admin changes the limit,
 182                 * print a new message when reaching the limit again.
 183                 */
 184                if (prev_max != max_lock_depth) {
 185                        prev_max = max_lock_depth;
 186                        printk(KERN_WARNING "Maximum lock depth %d reached "
 187                               "task: %s (%d)\n", max_lock_depth,
 188                               top_task->comm, task_pid_nr(top_task));
 189                }
 190                put_task_struct(task);
 191
 192                return deadlock_detect ? -EDEADLK : 0;
 193        }
 194 retry:
 195        /*
 196         * Task can not go away as we did a get_task() before !
 197         */
 198        raw_spin_lock_irqsave(&task->pi_lock, flags);
 199
 200        waiter = task->pi_blocked_on;
 201        /*
 202         * Check whether the end of the boosting chain has been
 203         * reached or the state of the chain has changed while we
 204         * dropped the locks.
 205         */
 206        if (!waiter || !waiter->task)
 207                goto out_unlock_pi;
 208
 209        /*
 210         * Check the orig_waiter state. After we dropped the locks,
 211         * the previous owner of the lock might have released the lock
 212         * and made us the pending owner:
 213         */
 214        if (orig_waiter && !orig_waiter->task)
 215                goto out_unlock_pi;
 216
 217        /*
 218         * Drop out, when the task has no waiters. Note,
 219         * top_waiter can be NULL, when we are in the deboosting
 220         * mode!
 221         */
 222        if (top_waiter && (!task_has_pi_waiters(task) ||
 223                           top_waiter != task_top_pi_waiter(task)))
 224                goto out_unlock_pi;
 225
 226        /*
 227         * When deadlock detection is off then we check, if further
 228         * priority adjustment is necessary.
 229         */
 230        if (!detect_deadlock && waiter->list_entry.prio == task->prio)
 231                goto out_unlock_pi;
 232
 233        lock = waiter->lock;
 234        if (!raw_spin_trylock(&lock->wait_lock)) {
 235                raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 236                cpu_relax();
 237                goto retry;
 238        }
 239
 240        /* Deadlock detection */
 241        if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
 242                debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
 243                raw_spin_unlock(&lock->wait_lock);
 244                ret = deadlock_detect ? -EDEADLK : 0;
 245                goto out_unlock_pi;
 246        }
 247
 248        top_waiter = rt_mutex_top_waiter(lock);
 249
 250        /* Requeue the waiter */
 251        plist_del(&waiter->list_entry, &lock->wait_list);
 252        waiter->list_entry.prio = task->prio;
 253        plist_add(&waiter->list_entry, &lock->wait_list);
 254
 255        /* Release the task */
 256        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 257        put_task_struct(task);
 258
 259        /* Grab the next task */
 260        task = rt_mutex_owner(lock);
 261        get_task_struct(task);
 262        raw_spin_lock_irqsave(&task->pi_lock, flags);
 263
 264        if (waiter == rt_mutex_top_waiter(lock)) {
 265                /* Boost the owner */
 266                plist_del(&top_waiter->pi_list_entry, &task->pi_waiters);
 267                waiter->pi_list_entry.prio = waiter->list_entry.prio;
 268                plist_add(&waiter->pi_list_entry, &task->pi_waiters);
 269                __rt_mutex_adjust_prio(task);
 270
 271        } else if (top_waiter == waiter) {
 272                /* Deboost the owner */
 273                plist_del(&waiter->pi_list_entry, &task->pi_waiters);
 274                waiter = rt_mutex_top_waiter(lock);
 275                waiter->pi_list_entry.prio = waiter->list_entry.prio;
 276                plist_add(&waiter->pi_list_entry, &task->pi_waiters);
 277                __rt_mutex_adjust_prio(task);
 278        }
 279
 280        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 281
 282        top_waiter = rt_mutex_top_waiter(lock);
 283        raw_spin_unlock(&lock->wait_lock);
 284
 285        if (!detect_deadlock && waiter != top_waiter)
 286                goto out_put_task;
 287
 288        goto again;
 289
 290 out_unlock_pi:
 291        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 292 out_put_task:
 293        put_task_struct(task);
 294
 295        return ret;
 296}
 297
 298/*
 299 * Optimization: check if we can steal the lock from the
 300 * assigned pending owner [which might not have taken the
 301 * lock yet]:
 302 */
 303static inline int try_to_steal_lock(struct rt_mutex *lock,
 304                                    struct task_struct *task)
 305{
 306        struct task_struct *pendowner = rt_mutex_owner(lock);
 307        struct rt_mutex_waiter *next;
 308        unsigned long flags;
 309
 310        if (!rt_mutex_owner_pending(lock))
 311                return 0;
 312
 313        if (pendowner == task)
 314                return 1;
 315
 316        raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
 317        if (task->prio >= pendowner->prio) {
 318                raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 319                return 0;
 320        }
 321
 322        /*
 323         * Check if a waiter is enqueued on the pending owners
 324         * pi_waiters list. Remove it and readjust pending owners
 325         * priority.
 326         */
 327        if (likely(!rt_mutex_has_waiters(lock))) {
 328                raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 329                return 1;
 330        }
 331
 332        /* No chain handling, pending owner is not blocked on anything: */
 333        next = rt_mutex_top_waiter(lock);
 334        plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
 335        __rt_mutex_adjust_prio(pendowner);
 336        raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 337
 338        /*
 339         * We are going to steal the lock and a waiter was
 340         * enqueued on the pending owners pi_waiters queue. So
 341         * we have to enqueue this waiter into
 342         * task->pi_waiters list. This covers the case,
 343         * where task is boosted because it holds another
 344         * lock and gets unboosted because the booster is
 345         * interrupted, so we would delay a waiter with higher
 346         * priority as task->normal_prio.
 347         *
 348         * Note: in the rare case of a SCHED_OTHER task changing
 349         * its priority and thus stealing the lock, next->task
 350         * might be task:
 351         */
 352        if (likely(next->task != task)) {
 353                raw_spin_lock_irqsave(&task->pi_lock, flags);
 354                plist_add(&next->pi_list_entry, &task->pi_waiters);
 355                __rt_mutex_adjust_prio(task);
 356                raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 357        }
 358        return 1;
 359}
 360
 361/*
 362 * Try to take an rt-mutex
 363 *
 364 * This fails
 365 * - when the lock has a real owner
 366 * - when a different pending owner exists and has higher priority than current
 367 *
 368 * Must be called with lock->wait_lock held.
 369 */
 370static int try_to_take_rt_mutex(struct rt_mutex *lock)
 371{
 372        /*
 373         * We have to be careful here if the atomic speedups are
 374         * enabled, such that, when
 375         *  - no other waiter is on the lock
 376         *  - the lock has been released since we did the cmpxchg
 377         * the lock can be released or taken while we are doing the
 378         * checks and marking the lock with RT_MUTEX_HAS_WAITERS.
 379         *
 380         * The atomic acquire/release aware variant of
 381         * mark_rt_mutex_waiters uses a cmpxchg loop. After setting
 382         * the WAITERS bit, the atomic release / acquire can not
 383         * happen anymore and lock->wait_lock protects us from the
 384         * non-atomic case.
 385         *
 386         * Note, that this might set lock->owner =
 387         * RT_MUTEX_HAS_WAITERS in the case the lock is not contended
 388         * any more. This is fixed up when we take the ownership.
 389         * This is the transitional state explained at the top of this file.
 390         */
 391        mark_rt_mutex_waiters(lock);
 392
 393        if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current))
 394                return 0;
 395
 396        /* We got the lock. */
 397        debug_rt_mutex_lock(lock);
 398
 399        rt_mutex_set_owner(lock, current, 0);
 400
 401        rt_mutex_deadlock_account_lock(lock, current);
 402
 403        return 1;
 404}
 405
 406/*
 407 * Task blocks on lock.
 408 *
 409 * Prepare waiter and propagate pi chain
 410 *
 411 * This must be called with lock->wait_lock held.
 412 */
 413static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 414                                   struct rt_mutex_waiter *waiter,
 415                                   struct task_struct *task,
 416                                   int detect_deadlock)
 417{
 418        struct task_struct *owner = rt_mutex_owner(lock);
 419        struct rt_mutex_waiter *top_waiter = waiter;
 420        unsigned long flags;
 421        int chain_walk = 0, res;
 422
 423        raw_spin_lock_irqsave(&task->pi_lock, flags);
 424        __rt_mutex_adjust_prio(task);
 425        waiter->task = task;
 426        waiter->lock = lock;
 427        plist_node_init(&waiter->list_entry, task->prio);
 428        plist_node_init(&waiter->pi_list_entry, task->prio);
 429
 430        /* Get the top priority waiter on the lock */
 431        if (rt_mutex_has_waiters(lock))
 432                top_waiter = rt_mutex_top_waiter(lock);
 433        plist_add(&waiter->list_entry, &lock->wait_list);
 434
 435        task->pi_blocked_on = waiter;
 436
 437        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 438
 439        if (waiter == rt_mutex_top_waiter(lock)) {
 440                raw_spin_lock_irqsave(&owner->pi_lock, flags);
 441                plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
 442                plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
 443
 444                __rt_mutex_adjust_prio(owner);
 445                if (owner->pi_blocked_on)
 446                        chain_walk = 1;
 447                raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
 448        }
 449        else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
 450                chain_walk = 1;
 451
 452        if (!chain_walk)
 453                return 0;
 454
 455        /*
 456         * The owner can't disappear while holding a lock,
 457         * so the owner struct is protected by wait_lock.
 458         * Gets dropped in rt_mutex_adjust_prio_chain()!
 459         */
 460        get_task_struct(owner);
 461
 462        raw_spin_unlock(&lock->wait_lock);
 463
 464        res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
 465                                         task);
 466
 467        raw_spin_lock(&lock->wait_lock);
 468
 469        return res;
 470}
 471
 472/*
 473 * Wake up the next waiter on the lock.
 474 *
 475 * Remove the top waiter from the current tasks waiter list and from
 476 * the lock waiter list. Set it as pending owner. Then wake it up.
 477 *
 478 * Called with lock->wait_lock held.
 479 */
 480static void wakeup_next_waiter(struct rt_mutex *lock)
 481{
 482        struct rt_mutex_waiter *waiter;
 483        struct task_struct *pendowner;
 484        unsigned long flags;
 485
 486        raw_spin_lock_irqsave(&current->pi_lock, flags);
 487
 488        waiter = rt_mutex_top_waiter(lock);
 489        plist_del(&waiter->list_entry, &lock->wait_list);
 490
 491        /*
 492         * Remove it from current->pi_waiters. We do not adjust a
 493         * possible priority boost right now. We execute wakeup in the
 494         * boosted mode and go back to normal after releasing
 495         * lock->wait_lock.
 496         */
 497        plist_del(&waiter->pi_list_entry, &current->pi_waiters);
 498        pendowner = waiter->task;
 499        waiter->task = NULL;
 500
 501        rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
 502
 503        raw_spin_unlock_irqrestore(&current->pi_lock, flags);
 504
 505        /*
 506         * Clear the pi_blocked_on variable and enqueue a possible
 507         * waiter into the pi_waiters list of the pending owner. This
 508         * prevents that in case the pending owner gets unboosted a
 509         * waiter with higher priority than pending-owner->normal_prio
 510         * is blocked on the unboosted (pending) owner.
 511         */
 512        raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
 513
 514        WARN_ON(!pendowner->pi_blocked_on);
 515        WARN_ON(pendowner->pi_blocked_on != waiter);
 516        WARN_ON(pendowner->pi_blocked_on->lock != lock);
 517
 518        pendowner->pi_blocked_on = NULL;
 519
 520        if (rt_mutex_has_waiters(lock)) {
 521                struct rt_mutex_waiter *next;
 522
 523                next = rt_mutex_top_waiter(lock);
 524                plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
 525        }
 526        raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 527
 528        wake_up_process(pendowner);
 529}
 530
 531/*
 532 * Remove a waiter from a lock
 533 *
 534 * Must be called with lock->wait_lock held
 535 */
 536static void remove_waiter(struct rt_mutex *lock,
 537                          struct rt_mutex_waiter *waiter)
 538{
 539        int first = (waiter == rt_mutex_top_waiter(lock));
 540        struct task_struct *owner = rt_mutex_owner(lock);
 541        unsigned long flags;
 542        int chain_walk = 0;
 543
 544        raw_spin_lock_irqsave(&current->pi_lock, flags);
 545        plist_del(&waiter->list_entry, &lock->wait_list);
 546        waiter->task = NULL;
 547        current->pi_blocked_on = NULL;
 548        raw_spin_unlock_irqrestore(&current->pi_lock, flags);
 549
 550        if (first && owner != current) {
 551
 552                raw_spin_lock_irqsave(&owner->pi_lock, flags);
 553
 554                plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
 555
 556                if (rt_mutex_has_waiters(lock)) {
 557                        struct rt_mutex_waiter *next;
 558
 559                        next = rt_mutex_top_waiter(lock);
 560                        plist_add(&next->pi_list_entry, &owner->pi_waiters);
 561                }
 562                __rt_mutex_adjust_prio(owner);
 563
 564                if (owner->pi_blocked_on)
 565                        chain_walk = 1;
 566
 567                raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
 568        }
 569
 570        WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
 571
 572        if (!chain_walk)
 573                return;
 574
 575        /* gets dropped in rt_mutex_adjust_prio_chain()! */
 576        get_task_struct(owner);
 577
 578        raw_spin_unlock(&lock->wait_lock);
 579
 580        rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
 581
 582        raw_spin_lock(&lock->wait_lock);
 583}
 584
 585/*
 586 * Recheck the pi chain, in case we got a priority setting
 587 *
 588 * Called from sched_setscheduler
 589 */
 590void rt_mutex_adjust_pi(struct task_struct *task)
 591{
 592        struct rt_mutex_waiter *waiter;
 593        unsigned long flags;
 594
 595        raw_spin_lock_irqsave(&task->pi_lock, flags);
 596
 597        waiter = task->pi_blocked_on;
 598        if (!waiter || waiter->list_entry.prio == task->prio) {
 599                raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 600                return;
 601        }
 602
 603        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 604
 605        /* gets dropped in rt_mutex_adjust_prio_chain()! */
 606        get_task_struct(task);
 607        rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
 608}
 609
 610/**
 611 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
 612 * @lock:                the rt_mutex to take
 613 * @state:               the state the task should block in (TASK_INTERRUPTIBLE
 614 *                       or TASK_UNINTERRUPTIBLE)
 615 * @timeout:             the pre-initialized and started timer, or NULL for none
 616 * @waiter:              the pre-initialized rt_mutex_waiter
 617 * @detect_deadlock:     passed to task_blocks_on_rt_mutex
 618 *
 619 * lock->wait_lock must be held by the caller.
 620 */
 621static int __sched
 622__rt_mutex_slowlock(struct rt_mutex *lock, int state,
 623                    struct hrtimer_sleeper *timeout,
 624                    struct rt_mutex_waiter *waiter,
 625                    int detect_deadlock)
 626{
 627        int ret = 0;
 628
 629        for (;;) {
 630                /* Try to acquire the lock: */
 631                if (try_to_take_rt_mutex(lock))
 632                        break;
 633
 634                /*
 635                 * TASK_INTERRUPTIBLE checks for signals and
 636                 * timeout. Ignored otherwise.
 637                 */
 638                if (unlikely(state == TASK_INTERRUPTIBLE)) {
 639                        /* Signal pending? */
 640                        if (signal_pending(current))
 641                                ret = -EINTR;
 642                        if (timeout && !timeout->task)
 643                                ret = -ETIMEDOUT;
 644                        if (ret)
 645                                break;
 646                }
 647
 648                /*
 649                 * waiter->task is NULL the first time we come here and
 650                 * when we have been woken up by the previous owner
 651                 * but the lock got stolen by a higher prio task.
 652                 */
 653                if (!waiter->task) {
 654                        ret = task_blocks_on_rt_mutex(lock, waiter, current,
 655                                                      detect_deadlock);
 656                        /*
 657                         * If we got woken up by the owner then start loop
 658                         * all over without going into schedule to try
 659                         * to get the lock now:
 660                         */
 661                        if (unlikely(!waiter->task)) {
 662                                /*
 663                                 * Reset the return value. We might
 664                                 * have returned with -EDEADLK and the
 665                                 * owner released the lock while we
 666                                 * were walking the pi chain.
 667                                 */
 668                                ret = 0;
 669                                continue;
 670                        }
 671                        if (unlikely(ret))
 672                                break;
 673                }
 674
 675                raw_spin_unlock(&lock->wait_lock);
 676
 677                debug_rt_mutex_print_deadlock(waiter);
 678
 679                if (waiter->task)
 680                        schedule_rt_mutex(lock);
 681
 682                raw_spin_lock(&lock->wait_lock);
 683                set_current_state(state);
 684        }
 685
 686        return ret;
 687}
 688
 689/*
 690 * Slow path lock function:
 691 */
 692static int __sched
 693rt_mutex_slowlock(struct rt_mutex *lock, int state,
 694                  struct hrtimer_sleeper *timeout,
 695                  int detect_deadlock)
 696{
 697        struct rt_mutex_waiter waiter;
 698        int ret = 0;
 699
 700        debug_rt_mutex_init_waiter(&waiter);
 701        waiter.task = NULL;
 702
 703        raw_spin_lock(&lock->wait_lock);
 704
 705        /* Try to acquire the lock again: */
 706        if (try_to_take_rt_mutex(lock)) {
 707                raw_spin_unlock(&lock->wait_lock);
 708                return 0;
 709        }
 710
 711        set_current_state(state);
 712
 713        /* Setup the timer, when timeout != NULL */
 714        if (unlikely(timeout)) {
 715                hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
 716                if (!hrtimer_active(&timeout->timer))
 717                        timeout->task = NULL;
 718        }
 719
 720        ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
 721                                  detect_deadlock);
 722
 723        set_current_state(TASK_RUNNING);
 724
 725        if (unlikely(waiter.task))
 726                remove_waiter(lock, &waiter);
 727
 728        /*
 729         * try_to_take_rt_mutex() sets the waiter bit
 730         * unconditionally. We might have to fix that up.
 731         */
 732        fixup_rt_mutex_waiters(lock);
 733
 734        raw_spin_unlock(&lock->wait_lock);
 735
 736        /* Remove pending timer: */
 737        if (unlikely(timeout))
 738                hrtimer_cancel(&timeout->timer);
 739
 740        /*
 741         * Readjust priority, when we did not get the lock. We might
 742         * have been the pending owner and boosted. Since we did not
 743         * take the lock, the PI boost has to go.
 744         */
 745        if (unlikely(ret))
 746                rt_mutex_adjust_prio(current);
 747
 748        debug_rt_mutex_free_waiter(&waiter);
 749
 750        return ret;
 751}
 752
 753/*
 754 * Slow path try-lock function:
 755 */
 756static inline int
 757rt_mutex_slowtrylock(struct rt_mutex *lock)
 758{
 759        int ret = 0;
 760
 761        raw_spin_lock(&lock->wait_lock);
 762
 763        if (likely(rt_mutex_owner(lock) != current)) {
 764
 765                ret = try_to_take_rt_mutex(lock);
 766                /*
 767                 * try_to_take_rt_mutex() sets the lock waiters
 768                 * bit unconditionally. Clean this up.
 769                 */
 770                fixup_rt_mutex_waiters(lock);
 771        }
 772
 773        raw_spin_unlock(&lock->wait_lock);
 774
 775        return ret;
 776}
 777
 778/*
 779 * Slow path to release a rt-mutex:
 780 */
 781static void __sched
 782rt_mutex_slowunlock(struct rt_mutex *lock)
 783{
 784        raw_spin_lock(&lock->wait_lock);
 785
 786        debug_rt_mutex_unlock(lock);
 787
 788        rt_mutex_deadlock_account_unlock(current);
 789
 790        if (!rt_mutex_has_waiters(lock)) {
 791                lock->owner = NULL;
 792                raw_spin_unlock(&lock->wait_lock);
 793                return;
 794        }
 795
 796        wakeup_next_waiter(lock);
 797
 798        raw_spin_unlock(&lock->wait_lock);
 799
 800        /* Undo pi boosting if necessary: */
 801        rt_mutex_adjust_prio(current);
 802}
 803
 804/*
 805 * debug aware fast / slowpath lock,trylock,unlock
 806 *
 807 * The atomic acquire/release ops are compiled away, when either the
 808 * architecture does not support cmpxchg or when debugging is enabled.
 809 */
 810static inline int
 811rt_mutex_fastlock(struct rt_mutex *lock, int state,
 812                  int detect_deadlock,
 813                  int (*slowfn)(struct rt_mutex *lock, int state,
 814                                struct hrtimer_sleeper *timeout,
 815                                int detect_deadlock))
 816{
 817        if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 818                rt_mutex_deadlock_account_lock(lock, current);
 819                return 0;
 820        } else
 821                return slowfn(lock, state, NULL, detect_deadlock);
 822}
 823
 824static inline int
 825rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
 826                        struct hrtimer_sleeper *timeout, int detect_deadlock,
 827                        int (*slowfn)(struct rt_mutex *lock, int state,
 828                                      struct hrtimer_sleeper *timeout,
 829                                      int detect_deadlock))
 830{
 831        if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 832                rt_mutex_deadlock_account_lock(lock, current);
 833                return 0;
 834        } else
 835                return slowfn(lock, state, timeout, detect_deadlock);
 836}
 837
 838static inline int
 839rt_mutex_fasttrylock(struct rt_mutex *lock,
 840                     int (*slowfn)(struct rt_mutex *lock))
 841{
 842        if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 843                rt_mutex_deadlock_account_lock(lock, current);
 844                return 1;
 845        }
 846        return slowfn(lock);
 847}
 848
 849static inline void
 850rt_mutex_fastunlock(struct rt_mutex *lock,
 851                    void (*slowfn)(struct rt_mutex *lock))
 852{
 853        if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
 854                rt_mutex_deadlock_account_unlock(current);
 855        else
 856                slowfn(lock);
 857}
 858
 859/**
 860 * rt_mutex_lock - lock a rt_mutex
 861 *
 862 * @lock: the rt_mutex to be locked
 863 */
 864void __sched rt_mutex_lock(struct rt_mutex *lock)
 865{
 866        might_sleep();
 867
 868        rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
 869}
 870EXPORT_SYMBOL_GPL(rt_mutex_lock);
 871
 872/**
 873 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
 874 *
 875 * @lock:               the rt_mutex to be locked
 876 * @detect_deadlock:    deadlock detection on/off
 877 *
 878 * Returns:
 879 *  0           on success
 880 * -EINTR       when interrupted by a signal
 881 * -EDEADLK     when the lock would deadlock (when deadlock detection is on)
 882 */
 883int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
 884                                                 int detect_deadlock)
 885{
 886        might_sleep();
 887
 888        return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
 889                                 detect_deadlock, rt_mutex_slowlock);
 890}
 891EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
 892
 893/**
 894 * rt_mutex_timed_lock - lock a rt_mutex interruptible
 895 *                      the timeout structure is provided
 896 *                      by the caller
 897 *
 898 * @lock:               the rt_mutex to be locked
 899 * @timeout:            timeout structure or NULL (no timeout)
 900 * @detect_deadlock:    deadlock detection on/off
 901 *
 902 * Returns:
 903 *  0           on success
 904 * -EINTR       when interrupted by a signal
 905 * -ETIMEDOUT   when the timeout expired
 906 * -EDEADLK     when the lock would deadlock (when deadlock detection is on)
 907 */
 908int
 909rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
 910                    int detect_deadlock)
 911{
 912        might_sleep();
 913
 914        return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
 915                                       detect_deadlock, rt_mutex_slowlock);
 916}
 917EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
 918
 919/**
 920 * rt_mutex_trylock - try to lock a rt_mutex
 921 *
 922 * @lock:       the rt_mutex to be locked
 923 *
 924 * Returns 1 on success and 0 on contention
 925 */
 926int __sched rt_mutex_trylock(struct rt_mutex *lock)
 927{
 928        return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
 929}
 930EXPORT_SYMBOL_GPL(rt_mutex_trylock);
 931
 932/**
 933 * rt_mutex_unlock - unlock a rt_mutex
 934 *
 935 * @lock: the rt_mutex to be unlocked
 936 */
 937void __sched rt_mutex_unlock(struct rt_mutex *lock)
 938{
 939        rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
 940}
 941EXPORT_SYMBOL_GPL(rt_mutex_unlock);
 942
 943/**
 944 * rt_mutex_destroy - mark a mutex unusable
 945 * @lock: the mutex to be destroyed
 946 *
 947 * This function marks the mutex uninitialized, and any subsequent
 948 * use of the mutex is forbidden. The mutex must not be locked when
 949 * this function is called.
 950 */
 951void rt_mutex_destroy(struct rt_mutex *lock)
 952{
 953        WARN_ON(rt_mutex_is_locked(lock));
 954#ifdef CONFIG_DEBUG_RT_MUTEXES
 955        lock->magic = NULL;
 956#endif
 957}
 958
 959EXPORT_SYMBOL_GPL(rt_mutex_destroy);
 960
 961/**
 962 * __rt_mutex_init - initialize the rt lock
 963 *
 964 * @lock: the rt lock to be initialized
 965 *
 966 * Initialize the rt lock to unlocked state.
 967 *
 968 * Initializing of a locked rt lock is not allowed
 969 */
 970void __rt_mutex_init(struct rt_mutex *lock, const char *name)
 971{
 972        lock->owner = NULL;
 973        raw_spin_lock_init(&lock->wait_lock);
 974        plist_head_init_raw(&lock->wait_list, &lock->wait_lock);
 975
 976        debug_rt_mutex_init(lock, name);
 977}
 978EXPORT_SYMBOL_GPL(__rt_mutex_init);
 979
 980/**
 981 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
 982 *                              proxy owner
 983 *
 984 * @lock:       the rt_mutex to be locked
 985 * @proxy_owner:the task to set as owner
 986 *
 987 * No locking. Caller has to do serializing itself
 988 * Special API call for PI-futex support
 989 */
 990void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
 991                                struct task_struct *proxy_owner)
 992{
 993        __rt_mutex_init(lock, NULL);
 994        debug_rt_mutex_proxy_lock(lock, proxy_owner);
 995        rt_mutex_set_owner(lock, proxy_owner, 0);
 996        rt_mutex_deadlock_account_lock(lock, proxy_owner);
 997}
 998
 999/**
1000 * rt_mutex_proxy_unlock - release a lock on behalf of owner
1001 *
1002 * @lock:       the rt_mutex to be locked
1003 *
1004 * No locking. Caller has to do serializing itself
1005 * Special API call for PI-futex support
1006 */
1007void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1008                           struct task_struct *proxy_owner)
1009{
1010        debug_rt_mutex_proxy_unlock(lock);
1011        rt_mutex_set_owner(lock, NULL, 0);
1012        rt_mutex_deadlock_account_unlock(proxy_owner);
1013}
1014
1015/**
1016 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1017 * @lock:               the rt_mutex to take
1018 * @waiter:             the pre-initialized rt_mutex_waiter
1019 * @task:               the task to prepare
1020 * @detect_deadlock:    perform deadlock detection (1) or not (0)
1021 *
1022 * Returns:
1023 *  0 - task blocked on lock
1024 *  1 - acquired the lock for task, caller should wake it up
1025 * <0 - error
1026 *
1027 * Special API call for FUTEX_REQUEUE_PI support.
1028 */
1029int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1030                              struct rt_mutex_waiter *waiter,
1031                              struct task_struct *task, int detect_deadlock)
1032{
1033        int ret;
1034
1035        raw_spin_lock(&lock->wait_lock);
1036
1037        mark_rt_mutex_waiters(lock);
1038
1039        if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
1040                /* We got the lock for task. */
1041                debug_rt_mutex_lock(lock);
1042                rt_mutex_set_owner(lock, task, 0);
1043                raw_spin_unlock(&lock->wait_lock);
1044                rt_mutex_deadlock_account_lock(lock, task);
1045                return 1;
1046        }
1047
1048        ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
1049
1050        if (ret && !waiter->task) {
1051                /*
1052                 * Reset the return value. We might have
1053                 * returned with -EDEADLK and the owner
1054                 * released the lock while we were walking the
1055                 * pi chain.  Let the waiter sort it out.
1056                 */
1057                ret = 0;
1058        }
1059        raw_spin_unlock(&lock->wait_lock);
1060
1061        debug_rt_mutex_print_deadlock(waiter);
1062
1063        return ret;
1064}
1065
1066/**
1067 * rt_mutex_next_owner - return the next owner of the lock
1068 *
1069 * @lock: the rt lock query
1070 *
1071 * Returns the next owner of the lock or NULL
1072 *
1073 * Caller has to serialize against other accessors to the lock
1074 * itself.
1075 *
1076 * Special API call for PI-futex support
1077 */
1078struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
1079{
1080        if (!rt_mutex_has_waiters(lock))
1081                return NULL;
1082
1083        return rt_mutex_top_waiter(lock)->task;
1084}
1085
1086/**
1087 * rt_mutex_finish_proxy_lock() - Complete lock acquisition
1088 * @lock:               the rt_mutex we were woken on
1089 * @to:                 the timeout, null if none. hrtimer should already have
1090 *                      been started.
1091 * @waiter:             the pre-initialized rt_mutex_waiter
1092 * @detect_deadlock:    perform deadlock detection (1) or not (0)
1093 *
1094 * Complete the lock acquisition started our behalf by another thread.
1095 *
1096 * Returns:
1097 *  0 - success
1098 * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
1099 *
1100 * Special API call for PI-futex requeue support
1101 */
1102int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1103                               struct hrtimer_sleeper *to,
1104                               struct rt_mutex_waiter *waiter,
1105                               int detect_deadlock)
1106{
1107        int ret;
1108
1109        raw_spin_lock(&lock->wait_lock);
1110
1111        set_current_state(TASK_INTERRUPTIBLE);
1112
1113        ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter,
1114                                  detect_deadlock);
1115
1116        set_current_state(TASK_RUNNING);
1117
1118        if (unlikely(waiter->task))
1119                remove_waiter(lock, waiter);
1120
1121        /*
1122         * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1123         * have to fix that up.
1124         */
1125        fixup_rt_mutex_waiters(lock);
1126
1127        raw_spin_unlock(&lock->wait_lock);
1128
1129        /*
1130         * Readjust priority, when we did not get the lock. We might have been
1131         * the pending owner and boosted. Since we did not take the lock, the
1132         * PI boost has to go.
1133         */
1134        if (unlikely(ret))
1135                rt_mutex_adjust_prio(current);
1136
1137        return ret;
1138}
1139