linux/lib/rwsem.c
<<
>>
Prefs
   1/* rwsem.c: R/W semaphores: contention handling functions
   2 *
   3 * Written by David Howells (dhowells@redhat.com).
   4 * Derived from arch/i386/kernel/semaphore.c
   5 */
   6#include <linux/rwsem.h>
   7#include <linux/sched.h>
   8#include <linux/init.h>
   9#include <linux/module.h>
  10
  11/*
  12 * Initialize an rwsem:
  13 */
  14void __init_rwsem(struct rw_semaphore *sem, const char *name,
  15                  struct lock_class_key *key)
  16{
  17#ifdef CONFIG_DEBUG_LOCK_ALLOC
  18        /*
  19         * Make sure we are not reinitializing a held semaphore:
  20         */
  21        debug_check_no_locks_freed((void *)sem, sizeof(*sem));
  22        lockdep_init_map(&sem->dep_map, name, key, 0);
  23#endif
  24        sem->count = RWSEM_UNLOCKED_VALUE;
  25        spin_lock_init(&sem->wait_lock);
  26        INIT_LIST_HEAD(&sem->wait_list);
  27}
  28
  29EXPORT_SYMBOL(__init_rwsem);
  30
  31struct rwsem_waiter {
  32        struct list_head list;
  33        struct task_struct *task;
  34        unsigned int flags;
  35#define RWSEM_WAITING_FOR_READ  0x00000001
  36#define RWSEM_WAITING_FOR_WRITE 0x00000002
  37};
  38
  39/* Wake types for __rwsem_do_wake().  Note that RWSEM_WAKE_NO_ACTIVE and
  40 * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
  41 * since the rwsem value was observed.
  42 */
  43#define RWSEM_WAKE_ANY        0 /* Wake whatever's at head of wait list */
  44#define RWSEM_WAKE_NO_ACTIVE  1 /* rwsem was observed with no active thread */
  45#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
  46
  47/*
  48 * handle the lock release when processes blocked on it that can now run
  49 * - if we come here from up_xxxx(), then:
  50 *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
  51 *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
  52 * - there must be someone on the queue
  53 * - the spinlock must be held by the caller
  54 * - woken process blocks are discarded from the list after having task zeroed
  55 * - writers are only woken if downgrading is false
  56 */
  57static struct rw_semaphore *
  58__rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
  59{
  60        struct rwsem_waiter *waiter;
  61        struct task_struct *tsk;
  62        struct list_head *next;
  63        signed long oldcount, woken, loop, adjustment;
  64
  65        waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
  66        if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
  67                goto readers_only;
  68
  69        if (wake_type == RWSEM_WAKE_READ_OWNED)
  70                /* Another active reader was observed, so wakeup is not
  71                 * likely to succeed. Save the atomic op.
  72                 */
  73                goto out;
  74
  75        /* There's a writer at the front of the queue - try to grant it the
  76         * write lock.  However, we only wake this writer if we can transition
  77         * the active part of the count from 0 -> 1
  78         */
  79        adjustment = RWSEM_ACTIVE_WRITE_BIAS;
  80        if (waiter->list.next == &sem->wait_list)
  81                adjustment -= RWSEM_WAITING_BIAS;
  82
  83 try_again_write:
  84        oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
  85        if (oldcount & RWSEM_ACTIVE_MASK)
  86                /* Someone grabbed the sem already */
  87                goto undo_write;
  88
  89        /* We must be careful not to touch 'waiter' after we set ->task = NULL.
  90         * It is an allocated on the waiter's stack and may become invalid at
  91         * any time after that point (due to a wakeup from another source).
  92         */
  93        list_del(&waiter->list);
  94        tsk = waiter->task;
  95        smp_mb();
  96        waiter->task = NULL;
  97        wake_up_process(tsk);
  98        put_task_struct(tsk);
  99        goto out;
 100
 101 readers_only:
 102        /* If we come here from up_xxxx(), another thread might have reached
 103         * rwsem_down_failed_common() before we acquired the spinlock and
 104         * woken up a waiter, making it now active.  We prefer to check for
 105         * this first in order to not spend too much time with the spinlock
 106         * held if we're not going to be able to wake up readers in the end.
 107         *
 108         * Note that we do not need to update the rwsem count: any writer
 109         * trying to acquire rwsem will run rwsem_down_write_failed() due
 110         * to the waiting threads and block trying to acquire the spinlock.
 111         *
 112         * We use a dummy atomic update in order to acquire the cache line
 113         * exclusively since we expect to succeed and run the final rwsem
 114         * count adjustment pretty soon.
 115         */
 116        if (wake_type == RWSEM_WAKE_ANY &&
 117            rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS)
 118                /* Someone grabbed the sem for write already */
 119                goto out;
 120
 121        /* Grant an infinite number of read locks to the readers at the front
 122         * of the queue.  Note we increment the 'active part' of the count by
 123         * the number of readers before waking any processes up.
 124         */
 125        woken = 0;
 126        do {
 127                woken++;
 128
 129                if (waiter->list.next == &sem->wait_list)
 130                        break;
 131
 132                waiter = list_entry(waiter->list.next,
 133                                        struct rwsem_waiter, list);
 134
 135        } while (waiter->flags & RWSEM_WAITING_FOR_READ);
 136
 137        adjustment = woken * RWSEM_ACTIVE_READ_BIAS;
 138        if (waiter->flags & RWSEM_WAITING_FOR_READ)
 139                /* hit end of list above */
 140                adjustment -= RWSEM_WAITING_BIAS;
 141
 142        rwsem_atomic_add(adjustment, sem);
 143
 144        next = sem->wait_list.next;
 145        for (loop = woken; loop > 0; loop--) {
 146                waiter = list_entry(next, struct rwsem_waiter, list);
 147                next = waiter->list.next;
 148                tsk = waiter->task;
 149                smp_mb();
 150                waiter->task = NULL;
 151                wake_up_process(tsk);
 152                put_task_struct(tsk);
 153        }
 154
 155        sem->wait_list.next = next;
 156        next->prev = &sem->wait_list;
 157
 158 out:
 159        return sem;
 160
 161        /* undo the change to the active count, but check for a transition
 162         * 1->0 */
 163 undo_write:
 164        if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
 165                goto out;
 166        goto try_again_write;
 167}
 168
 169/*
 170 * wait for a lock to be granted
 171 */
 172static struct rw_semaphore __sched *
 173rwsem_down_failed_common(struct rw_semaphore *sem,
 174                         unsigned int flags, signed long adjustment)
 175{
 176        struct rwsem_waiter waiter;
 177        struct task_struct *tsk = current;
 178        signed long count;
 179
 180        set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 181
 182        /* set up my own style of waitqueue */
 183        spin_lock_irq(&sem->wait_lock);
 184        waiter.task = tsk;
 185        waiter.flags = flags;
 186        get_task_struct(tsk);
 187
 188        if (list_empty(&sem->wait_list))
 189                adjustment += RWSEM_WAITING_BIAS;
 190        list_add_tail(&waiter.list, &sem->wait_list);
 191
 192        /* we're now waiting on the lock, but no longer actively locking */
 193        count = rwsem_atomic_update(adjustment, sem);
 194
 195        /* If there are no active locks, wake the front queued process(es) up.
 196         *
 197         * Alternatively, if we're called from a failed down_write(), there
 198         * were already threads queued before us and there are no active
 199         * writers, the lock must be read owned; so we try to wake any read
 200         * locks that were queued ahead of us. */
 201        if (count == RWSEM_WAITING_BIAS)
 202                sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
 203        else if (count > RWSEM_WAITING_BIAS &&
 204                 adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
 205                sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
 206
 207        spin_unlock_irq(&sem->wait_lock);
 208
 209        /* wait to be given the lock */
 210        for (;;) {
 211                if (!waiter.task)
 212                        break;
 213                schedule();
 214                set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 215        }
 216
 217        tsk->state = TASK_RUNNING;
 218
 219        return sem;
 220}
 221
 222/*
 223 * wait for the read lock to be granted
 224 */
 225asmregparm struct rw_semaphore __sched *
 226rwsem_down_read_failed(struct rw_semaphore *sem)
 227{
 228        return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
 229                                        -RWSEM_ACTIVE_READ_BIAS);
 230}
 231
 232/*
 233 * wait for the write lock to be granted
 234 */
 235asmregparm struct rw_semaphore __sched *
 236rwsem_down_write_failed(struct rw_semaphore *sem)
 237{
 238        return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
 239                                        -RWSEM_ACTIVE_WRITE_BIAS);
 240}
 241
 242/*
 243 * handle waking up a waiter on the semaphore
 244 * - up_read/up_write has decremented the active part of count if we come here
 245 */
 246asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 247{
 248        unsigned long flags;
 249
 250        spin_lock_irqsave(&sem->wait_lock, flags);
 251
 252        /* do nothing if list empty */
 253        if (!list_empty(&sem->wait_list))
 254                sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
 255
 256        spin_unlock_irqrestore(&sem->wait_lock, flags);
 257
 258        return sem;
 259}
 260
 261/*
 262 * downgrade a write lock into a read lock
 263 * - caller incremented waiting part of count and discovered it still negative
 264 * - just wake up any readers at the front of the queue
 265 */
 266asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 267{
 268        unsigned long flags;
 269
 270        spin_lock_irqsave(&sem->wait_lock, flags);
 271
 272        /* do nothing if list empty */
 273        if (!list_empty(&sem->wait_list))
 274                sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
 275
 276        spin_unlock_irqrestore(&sem->wait_lock, flags);
 277
 278        return sem;
 279}
 280
 281EXPORT_SYMBOL(rwsem_down_read_failed);
 282EXPORT_SYMBOL(rwsem_down_write_failed);
 283EXPORT_SYMBOL(rwsem_wake);
 284EXPORT_SYMBOL(rwsem_downgrade_wake);
 285