1
2
3
4
5
6
7
8
9
10
11
12#include <linux/rwsem.h>
13#include <linux/sched.h>
14#include <linux/init.h>
15#include <linux/export.h>
16#include <linux/sched/rt.h>
17#include <linux/osq_lock.h>
18
19#include "rwsem.h"
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73void __init_rwsem(struct rw_semaphore *sem, const char *name,
74 struct lock_class_key *key)
75{
76#ifdef CONFIG_DEBUG_LOCK_ALLOC
77
78
79
80 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
81 lockdep_init_map(&sem->dep_map, name, key, 0);
82#endif
83 sem->count = RWSEM_UNLOCKED_VALUE;
84 raw_spin_lock_init(&sem->wait_lock);
85 INIT_LIST_HEAD(&sem->wait_list);
86#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
87 sem->owner = NULL;
88 osq_lock_init(&sem->osq);
89#endif
90}
91
92EXPORT_SYMBOL(__init_rwsem);
93
94enum rwsem_waiter_type {
95 RWSEM_WAITING_FOR_WRITE,
96 RWSEM_WAITING_FOR_READ
97};
98
99struct rwsem_waiter {
100 struct list_head list;
101 struct task_struct *task;
102 enum rwsem_waiter_type type;
103};
104
105enum rwsem_wake_type {
106 RWSEM_WAKE_ANY,
107 RWSEM_WAKE_READERS,
108 RWSEM_WAKE_READ_OWNED
109};
110
111
112
113
114
115
116
117
118
119
120
121static struct rw_semaphore *
122__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
123{
124 struct rwsem_waiter *waiter;
125 struct task_struct *tsk;
126 struct list_head *next;
127 long oldcount, woken, loop, adjustment;
128
129 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
130 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
131 if (wake_type == RWSEM_WAKE_ANY)
132
133
134
135
136
137 wake_up_process(waiter->task);
138 goto out;
139 }
140
141
142
143
144
145 adjustment = 0;
146 if (wake_type != RWSEM_WAKE_READ_OWNED) {
147 adjustment = RWSEM_ACTIVE_READ_BIAS;
148 try_reader_grant:
149 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
150 if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
151
152 if (rwsem_atomic_update(-adjustment, sem) &
153 RWSEM_ACTIVE_MASK)
154 goto out;
155
156 goto try_reader_grant;
157 }
158 }
159
160
161
162
163
164 woken = 0;
165 do {
166 woken++;
167
168 if (waiter->list.next == &sem->wait_list)
169 break;
170
171 waiter = list_entry(waiter->list.next,
172 struct rwsem_waiter, list);
173
174 } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
175
176 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
177 if (waiter->type != RWSEM_WAITING_FOR_WRITE)
178
179 adjustment -= RWSEM_WAITING_BIAS;
180
181 if (adjustment)
182 rwsem_atomic_add(adjustment, sem);
183
184 next = sem->wait_list.next;
185 loop = woken;
186 do {
187 waiter = list_entry(next, struct rwsem_waiter, list);
188 next = waiter->list.next;
189 tsk = waiter->task;
190
191
192
193
194
195
196
197 smp_mb();
198 waiter->task = NULL;
199 wake_up_process(tsk);
200 put_task_struct(tsk);
201 } while (--loop);
202
203 sem->wait_list.next = next;
204 next->prev = &sem->wait_list;
205
206 out:
207 return sem;
208}
209
210
211
212
213__visible
214struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
215{
216 long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
217 struct rwsem_waiter waiter;
218 struct task_struct *tsk = current;
219
220
221 waiter.task = tsk;
222 waiter.type = RWSEM_WAITING_FOR_READ;
223 get_task_struct(tsk);
224
225 raw_spin_lock_irq(&sem->wait_lock);
226 if (list_empty(&sem->wait_list))
227 adjustment += RWSEM_WAITING_BIAS;
228 list_add_tail(&waiter.list, &sem->wait_list);
229
230
231 count = rwsem_atomic_update(adjustment, sem);
232
233
234
235
236
237
238 if (count == RWSEM_WAITING_BIAS ||
239 (count > RWSEM_WAITING_BIAS &&
240 adjustment != -RWSEM_ACTIVE_READ_BIAS))
241 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
242
243 raw_spin_unlock_irq(&sem->wait_lock);
244
245
246 while (true) {
247 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
248 if (!waiter.task)
249 break;
250 schedule();
251 }
252
253 __set_task_state(tsk, TASK_RUNNING);
254 return sem;
255}
256EXPORT_SYMBOL(rwsem_down_read_failed);
257
258static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
259{
260
261
262
263
264 if (count == RWSEM_WAITING_BIAS &&
265 cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
266 RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
267 if (!list_is_singular(&sem->wait_list))
268 rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
269 rwsem_set_owner(sem);
270 return true;
271 }
272
273 return false;
274}
275
276#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
277
278
279
280static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
281{
282 long old, count = READ_ONCE(sem->count);
283
284 while (true) {
285 if (!(count == 0 || count == RWSEM_WAITING_BIAS))
286 return false;
287
288 old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
289 if (old == count) {
290 rwsem_set_owner(sem);
291 return true;
292 }
293
294 count = old;
295 }
296}
297
298static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
299{
300 struct task_struct *owner;
301 bool ret = true;
302
303 if (need_resched())
304 return false;
305
306 rcu_read_lock();
307 owner = READ_ONCE(sem->owner);
308 if (!owner) {
309 long count = READ_ONCE(sem->count);
310
311
312
313
314
315
316 if (count & RWSEM_ACTIVE_MASK)
317 ret = false;
318 goto done;
319 }
320
321 ret = owner->on_cpu;
322done:
323 rcu_read_unlock();
324 return ret;
325}
326
327static noinline
328bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
329{
330 long count;
331
332 rcu_read_lock();
333 while (sem->owner == owner) {
334
335
336
337
338
339
340 barrier();
341
342
343 if (!owner->on_cpu || need_resched()) {
344 rcu_read_unlock();
345 return false;
346 }
347
348 cpu_relax_lowlatency();
349 }
350 rcu_read_unlock();
351
352 if (READ_ONCE(sem->owner))
353 return true;
354
355
356
357
358
359
360 count = READ_ONCE(sem->count);
361 return (count == 0 || count == RWSEM_WAITING_BIAS);
362}
363
364static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
365{
366 struct task_struct *owner;
367 bool taken = false;
368
369 preempt_disable();
370
371
372 if (!rwsem_can_spin_on_owner(sem))
373 goto done;
374
375 if (!osq_lock(&sem->osq))
376 goto done;
377
378 while (true) {
379 owner = READ_ONCE(sem->owner);
380 if (owner && !rwsem_spin_on_owner(sem, owner))
381 break;
382
383
384 if (rwsem_try_write_lock_unqueued(sem)) {
385 taken = true;
386 break;
387 }
388
389
390
391
392
393
394
395 if (!owner && (need_resched() || rt_task(current)))
396 break;
397
398
399
400
401
402
403
404 cpu_relax_lowlatency();
405 }
406 osq_unlock(&sem->osq);
407done:
408 preempt_enable();
409 return taken;
410}
411
412
413
414
415static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
416{
417 return osq_is_locked(&sem->osq);
418}
419
420#else
421static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
422{
423 return false;
424}
425
426static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
427{
428 return false;
429}
430#endif
431
432
433
434
435__visible
436struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
437{
438 long count;
439 bool waiting = true;
440 struct rwsem_waiter waiter;
441
442
443 count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
444
445
446 if (rwsem_optimistic_spin(sem))
447 return sem;
448
449
450
451
452
453 waiter.task = current;
454 waiter.type = RWSEM_WAITING_FOR_WRITE;
455
456 raw_spin_lock_irq(&sem->wait_lock);
457
458
459 if (list_empty(&sem->wait_list))
460 waiting = false;
461
462 list_add_tail(&waiter.list, &sem->wait_list);
463
464
465 if (waiting) {
466 count = READ_ONCE(sem->count);
467
468
469
470
471
472
473 if (count > RWSEM_WAITING_BIAS)
474 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
475
476 } else
477 count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
478
479
480 set_current_state(TASK_UNINTERRUPTIBLE);
481 while (true) {
482 if (rwsem_try_write_lock(count, sem))
483 break;
484 raw_spin_unlock_irq(&sem->wait_lock);
485
486
487 do {
488 schedule();
489 set_current_state(TASK_UNINTERRUPTIBLE);
490 } while ((count = sem->count) & RWSEM_ACTIVE_MASK);
491
492 raw_spin_lock_irq(&sem->wait_lock);
493 }
494 __set_current_state(TASK_RUNNING);
495
496 list_del(&waiter.list);
497 raw_spin_unlock_irq(&sem->wait_lock);
498
499 return sem;
500}
501EXPORT_SYMBOL(rwsem_down_write_failed);
502
503
504
505
506
507__visible
508struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
509{
510 unsigned long flags;
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532 if (rwsem_has_spinner(sem)) {
533
534
535
536
537 smp_rmb();
538 if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
539 return sem;
540 goto locked;
541 }
542 raw_spin_lock_irqsave(&sem->wait_lock, flags);
543locked:
544
545
546 if (!list_empty(&sem->wait_list))
547 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
548
549 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
550
551 return sem;
552}
553EXPORT_SYMBOL(rwsem_wake);
554
555
556
557
558
559
560__visible
561struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
562{
563 unsigned long flags;
564
565 raw_spin_lock_irqsave(&sem->wait_lock, flags);
566
567
568 if (!list_empty(&sem->wait_list))
569 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
570
571 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
572
573 return sem;
574}
575EXPORT_SYMBOL(rwsem_downgrade_wake);
576