1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/rwsem.h>
14#include <linux/init.h>
15#include <linux/export.h>
16#include <linux/sched/signal.h>
17#include <linux/sched/rt.h>
18#include <linux/sched/wake_q.h>
19#include <linux/sched/debug.h>
20#include <linux/osq_lock.h>
21
22#include "rwsem.h"
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76void __init_rwsem(struct rw_semaphore *sem, const char *name,
77 struct lock_class_key *key)
78{
79#ifdef CONFIG_DEBUG_LOCK_ALLOC
80
81
82
83 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
84 lockdep_init_map(&sem->dep_map, name, key, 0);
85#endif
86 atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
87 raw_spin_lock_init(&sem->wait_lock);
88 INIT_LIST_HEAD(&sem->wait_list);
89#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
90 sem->owner = NULL;
91 osq_lock_init(&sem->osq);
92#endif
93}
94
95EXPORT_SYMBOL(__init_rwsem);
96
97enum rwsem_waiter_type {
98 RWSEM_WAITING_FOR_WRITE,
99 RWSEM_WAITING_FOR_READ
100};
101
102struct rwsem_waiter {
103 struct list_head list;
104 struct task_struct *task;
105 enum rwsem_waiter_type type;
106};
107
108enum rwsem_wake_type {
109 RWSEM_WAKE_ANY,
110 RWSEM_WAKE_READERS,
111 RWSEM_WAKE_READ_OWNED
112};
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127static void __rwsem_mark_wake(struct rw_semaphore *sem,
128 enum rwsem_wake_type wake_type,
129 struct wake_q_head *wake_q)
130{
131 struct rwsem_waiter *waiter, *tmp;
132 long oldcount, woken = 0, adjustment = 0;
133
134
135
136
137
138 waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
139
140 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
141 if (wake_type == RWSEM_WAKE_ANY) {
142
143
144
145
146
147
148
149 wake_q_add(wake_q, waiter->task);
150 }
151
152 return;
153 }
154
155
156
157
158
159
160 if (wake_type != RWSEM_WAKE_READ_OWNED) {
161 adjustment = RWSEM_ACTIVE_READ_BIAS;
162 try_reader_grant:
163 oldcount = atomic_long_fetch_add(adjustment, &sem->count);
164 if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
165
166
167
168
169
170
171 if (atomic_long_add_return(-adjustment, &sem->count) <
172 RWSEM_WAITING_BIAS)
173 return;
174
175
176 goto try_reader_grant;
177 }
178
179
180
181
182
183 __rwsem_set_reader_owned(sem, waiter->task);
184 }
185
186
187
188
189
190
191
192 list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
193 struct task_struct *tsk;
194
195 if (waiter->type == RWSEM_WAITING_FOR_WRITE)
196 break;
197
198 woken++;
199 tsk = waiter->task;
200
201 get_task_struct(tsk);
202 list_del(&waiter->list);
203
204
205
206
207
208
209 smp_store_release(&waiter->task, NULL);
210
211
212
213
214 wake_q_add(wake_q, tsk);
215
216 put_task_struct(tsk);
217 }
218
219 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
220 if (list_empty(&sem->wait_list)) {
221
222 adjustment -= RWSEM_WAITING_BIAS;
223 }
224
225 if (adjustment)
226 atomic_long_add(adjustment, &sem->count);
227}
228
229
230
231
232static inline struct rw_semaphore __sched *
233__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
234{
235 long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
236 struct rwsem_waiter waiter;
237 DEFINE_WAKE_Q(wake_q);
238
239 waiter.task = current;
240 waiter.type = RWSEM_WAITING_FOR_READ;
241
242 raw_spin_lock_irq(&sem->wait_lock);
243 if (list_empty(&sem->wait_list)) {
244
245
246
247
248
249
250 if (atomic_long_read(&sem->count) >= 0) {
251 raw_spin_unlock_irq(&sem->wait_lock);
252 return sem;
253 }
254 adjustment += RWSEM_WAITING_BIAS;
255 }
256 list_add_tail(&waiter.list, &sem->wait_list);
257
258
259 count = atomic_long_add_return(adjustment, &sem->count);
260
261
262
263
264
265
266
267 if (count == RWSEM_WAITING_BIAS ||
268 (count > RWSEM_WAITING_BIAS &&
269 adjustment != -RWSEM_ACTIVE_READ_BIAS))
270 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
271
272 raw_spin_unlock_irq(&sem->wait_lock);
273 wake_up_q(&wake_q);
274
275
276 while (true) {
277 set_current_state(state);
278 if (!waiter.task)
279 break;
280 if (signal_pending_state(state, current)) {
281 raw_spin_lock_irq(&sem->wait_lock);
282 if (waiter.task)
283 goto out_nolock;
284 raw_spin_unlock_irq(&sem->wait_lock);
285 break;
286 }
287 schedule();
288 }
289
290 __set_current_state(TASK_RUNNING);
291 return sem;
292out_nolock:
293 list_del(&waiter.list);
294 if (list_empty(&sem->wait_list))
295 atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
296 raw_spin_unlock_irq(&sem->wait_lock);
297 __set_current_state(TASK_RUNNING);
298 return ERR_PTR(-EINTR);
299}
300
301__visible struct rw_semaphore * __sched
302rwsem_down_read_failed(struct rw_semaphore *sem)
303{
304 return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
305}
306EXPORT_SYMBOL(rwsem_down_read_failed);
307
308__visible struct rw_semaphore * __sched
309rwsem_down_read_failed_killable(struct rw_semaphore *sem)
310{
311 return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
312}
313EXPORT_SYMBOL(rwsem_down_read_failed_killable);
314
315
316
317
318
319
320static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
321{
322
323
324
325 if (count != RWSEM_WAITING_BIAS)
326 return false;
327
328
329
330
331
332 count = list_is_singular(&sem->wait_list) ?
333 RWSEM_ACTIVE_WRITE_BIAS :
334 RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS;
335
336 if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count)
337 == RWSEM_WAITING_BIAS) {
338 rwsem_set_owner(sem);
339 return true;
340 }
341
342 return false;
343}
344
345#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
346
347
348
349static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
350{
351 long old, count = atomic_long_read(&sem->count);
352
353 while (true) {
354 if (!(count == 0 || count == RWSEM_WAITING_BIAS))
355 return false;
356
357 old = atomic_long_cmpxchg_acquire(&sem->count, count,
358 count + RWSEM_ACTIVE_WRITE_BIAS);
359 if (old == count) {
360 rwsem_set_owner(sem);
361 return true;
362 }
363
364 count = old;
365 }
366}
367
368static inline bool owner_on_cpu(struct task_struct *owner)
369{
370
371
372
373
374 return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
375}
376
377static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
378{
379 struct task_struct *owner;
380 bool ret = true;
381
382 BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN));
383
384 if (need_resched())
385 return false;
386
387 rcu_read_lock();
388 owner = READ_ONCE(sem->owner);
389 if (owner) {
390 ret = is_rwsem_owner_spinnable(owner) &&
391 owner_on_cpu(owner);
392 }
393 rcu_read_unlock();
394 return ret;
395}
396
397
398
399
400static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
401{
402 struct task_struct *owner = READ_ONCE(sem->owner);
403
404 if (!is_rwsem_owner_spinnable(owner))
405 return false;
406
407 rcu_read_lock();
408 while (owner && (READ_ONCE(sem->owner) == owner)) {
409
410
411
412
413
414
415 barrier();
416
417
418
419
420
421 if (need_resched() || !owner_on_cpu(owner)) {
422 rcu_read_unlock();
423 return false;
424 }
425
426 cpu_relax();
427 }
428 rcu_read_unlock();
429
430
431
432
433
434 return is_rwsem_owner_spinnable(READ_ONCE(sem->owner));
435}
436
437static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
438{
439 bool taken = false;
440
441 preempt_disable();
442
443
444 if (!rwsem_can_spin_on_owner(sem))
445 goto done;
446
447 if (!osq_lock(&sem->osq))
448 goto done;
449
450
451
452
453
454
455
456
457 while (rwsem_spin_on_owner(sem)) {
458
459
460
461 if (rwsem_try_write_lock_unqueued(sem)) {
462 taken = true;
463 break;
464 }
465
466
467
468
469
470
471
472 if (!sem->owner && (need_resched() || rt_task(current)))
473 break;
474
475
476
477
478
479
480
481 cpu_relax();
482 }
483 osq_unlock(&sem->osq);
484done:
485 preempt_enable();
486 return taken;
487}
488
489
490
491
492static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
493{
494 return osq_is_locked(&sem->osq);
495}
496
497#else
498static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
499{
500 return false;
501}
502
503static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
504{
505 return false;
506}
507#endif
508
509
510
511
512static inline struct rw_semaphore *
513__rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
514{
515 long count;
516 bool waiting = true;
517 struct rwsem_waiter waiter;
518 struct rw_semaphore *ret = sem;
519 DEFINE_WAKE_Q(wake_q);
520
521
522 count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
523
524
525 if (rwsem_optimistic_spin(sem))
526 return sem;
527
528
529
530
531
532 waiter.task = current;
533 waiter.type = RWSEM_WAITING_FOR_WRITE;
534
535 raw_spin_lock_irq(&sem->wait_lock);
536
537
538 if (list_empty(&sem->wait_list))
539 waiting = false;
540
541 list_add_tail(&waiter.list, &sem->wait_list);
542
543
544 if (waiting) {
545 count = atomic_long_read(&sem->count);
546
547
548
549
550
551
552 if (count > RWSEM_WAITING_BIAS) {
553 __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
554
555
556
557
558
559
560
561 wake_up_q(&wake_q);
562
563
564
565
566 wake_q_init(&wake_q);
567 }
568
569 } else
570 count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count);
571
572
573 set_current_state(state);
574 while (true) {
575 if (rwsem_try_write_lock(count, sem))
576 break;
577 raw_spin_unlock_irq(&sem->wait_lock);
578
579
580 do {
581 if (signal_pending_state(state, current))
582 goto out_nolock;
583
584 schedule();
585 set_current_state(state);
586 } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
587
588 raw_spin_lock_irq(&sem->wait_lock);
589 }
590 __set_current_state(TASK_RUNNING);
591 list_del(&waiter.list);
592 raw_spin_unlock_irq(&sem->wait_lock);
593
594 return ret;
595
596out_nolock:
597 __set_current_state(TASK_RUNNING);
598 raw_spin_lock_irq(&sem->wait_lock);
599 list_del(&waiter.list);
600 if (list_empty(&sem->wait_list))
601 atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
602 else
603 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
604 raw_spin_unlock_irq(&sem->wait_lock);
605 wake_up_q(&wake_q);
606
607 return ERR_PTR(-EINTR);
608}
609
610__visible struct rw_semaphore * __sched
611rwsem_down_write_failed(struct rw_semaphore *sem)
612{
613 return __rwsem_down_write_failed_common(sem, TASK_UNINTERRUPTIBLE);
614}
615EXPORT_SYMBOL(rwsem_down_write_failed);
616
617__visible struct rw_semaphore * __sched
618rwsem_down_write_failed_killable(struct rw_semaphore *sem)
619{
620 return __rwsem_down_write_failed_common(sem, TASK_KILLABLE);
621}
622EXPORT_SYMBOL(rwsem_down_write_failed_killable);
623
624
625
626
627
628__visible
629struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
630{
631 unsigned long flags;
632 DEFINE_WAKE_Q(wake_q);
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659 smp_rmb();
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681 if (rwsem_has_spinner(sem)) {
682
683
684
685
686 smp_rmb();
687 if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
688 return sem;
689 goto locked;
690 }
691 raw_spin_lock_irqsave(&sem->wait_lock, flags);
692locked:
693
694 if (!list_empty(&sem->wait_list))
695 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
696
697 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
698 wake_up_q(&wake_q);
699
700 return sem;
701}
702EXPORT_SYMBOL(rwsem_wake);
703
704
705
706
707
708
709__visible
710struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
711{
712 unsigned long flags;
713 DEFINE_WAKE_Q(wake_q);
714
715 raw_spin_lock_irqsave(&sem->wait_lock, flags);
716
717 if (!list_empty(&sem->wait_list))
718 __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
719
720 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
721 wake_up_q(&wake_q);
722
723 return sem;
724}
725EXPORT_SYMBOL(rwsem_downgrade_wake);
726