1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/rwsem.h>
14#include <linux/init.h>
15#include <linux/export.h>
16#include <linux/sched/signal.h>
17#include <linux/sched/rt.h>
18#include <linux/sched/wake_q.h>
19#include <linux/sched/debug.h>
20#include <linux/osq_lock.h>
21
22#include "rwsem.h"
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76void __init_rwsem(struct rw_semaphore *sem, const char *name,
77 struct lock_class_key *key)
78{
79#ifdef CONFIG_DEBUG_LOCK_ALLOC
80
81
82
83 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
84 lockdep_init_map(&sem->dep_map, name, key, 0);
85#endif
86 atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
87 raw_spin_lock_init(&sem->wait_lock);
88 INIT_LIST_HEAD(&sem->wait_list);
89#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
90 sem->owner = NULL;
91 osq_lock_init(&sem->osq);
92#endif
93}
94
95EXPORT_SYMBOL(__init_rwsem);
96
97enum rwsem_waiter_type {
98 RWSEM_WAITING_FOR_WRITE,
99 RWSEM_WAITING_FOR_READ
100};
101
102struct rwsem_waiter {
103 struct list_head list;
104 struct task_struct *task;
105 enum rwsem_waiter_type type;
106};
107
108enum rwsem_wake_type {
109 RWSEM_WAKE_ANY,
110 RWSEM_WAKE_READERS,
111 RWSEM_WAKE_READ_OWNED
112};
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127static void __rwsem_mark_wake(struct rw_semaphore *sem,
128 enum rwsem_wake_type wake_type,
129 struct wake_q_head *wake_q)
130{
131 struct rwsem_waiter *waiter, *tmp;
132 long oldcount, woken = 0, adjustment = 0;
133
134
135
136
137
138 waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
139
140 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
141 if (wake_type == RWSEM_WAKE_ANY) {
142
143
144
145
146
147
148
149 wake_q_add(wake_q, waiter->task);
150 }
151
152 return;
153 }
154
155
156
157
158
159
160 if (wake_type != RWSEM_WAKE_READ_OWNED) {
161 adjustment = RWSEM_ACTIVE_READ_BIAS;
162 try_reader_grant:
163 oldcount = atomic_long_fetch_add(adjustment, &sem->count);
164 if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
165
166
167
168
169
170
171 if (atomic_long_add_return(-adjustment, &sem->count) <
172 RWSEM_WAITING_BIAS)
173 return;
174
175
176 goto try_reader_grant;
177 }
178
179
180
181
182
183 __rwsem_set_reader_owned(sem, waiter->task);
184 }
185
186
187
188
189
190
191
192 list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
193 struct task_struct *tsk;
194
195 if (waiter->type == RWSEM_WAITING_FOR_WRITE)
196 break;
197
198 woken++;
199 tsk = waiter->task;
200
201 get_task_struct(tsk);
202 list_del(&waiter->list);
203
204
205
206
207
208
209 smp_store_release(&waiter->task, NULL);
210
211
212
213
214 wake_q_add_safe(wake_q, tsk);
215 }
216
217 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
218 if (list_empty(&sem->wait_list)) {
219
220 adjustment -= RWSEM_WAITING_BIAS;
221 }
222
223 if (adjustment)
224 atomic_long_add(adjustment, &sem->count);
225}
226
227
228
229
230static inline struct rw_semaphore __sched *
231__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
232{
233 long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
234 struct rwsem_waiter waiter;
235 DEFINE_WAKE_Q(wake_q);
236
237 waiter.task = current;
238 waiter.type = RWSEM_WAITING_FOR_READ;
239
240 raw_spin_lock_irq(&sem->wait_lock);
241 if (list_empty(&sem->wait_list)) {
242
243
244
245
246
247
248 if (atomic_long_read(&sem->count) >= 0) {
249 raw_spin_unlock_irq(&sem->wait_lock);
250 return sem;
251 }
252 adjustment += RWSEM_WAITING_BIAS;
253 }
254 list_add_tail(&waiter.list, &sem->wait_list);
255
256
257 count = atomic_long_add_return(adjustment, &sem->count);
258
259
260
261
262
263
264
265 if (count == RWSEM_WAITING_BIAS ||
266 (count > RWSEM_WAITING_BIAS &&
267 adjustment != -RWSEM_ACTIVE_READ_BIAS))
268 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
269
270 raw_spin_unlock_irq(&sem->wait_lock);
271 wake_up_q(&wake_q);
272
273
274 while (true) {
275 set_current_state(state);
276 if (!waiter.task)
277 break;
278 if (signal_pending_state(state, current)) {
279 raw_spin_lock_irq(&sem->wait_lock);
280 if (waiter.task)
281 goto out_nolock;
282 raw_spin_unlock_irq(&sem->wait_lock);
283 break;
284 }
285 schedule();
286 }
287
288 __set_current_state(TASK_RUNNING);
289 return sem;
290out_nolock:
291 list_del(&waiter.list);
292 if (list_empty(&sem->wait_list))
293 atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
294 raw_spin_unlock_irq(&sem->wait_lock);
295 __set_current_state(TASK_RUNNING);
296 return ERR_PTR(-EINTR);
297}
298
299__visible struct rw_semaphore * __sched
300rwsem_down_read_failed(struct rw_semaphore *sem)
301{
302 return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
303}
304EXPORT_SYMBOL(rwsem_down_read_failed);
305
306__visible struct rw_semaphore * __sched
307rwsem_down_read_failed_killable(struct rw_semaphore *sem)
308{
309 return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
310}
311EXPORT_SYMBOL(rwsem_down_read_failed_killable);
312
313
314
315
316
317
318static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
319{
320
321
322
323 if (count != RWSEM_WAITING_BIAS)
324 return false;
325
326
327
328
329
330 count = list_is_singular(&sem->wait_list) ?
331 RWSEM_ACTIVE_WRITE_BIAS :
332 RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS;
333
334 if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count)
335 == RWSEM_WAITING_BIAS) {
336 rwsem_set_owner(sem);
337 return true;
338 }
339
340 return false;
341}
342
343#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
344
345
346
347static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
348{
349 long old, count = atomic_long_read(&sem->count);
350
351 while (true) {
352 if (!(count == 0 || count == RWSEM_WAITING_BIAS))
353 return false;
354
355 old = atomic_long_cmpxchg_acquire(&sem->count, count,
356 count + RWSEM_ACTIVE_WRITE_BIAS);
357 if (old == count) {
358 rwsem_set_owner(sem);
359 return true;
360 }
361
362 count = old;
363 }
364}
365
366static inline bool owner_on_cpu(struct task_struct *owner)
367{
368
369
370
371
372 return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
373}
374
375static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
376{
377 struct task_struct *owner;
378 bool ret = true;
379
380 BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN));
381
382 if (need_resched())
383 return false;
384
385 rcu_read_lock();
386 owner = READ_ONCE(sem->owner);
387 if (owner) {
388 ret = is_rwsem_owner_spinnable(owner) &&
389 owner_on_cpu(owner);
390 }
391 rcu_read_unlock();
392 return ret;
393}
394
395
396
397
398static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
399{
400 struct task_struct *owner = READ_ONCE(sem->owner);
401
402 if (!is_rwsem_owner_spinnable(owner))
403 return false;
404
405 rcu_read_lock();
406 while (owner && (READ_ONCE(sem->owner) == owner)) {
407
408
409
410
411
412
413 barrier();
414
415
416
417
418
419 if (need_resched() || !owner_on_cpu(owner)) {
420 rcu_read_unlock();
421 return false;
422 }
423
424 cpu_relax();
425 }
426 rcu_read_unlock();
427
428
429
430
431
432 return is_rwsem_owner_spinnable(READ_ONCE(sem->owner));
433}
434
435static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
436{
437 bool taken = false;
438
439 preempt_disable();
440
441
442 if (!rwsem_can_spin_on_owner(sem))
443 goto done;
444
445 if (!osq_lock(&sem->osq))
446 goto done;
447
448
449
450
451
452
453
454
455 while (rwsem_spin_on_owner(sem)) {
456
457
458
459 if (rwsem_try_write_lock_unqueued(sem)) {
460 taken = true;
461 break;
462 }
463
464
465
466
467
468
469
470 if (!sem->owner && (need_resched() || rt_task(current)))
471 break;
472
473
474
475
476
477
478
479 cpu_relax();
480 }
481 osq_unlock(&sem->osq);
482done:
483 preempt_enable();
484 return taken;
485}
486
487
488
489
490static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
491{
492 return osq_is_locked(&sem->osq);
493}
494
495#else
496static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
497{
498 return false;
499}
500
501static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
502{
503 return false;
504}
505#endif
506
507
508
509
510static inline struct rw_semaphore *
511__rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
512{
513 long count;
514 bool waiting = true;
515 struct rwsem_waiter waiter;
516 struct rw_semaphore *ret = sem;
517 DEFINE_WAKE_Q(wake_q);
518
519
520 count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
521
522
523 if (rwsem_optimistic_spin(sem))
524 return sem;
525
526
527
528
529
530 waiter.task = current;
531 waiter.type = RWSEM_WAITING_FOR_WRITE;
532
533 raw_spin_lock_irq(&sem->wait_lock);
534
535
536 if (list_empty(&sem->wait_list))
537 waiting = false;
538
539 list_add_tail(&waiter.list, &sem->wait_list);
540
541
542 if (waiting) {
543 count = atomic_long_read(&sem->count);
544
545
546
547
548
549
550 if (count > RWSEM_WAITING_BIAS) {
551 __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
552
553
554
555
556
557
558
559 wake_up_q(&wake_q);
560
561
562
563
564 wake_q_init(&wake_q);
565 }
566
567 } else
568 count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count);
569
570
571 set_current_state(state);
572 while (true) {
573 if (rwsem_try_write_lock(count, sem))
574 break;
575 raw_spin_unlock_irq(&sem->wait_lock);
576
577
578 do {
579 if (signal_pending_state(state, current))
580 goto out_nolock;
581
582 schedule();
583 set_current_state(state);
584 } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
585
586 raw_spin_lock_irq(&sem->wait_lock);
587 }
588 __set_current_state(TASK_RUNNING);
589 list_del(&waiter.list);
590 raw_spin_unlock_irq(&sem->wait_lock);
591
592 return ret;
593
594out_nolock:
595 __set_current_state(TASK_RUNNING);
596 raw_spin_lock_irq(&sem->wait_lock);
597 list_del(&waiter.list);
598 if (list_empty(&sem->wait_list))
599 atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
600 else
601 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
602 raw_spin_unlock_irq(&sem->wait_lock);
603 wake_up_q(&wake_q);
604
605 return ERR_PTR(-EINTR);
606}
607
608__visible struct rw_semaphore * __sched
609rwsem_down_write_failed(struct rw_semaphore *sem)
610{
611 return __rwsem_down_write_failed_common(sem, TASK_UNINTERRUPTIBLE);
612}
613EXPORT_SYMBOL(rwsem_down_write_failed);
614
615__visible struct rw_semaphore * __sched
616rwsem_down_write_failed_killable(struct rw_semaphore *sem)
617{
618 return __rwsem_down_write_failed_common(sem, TASK_KILLABLE);
619}
620EXPORT_SYMBOL(rwsem_down_write_failed_killable);
621
622
623
624
625
626__visible
627struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
628{
629 unsigned long flags;
630 DEFINE_WAKE_Q(wake_q);
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657 smp_rmb();
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679 if (rwsem_has_spinner(sem)) {
680
681
682
683
684 smp_rmb();
685 if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
686 return sem;
687 goto locked;
688 }
689 raw_spin_lock_irqsave(&sem->wait_lock, flags);
690locked:
691
692 if (!list_empty(&sem->wait_list))
693 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
694
695 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
696 wake_up_q(&wake_q);
697
698 return sem;
699}
700EXPORT_SYMBOL(rwsem_wake);
701
702
703
704
705
706
707__visible
708struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
709{
710 unsigned long flags;
711 DEFINE_WAKE_Q(wake_q);
712
713 raw_spin_lock_irqsave(&sem->wait_lock, flags);
714
715 if (!list_empty(&sem->wait_list))
716 __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
717
718 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
719 wake_up_q(&wake_q);
720
721 return sem;
722}
723EXPORT_SYMBOL(rwsem_downgrade_wake);
724