1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/rwsem.h>
14#include <linux/init.h>
15#include <linux/export.h>
16#include <linux/sched/signal.h>
17#include <linux/sched/rt.h>
18#include <linux/sched/wake_q.h>
19#include <linux/sched/debug.h>
20#include <linux/osq_lock.h>
21
22#include "rwsem.h"
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76void __init_rwsem(struct rw_semaphore *sem, const char *name,
77 struct lock_class_key *key)
78{
79#ifdef CONFIG_DEBUG_LOCK_ALLOC
80
81
82
83 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
84 lockdep_init_map(&sem->dep_map, name, key, 0);
85#endif
86 atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
87 raw_spin_lock_init(&sem->wait_lock);
88 INIT_LIST_HEAD(&sem->wait_list);
89#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
90 sem->owner = NULL;
91 osq_lock_init(&sem->osq);
92#endif
93}
94
95EXPORT_SYMBOL(__init_rwsem);
96
97enum rwsem_waiter_type {
98 RWSEM_WAITING_FOR_WRITE,
99 RWSEM_WAITING_FOR_READ
100};
101
102struct rwsem_waiter {
103 struct list_head list;
104 struct task_struct *task;
105 enum rwsem_waiter_type type;
106};
107
108enum rwsem_wake_type {
109 RWSEM_WAKE_ANY,
110 RWSEM_WAKE_READERS,
111 RWSEM_WAKE_READ_OWNED
112};
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127static void __rwsem_mark_wake(struct rw_semaphore *sem,
128 enum rwsem_wake_type wake_type,
129 struct wake_q_head *wake_q)
130{
131 struct rwsem_waiter *waiter, *tmp;
132 long oldcount, woken = 0, adjustment = 0;
133
134
135
136
137
138 waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
139
140 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
141 if (wake_type == RWSEM_WAKE_ANY) {
142
143
144
145
146
147
148
149 wake_q_add(wake_q, waiter->task);
150 }
151
152 return;
153 }
154
155
156
157
158
159
160 if (wake_type != RWSEM_WAKE_READ_OWNED) {
161 adjustment = RWSEM_ACTIVE_READ_BIAS;
162 try_reader_grant:
163 oldcount = atomic_long_fetch_add(adjustment, &sem->count);
164 if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
165
166
167
168
169
170
171 if (atomic_long_add_return(-adjustment, &sem->count) <
172 RWSEM_WAITING_BIAS)
173 return;
174
175
176 goto try_reader_grant;
177 }
178
179
180
181
182
183 rwsem_set_reader_owned(sem);
184 }
185
186
187
188
189
190
191
192 list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
193 struct task_struct *tsk;
194
195 if (waiter->type == RWSEM_WAITING_FOR_WRITE)
196 break;
197
198 woken++;
199 tsk = waiter->task;
200
201 wake_q_add(wake_q, tsk);
202 list_del(&waiter->list);
203
204
205
206
207
208
209 smp_store_release(&waiter->task, NULL);
210 }
211
212 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
213 if (list_empty(&sem->wait_list)) {
214
215 adjustment -= RWSEM_WAITING_BIAS;
216 }
217
218 if (adjustment)
219 atomic_long_add(adjustment, &sem->count);
220}
221
222
223
224
225static inline struct rw_semaphore __sched *
226__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
227{
228 long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
229 struct rwsem_waiter waiter;
230 DEFINE_WAKE_Q(wake_q);
231
232 waiter.task = current;
233 waiter.type = RWSEM_WAITING_FOR_READ;
234
235 raw_spin_lock_irq(&sem->wait_lock);
236 if (list_empty(&sem->wait_list))
237 adjustment += RWSEM_WAITING_BIAS;
238 list_add_tail(&waiter.list, &sem->wait_list);
239
240
241 count = atomic_long_add_return(adjustment, &sem->count);
242
243
244
245
246
247
248
249 if (count == RWSEM_WAITING_BIAS ||
250 (count > RWSEM_WAITING_BIAS &&
251 adjustment != -RWSEM_ACTIVE_READ_BIAS))
252 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
253
254 raw_spin_unlock_irq(&sem->wait_lock);
255 wake_up_q(&wake_q);
256
257
258 while (true) {
259 set_current_state(state);
260 if (!waiter.task)
261 break;
262 if (signal_pending_state(state, current)) {
263 raw_spin_lock_irq(&sem->wait_lock);
264 if (waiter.task)
265 goto out_nolock;
266 raw_spin_unlock_irq(&sem->wait_lock);
267 break;
268 }
269 schedule();
270 }
271
272 __set_current_state(TASK_RUNNING);
273 return sem;
274out_nolock:
275 list_del(&waiter.list);
276 if (list_empty(&sem->wait_list))
277 atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
278 raw_spin_unlock_irq(&sem->wait_lock);
279 __set_current_state(TASK_RUNNING);
280 return ERR_PTR(-EINTR);
281}
282
283__visible struct rw_semaphore * __sched
284rwsem_down_read_failed(struct rw_semaphore *sem)
285{
286 return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
287}
288EXPORT_SYMBOL(rwsem_down_read_failed);
289
290__visible struct rw_semaphore * __sched
291rwsem_down_read_failed_killable(struct rw_semaphore *sem)
292{
293 return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
294}
295EXPORT_SYMBOL(rwsem_down_read_failed_killable);
296
297
298
299
300
301
302static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
303{
304
305
306
307 if (count != RWSEM_WAITING_BIAS)
308 return false;
309
310
311
312
313
314 count = list_is_singular(&sem->wait_list) ?
315 RWSEM_ACTIVE_WRITE_BIAS :
316 RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS;
317
318 if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count)
319 == RWSEM_WAITING_BIAS) {
320 rwsem_set_owner(sem);
321 return true;
322 }
323
324 return false;
325}
326
327#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
328
329
330
331static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
332{
333 long old, count = atomic_long_read(&sem->count);
334
335 while (true) {
336 if (!(count == 0 || count == RWSEM_WAITING_BIAS))
337 return false;
338
339 old = atomic_long_cmpxchg_acquire(&sem->count, count,
340 count + RWSEM_ACTIVE_WRITE_BIAS);
341 if (old == count) {
342 rwsem_set_owner(sem);
343 return true;
344 }
345
346 count = old;
347 }
348}
349
350static inline bool owner_on_cpu(struct task_struct *owner)
351{
352
353
354
355
356 return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
357}
358
359static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
360{
361 struct task_struct *owner;
362 bool ret = true;
363
364 BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN));
365
366 if (need_resched())
367 return false;
368
369 rcu_read_lock();
370 owner = READ_ONCE(sem->owner);
371 if (owner) {
372 ret = is_rwsem_owner_spinnable(owner) &&
373 owner_on_cpu(owner);
374 }
375 rcu_read_unlock();
376 return ret;
377}
378
379
380
381
382static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
383{
384 struct task_struct *owner = READ_ONCE(sem->owner);
385
386 if (!is_rwsem_owner_spinnable(owner))
387 return false;
388
389 rcu_read_lock();
390 while (owner && (READ_ONCE(sem->owner) == owner)) {
391
392
393
394
395
396
397 barrier();
398
399
400
401
402
403 if (need_resched() || !owner_on_cpu(owner)) {
404 rcu_read_unlock();
405 return false;
406 }
407
408 cpu_relax();
409 }
410 rcu_read_unlock();
411
412
413
414
415
416 return is_rwsem_owner_spinnable(READ_ONCE(sem->owner));
417}
418
419static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
420{
421 bool taken = false;
422
423 preempt_disable();
424
425
426 if (!rwsem_can_spin_on_owner(sem))
427 goto done;
428
429 if (!osq_lock(&sem->osq))
430 goto done;
431
432
433
434
435
436
437
438
439 while (rwsem_spin_on_owner(sem)) {
440
441
442
443 if (rwsem_try_write_lock_unqueued(sem)) {
444 taken = true;
445 break;
446 }
447
448
449
450
451
452
453
454 if (!sem->owner && (need_resched() || rt_task(current)))
455 break;
456
457
458
459
460
461
462
463 cpu_relax();
464 }
465 osq_unlock(&sem->osq);
466done:
467 preempt_enable();
468 return taken;
469}
470
471
472
473
474static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
475{
476 return osq_is_locked(&sem->osq);
477}
478
479#else
480static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
481{
482 return false;
483}
484
485static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
486{
487 return false;
488}
489#endif
490
491
492
493
494static inline struct rw_semaphore *
495__rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
496{
497 long count;
498 bool waiting = true;
499 struct rwsem_waiter waiter;
500 struct rw_semaphore *ret = sem;
501 DEFINE_WAKE_Q(wake_q);
502
503
504 count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
505
506
507 if (rwsem_optimistic_spin(sem))
508 return sem;
509
510
511
512
513
514 waiter.task = current;
515 waiter.type = RWSEM_WAITING_FOR_WRITE;
516
517 raw_spin_lock_irq(&sem->wait_lock);
518
519
520 if (list_empty(&sem->wait_list))
521 waiting = false;
522
523 list_add_tail(&waiter.list, &sem->wait_list);
524
525
526 if (waiting) {
527 count = atomic_long_read(&sem->count);
528
529
530
531
532
533
534 if (count > RWSEM_WAITING_BIAS) {
535 __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
536
537
538
539
540
541
542
543 wake_up_q(&wake_q);
544
545
546
547
548 wake_q_init(&wake_q);
549 }
550
551 } else
552 count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count);
553
554
555 set_current_state(state);
556 while (true) {
557 if (rwsem_try_write_lock(count, sem))
558 break;
559 raw_spin_unlock_irq(&sem->wait_lock);
560
561
562 do {
563 if (signal_pending_state(state, current))
564 goto out_nolock;
565
566 schedule();
567 set_current_state(state);
568 } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
569
570 raw_spin_lock_irq(&sem->wait_lock);
571 }
572 __set_current_state(TASK_RUNNING);
573 list_del(&waiter.list);
574 raw_spin_unlock_irq(&sem->wait_lock);
575
576 return ret;
577
578out_nolock:
579 __set_current_state(TASK_RUNNING);
580 raw_spin_lock_irq(&sem->wait_lock);
581 list_del(&waiter.list);
582 if (list_empty(&sem->wait_list))
583 atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
584 else
585 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
586 raw_spin_unlock_irq(&sem->wait_lock);
587 wake_up_q(&wake_q);
588
589 return ERR_PTR(-EINTR);
590}
591
592__visible struct rw_semaphore * __sched
593rwsem_down_write_failed(struct rw_semaphore *sem)
594{
595 return __rwsem_down_write_failed_common(sem, TASK_UNINTERRUPTIBLE);
596}
597EXPORT_SYMBOL(rwsem_down_write_failed);
598
599__visible struct rw_semaphore * __sched
600rwsem_down_write_failed_killable(struct rw_semaphore *sem)
601{
602 return __rwsem_down_write_failed_common(sem, TASK_KILLABLE);
603}
604EXPORT_SYMBOL(rwsem_down_write_failed_killable);
605
606
607
608
609
610__visible
611struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
612{
613 unsigned long flags;
614 DEFINE_WAKE_Q(wake_q);
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641 smp_rmb();
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663 if (rwsem_has_spinner(sem)) {
664
665
666
667
668 smp_rmb();
669 if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
670 return sem;
671 goto locked;
672 }
673 raw_spin_lock_irqsave(&sem->wait_lock, flags);
674locked:
675
676 if (!list_empty(&sem->wait_list))
677 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
678
679 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
680 wake_up_q(&wake_q);
681
682 return sem;
683}
684EXPORT_SYMBOL(rwsem_wake);
685
686
687
688
689
690
691__visible
692struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
693{
694 unsigned long flags;
695 DEFINE_WAKE_Q(wake_q);
696
697 raw_spin_lock_irqsave(&sem->wait_lock, flags);
698
699 if (!list_empty(&sem->wait_list))
700 __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
701
702 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
703 wake_up_q(&wake_q);
704
705 return sem;
706}
707EXPORT_SYMBOL(rwsem_downgrade_wake);
708