1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/rh_kabi.h>
19
20#include <linux/types.h>
21#include <linux/kernel.h>
22#include <linux/sched.h>
23#include <linux/sched/rt.h>
24#include <linux/sched/task.h>
25#include <linux/sched/debug.h>
26#include <linux/sched/wake_q.h>
27#include RH_KABI_HIDE_INCLUDE(<linux/sched/signal.h>)
28#include <linux/sched/clock.h>
29#include <linux/export.h>
30#include <linux/rwsem.h>
31#include <linux/atomic.h>
32
33#ifndef CONFIG_PREEMPT_RT
34#include "lock_events.h"
35
36
37
38
39
40
41
42
43#define __init_rwsem ___init_rwsem
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74#define RWSEM_READER_OWNED (1UL << 0)
75#define RWSEM_NONSPINNABLE (1UL << 1)
76#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
77
78#ifdef CONFIG_DEBUG_RWSEMS
79# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
80 if (!debug_locks_silent && \
81 WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
82 #c, atomic_long_read(&(sem)->count), \
83 (unsigned long) sem->magic, \
84 atomic_long_read(&(sem)->owner), (long)current, \
85 list_empty(&(sem)->wait_list) ? "" : "not ")) \
86 debug_locks_off(); \
87 } while (0)
88#else
89# define DEBUG_RWSEMS_WARN_ON(c, sem)
90#endif
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128#define RWSEM_WRITER_LOCKED (1UL << 0)
129#define RWSEM_FLAG_WAITERS (1UL << 1)
130#define RWSEM_FLAG_HANDOFF (1UL << 2)
131#define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1))
132
133#define RWSEM_READER_SHIFT 8
134#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
135#define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
136#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
137#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
138#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
139 RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)
140
141
142
143
144
145
146
147
148static inline void rwsem_set_owner(struct rw_semaphore *sem)
149{
150 atomic_long_set(&sem->owner, (long)current);
151}
152
153static inline void rwsem_clear_owner(struct rw_semaphore *sem)
154{
155 atomic_long_set(&sem->owner, 0);
156}
157
158
159
160
161static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
162{
163 return atomic_long_read(&sem->owner) & flags;
164}
165
166
167
168
169
170
171
172
173
174
175
176static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
177 struct task_struct *owner)
178{
179 unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |
180 (atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE);
181
182 atomic_long_set(&sem->owner, val);
183}
184
185static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
186{
187 __rwsem_set_reader_owned(sem, current);
188}
189
190
191
192
193static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
194{
195#ifdef CONFIG_DEBUG_RWSEMS
196
197
198
199 long count = atomic_long_read(&sem->count);
200
201 if (count & RWSEM_WRITER_MASK)
202 return false;
203#endif
204 return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
205}
206
207#ifdef CONFIG_DEBUG_RWSEMS
208
209
210
211
212
213
214static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
215{
216 unsigned long val = atomic_long_read(&sem->owner);
217
218 while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
219 if (atomic_long_try_cmpxchg(&sem->owner, &val,
220 val & RWSEM_OWNER_FLAGS_MASK))
221 return;
222 }
223}
224#else
225static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
226{
227}
228#endif
229
230
231
232
233
234static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
235{
236 unsigned long owner = atomic_long_read(&sem->owner);
237
238 do {
239 if (!(owner & RWSEM_READER_OWNED))
240 break;
241 if (owner & RWSEM_NONSPINNABLE)
242 break;
243 } while (!atomic_long_try_cmpxchg(&sem->owner, &owner,
244 owner | RWSEM_NONSPINNABLE));
245}
246
247static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp)
248{
249 *cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
250
251 if (WARN_ON_ONCE(*cntp < 0))
252 rwsem_set_nonspinnable(sem);
253
254 if (!(*cntp & RWSEM_READ_FAILED_MASK)) {
255 rwsem_set_reader_owned(sem);
256 return true;
257 }
258
259 return false;
260}
261
262static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
263{
264 long tmp = RWSEM_UNLOCKED_VALUE;
265
266 if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) {
267 rwsem_set_owner(sem);
268 return true;
269 }
270
271 return false;
272}
273
274
275
276
277static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
278{
279 return (struct task_struct *)
280 (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
281}
282
283
284
285
286
287static inline struct task_struct *
288rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)
289{
290 unsigned long owner = atomic_long_read(&sem->owner);
291
292 *pflags = owner & RWSEM_OWNER_FLAGS_MASK;
293 return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);
294}
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316void __init_rwsem(struct rw_semaphore *sem, const char *name,
317 struct lock_class_key *key)
318{
319#ifdef CONFIG_DEBUG_LOCK_ALLOC
320
321
322
323 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
324 lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
325#endif
326#ifdef CONFIG_DEBUG_RWSEMS
327 sem->magic = sem;
328#endif
329 atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
330 raw_spin_lock_init(&sem->wait_lock);
331 INIT_LIST_HEAD(&sem->wait_list);
332 atomic_long_set(&sem->owner, 0L);
333#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
334 osq_lock_init(&sem->osq);
335#endif
336}
337EXPORT_SYMBOL(__init_rwsem);
338
339enum rwsem_waiter_type {
340 RWSEM_WAITING_FOR_WRITE,
341 RWSEM_WAITING_FOR_READ
342};
343
344struct rwsem_waiter {
345 struct list_head list;
346 struct task_struct *task;
347 enum rwsem_waiter_type type;
348 unsigned long timeout;
349};
350#define rwsem_first_waiter(sem) \
351 list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
352
353enum rwsem_wake_type {
354 RWSEM_WAKE_ANY,
355 RWSEM_WAKE_READERS,
356 RWSEM_WAKE_READ_OWNED
357};
358
359enum writer_wait_state {
360 WRITER_NOT_FIRST,
361 WRITER_FIRST,
362 WRITER_HANDOFF
363};
364
365
366
367
368
369
370#define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250)
371
372
373
374
375
376
377
378#define MAX_READERS_WAKEUP 0x100
379
380
381
382
383
384
385
386
387
388
389
390
391
392static void rwsem_mark_wake(struct rw_semaphore *sem,
393 enum rwsem_wake_type wake_type,
394 struct wake_q_head *wake_q)
395{
396 struct rwsem_waiter *waiter, *tmp;
397 long oldcount, woken = 0, adjustment = 0;
398 struct list_head wlist;
399
400 lockdep_assert_held(&sem->wait_lock);
401
402
403
404
405
406 waiter = rwsem_first_waiter(sem);
407
408 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
409 if (wake_type == RWSEM_WAKE_ANY) {
410
411
412
413
414
415
416
417 wake_q_add(wake_q, waiter->task);
418 lockevent_inc(rwsem_wake_writer);
419 }
420
421 return;
422 }
423
424
425
426
427 if (unlikely(atomic_long_read(&sem->count) < 0))
428 return;
429
430
431
432
433
434
435 if (wake_type != RWSEM_WAKE_READ_OWNED) {
436 struct task_struct *owner;
437
438 adjustment = RWSEM_READER_BIAS;
439 oldcount = atomic_long_fetch_add(adjustment, &sem->count);
440 if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
441
442
443
444
445
446 if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
447 time_after(jiffies, waiter->timeout)) {
448 adjustment -= RWSEM_FLAG_HANDOFF;
449 lockevent_inc(rwsem_rlock_handoff);
450 }
451
452 atomic_long_add(-adjustment, &sem->count);
453 return;
454 }
455
456
457
458
459
460
461 owner = waiter->task;
462 __rwsem_set_reader_owned(sem, owner);
463 }
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488 INIT_LIST_HEAD(&wlist);
489 list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
490 if (waiter->type == RWSEM_WAITING_FOR_WRITE)
491 continue;
492
493 woken++;
494 list_move_tail(&waiter->list, &wlist);
495
496
497
498
499 if (woken >= MAX_READERS_WAKEUP)
500 break;
501 }
502
503 adjustment = woken * RWSEM_READER_BIAS - adjustment;
504 lockevent_cond_inc(rwsem_wake_reader, woken);
505 if (list_empty(&sem->wait_list)) {
506
507 adjustment -= RWSEM_FLAG_WAITERS;
508 }
509
510
511
512
513
514 if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
515 adjustment -= RWSEM_FLAG_HANDOFF;
516
517 if (adjustment)
518 atomic_long_add(adjustment, &sem->count);
519
520
521 list_for_each_entry_safe(waiter, tmp, &wlist, list) {
522 struct task_struct *tsk;
523
524 tsk = waiter->task;
525 get_task_struct(tsk);
526
527
528
529
530
531
532
533 smp_store_release(&waiter->task, NULL);
534
535
536
537
538 wake_q_add_safe(wake_q, tsk);
539 }
540}
541
542
543
544
545
546
547
548
549
550static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
551 enum writer_wait_state wstate)
552{
553 long count, new;
554
555 lockdep_assert_held(&sem->wait_lock);
556
557 count = atomic_long_read(&sem->count);
558 do {
559 bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
560
561 if (has_handoff && wstate == WRITER_NOT_FIRST)
562 return false;
563
564 new = count;
565
566 if (count & RWSEM_LOCK_MASK) {
567 if (has_handoff || (wstate != WRITER_HANDOFF))
568 return false;
569
570 new |= RWSEM_FLAG_HANDOFF;
571 } else {
572 new |= RWSEM_WRITER_LOCKED;
573 new &= ~RWSEM_FLAG_HANDOFF;
574
575 if (list_is_singular(&sem->wait_list))
576 new &= ~RWSEM_FLAG_WAITERS;
577 }
578 } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
579
580
581
582
583
584 if (new & RWSEM_FLAG_HANDOFF)
585 return false;
586
587 rwsem_set_owner(sem);
588 return true;
589}
590
591#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
592
593
594
595static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
596{
597 long count = atomic_long_read(&sem->count);
598
599 while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
600 if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
601 count | RWSEM_WRITER_LOCKED)) {
602 rwsem_set_owner(sem);
603 lockevent_inc(rwsem_opt_lock);
604 return true;
605 }
606 }
607 return false;
608}
609
610static inline bool owner_on_cpu(struct task_struct *owner)
611{
612
613
614
615
616 return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
617}
618
619static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
620{
621 struct task_struct *owner;
622 unsigned long flags;
623 bool ret = true;
624
625 if (need_resched()) {
626 lockevent_inc(rwsem_opt_fail);
627 return false;
628 }
629
630 preempt_disable();
631 rcu_read_lock();
632 owner = rwsem_owner_flags(sem, &flags);
633
634
635
636 if ((flags & RWSEM_NONSPINNABLE) ||
637 (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
638 ret = false;
639 rcu_read_unlock();
640 preempt_enable();
641
642 lockevent_cond_inc(rwsem_opt_fail, !ret);
643 return ret;
644}
645
646
647
648
649
650
651
652
653
654
655
656
657enum owner_state {
658 OWNER_NULL = 1 << 0,
659 OWNER_WRITER = 1 << 1,
660 OWNER_READER = 1 << 2,
661 OWNER_NONSPINNABLE = 1 << 3,
662};
663#define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER)
664
665static inline enum owner_state
666rwsem_owner_state(struct task_struct *owner, unsigned long flags)
667{
668 if (flags & RWSEM_NONSPINNABLE)
669 return OWNER_NONSPINNABLE;
670
671 if (flags & RWSEM_READER_OWNED)
672 return OWNER_READER;
673
674 return owner ? OWNER_WRITER : OWNER_NULL;
675}
676
677static noinline enum owner_state
678rwsem_spin_on_owner(struct rw_semaphore *sem)
679{
680 struct task_struct *new, *owner;
681 unsigned long flags, new_flags;
682 enum owner_state state;
683
684 owner = rwsem_owner_flags(sem, &flags);
685 state = rwsem_owner_state(owner, flags);
686 if (state != OWNER_WRITER)
687 return state;
688
689 rcu_read_lock();
690 for (;;) {
691
692
693
694
695
696
697 new = rwsem_owner_flags(sem, &new_flags);
698 if ((new != owner) || (new_flags != flags)) {
699 state = rwsem_owner_state(new, new_flags);
700 break;
701 }
702
703
704
705
706
707
708
709 barrier();
710
711 if (need_resched() || !owner_on_cpu(owner)) {
712 state = OWNER_NONSPINNABLE;
713 break;
714 }
715
716 cpu_relax();
717 }
718 rcu_read_unlock();
719
720 return state;
721}
722
723
724
725
726
727
728
729
730
731
732
733
734
735static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
736{
737 long count = atomic_long_read(&sem->count);
738 int readers = count >> RWSEM_READER_SHIFT;
739 u64 delta;
740
741 if (readers > 30)
742 readers = 30;
743 delta = (20 + readers) * NSEC_PER_USEC / 2;
744
745 return sched_clock() + delta;
746}
747
748static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
749{
750 bool taken = false;
751 int prev_owner_state = OWNER_NULL;
752 int loop = 0;
753 u64 rspin_threshold = 0;
754
755 preempt_disable();
756
757
758 if (!osq_lock(&sem->osq))
759 goto done;
760
761
762
763
764
765
766
767 for (;;) {
768 enum owner_state owner_state;
769
770 owner_state = rwsem_spin_on_owner(sem);
771 if (!(owner_state & OWNER_SPINNABLE))
772 break;
773
774
775
776
777 taken = rwsem_try_write_lock_unqueued(sem);
778
779 if (taken)
780 break;
781
782
783
784
785 if (owner_state == OWNER_READER) {
786
787
788
789
790
791
792
793 if (prev_owner_state != OWNER_READER) {
794 if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))
795 break;
796 rspin_threshold = rwsem_rspin_threshold(sem);
797 loop = 0;
798 }
799
800
801
802
803
804
805
806
807 else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
808 rwsem_set_nonspinnable(sem);
809 lockevent_inc(rwsem_opt_nospin);
810 break;
811 }
812 }
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845 if (owner_state != OWNER_WRITER) {
846 if (need_resched())
847 break;
848 if (rt_task(current) &&
849 (prev_owner_state != OWNER_WRITER))
850 break;
851 }
852 prev_owner_state = owner_state;
853
854
855
856
857
858
859
860 cpu_relax();
861 }
862 osq_unlock(&sem->osq);
863done:
864 preempt_enable();
865 lockevent_cond_inc(rwsem_opt_fail, !taken);
866 return taken;
867}
868
869
870
871
872
873static inline void clear_nonspinnable(struct rw_semaphore *sem)
874{
875 if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))
876 atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner);
877}
878
879#else
880static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
881{
882 return false;
883}
884
885static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem)
886{
887 return false;
888}
889
890static inline void clear_nonspinnable(struct rw_semaphore *sem) { }
891
892static inline int
893rwsem_spin_on_owner(struct rw_semaphore *sem)
894{
895 return 0;
896}
897#define OWNER_NULL 1
898#endif
899
900
901
902
903static struct rw_semaphore __sched *
904rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state)
905{
906 long adjustment = -RWSEM_READER_BIAS;
907 long rcnt = (count >> RWSEM_READER_SHIFT);
908 struct rwsem_waiter waiter;
909 DEFINE_WAKE_Q(wake_q);
910 bool wake = false;
911
912
913
914
915
916
917 if ((atomic_long_read(&sem->owner) & RWSEM_READER_OWNED) &&
918 (rcnt > 1) && !(count & RWSEM_WRITER_LOCKED))
919 goto queue;
920
921
922
923
924 if (!(count & (RWSEM_WRITER_LOCKED | RWSEM_FLAG_HANDOFF))) {
925 rwsem_set_reader_owned(sem);
926 lockevent_inc(rwsem_rlock_steal);
927
928
929
930
931
932 if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) {
933 raw_spin_lock_irq(&sem->wait_lock);
934 if (!list_empty(&sem->wait_list))
935 rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
936 &wake_q);
937 raw_spin_unlock_irq(&sem->wait_lock);
938 wake_up_q(&wake_q);
939 }
940 return sem;
941 }
942
943queue:
944 waiter.task = current;
945 waiter.type = RWSEM_WAITING_FOR_READ;
946 waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
947
948 raw_spin_lock_irq(&sem->wait_lock);
949 if (list_empty(&sem->wait_list)) {
950
951
952
953
954
955
956 if (!(atomic_long_read(&sem->count) &
957 (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
958
959 smp_acquire__after_ctrl_dep();
960 raw_spin_unlock_irq(&sem->wait_lock);
961 rwsem_set_reader_owned(sem);
962 lockevent_inc(rwsem_rlock_fast);
963 return sem;
964 }
965 adjustment += RWSEM_FLAG_WAITERS;
966 }
967 list_add_tail(&waiter.list, &sem->wait_list);
968
969
970 count = atomic_long_add_return(adjustment, &sem->count);
971
972
973
974
975
976
977
978 if (!(count & RWSEM_LOCK_MASK)) {
979 clear_nonspinnable(sem);
980 wake = true;
981 }
982 if (wake || (!(count & RWSEM_WRITER_MASK) &&
983 (adjustment & RWSEM_FLAG_WAITERS)))
984 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
985
986 raw_spin_unlock_irq(&sem->wait_lock);
987 wake_up_q(&wake_q);
988
989
990 for (;;) {
991 set_current_state(state);
992 if (!smp_load_acquire(&waiter.task)) {
993
994 break;
995 }
996 if (signal_pending_state(state, current)) {
997 raw_spin_lock_irq(&sem->wait_lock);
998 if (waiter.task)
999 goto out_nolock;
1000 raw_spin_unlock_irq(&sem->wait_lock);
1001
1002 break;
1003 }
1004 schedule();
1005 lockevent_inc(rwsem_sleep_reader);
1006 }
1007
1008 __set_current_state(TASK_RUNNING);
1009 lockevent_inc(rwsem_rlock);
1010 return sem;
1011
1012out_nolock:
1013 list_del(&waiter.list);
1014 if (list_empty(&sem->wait_list)) {
1015 atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
1016 &sem->count);
1017 }
1018 raw_spin_unlock_irq(&sem->wait_lock);
1019 __set_current_state(TASK_RUNNING);
1020 lockevent_inc(rwsem_rlock_fail);
1021 return ERR_PTR(-EINTR);
1022}
1023
1024
1025
1026
1027static struct rw_semaphore *
1028rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
1029{
1030 long count;
1031 enum writer_wait_state wstate;
1032 struct rwsem_waiter waiter;
1033 struct rw_semaphore *ret = sem;
1034 DEFINE_WAKE_Q(wake_q);
1035
1036
1037 if (rwsem_can_spin_on_owner(sem) && rwsem_optimistic_spin(sem)) {
1038
1039 return sem;
1040 }
1041
1042
1043
1044
1045
1046 waiter.task = current;
1047 waiter.type = RWSEM_WAITING_FOR_WRITE;
1048 waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1049
1050 raw_spin_lock_irq(&sem->wait_lock);
1051
1052
1053 wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
1054
1055 list_add_tail(&waiter.list, &sem->wait_list);
1056
1057
1058 if (wstate == WRITER_NOT_FIRST) {
1059 count = atomic_long_read(&sem->count);
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069 if (count & RWSEM_WRITER_MASK)
1070 goto wait;
1071
1072 rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)
1073 ? RWSEM_WAKE_READERS
1074 : RWSEM_WAKE_ANY, &wake_q);
1075
1076 if (!wake_q_empty(&wake_q)) {
1077
1078
1079
1080
1081 raw_spin_unlock_irq(&sem->wait_lock);
1082 wake_up_q(&wake_q);
1083 wake_q_init(&wake_q);
1084 raw_spin_lock_irq(&sem->wait_lock);
1085 }
1086 } else {
1087 atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
1088 }
1089
1090wait:
1091
1092 set_current_state(state);
1093 for (;;) {
1094 if (rwsem_try_write_lock(sem, wstate)) {
1095
1096 break;
1097 }
1098
1099 raw_spin_unlock_irq(&sem->wait_lock);
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109 if (wstate == WRITER_HANDOFF &&
1110 rwsem_spin_on_owner(sem) == OWNER_NULL)
1111 goto trylock_again;
1112
1113
1114 for (;;) {
1115 if (signal_pending_state(state, current))
1116 goto out_nolock;
1117
1118 schedule();
1119 lockevent_inc(rwsem_sleep_writer);
1120 set_current_state(state);
1121
1122
1123
1124
1125 if (wstate == WRITER_HANDOFF)
1126 break;
1127
1128 if ((wstate == WRITER_NOT_FIRST) &&
1129 (rwsem_first_waiter(sem) == &waiter))
1130 wstate = WRITER_FIRST;
1131
1132 count = atomic_long_read(&sem->count);
1133 if (!(count & RWSEM_LOCK_MASK))
1134 break;
1135
1136
1137
1138
1139
1140 if ((wstate == WRITER_FIRST) && (rt_task(current) ||
1141 time_after(jiffies, waiter.timeout))) {
1142 wstate = WRITER_HANDOFF;
1143 lockevent_inc(rwsem_wlock_handoff);
1144 break;
1145 }
1146 }
1147trylock_again:
1148 raw_spin_lock_irq(&sem->wait_lock);
1149 }
1150 __set_current_state(TASK_RUNNING);
1151 list_del(&waiter.list);
1152 raw_spin_unlock_irq(&sem->wait_lock);
1153 lockevent_inc(rwsem_wlock);
1154
1155 return ret;
1156
1157out_nolock:
1158 __set_current_state(TASK_RUNNING);
1159 raw_spin_lock_irq(&sem->wait_lock);
1160 list_del(&waiter.list);
1161
1162 if (unlikely(wstate == WRITER_HANDOFF))
1163 atomic_long_add(-RWSEM_FLAG_HANDOFF, &sem->count);
1164
1165 if (list_empty(&sem->wait_list))
1166 atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
1167 else
1168 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1169 raw_spin_unlock_irq(&sem->wait_lock);
1170 wake_up_q(&wake_q);
1171 lockevent_inc(rwsem_wlock_fail);
1172
1173 return ERR_PTR(-EINTR);
1174}
1175
1176
1177
1178
1179
1180static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
1181{
1182 unsigned long flags;
1183 DEFINE_WAKE_Q(wake_q);
1184
1185 raw_spin_lock_irqsave(&sem->wait_lock, flags);
1186
1187 if (!list_empty(&sem->wait_list))
1188 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1189
1190 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1191 wake_up_q(&wake_q);
1192
1193 return sem;
1194}
1195
1196
1197
1198
1199
1200
1201static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
1202{
1203 unsigned long flags;
1204 DEFINE_WAKE_Q(wake_q);
1205
1206 raw_spin_lock_irqsave(&sem->wait_lock, flags);
1207
1208 if (!list_empty(&sem->wait_list))
1209 rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
1210
1211 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1212 wake_up_q(&wake_q);
1213
1214 return sem;
1215}
1216
1217
1218
1219
1220static inline int __down_read_common(struct rw_semaphore *sem, int state)
1221{
1222 long count;
1223
1224 if (!rwsem_read_trylock(sem, &count)) {
1225 if (IS_ERR(rwsem_down_read_slowpath(sem, count, state)))
1226 return -EINTR;
1227 DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1228 }
1229 return 0;
1230}
1231
1232static inline void __down_read(struct rw_semaphore *sem)
1233{
1234 __down_read_common(sem, TASK_UNINTERRUPTIBLE);
1235}
1236
1237static inline int __down_read_interruptible(struct rw_semaphore *sem)
1238{
1239 return __down_read_common(sem, TASK_INTERRUPTIBLE);
1240}
1241
1242static inline int __down_read_killable(struct rw_semaphore *sem)
1243{
1244 return __down_read_common(sem, TASK_KILLABLE);
1245}
1246
1247static inline int __down_read_trylock(struct rw_semaphore *sem)
1248{
1249 long tmp;
1250
1251 DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1252
1253
1254
1255
1256 tmp = RWSEM_UNLOCKED_VALUE;
1257 do {
1258 if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1259 tmp + RWSEM_READER_BIAS)) {
1260 rwsem_set_reader_owned(sem);
1261 return 1;
1262 }
1263 } while (!(tmp & RWSEM_READ_FAILED_MASK));
1264 return 0;
1265}
1266
1267
1268
1269
1270static inline int __down_write_common(struct rw_semaphore *sem, int state)
1271{
1272 if (unlikely(!rwsem_write_trylock(sem))) {
1273 if (IS_ERR(rwsem_down_write_slowpath(sem, state)))
1274 return -EINTR;
1275 }
1276
1277 return 0;
1278}
1279
1280static inline void __down_write(struct rw_semaphore *sem)
1281{
1282 __down_write_common(sem, TASK_UNINTERRUPTIBLE);
1283}
1284
1285static inline int __down_write_killable(struct rw_semaphore *sem)
1286{
1287 return __down_write_common(sem, TASK_KILLABLE);
1288}
1289
1290static inline int __down_write_trylock(struct rw_semaphore *sem)
1291{
1292 DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1293 return rwsem_write_trylock(sem);
1294}
1295
1296
1297
1298
1299static inline void __up_read(struct rw_semaphore *sem)
1300{
1301 long tmp;
1302
1303 DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1304 DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1305
1306 rwsem_clear_reader_owned(sem);
1307 tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
1308 DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
1309 if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
1310 RWSEM_FLAG_WAITERS)) {
1311 clear_nonspinnable(sem);
1312 rwsem_wake(sem);
1313 }
1314}
1315
1316
1317
1318
1319static inline void __up_write(struct rw_semaphore *sem)
1320{
1321 long tmp;
1322
1323 DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1324
1325
1326
1327
1328 DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
1329 !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
1330
1331 rwsem_clear_owner(sem);
1332 tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
1333 if (unlikely(tmp & RWSEM_FLAG_WAITERS))
1334 rwsem_wake(sem);
1335}
1336
1337
1338
1339
1340static inline void __downgrade_write(struct rw_semaphore *sem)
1341{
1342 long tmp;
1343
1344
1345
1346
1347
1348
1349
1350
1351 DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);
1352 tmp = atomic_long_fetch_add_release(
1353 -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
1354 rwsem_set_reader_owned(sem);
1355 if (tmp & RWSEM_FLAG_WAITERS)
1356 rwsem_downgrade_wake(sem);
1357}
1358
1359#else
1360
1361#define RT_MUTEX_BUILD_MUTEX
1362#include "rtmutex.c"
1363
1364#define rwbase_set_and_save_current_state(state) \
1365 set_current_state(state)
1366
1367#define rwbase_restore_current_state() \
1368 __set_current_state(TASK_RUNNING)
1369
1370#define rwbase_rtmutex_lock_state(rtm, state) \
1371 __rt_mutex_lock(rtm, state)
1372
1373#define rwbase_rtmutex_slowlock_locked(rtm, state) \
1374 __rt_mutex_slowlock_locked(rtm, NULL, state)
1375
1376#define rwbase_rtmutex_unlock(rtm) \
1377 __rt_mutex_unlock(rtm)
1378
1379#define rwbase_rtmutex_trylock(rtm) \
1380 __rt_mutex_trylock(rtm)
1381
1382#define rwbase_signal_pending_state(state, current) \
1383 signal_pending_state(state, current)
1384
1385#define rwbase_schedule() \
1386 schedule()
1387
1388#include "rwbase_rt.c"
1389
1390void __init_rwsem(struct rw_semaphore *sem, const char *name,
1391 struct lock_class_key *key)
1392{
1393 init_rwbase_rt(&(sem)->rwbase);
1394
1395#ifdef CONFIG_DEBUG_LOCK_ALLOC
1396 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
1397 lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
1398#endif
1399}
1400EXPORT_SYMBOL(__init_rwsem);
1401
1402static inline void __down_read(struct rw_semaphore *sem)
1403{
1404 rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
1405}
1406
1407static inline int __down_read_interruptible(struct rw_semaphore *sem)
1408{
1409 return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE);
1410}
1411
1412static inline int __down_read_killable(struct rw_semaphore *sem)
1413{
1414 return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE);
1415}
1416
1417static inline int __down_read_trylock(struct rw_semaphore *sem)
1418{
1419 return rwbase_read_trylock(&sem->rwbase);
1420}
1421
1422static inline void __up_read(struct rw_semaphore *sem)
1423{
1424 rwbase_read_unlock(&sem->rwbase, TASK_NORMAL);
1425}
1426
1427static inline void __sched __down_write(struct rw_semaphore *sem)
1428{
1429 rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
1430}
1431
1432static inline int __sched __down_write_killable(struct rw_semaphore *sem)
1433{
1434 return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE);
1435}
1436
1437static inline int __down_write_trylock(struct rw_semaphore *sem)
1438{
1439 return rwbase_write_trylock(&sem->rwbase);
1440}
1441
1442static inline void __up_write(struct rw_semaphore *sem)
1443{
1444 rwbase_write_unlock(&sem->rwbase);
1445}
1446
1447static inline void __downgrade_write(struct rw_semaphore *sem)
1448{
1449 rwbase_write_downgrade(&sem->rwbase);
1450}
1451
1452
1453#define DEBUG_RWSEMS_WARN_ON(c, sem)
1454
1455static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
1456 struct task_struct *owner)
1457{
1458}
1459
1460static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
1461{
1462 int count = atomic_read(&sem->rwbase.readers);
1463
1464 return count < 0 && count != READER_BIAS;
1465}
1466
1467#endif
1468
1469
1470
1471
1472void __sched down_read(struct rw_semaphore *sem)
1473{
1474 might_sleep();
1475 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1476
1477 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
1478}
1479EXPORT_SYMBOL(down_read);
1480
1481int __sched down_read_interruptible(struct rw_semaphore *sem)
1482{
1483 might_sleep();
1484 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1485
1486 if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) {
1487 rwsem_release(&sem->dep_map, _RET_IP_);
1488 return -EINTR;
1489 }
1490
1491 return 0;
1492}
1493EXPORT_SYMBOL(down_read_interruptible);
1494
1495int __sched down_read_killable(struct rw_semaphore *sem)
1496{
1497 might_sleep();
1498 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1499
1500 if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
1501 rwsem_release(&sem->dep_map, _RET_IP_);
1502 return -EINTR;
1503 }
1504
1505 return 0;
1506}
1507EXPORT_SYMBOL(down_read_killable);
1508
1509
1510
1511
1512int down_read_trylock(struct rw_semaphore *sem)
1513{
1514 int ret = __down_read_trylock(sem);
1515
1516 if (ret == 1)
1517 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
1518 return ret;
1519}
1520EXPORT_SYMBOL(down_read_trylock);
1521
1522
1523
1524
1525void __sched down_write(struct rw_semaphore *sem)
1526{
1527 might_sleep();
1528 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
1529 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1530}
1531EXPORT_SYMBOL(down_write);
1532
1533
1534
1535
1536int __sched down_write_killable(struct rw_semaphore *sem)
1537{
1538 might_sleep();
1539 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
1540
1541 if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1542 __down_write_killable)) {
1543 rwsem_release(&sem->dep_map, _RET_IP_);
1544 return -EINTR;
1545 }
1546
1547 return 0;
1548}
1549EXPORT_SYMBOL(down_write_killable);
1550
1551
1552
1553
1554int down_write_trylock(struct rw_semaphore *sem)
1555{
1556 int ret = __down_write_trylock(sem);
1557
1558 if (ret == 1)
1559 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
1560
1561 return ret;
1562}
1563EXPORT_SYMBOL(down_write_trylock);
1564
1565
1566
1567
1568void up_read(struct rw_semaphore *sem)
1569{
1570 rwsem_release(&sem->dep_map, _RET_IP_);
1571 __up_read(sem);
1572}
1573EXPORT_SYMBOL(up_read);
1574
1575
1576
1577
1578void up_write(struct rw_semaphore *sem)
1579{
1580 rwsem_release(&sem->dep_map, _RET_IP_);
1581 __up_write(sem);
1582}
1583EXPORT_SYMBOL(up_write);
1584
1585
1586
1587
1588void downgrade_write(struct rw_semaphore *sem)
1589{
1590 lock_downgrade(&sem->dep_map, _RET_IP_);
1591 __downgrade_write(sem);
1592}
1593EXPORT_SYMBOL(downgrade_write);
1594
1595#ifdef CONFIG_DEBUG_LOCK_ALLOC
1596
1597void down_read_nested(struct rw_semaphore *sem, int subclass)
1598{
1599 might_sleep();
1600 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
1601 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
1602}
1603EXPORT_SYMBOL(down_read_nested);
1604
1605int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
1606{
1607 might_sleep();
1608 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
1609
1610 if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
1611 rwsem_release(&sem->dep_map, _RET_IP_);
1612 return -EINTR;
1613 }
1614
1615 return 0;
1616}
1617EXPORT_SYMBOL(down_read_killable_nested);
1618
1619void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
1620{
1621 might_sleep();
1622 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
1623 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1624}
1625EXPORT_SYMBOL(_down_write_nest_lock);
1626
1627void down_read_non_owner(struct rw_semaphore *sem)
1628{
1629 might_sleep();
1630 __down_read(sem);
1631 __rwsem_set_reader_owned(sem, NULL);
1632}
1633EXPORT_SYMBOL(down_read_non_owner);
1634
1635void down_write_nested(struct rw_semaphore *sem, int subclass)
1636{
1637 might_sleep();
1638 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
1639 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1640}
1641EXPORT_SYMBOL(down_write_nested);
1642
1643int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
1644{
1645 might_sleep();
1646 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
1647
1648 if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1649 __down_write_killable)) {
1650 rwsem_release(&sem->dep_map, _RET_IP_);
1651 return -EINTR;
1652 }
1653
1654 return 0;
1655}
1656EXPORT_SYMBOL(down_write_killable_nested);
1657
1658void up_read_non_owner(struct rw_semaphore *sem)
1659{
1660 DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1661 __up_read(sem);
1662}
1663EXPORT_SYMBOL(up_read_non_owner);
1664
1665#endif
1666