1
2
3
4
5
6
7
8
9
10
11
12#include <linux/rwsem.h>
13#include <linux/sched.h>
14#include <linux/init.h>
15#include <linux/export.h>
16#include <linux/sched/rt.h>
17#include <linux/osq_lock.h>
18
19#include "rwsem.h"
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73void __init_rwsem(struct rw_semaphore *sem, const char *name,
74 struct lock_class_key *key)
75{
76#ifdef CONFIG_DEBUG_LOCK_ALLOC
77
78
79
80 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
81 lockdep_init_map(&sem->dep_map, name, key, 0);
82#endif
83 sem->count = RWSEM_UNLOCKED_VALUE;
84 raw_spin_lock_init(&sem->wait_lock);
85 INIT_LIST_HEAD(&sem->wait_list);
86#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
87 sem->owner = NULL;
88 osq_lock_init(&sem->osq);
89#endif
90}
91
92EXPORT_SYMBOL(__init_rwsem);
93
94enum rwsem_waiter_type {
95 RWSEM_WAITING_FOR_WRITE,
96 RWSEM_WAITING_FOR_READ
97};
98
99struct rwsem_waiter {
100 struct list_head list;
101 struct task_struct *task;
102 enum rwsem_waiter_type type;
103};
104
105enum rwsem_wake_type {
106 RWSEM_WAKE_ANY,
107 RWSEM_WAKE_READERS,
108 RWSEM_WAKE_READ_OWNED
109};
110
111
112
113
114
115
116
117
118
119
120
121static struct rw_semaphore *
122__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
123{
124 struct rwsem_waiter *waiter;
125 struct task_struct *tsk;
126 struct list_head *next;
127 long oldcount, woken, loop, adjustment;
128
129 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
130 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
131 if (wake_type == RWSEM_WAKE_ANY)
132
133
134
135
136
137 wake_up_process(waiter->task);
138 goto out;
139 }
140
141
142
143
144
145 adjustment = 0;
146 if (wake_type != RWSEM_WAKE_READ_OWNED) {
147 adjustment = RWSEM_ACTIVE_READ_BIAS;
148 try_reader_grant:
149 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
150 if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
151
152 if (rwsem_atomic_update(-adjustment, sem) &
153 RWSEM_ACTIVE_MASK)
154 goto out;
155
156 goto try_reader_grant;
157 }
158 }
159
160
161
162
163
164 woken = 0;
165 do {
166 woken++;
167
168 if (waiter->list.next == &sem->wait_list)
169 break;
170
171 waiter = list_entry(waiter->list.next,
172 struct rwsem_waiter, list);
173
174 } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
175
176 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
177 if (waiter->type != RWSEM_WAITING_FOR_WRITE)
178
179 adjustment -= RWSEM_WAITING_BIAS;
180
181 if (adjustment)
182 rwsem_atomic_add(adjustment, sem);
183
184 next = sem->wait_list.next;
185 loop = woken;
186 do {
187 waiter = list_entry(next, struct rwsem_waiter, list);
188 next = waiter->list.next;
189 tsk = waiter->task;
190
191
192
193
194
195
196
197 smp_mb();
198 waiter->task = NULL;
199 wake_up_process(tsk);
200 put_task_struct(tsk);
201 } while (--loop);
202
203 sem->wait_list.next = next;
204 next->prev = &sem->wait_list;
205
206 out:
207 return sem;
208}
209
210
211
212
213__visible
214struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
215{
216 long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
217 struct rwsem_waiter waiter;
218 struct task_struct *tsk = current;
219
220
221 waiter.task = tsk;
222 waiter.type = RWSEM_WAITING_FOR_READ;
223 get_task_struct(tsk);
224
225 raw_spin_lock_irq(&sem->wait_lock);
226 if (list_empty(&sem->wait_list))
227 adjustment += RWSEM_WAITING_BIAS;
228 list_add_tail(&waiter.list, &sem->wait_list);
229
230
231 count = rwsem_atomic_update(adjustment, sem);
232
233
234
235
236
237
238 if (count == RWSEM_WAITING_BIAS ||
239 (count > RWSEM_WAITING_BIAS &&
240 adjustment != -RWSEM_ACTIVE_READ_BIAS))
241 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
242
243 raw_spin_unlock_irq(&sem->wait_lock);
244
245
246 while (true) {
247 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
248 if (!waiter.task)
249 break;
250 schedule();
251 }
252
253 __set_task_state(tsk, TASK_RUNNING);
254 return sem;
255}
256EXPORT_SYMBOL(rwsem_down_read_failed);
257
258static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
259{
260
261
262
263
264 if (count == RWSEM_WAITING_BIAS &&
265 cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS,
266 RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
267 if (!list_is_singular(&sem->wait_list))
268 rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
269 rwsem_set_owner(sem);
270 return true;
271 }
272
273 return false;
274}
275
276#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
277
278
279
280static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
281{
282 long old, count = READ_ONCE(sem->count);
283
284 while (true) {
285 if (!(count == 0 || count == RWSEM_WAITING_BIAS))
286 return false;
287
288 old = cmpxchg_acquire(&sem->count, count,
289 count + RWSEM_ACTIVE_WRITE_BIAS);
290 if (old == count) {
291 rwsem_set_owner(sem);
292 return true;
293 }
294
295 count = old;
296 }
297}
298
299static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
300{
301 struct task_struct *owner;
302 bool ret = true;
303
304 if (need_resched())
305 return false;
306
307 rcu_read_lock();
308 owner = READ_ONCE(sem->owner);
309 if (!owner) {
310 long count = READ_ONCE(sem->count);
311
312
313
314
315
316
317 if (count & RWSEM_ACTIVE_MASK)
318 ret = false;
319 goto done;
320 }
321
322 ret = owner->on_cpu;
323done:
324 rcu_read_unlock();
325 return ret;
326}
327
328static noinline
329bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
330{
331 long count;
332
333 rcu_read_lock();
334 while (sem->owner == owner) {
335
336
337
338
339
340
341 barrier();
342
343
344 if (!owner->on_cpu || need_resched()) {
345 rcu_read_unlock();
346 return false;
347 }
348
349 cpu_relax_lowlatency();
350 }
351 rcu_read_unlock();
352
353 if (READ_ONCE(sem->owner))
354 return true;
355
356
357
358
359
360
361 count = READ_ONCE(sem->count);
362 return (count == 0 || count == RWSEM_WAITING_BIAS);
363}
364
365static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
366{
367 struct task_struct *owner;
368 bool taken = false;
369
370 preempt_disable();
371
372
373 if (!rwsem_can_spin_on_owner(sem))
374 goto done;
375
376 if (!osq_lock(&sem->osq))
377 goto done;
378
379 while (true) {
380 owner = READ_ONCE(sem->owner);
381 if (owner && !rwsem_spin_on_owner(sem, owner))
382 break;
383
384
385 if (rwsem_try_write_lock_unqueued(sem)) {
386 taken = true;
387 break;
388 }
389
390
391
392
393
394
395
396 if (!owner && (need_resched() || rt_task(current)))
397 break;
398
399
400
401
402
403
404
405 cpu_relax_lowlatency();
406 }
407 osq_unlock(&sem->osq);
408done:
409 preempt_enable();
410 return taken;
411}
412
413
414
415
416static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
417{
418 return osq_is_locked(&sem->osq);
419}
420
421#else
422static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
423{
424 return false;
425}
426
427static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
428{
429 return false;
430}
431#endif
432
433
434
435
436__visible
437struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
438{
439 long count;
440 bool waiting = true;
441 struct rwsem_waiter waiter;
442
443
444 count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
445
446
447 if (rwsem_optimistic_spin(sem))
448 return sem;
449
450
451
452
453
454 waiter.task = current;
455 waiter.type = RWSEM_WAITING_FOR_WRITE;
456
457 raw_spin_lock_irq(&sem->wait_lock);
458
459
460 if (list_empty(&sem->wait_list))
461 waiting = false;
462
463 list_add_tail(&waiter.list, &sem->wait_list);
464
465
466 if (waiting) {
467 count = READ_ONCE(sem->count);
468
469
470
471
472
473
474 if (count > RWSEM_WAITING_BIAS)
475 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
476
477 } else
478 count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
479
480
481 set_current_state(TASK_UNINTERRUPTIBLE);
482 while (true) {
483 if (rwsem_try_write_lock(count, sem))
484 break;
485 raw_spin_unlock_irq(&sem->wait_lock);
486
487
488 do {
489 schedule();
490 set_current_state(TASK_UNINTERRUPTIBLE);
491 } while ((count = sem->count) & RWSEM_ACTIVE_MASK);
492
493 raw_spin_lock_irq(&sem->wait_lock);
494 }
495 __set_current_state(TASK_RUNNING);
496
497 list_del(&waiter.list);
498 raw_spin_unlock_irq(&sem->wait_lock);
499
500 return sem;
501}
502EXPORT_SYMBOL(rwsem_down_write_failed);
503
504
505
506
507
508__visible
509struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
510{
511 unsigned long flags;
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533 if (rwsem_has_spinner(sem)) {
534
535
536
537
538 smp_rmb();
539 if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
540 return sem;
541 goto locked;
542 }
543 raw_spin_lock_irqsave(&sem->wait_lock, flags);
544locked:
545
546
547 if (!list_empty(&sem->wait_list))
548 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
549
550 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
551
552 return sem;
553}
554EXPORT_SYMBOL(rwsem_wake);
555
556
557
558
559
560
561__visible
562struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
563{
564 unsigned long flags;
565
566 raw_spin_lock_irqsave(&sem->wait_lock, flags);
567
568
569 if (!list_empty(&sem->wait_list))
570 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
571
572 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
573
574 return sem;
575}
576EXPORT_SYMBOL(rwsem_downgrade_wake);
577