1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/export.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
63#include <linux/sched/rt.h>
64#include <linux/hugetlb.h>
65#include <linux/freezer.h>
66
67#include <asm/futex.h>
68
69#include "locking/rtmutex_common.h"
70
71int __read_mostly futex_cmpxchg_enabled;
72
73#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
74
75
76
77
78
79#define FLAGS_SHARED 0x01
80#define FLAGS_CLOCKRT 0x02
81#define FLAGS_HAS_TIMEOUT 0x04
82
83
84
85
86struct futex_pi_state {
87
88
89
90
91 struct list_head list;
92
93
94
95
96 struct rt_mutex pi_mutex;
97
98 struct task_struct *owner;
99 atomic_t refcount;
100
101 union futex_key key;
102};
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126struct futex_q {
127 struct plist_node list;
128
129 struct task_struct *task;
130 spinlock_t *lock_ptr;
131 union futex_key key;
132 struct futex_pi_state *pi_state;
133 struct rt_mutex_waiter *rt_waiter;
134 union futex_key *requeue_pi_key;
135 u32 bitset;
136};
137
138static const struct futex_q futex_q_init = {
139
140 .key = FUTEX_KEY_INIT,
141 .bitset = FUTEX_BITSET_MATCH_ANY
142};
143
144
145
146
147
148
149struct futex_hash_bucket {
150 spinlock_t lock;
151 struct plist_head chain;
152};
153
154static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
155
156
157
158
159static struct futex_hash_bucket *hash_futex(union futex_key *key)
160{
161 u32 hash = jhash2((u32*)&key->both.word,
162 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
163 key->both.offset);
164 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
165}
166
167
168
169
170static inline int match_futex(union futex_key *key1, union futex_key *key2)
171{
172 return (key1 && key2
173 && key1->both.word == key2->both.word
174 && key1->both.ptr == key2->both.ptr
175 && key1->both.offset == key2->both.offset);
176}
177
178
179
180
181
182
183static void get_futex_key_refs(union futex_key *key)
184{
185 if (!key->both.ptr)
186 return;
187
188 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
189 case FUT_OFF_INODE:
190 ihold(key->shared.inode);
191 break;
192 case FUT_OFF_MMSHARED:
193 atomic_inc(&key->private.mm->mm_count);
194 break;
195 }
196}
197
198
199
200
201
202static void drop_futex_key_refs(union futex_key *key)
203{
204 if (!key->both.ptr) {
205
206 WARN_ON_ONCE(1);
207 return;
208 }
209
210 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
211 case FUT_OFF_INODE:
212 iput(key->shared.inode);
213 break;
214 case FUT_OFF_MMSHARED:
215 mmdrop(key->private.mm);
216 break;
217 }
218}
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238static int
239get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
240{
241 unsigned long address = (unsigned long)uaddr;
242 struct mm_struct *mm = current->mm;
243 struct page *page, *page_head;
244 int err, ro = 0;
245
246
247
248
249 key->both.offset = address % PAGE_SIZE;
250 if (unlikely((address % sizeof(u32)) != 0))
251 return -EINVAL;
252 address -= key->both.offset;
253
254 if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
255 return -EFAULT;
256
257
258
259
260
261
262
263
264 if (!fshared) {
265 key->private.mm = mm;
266 key->private.address = address;
267 get_futex_key_refs(key);
268 return 0;
269 }
270
271again:
272 err = get_user_pages_fast(address, 1, 1, &page);
273
274
275
276
277 if (err == -EFAULT && rw == VERIFY_READ) {
278 err = get_user_pages_fast(address, 1, 0, &page);
279 ro = 1;
280 }
281 if (err < 0)
282 return err;
283 else
284 err = 0;
285
286#ifdef CONFIG_TRANSPARENT_HUGEPAGE
287 page_head = page;
288 if (unlikely(PageTail(page))) {
289 put_page(page);
290
291 local_irq_disable();
292 if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) {
293 page_head = compound_head(page);
294
295
296
297
298
299
300
301
302
303
304 if (page != page_head) {
305 get_page(page_head);
306 put_page(page);
307 }
308 local_irq_enable();
309 } else {
310 local_irq_enable();
311 goto again;
312 }
313 }
314#else
315 page_head = compound_head(page);
316 if (page != page_head) {
317 get_page(page_head);
318 put_page(page);
319 }
320#endif
321
322 lock_page(page_head);
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339 if (!page_head->mapping) {
340 int shmem_swizzled = PageSwapCache(page_head);
341 unlock_page(page_head);
342 put_page(page_head);
343 if (shmem_swizzled)
344 goto again;
345 return -EFAULT;
346 }
347
348
349
350
351
352
353
354
355 if (PageAnon(page_head)) {
356
357
358
359
360 if (ro) {
361 err = -EFAULT;
362 goto out;
363 }
364
365 key->both.offset |= FUT_OFF_MMSHARED;
366 key->private.mm = mm;
367 key->private.address = address;
368 } else {
369 key->both.offset |= FUT_OFF_INODE;
370 key->shared.inode = page_head->mapping->host;
371 key->shared.pgoff = basepage_index(page);
372 }
373
374 get_futex_key_refs(key);
375
376out:
377 unlock_page(page_head);
378 put_page(page_head);
379 return err;
380}
381
382static inline void put_futex_key(union futex_key *key)
383{
384 drop_futex_key_refs(key);
385}
386
387
388
389
390
391
392
393
394
395
396
397
398
399static int fault_in_user_writeable(u32 __user *uaddr)
400{
401 struct mm_struct *mm = current->mm;
402 int ret;
403
404 down_read(&mm->mmap_sem);
405 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
406 FAULT_FLAG_WRITE);
407 up_read(&mm->mmap_sem);
408
409 return ret < 0 ? ret : 0;
410}
411
412
413
414
415
416
417
418
419static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
420 union futex_key *key)
421{
422 struct futex_q *this;
423
424 plist_for_each_entry(this, &hb->chain, list) {
425 if (match_futex(&this->key, key))
426 return this;
427 }
428 return NULL;
429}
430
431static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
432 u32 uval, u32 newval)
433{
434 int ret;
435
436 pagefault_disable();
437 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
438 pagefault_enable();
439
440 return ret;
441}
442
443static int get_futex_value_locked(u32 *dest, u32 __user *from)
444{
445 int ret;
446
447 pagefault_disable();
448 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
449 pagefault_enable();
450
451 return ret ? -EFAULT : 0;
452}
453
454
455
456
457
458static int refill_pi_state_cache(void)
459{
460 struct futex_pi_state *pi_state;
461
462 if (likely(current->pi_state_cache))
463 return 0;
464
465 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
466
467 if (!pi_state)
468 return -ENOMEM;
469
470 INIT_LIST_HEAD(&pi_state->list);
471
472 pi_state->owner = NULL;
473 atomic_set(&pi_state->refcount, 1);
474 pi_state->key = FUTEX_KEY_INIT;
475
476 current->pi_state_cache = pi_state;
477
478 return 0;
479}
480
481static struct futex_pi_state * alloc_pi_state(void)
482{
483 struct futex_pi_state *pi_state = current->pi_state_cache;
484
485 WARN_ON(!pi_state);
486 current->pi_state_cache = NULL;
487
488 return pi_state;
489}
490
491static void free_pi_state(struct futex_pi_state *pi_state)
492{
493 if (!atomic_dec_and_test(&pi_state->refcount))
494 return;
495
496
497
498
499
500 if (pi_state->owner) {
501 raw_spin_lock_irq(&pi_state->owner->pi_lock);
502 list_del_init(&pi_state->list);
503 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
504
505 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
506 }
507
508 if (current->pi_state_cache)
509 kfree(pi_state);
510 else {
511
512
513
514
515
516 pi_state->owner = NULL;
517 atomic_set(&pi_state->refcount, 1);
518 current->pi_state_cache = pi_state;
519 }
520}
521
522
523
524
525
526static struct task_struct * futex_find_get_task(pid_t pid)
527{
528 struct task_struct *p;
529
530 rcu_read_lock();
531 p = find_task_by_vpid(pid);
532 if (p)
533 get_task_struct(p);
534
535 rcu_read_unlock();
536
537 return p;
538}
539
540
541
542
543
544
545void exit_pi_state_list(struct task_struct *curr)
546{
547 struct list_head *next, *head = &curr->pi_state_list;
548 struct futex_pi_state *pi_state;
549 struct futex_hash_bucket *hb;
550 union futex_key key = FUTEX_KEY_INIT;
551
552 if (!futex_cmpxchg_enabled)
553 return;
554
555
556
557
558
559 raw_spin_lock_irq(&curr->pi_lock);
560 while (!list_empty(head)) {
561
562 next = head->next;
563 pi_state = list_entry(next, struct futex_pi_state, list);
564 key = pi_state->key;
565 hb = hash_futex(&key);
566 raw_spin_unlock_irq(&curr->pi_lock);
567
568 spin_lock(&hb->lock);
569
570 raw_spin_lock_irq(&curr->pi_lock);
571
572
573
574
575 if (head->next != next) {
576 spin_unlock(&hb->lock);
577 continue;
578 }
579
580 WARN_ON(pi_state->owner != curr);
581 WARN_ON(list_empty(&pi_state->list));
582 list_del_init(&pi_state->list);
583 pi_state->owner = NULL;
584 raw_spin_unlock_irq(&curr->pi_lock);
585
586 rt_mutex_unlock(&pi_state->pi_mutex);
587
588 spin_unlock(&hb->lock);
589
590 raw_spin_lock_irq(&curr->pi_lock);
591 }
592 raw_spin_unlock_irq(&curr->pi_lock);
593}
594
595static int
596lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
597 union futex_key *key, struct futex_pi_state **ps)
598{
599 struct futex_pi_state *pi_state = NULL;
600 struct futex_q *this, *next;
601 struct plist_head *head;
602 struct task_struct *p;
603 pid_t pid = uval & FUTEX_TID_MASK;
604
605 head = &hb->chain;
606
607 plist_for_each_entry_safe(this, next, head, list) {
608 if (match_futex(&this->key, key)) {
609
610
611
612
613 pi_state = this->pi_state;
614
615
616
617 if (unlikely(!pi_state))
618 return -EINVAL;
619
620 WARN_ON(!atomic_read(&pi_state->refcount));
621
622
623
624
625
626
627
628
629
630
631
632 if (pid && pi_state->owner) {
633
634
635
636
637 if (pid != task_pid_vnr(pi_state->owner))
638 return -EINVAL;
639 }
640
641 atomic_inc(&pi_state->refcount);
642 *ps = pi_state;
643
644 return 0;
645 }
646 }
647
648
649
650
651
652 if (!pid)
653 return -ESRCH;
654 p = futex_find_get_task(pid);
655 if (!p)
656 return -ESRCH;
657
658
659
660
661
662
663
664 raw_spin_lock_irq(&p->pi_lock);
665 if (unlikely(p->flags & PF_EXITING)) {
666
667
668
669
670
671 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
672
673 raw_spin_unlock_irq(&p->pi_lock);
674 put_task_struct(p);
675 return ret;
676 }
677
678 pi_state = alloc_pi_state();
679
680
681
682
683
684 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
685
686
687 pi_state->key = *key;
688
689 WARN_ON(!list_empty(&pi_state->list));
690 list_add(&pi_state->list, &p->pi_state_list);
691 pi_state->owner = p;
692 raw_spin_unlock_irq(&p->pi_lock);
693
694 put_task_struct(p);
695
696 *ps = pi_state;
697
698 return 0;
699}
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
720 union futex_key *key,
721 struct futex_pi_state **ps,
722 struct task_struct *task, int set_waiters)
723{
724 int lock_taken, ret, force_take = 0;
725 u32 uval, newval, curval, vpid = task_pid_vnr(task);
726
727retry:
728 ret = lock_taken = 0;
729
730
731
732
733
734
735 newval = vpid;
736 if (set_waiters)
737 newval |= FUTEX_WAITERS;
738
739 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
740 return -EFAULT;
741
742
743
744
745 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
746 return -EDEADLK;
747
748
749
750
751 if (unlikely(!curval))
752 return 1;
753
754 uval = curval;
755
756
757
758
759
760 newval = curval | FUTEX_WAITERS;
761
762
763
764
765 if (unlikely(force_take)) {
766
767
768
769
770 newval = (curval & ~FUTEX_TID_MASK) | vpid;
771 force_take = 0;
772 lock_taken = 1;
773 }
774
775 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
776 return -EFAULT;
777 if (unlikely(curval != uval))
778 goto retry;
779
780
781
782
783 if (unlikely(lock_taken))
784 return 1;
785
786
787
788
789
790 ret = lookup_pi_state(uval, hb, key, ps);
791
792 if (unlikely(ret)) {
793 switch (ret) {
794 case -ESRCH:
795
796
797
798
799
800
801
802
803
804
805 if (get_futex_value_locked(&curval, uaddr))
806 return -EFAULT;
807
808
809
810
811
812
813 if (!(curval & FUTEX_TID_MASK)) {
814 force_take = 1;
815 goto retry;
816 }
817 default:
818 break;
819 }
820 }
821
822 return ret;
823}
824
825
826
827
828
829
830
831static void __unqueue_futex(struct futex_q *q)
832{
833 struct futex_hash_bucket *hb;
834
835 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
836 || WARN_ON(plist_node_empty(&q->list)))
837 return;
838
839 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
840 plist_del(&q->list, &hb->chain);
841}
842
843
844
845
846
847static void wake_futex(struct futex_q *q)
848{
849 struct task_struct *p = q->task;
850
851 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
852 return;
853
854
855
856
857
858
859
860
861 get_task_struct(p);
862
863 __unqueue_futex(q);
864
865
866
867
868
869
870 smp_wmb();
871 q->lock_ptr = NULL;
872
873 wake_up_state(p, TASK_NORMAL);
874 put_task_struct(p);
875}
876
877static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
878{
879 struct task_struct *new_owner;
880 struct futex_pi_state *pi_state = this->pi_state;
881 u32 uninitialized_var(curval), newval;
882
883 if (!pi_state)
884 return -EINVAL;
885
886
887
888
889
890 if (pi_state->owner != current)
891 return -EINVAL;
892
893 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
894 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
895
896
897
898
899
900
901 if (!new_owner)
902 new_owner = this->task;
903
904
905
906
907
908
909 if (!(uval & FUTEX_OWNER_DIED)) {
910 int ret = 0;
911
912 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
913
914 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
915 ret = -EFAULT;
916 else if (curval != uval)
917 ret = -EINVAL;
918 if (ret) {
919 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
920 return ret;
921 }
922 }
923
924 raw_spin_lock_irq(&pi_state->owner->pi_lock);
925 WARN_ON(list_empty(&pi_state->list));
926 list_del_init(&pi_state->list);
927 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
928
929 raw_spin_lock_irq(&new_owner->pi_lock);
930 WARN_ON(!list_empty(&pi_state->list));
931 list_add(&pi_state->list, &new_owner->pi_state_list);
932 pi_state->owner = new_owner;
933 raw_spin_unlock_irq(&new_owner->pi_lock);
934
935 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
936 rt_mutex_unlock(&pi_state->pi_mutex);
937
938 return 0;
939}
940
941static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
942{
943 u32 uninitialized_var(oldval);
944
945
946
947
948
949 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
950 return -EFAULT;
951 if (oldval != uval)
952 return -EAGAIN;
953
954 return 0;
955}
956
957
958
959
960static inline void
961double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
962{
963 if (hb1 <= hb2) {
964 spin_lock(&hb1->lock);
965 if (hb1 < hb2)
966 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
967 } else {
968 spin_lock(&hb2->lock);
969 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
970 }
971}
972
973static inline void
974double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
975{
976 spin_unlock(&hb1->lock);
977 if (hb1 != hb2)
978 spin_unlock(&hb2->lock);
979}
980
981
982
983
984static int
985futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
986{
987 struct futex_hash_bucket *hb;
988 struct futex_q *this, *next;
989 struct plist_head *head;
990 union futex_key key = FUTEX_KEY_INIT;
991 int ret;
992
993 if (!bitset)
994 return -EINVAL;
995
996 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
997 if (unlikely(ret != 0))
998 goto out;
999
1000 hb = hash_futex(&key);
1001 spin_lock(&hb->lock);
1002 head = &hb->chain;
1003
1004 plist_for_each_entry_safe(this, next, head, list) {
1005 if (match_futex (&this->key, &key)) {
1006 if (this->pi_state || this->rt_waiter) {
1007 ret = -EINVAL;
1008 break;
1009 }
1010
1011
1012 if (!(this->bitset & bitset))
1013 continue;
1014
1015 wake_futex(this);
1016 if (++ret >= nr_wake)
1017 break;
1018 }
1019 }
1020
1021 spin_unlock(&hb->lock);
1022 put_futex_key(&key);
1023out:
1024 return ret;
1025}
1026
1027
1028
1029
1030
1031static int
1032futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1033 int nr_wake, int nr_wake2, int op)
1034{
1035 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1036 struct futex_hash_bucket *hb1, *hb2;
1037 struct plist_head *head;
1038 struct futex_q *this, *next;
1039 int ret, op_ret;
1040
1041retry:
1042 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1043 if (unlikely(ret != 0))
1044 goto out;
1045 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1046 if (unlikely(ret != 0))
1047 goto out_put_key1;
1048
1049 hb1 = hash_futex(&key1);
1050 hb2 = hash_futex(&key2);
1051
1052retry_private:
1053 double_lock_hb(hb1, hb2);
1054 op_ret = futex_atomic_op_inuser(op, uaddr2);
1055 if (unlikely(op_ret < 0)) {
1056
1057 double_unlock_hb(hb1, hb2);
1058
1059#ifndef CONFIG_MMU
1060
1061
1062
1063
1064 ret = op_ret;
1065 goto out_put_keys;
1066#endif
1067
1068 if (unlikely(op_ret != -EFAULT)) {
1069 ret = op_ret;
1070 goto out_put_keys;
1071 }
1072
1073 ret = fault_in_user_writeable(uaddr2);
1074 if (ret)
1075 goto out_put_keys;
1076
1077 if (!(flags & FLAGS_SHARED))
1078 goto retry_private;
1079
1080 put_futex_key(&key2);
1081 put_futex_key(&key1);
1082 goto retry;
1083 }
1084
1085 head = &hb1->chain;
1086
1087 plist_for_each_entry_safe(this, next, head, list) {
1088 if (match_futex (&this->key, &key1)) {
1089 if (this->pi_state || this->rt_waiter) {
1090 ret = -EINVAL;
1091 goto out_unlock;
1092 }
1093 wake_futex(this);
1094 if (++ret >= nr_wake)
1095 break;
1096 }
1097 }
1098
1099 if (op_ret > 0) {
1100 head = &hb2->chain;
1101
1102 op_ret = 0;
1103 plist_for_each_entry_safe(this, next, head, list) {
1104 if (match_futex (&this->key, &key2)) {
1105 if (this->pi_state || this->rt_waiter) {
1106 ret = -EINVAL;
1107 goto out_unlock;
1108 }
1109 wake_futex(this);
1110 if (++op_ret >= nr_wake2)
1111 break;
1112 }
1113 }
1114 ret += op_ret;
1115 }
1116
1117out_unlock:
1118 double_unlock_hb(hb1, hb2);
1119out_put_keys:
1120 put_futex_key(&key2);
1121out_put_key1:
1122 put_futex_key(&key1);
1123out:
1124 return ret;
1125}
1126
1127
1128
1129
1130
1131
1132
1133
1134static inline
1135void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1136 struct futex_hash_bucket *hb2, union futex_key *key2)
1137{
1138
1139
1140
1141
1142
1143 if (likely(&hb1->chain != &hb2->chain)) {
1144 plist_del(&q->list, &hb1->chain);
1145 plist_add(&q->list, &hb2->chain);
1146 q->lock_ptr = &hb2->lock;
1147 }
1148 get_futex_key_refs(key2);
1149 q->key = *key2;
1150}
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166static inline
1167void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1168 struct futex_hash_bucket *hb)
1169{
1170 get_futex_key_refs(key);
1171 q->key = *key;
1172
1173 __unqueue_futex(q);
1174
1175 WARN_ON(!q->rt_waiter);
1176 q->rt_waiter = NULL;
1177
1178 q->lock_ptr = &hb->lock;
1179
1180 wake_up_state(q->task, TASK_NORMAL);
1181}
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1204 struct futex_hash_bucket *hb1,
1205 struct futex_hash_bucket *hb2,
1206 union futex_key *key1, union futex_key *key2,
1207 struct futex_pi_state **ps, int set_waiters)
1208{
1209 struct futex_q *top_waiter = NULL;
1210 u32 curval;
1211 int ret;
1212
1213 if (get_futex_value_locked(&curval, pifutex))
1214 return -EFAULT;
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224 top_waiter = futex_top_waiter(hb1, key1);
1225
1226
1227 if (!top_waiter)
1228 return 0;
1229
1230
1231 if (!match_futex(top_waiter->requeue_pi_key, key2))
1232 return -EINVAL;
1233
1234
1235
1236
1237
1238
1239 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1240 set_waiters);
1241 if (ret == 1)
1242 requeue_pi_wake_futex(top_waiter, key2, hb2);
1243
1244 return ret;
1245}
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1266 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1267 u32 *cmpval, int requeue_pi)
1268{
1269 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1270 int drop_count = 0, task_count = 0, ret;
1271 struct futex_pi_state *pi_state = NULL;
1272 struct futex_hash_bucket *hb1, *hb2;
1273 struct plist_head *head1;
1274 struct futex_q *this, *next;
1275 u32 curval2;
1276
1277 if (requeue_pi) {
1278
1279
1280
1281
1282 if (refill_pi_state_cache())
1283 return -ENOMEM;
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294 if (nr_wake != 1)
1295 return -EINVAL;
1296 }
1297
1298retry:
1299 if (pi_state != NULL) {
1300
1301
1302
1303
1304 free_pi_state(pi_state);
1305 pi_state = NULL;
1306 }
1307
1308 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1309 if (unlikely(ret != 0))
1310 goto out;
1311 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1312 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1313 if (unlikely(ret != 0))
1314 goto out_put_key1;
1315
1316 hb1 = hash_futex(&key1);
1317 hb2 = hash_futex(&key2);
1318
1319retry_private:
1320 double_lock_hb(hb1, hb2);
1321
1322 if (likely(cmpval != NULL)) {
1323 u32 curval;
1324
1325 ret = get_futex_value_locked(&curval, uaddr1);
1326
1327 if (unlikely(ret)) {
1328 double_unlock_hb(hb1, hb2);
1329
1330 ret = get_user(curval, uaddr1);
1331 if (ret)
1332 goto out_put_keys;
1333
1334 if (!(flags & FLAGS_SHARED))
1335 goto retry_private;
1336
1337 put_futex_key(&key2);
1338 put_futex_key(&key1);
1339 goto retry;
1340 }
1341 if (curval != *cmpval) {
1342 ret = -EAGAIN;
1343 goto out_unlock;
1344 }
1345 }
1346
1347 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1348
1349
1350
1351
1352
1353
1354 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1355 &key2, &pi_state, nr_requeue);
1356
1357
1358
1359
1360
1361
1362
1363 if (ret == 1) {
1364 WARN_ON(pi_state);
1365 drop_count++;
1366 task_count++;
1367 ret = get_futex_value_locked(&curval2, uaddr2);
1368 if (!ret)
1369 ret = lookup_pi_state(curval2, hb2, &key2,
1370 &pi_state);
1371 }
1372
1373 switch (ret) {
1374 case 0:
1375 break;
1376 case -EFAULT:
1377 double_unlock_hb(hb1, hb2);
1378 put_futex_key(&key2);
1379 put_futex_key(&key1);
1380 ret = fault_in_user_writeable(uaddr2);
1381 if (!ret)
1382 goto retry;
1383 goto out;
1384 case -EAGAIN:
1385
1386 double_unlock_hb(hb1, hb2);
1387 put_futex_key(&key2);
1388 put_futex_key(&key1);
1389 cond_resched();
1390 goto retry;
1391 default:
1392 goto out_unlock;
1393 }
1394 }
1395
1396 head1 = &hb1->chain;
1397 plist_for_each_entry_safe(this, next, head1, list) {
1398 if (task_count - nr_wake >= nr_requeue)
1399 break;
1400
1401 if (!match_futex(&this->key, &key1))
1402 continue;
1403
1404
1405
1406
1407
1408
1409
1410
1411 if ((requeue_pi && !this->rt_waiter) ||
1412 (!requeue_pi && this->rt_waiter) ||
1413 this->pi_state) {
1414 ret = -EINVAL;
1415 break;
1416 }
1417
1418
1419
1420
1421
1422
1423 if (++task_count <= nr_wake && !requeue_pi) {
1424 wake_futex(this);
1425 continue;
1426 }
1427
1428
1429 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1430 ret = -EINVAL;
1431 break;
1432 }
1433
1434
1435
1436
1437
1438 if (requeue_pi) {
1439
1440 atomic_inc(&pi_state->refcount);
1441 this->pi_state = pi_state;
1442 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1443 this->rt_waiter,
1444 this->task, 1);
1445 if (ret == 1) {
1446
1447 requeue_pi_wake_futex(this, &key2, hb2);
1448 drop_count++;
1449 continue;
1450 } else if (ret) {
1451
1452 this->pi_state = NULL;
1453 free_pi_state(pi_state);
1454 goto out_unlock;
1455 }
1456 }
1457 requeue_futex(this, hb1, hb2, &key2);
1458 drop_count++;
1459 }
1460
1461out_unlock:
1462 double_unlock_hb(hb1, hb2);
1463
1464
1465
1466
1467
1468
1469
1470 while (--drop_count >= 0)
1471 drop_futex_key_refs(&key1);
1472
1473out_put_keys:
1474 put_futex_key(&key2);
1475out_put_key1:
1476 put_futex_key(&key1);
1477out:
1478 if (pi_state != NULL)
1479 free_pi_state(pi_state);
1480 return ret ? ret : task_count;
1481}
1482
1483
1484static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1485 __acquires(&hb->lock)
1486{
1487 struct futex_hash_bucket *hb;
1488
1489 hb = hash_futex(&q->key);
1490 q->lock_ptr = &hb->lock;
1491
1492 spin_lock(&hb->lock);
1493 return hb;
1494}
1495
1496static inline void
1497queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1498 __releases(&hb->lock)
1499{
1500 spin_unlock(&hb->lock);
1501}
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1516 __releases(&hb->lock)
1517{
1518 int prio;
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528 prio = min(current->normal_prio, MAX_RT_PRIO);
1529
1530 plist_node_init(&q->list, prio);
1531 plist_add(&q->list, &hb->chain);
1532 q->task = current;
1533 spin_unlock(&hb->lock);
1534}
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547static int unqueue_me(struct futex_q *q)
1548{
1549 spinlock_t *lock_ptr;
1550 int ret = 0;
1551
1552
1553retry:
1554 lock_ptr = q->lock_ptr;
1555 barrier();
1556 if (lock_ptr != NULL) {
1557 spin_lock(lock_ptr);
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571 if (unlikely(lock_ptr != q->lock_ptr)) {
1572 spin_unlock(lock_ptr);
1573 goto retry;
1574 }
1575 __unqueue_futex(q);
1576
1577 BUG_ON(q->pi_state);
1578
1579 spin_unlock(lock_ptr);
1580 ret = 1;
1581 }
1582
1583 drop_futex_key_refs(&q->key);
1584 return ret;
1585}
1586
1587
1588
1589
1590
1591
1592static void unqueue_me_pi(struct futex_q *q)
1593 __releases(q->lock_ptr)
1594{
1595 __unqueue_futex(q);
1596
1597 BUG_ON(!q->pi_state);
1598 free_pi_state(q->pi_state);
1599 q->pi_state = NULL;
1600
1601 spin_unlock(q->lock_ptr);
1602}
1603
1604
1605
1606
1607
1608
1609
1610static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1611 struct task_struct *newowner)
1612{
1613 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1614 struct futex_pi_state *pi_state = q->pi_state;
1615 struct task_struct *oldowner = pi_state->owner;
1616 u32 uval, uninitialized_var(curval), newval;
1617 int ret;
1618
1619
1620 if (!pi_state->owner)
1621 newtid |= FUTEX_OWNER_DIED;
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640retry:
1641 if (get_futex_value_locked(&uval, uaddr))
1642 goto handle_fault;
1643
1644 while (1) {
1645 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1646
1647 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1648 goto handle_fault;
1649 if (curval == uval)
1650 break;
1651 uval = curval;
1652 }
1653
1654
1655
1656
1657
1658 if (pi_state->owner != NULL) {
1659 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1660 WARN_ON(list_empty(&pi_state->list));
1661 list_del_init(&pi_state->list);
1662 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1663 }
1664
1665 pi_state->owner = newowner;
1666
1667 raw_spin_lock_irq(&newowner->pi_lock);
1668 WARN_ON(!list_empty(&pi_state->list));
1669 list_add(&pi_state->list, &newowner->pi_state_list);
1670 raw_spin_unlock_irq(&newowner->pi_lock);
1671 return 0;
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683handle_fault:
1684 spin_unlock(q->lock_ptr);
1685
1686 ret = fault_in_user_writeable(uaddr);
1687
1688 spin_lock(q->lock_ptr);
1689
1690
1691
1692
1693 if (pi_state->owner != oldowner)
1694 return 0;
1695
1696 if (ret)
1697 return ret;
1698
1699 goto retry;
1700}
1701
1702static long futex_wait_restart(struct restart_block *restart);
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1720{
1721 struct task_struct *owner;
1722 int ret = 0;
1723
1724 if (locked) {
1725
1726
1727
1728
1729 if (q->pi_state->owner != current)
1730 ret = fixup_pi_state_owner(uaddr, q, current);
1731 goto out;
1732 }
1733
1734
1735
1736
1737
1738 if (q->pi_state->owner == current) {
1739
1740
1741
1742
1743
1744 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1745 locked = 1;
1746 goto out;
1747 }
1748
1749
1750
1751
1752
1753
1754 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1755 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1756 if (!owner)
1757 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1758 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1759 ret = fixup_pi_state_owner(uaddr, q, owner);
1760 goto out;
1761 }
1762
1763
1764
1765
1766
1767 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1768 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1769 "pi-state %p\n", ret,
1770 q->pi_state->pi_mutex.owner,
1771 q->pi_state->owner);
1772
1773out:
1774 return ret ? ret : locked;
1775}
1776
1777
1778
1779
1780
1781
1782
1783static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1784 struct hrtimer_sleeper *timeout)
1785{
1786
1787
1788
1789
1790
1791
1792 set_current_state(TASK_INTERRUPTIBLE);
1793 queue_me(q, hb);
1794
1795
1796 if (timeout) {
1797 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1798 if (!hrtimer_active(&timeout->timer))
1799 timeout->task = NULL;
1800 }
1801
1802
1803
1804
1805
1806 if (likely(!plist_node_empty(&q->list))) {
1807
1808
1809
1810
1811
1812 if (!timeout || timeout->task)
1813 freezable_schedule();
1814 }
1815 __set_current_state(TASK_RUNNING);
1816}
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1836 struct futex_q *q, struct futex_hash_bucket **hb)
1837{
1838 u32 uval;
1839 int ret;
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859retry:
1860 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
1861 if (unlikely(ret != 0))
1862 return ret;
1863
1864retry_private:
1865 *hb = queue_lock(q);
1866
1867 ret = get_futex_value_locked(&uval, uaddr);
1868
1869 if (ret) {
1870 queue_unlock(q, *hb);
1871
1872 ret = get_user(uval, uaddr);
1873 if (ret)
1874 goto out;
1875
1876 if (!(flags & FLAGS_SHARED))
1877 goto retry_private;
1878
1879 put_futex_key(&q->key);
1880 goto retry;
1881 }
1882
1883 if (uval != val) {
1884 queue_unlock(q, *hb);
1885 ret = -EWOULDBLOCK;
1886 }
1887
1888out:
1889 if (ret)
1890 put_futex_key(&q->key);
1891 return ret;
1892}
1893
1894static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
1895 ktime_t *abs_time, u32 bitset)
1896{
1897 struct hrtimer_sleeper timeout, *to = NULL;
1898 struct restart_block *restart;
1899 struct futex_hash_bucket *hb;
1900 struct futex_q q = futex_q_init;
1901 int ret;
1902
1903 if (!bitset)
1904 return -EINVAL;
1905 q.bitset = bitset;
1906
1907 if (abs_time) {
1908 to = &timeout;
1909
1910 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
1911 CLOCK_REALTIME : CLOCK_MONOTONIC,
1912 HRTIMER_MODE_ABS);
1913 hrtimer_init_sleeper(to, current);
1914 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1915 current->timer_slack_ns);
1916 }
1917
1918retry:
1919
1920
1921
1922
1923 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
1924 if (ret)
1925 goto out;
1926
1927
1928 futex_wait_queue_me(hb, &q, to);
1929
1930
1931 ret = 0;
1932
1933 if (!unqueue_me(&q))
1934 goto out;
1935 ret = -ETIMEDOUT;
1936 if (to && !to->task)
1937 goto out;
1938
1939
1940
1941
1942
1943 if (!signal_pending(current))
1944 goto retry;
1945
1946 ret = -ERESTARTSYS;
1947 if (!abs_time)
1948 goto out;
1949
1950 restart = ¤t_thread_info()->restart_block;
1951 restart->fn = futex_wait_restart;
1952 restart->futex.uaddr = uaddr;
1953 restart->futex.val = val;
1954 restart->futex.time = abs_time->tv64;
1955 restart->futex.bitset = bitset;
1956 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
1957
1958 ret = -ERESTART_RESTARTBLOCK;
1959
1960out:
1961 if (to) {
1962 hrtimer_cancel(&to->timer);
1963 destroy_hrtimer_on_stack(&to->timer);
1964 }
1965 return ret;
1966}
1967
1968
1969static long futex_wait_restart(struct restart_block *restart)
1970{
1971 u32 __user *uaddr = restart->futex.uaddr;
1972 ktime_t t, *tp = NULL;
1973
1974 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1975 t.tv64 = restart->futex.time;
1976 tp = &t;
1977 }
1978 restart->fn = do_no_restart_syscall;
1979
1980 return (long)futex_wait(uaddr, restart->futex.flags,
1981 restart->futex.val, tp, restart->futex.bitset);
1982}
1983
1984
1985
1986
1987
1988
1989
1990
1991static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1992 ktime_t *time, int trylock)
1993{
1994 struct hrtimer_sleeper timeout, *to = NULL;
1995 struct futex_hash_bucket *hb;
1996 struct futex_q q = futex_q_init;
1997 int res, ret;
1998
1999 if (refill_pi_state_cache())
2000 return -ENOMEM;
2001
2002 if (time) {
2003 to = &timeout;
2004 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2005 HRTIMER_MODE_ABS);
2006 hrtimer_init_sleeper(to, current);
2007 hrtimer_set_expires(&to->timer, *time);
2008 }
2009
2010retry:
2011 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2012 if (unlikely(ret != 0))
2013 goto out;
2014
2015retry_private:
2016 hb = queue_lock(&q);
2017
2018 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
2019 if (unlikely(ret)) {
2020 switch (ret) {
2021 case 1:
2022
2023 ret = 0;
2024 goto out_unlock_put_key;
2025 case -EFAULT:
2026 goto uaddr_faulted;
2027 case -EAGAIN:
2028
2029
2030
2031
2032 queue_unlock(&q, hb);
2033 put_futex_key(&q.key);
2034 cond_resched();
2035 goto retry;
2036 default:
2037 goto out_unlock_put_key;
2038 }
2039 }
2040
2041
2042
2043
2044 queue_me(&q, hb);
2045
2046 WARN_ON(!q.pi_state);
2047
2048
2049
2050 if (!trylock)
2051 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
2052 else {
2053 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2054
2055 ret = ret ? 0 : -EWOULDBLOCK;
2056 }
2057
2058 spin_lock(q.lock_ptr);
2059
2060
2061
2062
2063 res = fixup_owner(uaddr, &q, !ret);
2064
2065
2066
2067
2068 if (res)
2069 ret = (res < 0) ? res : 0;
2070
2071
2072
2073
2074
2075 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2076 rt_mutex_unlock(&q.pi_state->pi_mutex);
2077
2078
2079 unqueue_me_pi(&q);
2080
2081 goto out_put_key;
2082
2083out_unlock_put_key:
2084 queue_unlock(&q, hb);
2085
2086out_put_key:
2087 put_futex_key(&q.key);
2088out:
2089 if (to)
2090 destroy_hrtimer_on_stack(&to->timer);
2091 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2092
2093uaddr_faulted:
2094 queue_unlock(&q, hb);
2095
2096 ret = fault_in_user_writeable(uaddr);
2097 if (ret)
2098 goto out_put_key;
2099
2100 if (!(flags & FLAGS_SHARED))
2101 goto retry_private;
2102
2103 put_futex_key(&q.key);
2104 goto retry;
2105}
2106
2107
2108
2109
2110
2111
2112static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2113{
2114 struct futex_hash_bucket *hb;
2115 struct futex_q *this, *next;
2116 struct plist_head *head;
2117 union futex_key key = FUTEX_KEY_INIT;
2118 u32 uval, vpid = task_pid_vnr(current);
2119 int ret;
2120
2121retry:
2122 if (get_user(uval, uaddr))
2123 return -EFAULT;
2124
2125
2126
2127 if ((uval & FUTEX_TID_MASK) != vpid)
2128 return -EPERM;
2129
2130 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2131 if (unlikely(ret != 0))
2132 goto out;
2133
2134 hb = hash_futex(&key);
2135 spin_lock(&hb->lock);
2136
2137
2138
2139
2140
2141
2142 if (!(uval & FUTEX_OWNER_DIED) &&
2143 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2144 goto pi_faulted;
2145
2146
2147
2148
2149 if (unlikely(uval == vpid))
2150 goto out_unlock;
2151
2152
2153
2154
2155
2156 head = &hb->chain;
2157
2158 plist_for_each_entry_safe(this, next, head, list) {
2159 if (!match_futex (&this->key, &key))
2160 continue;
2161 ret = wake_futex_pi(uaddr, uval, this);
2162
2163
2164
2165
2166
2167 if (ret == -EFAULT)
2168 goto pi_faulted;
2169 goto out_unlock;
2170 }
2171
2172
2173
2174 if (!(uval & FUTEX_OWNER_DIED)) {
2175 ret = unlock_futex_pi(uaddr, uval);
2176 if (ret == -EFAULT)
2177 goto pi_faulted;
2178 }
2179
2180out_unlock:
2181 spin_unlock(&hb->lock);
2182 put_futex_key(&key);
2183
2184out:
2185 return ret;
2186
2187pi_faulted:
2188 spin_unlock(&hb->lock);
2189 put_futex_key(&key);
2190
2191 ret = fault_in_user_writeable(uaddr);
2192 if (!ret)
2193 goto retry;
2194
2195 return ret;
2196}
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214static inline
2215int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2216 struct futex_q *q, union futex_key *key2,
2217 struct hrtimer_sleeper *timeout)
2218{
2219 int ret = 0;
2220
2221
2222
2223
2224
2225
2226
2227
2228 if (!match_futex(&q->key, key2)) {
2229 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2230
2231
2232
2233
2234 plist_del(&q->list, &hb->chain);
2235
2236
2237 ret = -EWOULDBLOCK;
2238 if (timeout && !timeout->task)
2239 ret = -ETIMEDOUT;
2240 else if (signal_pending(current))
2241 ret = -ERESTARTNOINTR;
2242 }
2243 return ret;
2244}
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2287 u32 val, ktime_t *abs_time, u32 bitset,
2288 u32 __user *uaddr2)
2289{
2290 struct hrtimer_sleeper timeout, *to = NULL;
2291 struct rt_mutex_waiter rt_waiter;
2292 struct rt_mutex *pi_mutex = NULL;
2293 struct futex_hash_bucket *hb;
2294 union futex_key key2 = FUTEX_KEY_INIT;
2295 struct futex_q q = futex_q_init;
2296 int res, ret;
2297
2298 if (uaddr == uaddr2)
2299 return -EINVAL;
2300
2301 if (!bitset)
2302 return -EINVAL;
2303
2304 if (abs_time) {
2305 to = &timeout;
2306 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2307 CLOCK_REALTIME : CLOCK_MONOTONIC,
2308 HRTIMER_MODE_ABS);
2309 hrtimer_init_sleeper(to, current);
2310 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2311 current->timer_slack_ns);
2312 }
2313
2314
2315
2316
2317
2318 debug_rt_mutex_init_waiter(&rt_waiter);
2319 rt_waiter.task = NULL;
2320
2321 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2322 if (unlikely(ret != 0))
2323 goto out;
2324
2325 q.bitset = bitset;
2326 q.rt_waiter = &rt_waiter;
2327 q.requeue_pi_key = &key2;
2328
2329
2330
2331
2332
2333 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2334 if (ret)
2335 goto out_key2;
2336
2337
2338 futex_wait_queue_me(hb, &q, to);
2339
2340 spin_lock(&hb->lock);
2341 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2342 spin_unlock(&hb->lock);
2343 if (ret)
2344 goto out_put_keys;
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356 if (!q.rt_waiter) {
2357
2358
2359
2360
2361 if (q.pi_state && (q.pi_state->owner != current)) {
2362 spin_lock(q.lock_ptr);
2363 ret = fixup_pi_state_owner(uaddr2, &q, current);
2364 spin_unlock(q.lock_ptr);
2365 }
2366 } else {
2367
2368
2369
2370
2371
2372 WARN_ON(!q.pi_state);
2373 pi_mutex = &q.pi_state->pi_mutex;
2374 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2375 debug_rt_mutex_free_waiter(&rt_waiter);
2376
2377 spin_lock(q.lock_ptr);
2378
2379
2380
2381
2382 res = fixup_owner(uaddr2, &q, !ret);
2383
2384
2385
2386
2387 if (res)
2388 ret = (res < 0) ? res : 0;
2389
2390
2391 unqueue_me_pi(&q);
2392 }
2393
2394
2395
2396
2397
2398 if (ret == -EFAULT) {
2399 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2400 rt_mutex_unlock(pi_mutex);
2401 } else if (ret == -EINTR) {
2402
2403
2404
2405
2406
2407
2408
2409 ret = -EWOULDBLOCK;
2410 }
2411
2412out_put_keys:
2413 put_futex_key(&q.key);
2414out_key2:
2415 put_futex_key(&key2);
2416
2417out:
2418 if (to) {
2419 hrtimer_cancel(&to->timer);
2420 destroy_hrtimer_on_stack(&to->timer);
2421 }
2422 return ret;
2423}
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2446 size_t, len)
2447{
2448 if (!futex_cmpxchg_enabled)
2449 return -ENOSYS;
2450
2451
2452
2453 if (unlikely(len != sizeof(*head)))
2454 return -EINVAL;
2455
2456 current->robust_list = head;
2457
2458 return 0;
2459}
2460
2461
2462
2463
2464
2465
2466
2467SYSCALL_DEFINE3(get_robust_list, int, pid,
2468 struct robust_list_head __user * __user *, head_ptr,
2469 size_t __user *, len_ptr)
2470{
2471 struct robust_list_head __user *head;
2472 unsigned long ret;
2473 struct task_struct *p;
2474
2475 if (!futex_cmpxchg_enabled)
2476 return -ENOSYS;
2477
2478 rcu_read_lock();
2479
2480 ret = -ESRCH;
2481 if (!pid)
2482 p = current;
2483 else {
2484 p = find_task_by_vpid(pid);
2485 if (!p)
2486 goto err_unlock;
2487 }
2488
2489 ret = -EPERM;
2490 if (!ptrace_may_access(p, PTRACE_MODE_READ))
2491 goto err_unlock;
2492
2493 head = p->robust_list;
2494 rcu_read_unlock();
2495
2496 if (put_user(sizeof(*head), len_ptr))
2497 return -EFAULT;
2498 return put_user(head, head_ptr);
2499
2500err_unlock:
2501 rcu_read_unlock();
2502
2503 return ret;
2504}
2505
2506
2507
2508
2509
2510int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2511{
2512 u32 uval, uninitialized_var(nval), mval;
2513
2514retry:
2515 if (get_user(uval, uaddr))
2516 return -1;
2517
2518 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2540 if (fault_in_user_writeable(uaddr))
2541 return -1;
2542 goto retry;
2543 }
2544 if (nval != uval)
2545 goto retry;
2546
2547
2548
2549
2550
2551 if (!pi && (uval & FUTEX_WAITERS))
2552 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2553 }
2554 return 0;
2555}
2556
2557
2558
2559
2560static inline int fetch_robust_entry(struct robust_list __user **entry,
2561 struct robust_list __user * __user *head,
2562 unsigned int *pi)
2563{
2564 unsigned long uentry;
2565
2566 if (get_user(uentry, (unsigned long __user *)head))
2567 return -EFAULT;
2568
2569 *entry = (void __user *)(uentry & ~1UL);
2570 *pi = uentry & 1;
2571
2572 return 0;
2573}
2574
2575
2576
2577
2578
2579
2580
2581void exit_robust_list(struct task_struct *curr)
2582{
2583 struct robust_list_head __user *head = curr->robust_list;
2584 struct robust_list __user *entry, *next_entry, *pending;
2585 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2586 unsigned int uninitialized_var(next_pi);
2587 unsigned long futex_offset;
2588 int rc;
2589
2590 if (!futex_cmpxchg_enabled)
2591 return;
2592
2593
2594
2595
2596
2597 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2598 return;
2599
2600
2601
2602 if (get_user(futex_offset, &head->futex_offset))
2603 return;
2604
2605
2606
2607
2608 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2609 return;
2610
2611 next_entry = NULL;
2612 while (entry != &head->list) {
2613
2614
2615
2616
2617 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2618
2619
2620
2621
2622 if (entry != pending)
2623 if (handle_futex_death((void __user *)entry + futex_offset,
2624 curr, pi))
2625 return;
2626 if (rc)
2627 return;
2628 entry = next_entry;
2629 pi = next_pi;
2630
2631
2632
2633 if (!--limit)
2634 break;
2635
2636 cond_resched();
2637 }
2638
2639 if (pending)
2640 handle_futex_death((void __user *)pending + futex_offset,
2641 curr, pip);
2642}
2643
2644long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2645 u32 __user *uaddr2, u32 val2, u32 val3)
2646{
2647 int cmd = op & FUTEX_CMD_MASK;
2648 unsigned int flags = 0;
2649
2650 if (!(op & FUTEX_PRIVATE_FLAG))
2651 flags |= FLAGS_SHARED;
2652
2653 if (op & FUTEX_CLOCK_REALTIME) {
2654 flags |= FLAGS_CLOCKRT;
2655 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2656 return -ENOSYS;
2657 }
2658
2659 switch (cmd) {
2660 case FUTEX_LOCK_PI:
2661 case FUTEX_UNLOCK_PI:
2662 case FUTEX_TRYLOCK_PI:
2663 case FUTEX_WAIT_REQUEUE_PI:
2664 case FUTEX_CMP_REQUEUE_PI:
2665 if (!futex_cmpxchg_enabled)
2666 return -ENOSYS;
2667 }
2668
2669 switch (cmd) {
2670 case FUTEX_WAIT:
2671 val3 = FUTEX_BITSET_MATCH_ANY;
2672 case FUTEX_WAIT_BITSET:
2673 return futex_wait(uaddr, flags, val, timeout, val3);
2674 case FUTEX_WAKE:
2675 val3 = FUTEX_BITSET_MATCH_ANY;
2676 case FUTEX_WAKE_BITSET:
2677 return futex_wake(uaddr, flags, val, val3);
2678 case FUTEX_REQUEUE:
2679 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2680 case FUTEX_CMP_REQUEUE:
2681 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2682 case FUTEX_WAKE_OP:
2683 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2684 case FUTEX_LOCK_PI:
2685 return futex_lock_pi(uaddr, flags, val, timeout, 0);
2686 case FUTEX_UNLOCK_PI:
2687 return futex_unlock_pi(uaddr, flags);
2688 case FUTEX_TRYLOCK_PI:
2689 return futex_lock_pi(uaddr, flags, 0, timeout, 1);
2690 case FUTEX_WAIT_REQUEUE_PI:
2691 val3 = FUTEX_BITSET_MATCH_ANY;
2692 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2693 uaddr2);
2694 case FUTEX_CMP_REQUEUE_PI:
2695 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2696 }
2697 return -ENOSYS;
2698}
2699
2700
2701SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2702 struct timespec __user *, utime, u32 __user *, uaddr2,
2703 u32, val3)
2704{
2705 struct timespec ts;
2706 ktime_t t, *tp = NULL;
2707 u32 val2 = 0;
2708 int cmd = op & FUTEX_CMD_MASK;
2709
2710 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2711 cmd == FUTEX_WAIT_BITSET ||
2712 cmd == FUTEX_WAIT_REQUEUE_PI)) {
2713 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2714 return -EFAULT;
2715 if (!timespec_valid(&ts))
2716 return -EINVAL;
2717
2718 t = timespec_to_ktime(ts);
2719 if (cmd == FUTEX_WAIT)
2720 t = ktime_add_safe(ktime_get(), t);
2721 tp = &t;
2722 }
2723
2724
2725
2726
2727 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2728 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2729 val2 = (u32) (unsigned long) utime;
2730
2731 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2732}
2733
2734static int __init futex_init(void)
2735{
2736 u32 curval;
2737 int i;
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2750 futex_cmpxchg_enabled = 1;
2751
2752 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
2753 plist_head_init(&futex_queues[i].chain);
2754 spin_lock_init(&futex_queues[i].lock);
2755 }
2756
2757 return 0;
2758}
2759__initcall(futex_init);
2760