1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/export.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
63#include <linux/sched/rt.h>
64#include <linux/hugetlb.h>
65#include <linux/freezer.h>
66
67#include <asm/futex.h>
68
69#include "rtmutex_common.h"
70
71int __read_mostly futex_cmpxchg_enabled;
72
73#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
74
75
76
77
78
79#define FLAGS_SHARED 0x01
80#define FLAGS_CLOCKRT 0x02
81#define FLAGS_HAS_TIMEOUT 0x04
82
83
84
85
86struct futex_pi_state {
87
88
89
90
91 struct list_head list;
92
93
94
95
96 struct rt_mutex pi_mutex;
97
98 struct task_struct *owner;
99 atomic_t refcount;
100
101 union futex_key key;
102};
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126struct futex_q {
127 struct plist_node list;
128
129 struct task_struct *task;
130 spinlock_t *lock_ptr;
131 union futex_key key;
132 struct futex_pi_state *pi_state;
133 struct rt_mutex_waiter *rt_waiter;
134 union futex_key *requeue_pi_key;
135 u32 bitset;
136};
137
138static const struct futex_q futex_q_init = {
139
140 .key = FUTEX_KEY_INIT,
141 .bitset = FUTEX_BITSET_MATCH_ANY
142};
143
144
145
146
147
148
149struct futex_hash_bucket {
150 spinlock_t lock;
151 struct plist_head chain;
152};
153
154static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
155
156
157
158
159static struct futex_hash_bucket *hash_futex(union futex_key *key)
160{
161 u32 hash = jhash2((u32*)&key->both.word,
162 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
163 key->both.offset);
164 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
165}
166
167
168
169
170static inline int match_futex(union futex_key *key1, union futex_key *key2)
171{
172 return (key1 && key2
173 && key1->both.word == key2->both.word
174 && key1->both.ptr == key2->both.ptr
175 && key1->both.offset == key2->both.offset);
176}
177
178
179
180
181
182
183static void get_futex_key_refs(union futex_key *key)
184{
185 if (!key->both.ptr)
186 return;
187
188 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
189 case FUT_OFF_INODE:
190 ihold(key->shared.inode);
191 break;
192 case FUT_OFF_MMSHARED:
193 atomic_inc(&key->private.mm->mm_count);
194 break;
195 }
196}
197
198
199
200
201
202static void drop_futex_key_refs(union futex_key *key)
203{
204 if (!key->both.ptr) {
205
206 WARN_ON_ONCE(1);
207 return;
208 }
209
210 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
211 case FUT_OFF_INODE:
212 iput(key->shared.inode);
213 break;
214 case FUT_OFF_MMSHARED:
215 mmdrop(key->private.mm);
216 break;
217 }
218}
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238static int
239get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
240{
241 unsigned long address = (unsigned long)uaddr;
242 struct mm_struct *mm = current->mm;
243 struct page *page, *page_head;
244 int err, ro = 0;
245
246
247
248
249 key->both.offset = address % PAGE_SIZE;
250 if (unlikely((address % sizeof(u32)) != 0))
251 return -EINVAL;
252 address -= key->both.offset;
253
254
255
256
257
258
259
260
261 if (!fshared) {
262 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
263 return -EFAULT;
264 key->private.mm = mm;
265 key->private.address = address;
266 get_futex_key_refs(key);
267 return 0;
268 }
269
270again:
271 err = get_user_pages_fast(address, 1, 1, &page);
272
273
274
275
276 if (err == -EFAULT && rw == VERIFY_READ) {
277 err = get_user_pages_fast(address, 1, 0, &page);
278 ro = 1;
279 }
280 if (err < 0)
281 return err;
282 else
283 err = 0;
284
285#ifdef CONFIG_TRANSPARENT_HUGEPAGE
286 page_head = page;
287 if (unlikely(PageTail(page))) {
288 put_page(page);
289
290 local_irq_disable();
291 if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
292 page_head = compound_head(page);
293
294
295
296
297
298
299
300
301
302
303 if (page != page_head) {
304 get_page(page_head);
305 put_page(page);
306 }
307 local_irq_enable();
308 } else {
309 local_irq_enable();
310 goto again;
311 }
312 }
313#else
314 page_head = compound_head(page);
315 if (page != page_head) {
316 get_page(page_head);
317 put_page(page);
318 }
319#endif
320
321 lock_page(page_head);
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338 if (!page_head->mapping) {
339 int shmem_swizzled = PageSwapCache(page_head);
340 unlock_page(page_head);
341 put_page(page_head);
342 if (shmem_swizzled)
343 goto again;
344 return -EFAULT;
345 }
346
347
348
349
350
351
352
353
354 if (PageAnon(page_head)) {
355
356
357
358
359 if (ro) {
360 err = -EFAULT;
361 goto out;
362 }
363
364 key->both.offset |= FUT_OFF_MMSHARED;
365 key->private.mm = mm;
366 key->private.address = address;
367 } else {
368 key->both.offset |= FUT_OFF_INODE;
369 key->shared.inode = page_head->mapping->host;
370 key->shared.pgoff = basepage_index(page);
371 }
372
373 get_futex_key_refs(key);
374
375out:
376 unlock_page(page_head);
377 put_page(page_head);
378 return err;
379}
380
381static inline void put_futex_key(union futex_key *key)
382{
383 drop_futex_key_refs(key);
384}
385
386
387
388
389
390
391
392
393
394
395
396
397
398static int fault_in_user_writeable(u32 __user *uaddr)
399{
400 struct mm_struct *mm = current->mm;
401 int ret;
402
403 down_read(&mm->mmap_sem);
404 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
405 FAULT_FLAG_WRITE);
406 up_read(&mm->mmap_sem);
407
408 return ret < 0 ? ret : 0;
409}
410
411
412
413
414
415
416
417
418static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
419 union futex_key *key)
420{
421 struct futex_q *this;
422
423 plist_for_each_entry(this, &hb->chain, list) {
424 if (match_futex(&this->key, key))
425 return this;
426 }
427 return NULL;
428}
429
430static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
431 u32 uval, u32 newval)
432{
433 int ret;
434
435 pagefault_disable();
436 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
437 pagefault_enable();
438
439 return ret;
440}
441
442static int get_futex_value_locked(u32 *dest, u32 __user *from)
443{
444 int ret;
445
446 pagefault_disable();
447 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
448 pagefault_enable();
449
450 return ret ? -EFAULT : 0;
451}
452
453
454
455
456
457static int refill_pi_state_cache(void)
458{
459 struct futex_pi_state *pi_state;
460
461 if (likely(current->pi_state_cache))
462 return 0;
463
464 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
465
466 if (!pi_state)
467 return -ENOMEM;
468
469 INIT_LIST_HEAD(&pi_state->list);
470
471 pi_state->owner = NULL;
472 atomic_set(&pi_state->refcount, 1);
473 pi_state->key = FUTEX_KEY_INIT;
474
475 current->pi_state_cache = pi_state;
476
477 return 0;
478}
479
480static struct futex_pi_state * alloc_pi_state(void)
481{
482 struct futex_pi_state *pi_state = current->pi_state_cache;
483
484 WARN_ON(!pi_state);
485 current->pi_state_cache = NULL;
486
487 return pi_state;
488}
489
490static void free_pi_state(struct futex_pi_state *pi_state)
491{
492 if (!atomic_dec_and_test(&pi_state->refcount))
493 return;
494
495
496
497
498
499 if (pi_state->owner) {
500 raw_spin_lock_irq(&pi_state->owner->pi_lock);
501 list_del_init(&pi_state->list);
502 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
503
504 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
505 }
506
507 if (current->pi_state_cache)
508 kfree(pi_state);
509 else {
510
511
512
513
514
515 pi_state->owner = NULL;
516 atomic_set(&pi_state->refcount, 1);
517 current->pi_state_cache = pi_state;
518 }
519}
520
521
522
523
524
525static struct task_struct * futex_find_get_task(pid_t pid)
526{
527 struct task_struct *p;
528
529 rcu_read_lock();
530 p = find_task_by_vpid(pid);
531 if (p)
532 get_task_struct(p);
533
534 rcu_read_unlock();
535
536 return p;
537}
538
539
540
541
542
543
544void exit_pi_state_list(struct task_struct *curr)
545{
546 struct list_head *next, *head = &curr->pi_state_list;
547 struct futex_pi_state *pi_state;
548 struct futex_hash_bucket *hb;
549 union futex_key key = FUTEX_KEY_INIT;
550
551 if (!futex_cmpxchg_enabled)
552 return;
553
554
555
556
557
558 raw_spin_lock_irq(&curr->pi_lock);
559 while (!list_empty(head)) {
560
561 next = head->next;
562 pi_state = list_entry(next, struct futex_pi_state, list);
563 key = pi_state->key;
564 hb = hash_futex(&key);
565 raw_spin_unlock_irq(&curr->pi_lock);
566
567 spin_lock(&hb->lock);
568
569 raw_spin_lock_irq(&curr->pi_lock);
570
571
572
573
574 if (head->next != next) {
575 spin_unlock(&hb->lock);
576 continue;
577 }
578
579 WARN_ON(pi_state->owner != curr);
580 WARN_ON(list_empty(&pi_state->list));
581 list_del_init(&pi_state->list);
582 pi_state->owner = NULL;
583 raw_spin_unlock_irq(&curr->pi_lock);
584
585 rt_mutex_unlock(&pi_state->pi_mutex);
586
587 spin_unlock(&hb->lock);
588
589 raw_spin_lock_irq(&curr->pi_lock);
590 }
591 raw_spin_unlock_irq(&curr->pi_lock);
592}
593
594static int
595lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
596 union futex_key *key, struct futex_pi_state **ps)
597{
598 struct futex_pi_state *pi_state = NULL;
599 struct futex_q *this, *next;
600 struct plist_head *head;
601 struct task_struct *p;
602 pid_t pid = uval & FUTEX_TID_MASK;
603
604 head = &hb->chain;
605
606 plist_for_each_entry_safe(this, next, head, list) {
607 if (match_futex(&this->key, key)) {
608
609
610
611
612 pi_state = this->pi_state;
613
614
615
616 if (unlikely(!pi_state))
617 return -EINVAL;
618
619 WARN_ON(!atomic_read(&pi_state->refcount));
620
621
622
623
624
625
626
627
628
629
630
631 if (pid && pi_state->owner) {
632
633
634
635
636 if (pid != task_pid_vnr(pi_state->owner))
637 return -EINVAL;
638 }
639
640 atomic_inc(&pi_state->refcount);
641 *ps = pi_state;
642
643 return 0;
644 }
645 }
646
647
648
649
650
651 if (!pid)
652 return -ESRCH;
653 p = futex_find_get_task(pid);
654 if (!p)
655 return -ESRCH;
656
657
658
659
660
661
662
663 raw_spin_lock_irq(&p->pi_lock);
664 if (unlikely(p->flags & PF_EXITING)) {
665
666
667
668
669
670 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
671
672 raw_spin_unlock_irq(&p->pi_lock);
673 put_task_struct(p);
674 return ret;
675 }
676
677 pi_state = alloc_pi_state();
678
679
680
681
682
683 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
684
685
686 pi_state->key = *key;
687
688 WARN_ON(!list_empty(&pi_state->list));
689 list_add(&pi_state->list, &p->pi_state_list);
690 pi_state->owner = p;
691 raw_spin_unlock_irq(&p->pi_lock);
692
693 put_task_struct(p);
694
695 *ps = pi_state;
696
697 return 0;
698}
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
719 union futex_key *key,
720 struct futex_pi_state **ps,
721 struct task_struct *task, int set_waiters)
722{
723 int lock_taken, ret, force_take = 0;
724 u32 uval, newval, curval, vpid = task_pid_vnr(task);
725
726retry:
727 ret = lock_taken = 0;
728
729
730
731
732
733
734 newval = vpid;
735 if (set_waiters)
736 newval |= FUTEX_WAITERS;
737
738 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
739 return -EFAULT;
740
741
742
743
744 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
745 return -EDEADLK;
746
747
748
749
750 if (unlikely(!curval))
751 return 1;
752
753 uval = curval;
754
755
756
757
758
759 newval = curval | FUTEX_WAITERS;
760
761
762
763
764 if (unlikely(force_take)) {
765
766
767
768
769 newval = (curval & ~FUTEX_TID_MASK) | vpid;
770 force_take = 0;
771 lock_taken = 1;
772 }
773
774 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
775 return -EFAULT;
776 if (unlikely(curval != uval))
777 goto retry;
778
779
780
781
782 if (unlikely(lock_taken))
783 return 1;
784
785
786
787
788
789 ret = lookup_pi_state(uval, hb, key, ps);
790
791 if (unlikely(ret)) {
792 switch (ret) {
793 case -ESRCH:
794
795
796
797
798
799
800
801
802
803
804 if (get_futex_value_locked(&curval, uaddr))
805 return -EFAULT;
806
807
808
809
810
811
812 if (!(curval & FUTEX_TID_MASK)) {
813 force_take = 1;
814 goto retry;
815 }
816 default:
817 break;
818 }
819 }
820
821 return ret;
822}
823
824
825
826
827
828
829
830static void __unqueue_futex(struct futex_q *q)
831{
832 struct futex_hash_bucket *hb;
833
834 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
835 || WARN_ON(plist_node_empty(&q->list)))
836 return;
837
838 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
839 plist_del(&q->list, &hb->chain);
840}
841
842
843
844
845
846static void wake_futex(struct futex_q *q)
847{
848 struct task_struct *p = q->task;
849
850 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
851 return;
852
853
854
855
856
857
858
859
860 get_task_struct(p);
861
862 __unqueue_futex(q);
863
864
865
866
867
868
869 smp_wmb();
870 q->lock_ptr = NULL;
871
872 wake_up_state(p, TASK_NORMAL);
873 put_task_struct(p);
874}
875
876static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
877{
878 struct task_struct *new_owner;
879 struct futex_pi_state *pi_state = this->pi_state;
880 u32 uninitialized_var(curval), newval;
881
882 if (!pi_state)
883 return -EINVAL;
884
885
886
887
888
889 if (pi_state->owner != current)
890 return -EINVAL;
891
892 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
893 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
894
895
896
897
898
899
900 if (!new_owner)
901 new_owner = this->task;
902
903
904
905
906
907
908 if (!(uval & FUTEX_OWNER_DIED)) {
909 int ret = 0;
910
911 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
912
913 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
914 ret = -EFAULT;
915 else if (curval != uval)
916 ret = -EINVAL;
917 if (ret) {
918 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
919 return ret;
920 }
921 }
922
923 raw_spin_lock_irq(&pi_state->owner->pi_lock);
924 WARN_ON(list_empty(&pi_state->list));
925 list_del_init(&pi_state->list);
926 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
927
928 raw_spin_lock_irq(&new_owner->pi_lock);
929 WARN_ON(!list_empty(&pi_state->list));
930 list_add(&pi_state->list, &new_owner->pi_state_list);
931 pi_state->owner = new_owner;
932 raw_spin_unlock_irq(&new_owner->pi_lock);
933
934 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
935 rt_mutex_unlock(&pi_state->pi_mutex);
936
937 return 0;
938}
939
940static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
941{
942 u32 uninitialized_var(oldval);
943
944
945
946
947
948 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
949 return -EFAULT;
950 if (oldval != uval)
951 return -EAGAIN;
952
953 return 0;
954}
955
956
957
958
959static inline void
960double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
961{
962 if (hb1 <= hb2) {
963 spin_lock(&hb1->lock);
964 if (hb1 < hb2)
965 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
966 } else {
967 spin_lock(&hb2->lock);
968 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
969 }
970}
971
972static inline void
973double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
974{
975 spin_unlock(&hb1->lock);
976 if (hb1 != hb2)
977 spin_unlock(&hb2->lock);
978}
979
980
981
982
983static int
984futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
985{
986 struct futex_hash_bucket *hb;
987 struct futex_q *this, *next;
988 struct plist_head *head;
989 union futex_key key = FUTEX_KEY_INIT;
990 int ret;
991
992 if (!bitset)
993 return -EINVAL;
994
995 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
996 if (unlikely(ret != 0))
997 goto out;
998
999 hb = hash_futex(&key);
1000 spin_lock(&hb->lock);
1001 head = &hb->chain;
1002
1003 plist_for_each_entry_safe(this, next, head, list) {
1004 if (match_futex (&this->key, &key)) {
1005 if (this->pi_state || this->rt_waiter) {
1006 ret = -EINVAL;
1007 break;
1008 }
1009
1010
1011 if (!(this->bitset & bitset))
1012 continue;
1013
1014 wake_futex(this);
1015 if (++ret >= nr_wake)
1016 break;
1017 }
1018 }
1019
1020 spin_unlock(&hb->lock);
1021 put_futex_key(&key);
1022out:
1023 return ret;
1024}
1025
1026
1027
1028
1029
1030static int
1031futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1032 int nr_wake, int nr_wake2, int op)
1033{
1034 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1035 struct futex_hash_bucket *hb1, *hb2;
1036 struct plist_head *head;
1037 struct futex_q *this, *next;
1038 int ret, op_ret;
1039
1040retry:
1041 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1042 if (unlikely(ret != 0))
1043 goto out;
1044 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1045 if (unlikely(ret != 0))
1046 goto out_put_key1;
1047
1048 hb1 = hash_futex(&key1);
1049 hb2 = hash_futex(&key2);
1050
1051retry_private:
1052 double_lock_hb(hb1, hb2);
1053 op_ret = futex_atomic_op_inuser(op, uaddr2);
1054 if (unlikely(op_ret < 0)) {
1055
1056 double_unlock_hb(hb1, hb2);
1057
1058#ifndef CONFIG_MMU
1059
1060
1061
1062
1063 ret = op_ret;
1064 goto out_put_keys;
1065#endif
1066
1067 if (unlikely(op_ret != -EFAULT)) {
1068 ret = op_ret;
1069 goto out_put_keys;
1070 }
1071
1072 ret = fault_in_user_writeable(uaddr2);
1073 if (ret)
1074 goto out_put_keys;
1075
1076 if (!(flags & FLAGS_SHARED))
1077 goto retry_private;
1078
1079 put_futex_key(&key2);
1080 put_futex_key(&key1);
1081 goto retry;
1082 }
1083
1084 head = &hb1->chain;
1085
1086 plist_for_each_entry_safe(this, next, head, list) {
1087 if (match_futex (&this->key, &key1)) {
1088 if (this->pi_state || this->rt_waiter) {
1089 ret = -EINVAL;
1090 goto out_unlock;
1091 }
1092 wake_futex(this);
1093 if (++ret >= nr_wake)
1094 break;
1095 }
1096 }
1097
1098 if (op_ret > 0) {
1099 head = &hb2->chain;
1100
1101 op_ret = 0;
1102 plist_for_each_entry_safe(this, next, head, list) {
1103 if (match_futex (&this->key, &key2)) {
1104 if (this->pi_state || this->rt_waiter) {
1105 ret = -EINVAL;
1106 goto out_unlock;
1107 }
1108 wake_futex(this);
1109 if (++op_ret >= nr_wake2)
1110 break;
1111 }
1112 }
1113 ret += op_ret;
1114 }
1115
1116out_unlock:
1117 double_unlock_hb(hb1, hb2);
1118out_put_keys:
1119 put_futex_key(&key2);
1120out_put_key1:
1121 put_futex_key(&key1);
1122out:
1123 return ret;
1124}
1125
1126
1127
1128
1129
1130
1131
1132
1133static inline
1134void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1135 struct futex_hash_bucket *hb2, union futex_key *key2)
1136{
1137
1138
1139
1140
1141
1142 if (likely(&hb1->chain != &hb2->chain)) {
1143 plist_del(&q->list, &hb1->chain);
1144 plist_add(&q->list, &hb2->chain);
1145 q->lock_ptr = &hb2->lock;
1146 }
1147 get_futex_key_refs(key2);
1148 q->key = *key2;
1149}
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165static inline
1166void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1167 struct futex_hash_bucket *hb)
1168{
1169 get_futex_key_refs(key);
1170 q->key = *key;
1171
1172 __unqueue_futex(q);
1173
1174 WARN_ON(!q->rt_waiter);
1175 q->rt_waiter = NULL;
1176
1177 q->lock_ptr = &hb->lock;
1178
1179 wake_up_state(q->task, TASK_NORMAL);
1180}
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1203 struct futex_hash_bucket *hb1,
1204 struct futex_hash_bucket *hb2,
1205 union futex_key *key1, union futex_key *key2,
1206 struct futex_pi_state **ps, int set_waiters)
1207{
1208 struct futex_q *top_waiter = NULL;
1209 u32 curval;
1210 int ret;
1211
1212 if (get_futex_value_locked(&curval, pifutex))
1213 return -EFAULT;
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223 top_waiter = futex_top_waiter(hb1, key1);
1224
1225
1226 if (!top_waiter)
1227 return 0;
1228
1229
1230 if (!match_futex(top_waiter->requeue_pi_key, key2))
1231 return -EINVAL;
1232
1233
1234
1235
1236
1237
1238 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1239 set_waiters);
1240 if (ret == 1)
1241 requeue_pi_wake_futex(top_waiter, key2, hb2);
1242
1243 return ret;
1244}
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1265 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1266 u32 *cmpval, int requeue_pi)
1267{
1268 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1269 int drop_count = 0, task_count = 0, ret;
1270 struct futex_pi_state *pi_state = NULL;
1271 struct futex_hash_bucket *hb1, *hb2;
1272 struct plist_head *head1;
1273 struct futex_q *this, *next;
1274 u32 curval2;
1275
1276 if (requeue_pi) {
1277
1278
1279
1280
1281 if (refill_pi_state_cache())
1282 return -ENOMEM;
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293 if (nr_wake != 1)
1294 return -EINVAL;
1295 }
1296
1297retry:
1298 if (pi_state != NULL) {
1299
1300
1301
1302
1303 free_pi_state(pi_state);
1304 pi_state = NULL;
1305 }
1306
1307 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1308 if (unlikely(ret != 0))
1309 goto out;
1310 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1311 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1312 if (unlikely(ret != 0))
1313 goto out_put_key1;
1314
1315 hb1 = hash_futex(&key1);
1316 hb2 = hash_futex(&key2);
1317
1318retry_private:
1319 double_lock_hb(hb1, hb2);
1320
1321 if (likely(cmpval != NULL)) {
1322 u32 curval;
1323
1324 ret = get_futex_value_locked(&curval, uaddr1);
1325
1326 if (unlikely(ret)) {
1327 double_unlock_hb(hb1, hb2);
1328
1329 ret = get_user(curval, uaddr1);
1330 if (ret)
1331 goto out_put_keys;
1332
1333 if (!(flags & FLAGS_SHARED))
1334 goto retry_private;
1335
1336 put_futex_key(&key2);
1337 put_futex_key(&key1);
1338 goto retry;
1339 }
1340 if (curval != *cmpval) {
1341 ret = -EAGAIN;
1342 goto out_unlock;
1343 }
1344 }
1345
1346 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1347
1348
1349
1350
1351
1352
1353 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1354 &key2, &pi_state, nr_requeue);
1355
1356
1357
1358
1359
1360
1361
1362 if (ret == 1) {
1363 WARN_ON(pi_state);
1364 drop_count++;
1365 task_count++;
1366 ret = get_futex_value_locked(&curval2, uaddr2);
1367 if (!ret)
1368 ret = lookup_pi_state(curval2, hb2, &key2,
1369 &pi_state);
1370 }
1371
1372 switch (ret) {
1373 case 0:
1374 break;
1375 case -EFAULT:
1376 double_unlock_hb(hb1, hb2);
1377 put_futex_key(&key2);
1378 put_futex_key(&key1);
1379 ret = fault_in_user_writeable(uaddr2);
1380 if (!ret)
1381 goto retry;
1382 goto out;
1383 case -EAGAIN:
1384
1385 double_unlock_hb(hb1, hb2);
1386 put_futex_key(&key2);
1387 put_futex_key(&key1);
1388 cond_resched();
1389 goto retry;
1390 default:
1391 goto out_unlock;
1392 }
1393 }
1394
1395 head1 = &hb1->chain;
1396 plist_for_each_entry_safe(this, next, head1, list) {
1397 if (task_count - nr_wake >= nr_requeue)
1398 break;
1399
1400 if (!match_futex(&this->key, &key1))
1401 continue;
1402
1403
1404
1405
1406
1407
1408
1409
1410 if ((requeue_pi && !this->rt_waiter) ||
1411 (!requeue_pi && this->rt_waiter) ||
1412 this->pi_state) {
1413 ret = -EINVAL;
1414 break;
1415 }
1416
1417
1418
1419
1420
1421
1422 if (++task_count <= nr_wake && !requeue_pi) {
1423 wake_futex(this);
1424 continue;
1425 }
1426
1427
1428 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1429 ret = -EINVAL;
1430 break;
1431 }
1432
1433
1434
1435
1436
1437 if (requeue_pi) {
1438
1439 atomic_inc(&pi_state->refcount);
1440 this->pi_state = pi_state;
1441 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1442 this->rt_waiter,
1443 this->task, 1);
1444 if (ret == 1) {
1445
1446 requeue_pi_wake_futex(this, &key2, hb2);
1447 drop_count++;
1448 continue;
1449 } else if (ret) {
1450
1451 this->pi_state = NULL;
1452 free_pi_state(pi_state);
1453 goto out_unlock;
1454 }
1455 }
1456 requeue_futex(this, hb1, hb2, &key2);
1457 drop_count++;
1458 }
1459
1460out_unlock:
1461 double_unlock_hb(hb1, hb2);
1462
1463
1464
1465
1466
1467
1468
1469 while (--drop_count >= 0)
1470 drop_futex_key_refs(&key1);
1471
1472out_put_keys:
1473 put_futex_key(&key2);
1474out_put_key1:
1475 put_futex_key(&key1);
1476out:
1477 if (pi_state != NULL)
1478 free_pi_state(pi_state);
1479 return ret ? ret : task_count;
1480}
1481
1482
1483static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1484 __acquires(&hb->lock)
1485{
1486 struct futex_hash_bucket *hb;
1487
1488 hb = hash_futex(&q->key);
1489 q->lock_ptr = &hb->lock;
1490
1491 spin_lock(&hb->lock);
1492 return hb;
1493}
1494
1495static inline void
1496queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1497 __releases(&hb->lock)
1498{
1499 spin_unlock(&hb->lock);
1500}
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1515 __releases(&hb->lock)
1516{
1517 int prio;
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527 prio = min(current->normal_prio, MAX_RT_PRIO);
1528
1529 plist_node_init(&q->list, prio);
1530 plist_add(&q->list, &hb->chain);
1531 q->task = current;
1532 spin_unlock(&hb->lock);
1533}
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546static int unqueue_me(struct futex_q *q)
1547{
1548 spinlock_t *lock_ptr;
1549 int ret = 0;
1550
1551
1552retry:
1553 lock_ptr = q->lock_ptr;
1554 barrier();
1555 if (lock_ptr != NULL) {
1556 spin_lock(lock_ptr);
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570 if (unlikely(lock_ptr != q->lock_ptr)) {
1571 spin_unlock(lock_ptr);
1572 goto retry;
1573 }
1574 __unqueue_futex(q);
1575
1576 BUG_ON(q->pi_state);
1577
1578 spin_unlock(lock_ptr);
1579 ret = 1;
1580 }
1581
1582 drop_futex_key_refs(&q->key);
1583 return ret;
1584}
1585
1586
1587
1588
1589
1590
1591static void unqueue_me_pi(struct futex_q *q)
1592 __releases(q->lock_ptr)
1593{
1594 __unqueue_futex(q);
1595
1596 BUG_ON(!q->pi_state);
1597 free_pi_state(q->pi_state);
1598 q->pi_state = NULL;
1599
1600 spin_unlock(q->lock_ptr);
1601}
1602
1603
1604
1605
1606
1607
1608
1609static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1610 struct task_struct *newowner)
1611{
1612 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1613 struct futex_pi_state *pi_state = q->pi_state;
1614 struct task_struct *oldowner = pi_state->owner;
1615 u32 uval, uninitialized_var(curval), newval;
1616 int ret;
1617
1618
1619 if (!pi_state->owner)
1620 newtid |= FUTEX_OWNER_DIED;
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639retry:
1640 if (get_futex_value_locked(&uval, uaddr))
1641 goto handle_fault;
1642
1643 while (1) {
1644 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1645
1646 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1647 goto handle_fault;
1648 if (curval == uval)
1649 break;
1650 uval = curval;
1651 }
1652
1653
1654
1655
1656
1657 if (pi_state->owner != NULL) {
1658 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1659 WARN_ON(list_empty(&pi_state->list));
1660 list_del_init(&pi_state->list);
1661 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1662 }
1663
1664 pi_state->owner = newowner;
1665
1666 raw_spin_lock_irq(&newowner->pi_lock);
1667 WARN_ON(!list_empty(&pi_state->list));
1668 list_add(&pi_state->list, &newowner->pi_state_list);
1669 raw_spin_unlock_irq(&newowner->pi_lock);
1670 return 0;
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682handle_fault:
1683 spin_unlock(q->lock_ptr);
1684
1685 ret = fault_in_user_writeable(uaddr);
1686
1687 spin_lock(q->lock_ptr);
1688
1689
1690
1691
1692 if (pi_state->owner != oldowner)
1693 return 0;
1694
1695 if (ret)
1696 return ret;
1697
1698 goto retry;
1699}
1700
1701static long futex_wait_restart(struct restart_block *restart);
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1719{
1720 struct task_struct *owner;
1721 int ret = 0;
1722
1723 if (locked) {
1724
1725
1726
1727
1728 if (q->pi_state->owner != current)
1729 ret = fixup_pi_state_owner(uaddr, q, current);
1730 goto out;
1731 }
1732
1733
1734
1735
1736
1737 if (q->pi_state->owner == current) {
1738
1739
1740
1741
1742
1743 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1744 locked = 1;
1745 goto out;
1746 }
1747
1748
1749
1750
1751
1752
1753 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1754 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1755 if (!owner)
1756 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1757 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1758 ret = fixup_pi_state_owner(uaddr, q, owner);
1759 goto out;
1760 }
1761
1762
1763
1764
1765
1766 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1767 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1768 "pi-state %p\n", ret,
1769 q->pi_state->pi_mutex.owner,
1770 q->pi_state->owner);
1771
1772out:
1773 return ret ? ret : locked;
1774}
1775
1776
1777
1778
1779
1780
1781
1782static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1783 struct hrtimer_sleeper *timeout)
1784{
1785
1786
1787
1788
1789
1790
1791 set_current_state(TASK_INTERRUPTIBLE);
1792 queue_me(q, hb);
1793
1794
1795 if (timeout) {
1796 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1797 if (!hrtimer_active(&timeout->timer))
1798 timeout->task = NULL;
1799 }
1800
1801
1802
1803
1804
1805 if (likely(!plist_node_empty(&q->list))) {
1806
1807
1808
1809
1810
1811 if (!timeout || timeout->task)
1812 freezable_schedule();
1813 }
1814 __set_current_state(TASK_RUNNING);
1815}
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1835 struct futex_q *q, struct futex_hash_bucket **hb)
1836{
1837 u32 uval;
1838 int ret;
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858retry:
1859 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
1860 if (unlikely(ret != 0))
1861 return ret;
1862
1863retry_private:
1864 *hb = queue_lock(q);
1865
1866 ret = get_futex_value_locked(&uval, uaddr);
1867
1868 if (ret) {
1869 queue_unlock(q, *hb);
1870
1871 ret = get_user(uval, uaddr);
1872 if (ret)
1873 goto out;
1874
1875 if (!(flags & FLAGS_SHARED))
1876 goto retry_private;
1877
1878 put_futex_key(&q->key);
1879 goto retry;
1880 }
1881
1882 if (uval != val) {
1883 queue_unlock(q, *hb);
1884 ret = -EWOULDBLOCK;
1885 }
1886
1887out:
1888 if (ret)
1889 put_futex_key(&q->key);
1890 return ret;
1891}
1892
1893static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
1894 ktime_t *abs_time, u32 bitset)
1895{
1896 struct hrtimer_sleeper timeout, *to = NULL;
1897 struct restart_block *restart;
1898 struct futex_hash_bucket *hb;
1899 struct futex_q q = futex_q_init;
1900 int ret;
1901
1902 if (!bitset)
1903 return -EINVAL;
1904 q.bitset = bitset;
1905
1906 if (abs_time) {
1907 to = &timeout;
1908
1909 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
1910 CLOCK_REALTIME : CLOCK_MONOTONIC,
1911 HRTIMER_MODE_ABS);
1912 hrtimer_init_sleeper(to, current);
1913 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1914 current->timer_slack_ns);
1915 }
1916
1917retry:
1918
1919
1920
1921
1922 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
1923 if (ret)
1924 goto out;
1925
1926
1927 futex_wait_queue_me(hb, &q, to);
1928
1929
1930 ret = 0;
1931
1932 if (!unqueue_me(&q))
1933 goto out;
1934 ret = -ETIMEDOUT;
1935 if (to && !to->task)
1936 goto out;
1937
1938
1939
1940
1941
1942 if (!signal_pending(current))
1943 goto retry;
1944
1945 ret = -ERESTARTSYS;
1946 if (!abs_time)
1947 goto out;
1948
1949 restart = ¤t_thread_info()->restart_block;
1950 restart->fn = futex_wait_restart;
1951 restart->futex.uaddr = uaddr;
1952 restart->futex.val = val;
1953 restart->futex.time = abs_time->tv64;
1954 restart->futex.bitset = bitset;
1955 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
1956
1957 ret = -ERESTART_RESTARTBLOCK;
1958
1959out:
1960 if (to) {
1961 hrtimer_cancel(&to->timer);
1962 destroy_hrtimer_on_stack(&to->timer);
1963 }
1964 return ret;
1965}
1966
1967
1968static long futex_wait_restart(struct restart_block *restart)
1969{
1970 u32 __user *uaddr = restart->futex.uaddr;
1971 ktime_t t, *tp = NULL;
1972
1973 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1974 t.tv64 = restart->futex.time;
1975 tp = &t;
1976 }
1977 restart->fn = do_no_restart_syscall;
1978
1979 return (long)futex_wait(uaddr, restart->futex.flags,
1980 restart->futex.val, tp, restart->futex.bitset);
1981}
1982
1983
1984
1985
1986
1987
1988
1989
1990static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1991 ktime_t *time, int trylock)
1992{
1993 struct hrtimer_sleeper timeout, *to = NULL;
1994 struct futex_hash_bucket *hb;
1995 struct futex_q q = futex_q_init;
1996 int res, ret;
1997
1998 if (refill_pi_state_cache())
1999 return -ENOMEM;
2000
2001 if (time) {
2002 to = &timeout;
2003 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2004 HRTIMER_MODE_ABS);
2005 hrtimer_init_sleeper(to, current);
2006 hrtimer_set_expires(&to->timer, *time);
2007 }
2008
2009retry:
2010 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2011 if (unlikely(ret != 0))
2012 goto out;
2013
2014retry_private:
2015 hb = queue_lock(&q);
2016
2017 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
2018 if (unlikely(ret)) {
2019 switch (ret) {
2020 case 1:
2021
2022 ret = 0;
2023 goto out_unlock_put_key;
2024 case -EFAULT:
2025 goto uaddr_faulted;
2026 case -EAGAIN:
2027
2028
2029
2030
2031 queue_unlock(&q, hb);
2032 put_futex_key(&q.key);
2033 cond_resched();
2034 goto retry;
2035 default:
2036 goto out_unlock_put_key;
2037 }
2038 }
2039
2040
2041
2042
2043 queue_me(&q, hb);
2044
2045 WARN_ON(!q.pi_state);
2046
2047
2048
2049 if (!trylock)
2050 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
2051 else {
2052 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2053
2054 ret = ret ? 0 : -EWOULDBLOCK;
2055 }
2056
2057 spin_lock(q.lock_ptr);
2058
2059
2060
2061
2062 res = fixup_owner(uaddr, &q, !ret);
2063
2064
2065
2066
2067 if (res)
2068 ret = (res < 0) ? res : 0;
2069
2070
2071
2072
2073
2074 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2075 rt_mutex_unlock(&q.pi_state->pi_mutex);
2076
2077
2078 unqueue_me_pi(&q);
2079
2080 goto out_put_key;
2081
2082out_unlock_put_key:
2083 queue_unlock(&q, hb);
2084
2085out_put_key:
2086 put_futex_key(&q.key);
2087out:
2088 if (to)
2089 destroy_hrtimer_on_stack(&to->timer);
2090 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2091
2092uaddr_faulted:
2093 queue_unlock(&q, hb);
2094
2095 ret = fault_in_user_writeable(uaddr);
2096 if (ret)
2097 goto out_put_key;
2098
2099 if (!(flags & FLAGS_SHARED))
2100 goto retry_private;
2101
2102 put_futex_key(&q.key);
2103 goto retry;
2104}
2105
2106
2107
2108
2109
2110
2111static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2112{
2113 struct futex_hash_bucket *hb;
2114 struct futex_q *this, *next;
2115 struct plist_head *head;
2116 union futex_key key = FUTEX_KEY_INIT;
2117 u32 uval, vpid = task_pid_vnr(current);
2118 int ret;
2119
2120retry:
2121 if (get_user(uval, uaddr))
2122 return -EFAULT;
2123
2124
2125
2126 if ((uval & FUTEX_TID_MASK) != vpid)
2127 return -EPERM;
2128
2129 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2130 if (unlikely(ret != 0))
2131 goto out;
2132
2133 hb = hash_futex(&key);
2134 spin_lock(&hb->lock);
2135
2136
2137
2138
2139
2140
2141 if (!(uval & FUTEX_OWNER_DIED) &&
2142 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2143 goto pi_faulted;
2144
2145
2146
2147
2148 if (unlikely(uval == vpid))
2149 goto out_unlock;
2150
2151
2152
2153
2154
2155 head = &hb->chain;
2156
2157 plist_for_each_entry_safe(this, next, head, list) {
2158 if (!match_futex (&this->key, &key))
2159 continue;
2160 ret = wake_futex_pi(uaddr, uval, this);
2161
2162
2163
2164
2165
2166 if (ret == -EFAULT)
2167 goto pi_faulted;
2168 goto out_unlock;
2169 }
2170
2171
2172
2173 if (!(uval & FUTEX_OWNER_DIED)) {
2174 ret = unlock_futex_pi(uaddr, uval);
2175 if (ret == -EFAULT)
2176 goto pi_faulted;
2177 }
2178
2179out_unlock:
2180 spin_unlock(&hb->lock);
2181 put_futex_key(&key);
2182
2183out:
2184 return ret;
2185
2186pi_faulted:
2187 spin_unlock(&hb->lock);
2188 put_futex_key(&key);
2189
2190 ret = fault_in_user_writeable(uaddr);
2191 if (!ret)
2192 goto retry;
2193
2194 return ret;
2195}
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213static inline
2214int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2215 struct futex_q *q, union futex_key *key2,
2216 struct hrtimer_sleeper *timeout)
2217{
2218 int ret = 0;
2219
2220
2221
2222
2223
2224
2225
2226
2227 if (!match_futex(&q->key, key2)) {
2228 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2229
2230
2231
2232
2233 plist_del(&q->list, &hb->chain);
2234
2235
2236 ret = -EWOULDBLOCK;
2237 if (timeout && !timeout->task)
2238 ret = -ETIMEDOUT;
2239 else if (signal_pending(current))
2240 ret = -ERESTARTNOINTR;
2241 }
2242 return ret;
2243}
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2286 u32 val, ktime_t *abs_time, u32 bitset,
2287 u32 __user *uaddr2)
2288{
2289 struct hrtimer_sleeper timeout, *to = NULL;
2290 struct rt_mutex_waiter rt_waiter;
2291 struct rt_mutex *pi_mutex = NULL;
2292 struct futex_hash_bucket *hb;
2293 union futex_key key2 = FUTEX_KEY_INIT;
2294 struct futex_q q = futex_q_init;
2295 int res, ret;
2296
2297 if (uaddr == uaddr2)
2298 return -EINVAL;
2299
2300 if (!bitset)
2301 return -EINVAL;
2302
2303 if (abs_time) {
2304 to = &timeout;
2305 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2306 CLOCK_REALTIME : CLOCK_MONOTONIC,
2307 HRTIMER_MODE_ABS);
2308 hrtimer_init_sleeper(to, current);
2309 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2310 current->timer_slack_ns);
2311 }
2312
2313
2314
2315
2316
2317 debug_rt_mutex_init_waiter(&rt_waiter);
2318 rt_waiter.task = NULL;
2319
2320 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2321 if (unlikely(ret != 0))
2322 goto out;
2323
2324 q.bitset = bitset;
2325 q.rt_waiter = &rt_waiter;
2326 q.requeue_pi_key = &key2;
2327
2328
2329
2330
2331
2332 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2333 if (ret)
2334 goto out_key2;
2335
2336
2337 futex_wait_queue_me(hb, &q, to);
2338
2339 spin_lock(&hb->lock);
2340 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2341 spin_unlock(&hb->lock);
2342 if (ret)
2343 goto out_put_keys;
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355 if (!q.rt_waiter) {
2356
2357
2358
2359
2360 if (q.pi_state && (q.pi_state->owner != current)) {
2361 spin_lock(q.lock_ptr);
2362 ret = fixup_pi_state_owner(uaddr2, &q, current);
2363 spin_unlock(q.lock_ptr);
2364 }
2365 } else {
2366
2367
2368
2369
2370
2371 WARN_ON(!q.pi_state);
2372 pi_mutex = &q.pi_state->pi_mutex;
2373 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2374 debug_rt_mutex_free_waiter(&rt_waiter);
2375
2376 spin_lock(q.lock_ptr);
2377
2378
2379
2380
2381 res = fixup_owner(uaddr2, &q, !ret);
2382
2383
2384
2385
2386 if (res)
2387 ret = (res < 0) ? res : 0;
2388
2389
2390 unqueue_me_pi(&q);
2391 }
2392
2393
2394
2395
2396
2397 if (ret == -EFAULT) {
2398 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2399 rt_mutex_unlock(pi_mutex);
2400 } else if (ret == -EINTR) {
2401
2402
2403
2404
2405
2406
2407
2408 ret = -EWOULDBLOCK;
2409 }
2410
2411out_put_keys:
2412 put_futex_key(&q.key);
2413out_key2:
2414 put_futex_key(&key2);
2415
2416out:
2417 if (to) {
2418 hrtimer_cancel(&to->timer);
2419 destroy_hrtimer_on_stack(&to->timer);
2420 }
2421 return ret;
2422}
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2445 size_t, len)
2446{
2447 if (!futex_cmpxchg_enabled)
2448 return -ENOSYS;
2449
2450
2451
2452 if (unlikely(len != sizeof(*head)))
2453 return -EINVAL;
2454
2455 current->robust_list = head;
2456
2457 return 0;
2458}
2459
2460
2461
2462
2463
2464
2465
2466SYSCALL_DEFINE3(get_robust_list, int, pid,
2467 struct robust_list_head __user * __user *, head_ptr,
2468 size_t __user *, len_ptr)
2469{
2470 struct robust_list_head __user *head;
2471 unsigned long ret;
2472 struct task_struct *p;
2473
2474 if (!futex_cmpxchg_enabled)
2475 return -ENOSYS;
2476
2477 rcu_read_lock();
2478
2479 ret = -ESRCH;
2480 if (!pid)
2481 p = current;
2482 else {
2483 p = find_task_by_vpid(pid);
2484 if (!p)
2485 goto err_unlock;
2486 }
2487
2488 ret = -EPERM;
2489 if (!ptrace_may_access(p, PTRACE_MODE_READ))
2490 goto err_unlock;
2491
2492 head = p->robust_list;
2493 rcu_read_unlock();
2494
2495 if (put_user(sizeof(*head), len_ptr))
2496 return -EFAULT;
2497 return put_user(head, head_ptr);
2498
2499err_unlock:
2500 rcu_read_unlock();
2501
2502 return ret;
2503}
2504
2505
2506
2507
2508
2509int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2510{
2511 u32 uval, uninitialized_var(nval), mval;
2512
2513retry:
2514 if (get_user(uval, uaddr))
2515 return -1;
2516
2517 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2539 if (fault_in_user_writeable(uaddr))
2540 return -1;
2541 goto retry;
2542 }
2543 if (nval != uval)
2544 goto retry;
2545
2546
2547
2548
2549
2550 if (!pi && (uval & FUTEX_WAITERS))
2551 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2552 }
2553 return 0;
2554}
2555
2556
2557
2558
2559static inline int fetch_robust_entry(struct robust_list __user **entry,
2560 struct robust_list __user * __user *head,
2561 unsigned int *pi)
2562{
2563 unsigned long uentry;
2564
2565 if (get_user(uentry, (unsigned long __user *)head))
2566 return -EFAULT;
2567
2568 *entry = (void __user *)(uentry & ~1UL);
2569 *pi = uentry & 1;
2570
2571 return 0;
2572}
2573
2574
2575
2576
2577
2578
2579
2580void exit_robust_list(struct task_struct *curr)
2581{
2582 struct robust_list_head __user *head = curr->robust_list;
2583 struct robust_list __user *entry, *next_entry, *pending;
2584 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2585 unsigned int uninitialized_var(next_pi);
2586 unsigned long futex_offset;
2587 int rc;
2588
2589 if (!futex_cmpxchg_enabled)
2590 return;
2591
2592
2593
2594
2595
2596 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2597 return;
2598
2599
2600
2601 if (get_user(futex_offset, &head->futex_offset))
2602 return;
2603
2604
2605
2606
2607 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2608 return;
2609
2610 next_entry = NULL;
2611 while (entry != &head->list) {
2612
2613
2614
2615
2616 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2617
2618
2619
2620
2621 if (entry != pending)
2622 if (handle_futex_death((void __user *)entry + futex_offset,
2623 curr, pi))
2624 return;
2625 if (rc)
2626 return;
2627 entry = next_entry;
2628 pi = next_pi;
2629
2630
2631
2632 if (!--limit)
2633 break;
2634
2635 cond_resched();
2636 }
2637
2638 if (pending)
2639 handle_futex_death((void __user *)pending + futex_offset,
2640 curr, pip);
2641}
2642
2643long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2644 u32 __user *uaddr2, u32 val2, u32 val3)
2645{
2646 int cmd = op & FUTEX_CMD_MASK;
2647 unsigned int flags = 0;
2648
2649 if (!(op & FUTEX_PRIVATE_FLAG))
2650 flags |= FLAGS_SHARED;
2651
2652 if (op & FUTEX_CLOCK_REALTIME) {
2653 flags |= FLAGS_CLOCKRT;
2654 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2655 return -ENOSYS;
2656 }
2657
2658 switch (cmd) {
2659 case FUTEX_LOCK_PI:
2660 case FUTEX_UNLOCK_PI:
2661 case FUTEX_TRYLOCK_PI:
2662 case FUTEX_WAIT_REQUEUE_PI:
2663 case FUTEX_CMP_REQUEUE_PI:
2664 if (!futex_cmpxchg_enabled)
2665 return -ENOSYS;
2666 }
2667
2668 switch (cmd) {
2669 case FUTEX_WAIT:
2670 val3 = FUTEX_BITSET_MATCH_ANY;
2671 case FUTEX_WAIT_BITSET:
2672 return futex_wait(uaddr, flags, val, timeout, val3);
2673 case FUTEX_WAKE:
2674 val3 = FUTEX_BITSET_MATCH_ANY;
2675 case FUTEX_WAKE_BITSET:
2676 return futex_wake(uaddr, flags, val, val3);
2677 case FUTEX_REQUEUE:
2678 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2679 case FUTEX_CMP_REQUEUE:
2680 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2681 case FUTEX_WAKE_OP:
2682 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2683 case FUTEX_LOCK_PI:
2684 return futex_lock_pi(uaddr, flags, val, timeout, 0);
2685 case FUTEX_UNLOCK_PI:
2686 return futex_unlock_pi(uaddr, flags);
2687 case FUTEX_TRYLOCK_PI:
2688 return futex_lock_pi(uaddr, flags, 0, timeout, 1);
2689 case FUTEX_WAIT_REQUEUE_PI:
2690 val3 = FUTEX_BITSET_MATCH_ANY;
2691 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2692 uaddr2);
2693 case FUTEX_CMP_REQUEUE_PI:
2694 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2695 }
2696 return -ENOSYS;
2697}
2698
2699
2700SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2701 struct timespec __user *, utime, u32 __user *, uaddr2,
2702 u32, val3)
2703{
2704 struct timespec ts;
2705 ktime_t t, *tp = NULL;
2706 u32 val2 = 0;
2707 int cmd = op & FUTEX_CMD_MASK;
2708
2709 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2710 cmd == FUTEX_WAIT_BITSET ||
2711 cmd == FUTEX_WAIT_REQUEUE_PI)) {
2712 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2713 return -EFAULT;
2714 if (!timespec_valid(&ts))
2715 return -EINVAL;
2716
2717 t = timespec_to_ktime(ts);
2718 if (cmd == FUTEX_WAIT)
2719 t = ktime_add_safe(ktime_get(), t);
2720 tp = &t;
2721 }
2722
2723
2724
2725
2726 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2727 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2728 val2 = (u32) (unsigned long) utime;
2729
2730 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2731}
2732
2733static int __init futex_init(void)
2734{
2735 u32 curval;
2736 int i;
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2749 futex_cmpxchg_enabled = 1;
2750
2751 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
2752 plist_head_init(&futex_queues[i].chain);
2753 spin_lock_init(&futex_queues[i].lock);
2754 }
2755
2756 return 0;
2757}
2758__initcall(futex_init);
2759