1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/export.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
63#include <linux/sched/rt.h>
64
65#include <asm/futex.h>
66
67#include "rtmutex_common.h"
68
69int __read_mostly futex_cmpxchg_enabled;
70
71#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
72
73
74
75
76
77#define FLAGS_SHARED 0x01
78#define FLAGS_CLOCKRT 0x02
79#define FLAGS_HAS_TIMEOUT 0x04
80
81
82
83
84struct futex_pi_state {
85
86
87
88
89 struct list_head list;
90
91
92
93
94 struct rt_mutex pi_mutex;
95
96 struct task_struct *owner;
97 atomic_t refcount;
98
99 union futex_key key;
100};
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124struct futex_q {
125 struct plist_node list;
126
127 struct task_struct *task;
128 spinlock_t *lock_ptr;
129 union futex_key key;
130 struct futex_pi_state *pi_state;
131 struct rt_mutex_waiter *rt_waiter;
132 union futex_key *requeue_pi_key;
133 u32 bitset;
134};
135
136static const struct futex_q futex_q_init = {
137
138 .key = FUTEX_KEY_INIT,
139 .bitset = FUTEX_BITSET_MATCH_ANY
140};
141
142
143
144
145
146
147struct futex_hash_bucket {
148 spinlock_t lock;
149 struct plist_head chain;
150};
151
152static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
153
154
155
156
157static struct futex_hash_bucket *hash_futex(union futex_key *key)
158{
159 u32 hash = jhash2((u32*)&key->both.word,
160 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
161 key->both.offset);
162 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
163}
164
165
166
167
168static inline int match_futex(union futex_key *key1, union futex_key *key2)
169{
170 return (key1 && key2
171 && key1->both.word == key2->both.word
172 && key1->both.ptr == key2->both.ptr
173 && key1->both.offset == key2->both.offset);
174}
175
176
177
178
179
180
181static void get_futex_key_refs(union futex_key *key)
182{
183 if (!key->both.ptr)
184 return;
185
186 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
187 case FUT_OFF_INODE:
188 ihold(key->shared.inode);
189 break;
190 case FUT_OFF_MMSHARED:
191 atomic_inc(&key->private.mm->mm_count);
192 break;
193 }
194}
195
196
197
198
199
200static void drop_futex_key_refs(union futex_key *key)
201{
202 if (!key->both.ptr) {
203
204 WARN_ON_ONCE(1);
205 return;
206 }
207
208 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
209 case FUT_OFF_INODE:
210 iput(key->shared.inode);
211 break;
212 case FUT_OFF_MMSHARED:
213 mmdrop(key->private.mm);
214 break;
215 }
216}
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236static int
237get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
238{
239 unsigned long address = (unsigned long)uaddr;
240 struct mm_struct *mm = current->mm;
241 struct page *page, *page_head;
242 int err, ro = 0;
243
244
245
246
247 key->both.offset = address % PAGE_SIZE;
248 if (unlikely((address % sizeof(u32)) != 0))
249 return -EINVAL;
250 address -= key->both.offset;
251
252
253
254
255
256
257
258
259 if (!fshared) {
260 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
261 return -EFAULT;
262 key->private.mm = mm;
263 key->private.address = address;
264 get_futex_key_refs(key);
265 return 0;
266 }
267
268again:
269 err = get_user_pages_fast(address, 1, 1, &page);
270
271
272
273
274 if (err == -EFAULT && rw == VERIFY_READ) {
275 err = get_user_pages_fast(address, 1, 0, &page);
276 ro = 1;
277 }
278 if (err < 0)
279 return err;
280 else
281 err = 0;
282
283#ifdef CONFIG_TRANSPARENT_HUGEPAGE
284 page_head = page;
285 if (unlikely(PageTail(page))) {
286 put_page(page);
287
288 local_irq_disable();
289 if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
290 page_head = compound_head(page);
291
292
293
294
295
296
297
298
299
300
301 if (page != page_head) {
302 get_page(page_head);
303 put_page(page);
304 }
305 local_irq_enable();
306 } else {
307 local_irq_enable();
308 goto again;
309 }
310 }
311#else
312 page_head = compound_head(page);
313 if (page != page_head) {
314 get_page(page_head);
315 put_page(page);
316 }
317#endif
318
319 lock_page(page_head);
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336 if (!page_head->mapping) {
337 int shmem_swizzled = PageSwapCache(page_head);
338 unlock_page(page_head);
339 put_page(page_head);
340 if (shmem_swizzled)
341 goto again;
342 return -EFAULT;
343 }
344
345
346
347
348
349
350
351
352 if (PageAnon(page_head)) {
353
354
355
356
357 if (ro) {
358 err = -EFAULT;
359 goto out;
360 }
361
362 key->both.offset |= FUT_OFF_MMSHARED;
363 key->private.mm = mm;
364 key->private.address = address;
365 } else {
366 key->both.offset |= FUT_OFF_INODE;
367 key->shared.inode = page_head->mapping->host;
368 key->shared.pgoff = page_head->index;
369 }
370
371 get_futex_key_refs(key);
372
373out:
374 unlock_page(page_head);
375 put_page(page_head);
376 return err;
377}
378
379static inline void put_futex_key(union futex_key *key)
380{
381 drop_futex_key_refs(key);
382}
383
384
385
386
387
388
389
390
391
392
393
394
395
396static int fault_in_user_writeable(u32 __user *uaddr)
397{
398 struct mm_struct *mm = current->mm;
399 int ret;
400
401 down_read(&mm->mmap_sem);
402 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
403 FAULT_FLAG_WRITE);
404 up_read(&mm->mmap_sem);
405
406 return ret < 0 ? ret : 0;
407}
408
409
410
411
412
413
414
415
416static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
417 union futex_key *key)
418{
419 struct futex_q *this;
420
421 plist_for_each_entry(this, &hb->chain, list) {
422 if (match_futex(&this->key, key))
423 return this;
424 }
425 return NULL;
426}
427
428static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
429 u32 uval, u32 newval)
430{
431 int ret;
432
433 pagefault_disable();
434 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
435 pagefault_enable();
436
437 return ret;
438}
439
440static int get_futex_value_locked(u32 *dest, u32 __user *from)
441{
442 int ret;
443
444 pagefault_disable();
445 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
446 pagefault_enable();
447
448 return ret ? -EFAULT : 0;
449}
450
451
452
453
454
455static int refill_pi_state_cache(void)
456{
457 struct futex_pi_state *pi_state;
458
459 if (likely(current->pi_state_cache))
460 return 0;
461
462 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
463
464 if (!pi_state)
465 return -ENOMEM;
466
467 INIT_LIST_HEAD(&pi_state->list);
468
469 pi_state->owner = NULL;
470 atomic_set(&pi_state->refcount, 1);
471 pi_state->key = FUTEX_KEY_INIT;
472
473 current->pi_state_cache = pi_state;
474
475 return 0;
476}
477
478static struct futex_pi_state * alloc_pi_state(void)
479{
480 struct futex_pi_state *pi_state = current->pi_state_cache;
481
482 WARN_ON(!pi_state);
483 current->pi_state_cache = NULL;
484
485 return pi_state;
486}
487
488static void free_pi_state(struct futex_pi_state *pi_state)
489{
490 if (!atomic_dec_and_test(&pi_state->refcount))
491 return;
492
493
494
495
496
497 if (pi_state->owner) {
498 raw_spin_lock_irq(&pi_state->owner->pi_lock);
499 list_del_init(&pi_state->list);
500 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
501
502 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
503 }
504
505 if (current->pi_state_cache)
506 kfree(pi_state);
507 else {
508
509
510
511
512
513 pi_state->owner = NULL;
514 atomic_set(&pi_state->refcount, 1);
515 current->pi_state_cache = pi_state;
516 }
517}
518
519
520
521
522
523static struct task_struct * futex_find_get_task(pid_t pid)
524{
525 struct task_struct *p;
526
527 rcu_read_lock();
528 p = find_task_by_vpid(pid);
529 if (p)
530 get_task_struct(p);
531
532 rcu_read_unlock();
533
534 return p;
535}
536
537
538
539
540
541
542void exit_pi_state_list(struct task_struct *curr)
543{
544 struct list_head *next, *head = &curr->pi_state_list;
545 struct futex_pi_state *pi_state;
546 struct futex_hash_bucket *hb;
547 union futex_key key = FUTEX_KEY_INIT;
548
549 if (!futex_cmpxchg_enabled)
550 return;
551
552
553
554
555
556 raw_spin_lock_irq(&curr->pi_lock);
557 while (!list_empty(head)) {
558
559 next = head->next;
560 pi_state = list_entry(next, struct futex_pi_state, list);
561 key = pi_state->key;
562 hb = hash_futex(&key);
563 raw_spin_unlock_irq(&curr->pi_lock);
564
565 spin_lock(&hb->lock);
566
567 raw_spin_lock_irq(&curr->pi_lock);
568
569
570
571
572 if (head->next != next) {
573 spin_unlock(&hb->lock);
574 continue;
575 }
576
577 WARN_ON(pi_state->owner != curr);
578 WARN_ON(list_empty(&pi_state->list));
579 list_del_init(&pi_state->list);
580 pi_state->owner = NULL;
581 raw_spin_unlock_irq(&curr->pi_lock);
582
583 rt_mutex_unlock(&pi_state->pi_mutex);
584
585 spin_unlock(&hb->lock);
586
587 raw_spin_lock_irq(&curr->pi_lock);
588 }
589 raw_spin_unlock_irq(&curr->pi_lock);
590}
591
592static int
593lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
594 union futex_key *key, struct futex_pi_state **ps)
595{
596 struct futex_pi_state *pi_state = NULL;
597 struct futex_q *this, *next;
598 struct plist_head *head;
599 struct task_struct *p;
600 pid_t pid = uval & FUTEX_TID_MASK;
601
602 head = &hb->chain;
603
604 plist_for_each_entry_safe(this, next, head, list) {
605 if (match_futex(&this->key, key)) {
606
607
608
609
610 pi_state = this->pi_state;
611
612
613
614 if (unlikely(!pi_state))
615 return -EINVAL;
616
617 WARN_ON(!atomic_read(&pi_state->refcount));
618
619
620
621
622
623
624
625
626
627
628
629 if (pid && pi_state->owner) {
630
631
632
633
634 if (pid != task_pid_vnr(pi_state->owner))
635 return -EINVAL;
636 }
637
638 atomic_inc(&pi_state->refcount);
639 *ps = pi_state;
640
641 return 0;
642 }
643 }
644
645
646
647
648
649 if (!pid)
650 return -ESRCH;
651 p = futex_find_get_task(pid);
652 if (!p)
653 return -ESRCH;
654
655
656
657
658
659
660
661 raw_spin_lock_irq(&p->pi_lock);
662 if (unlikely(p->flags & PF_EXITING)) {
663
664
665
666
667
668 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
669
670 raw_spin_unlock_irq(&p->pi_lock);
671 put_task_struct(p);
672 return ret;
673 }
674
675 pi_state = alloc_pi_state();
676
677
678
679
680
681 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
682
683
684 pi_state->key = *key;
685
686 WARN_ON(!list_empty(&pi_state->list));
687 list_add(&pi_state->list, &p->pi_state_list);
688 pi_state->owner = p;
689 raw_spin_unlock_irq(&p->pi_lock);
690
691 put_task_struct(p);
692
693 *ps = pi_state;
694
695 return 0;
696}
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
717 union futex_key *key,
718 struct futex_pi_state **ps,
719 struct task_struct *task, int set_waiters)
720{
721 int lock_taken, ret, force_take = 0;
722 u32 uval, newval, curval, vpid = task_pid_vnr(task);
723
724retry:
725 ret = lock_taken = 0;
726
727
728
729
730
731
732 newval = vpid;
733 if (set_waiters)
734 newval |= FUTEX_WAITERS;
735
736 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
737 return -EFAULT;
738
739
740
741
742 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
743 return -EDEADLK;
744
745
746
747
748 if (unlikely(!curval))
749 return 1;
750
751 uval = curval;
752
753
754
755
756
757 newval = curval | FUTEX_WAITERS;
758
759
760
761
762 if (unlikely(force_take)) {
763
764
765
766
767 newval = (curval & ~FUTEX_TID_MASK) | vpid;
768 force_take = 0;
769 lock_taken = 1;
770 }
771
772 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
773 return -EFAULT;
774 if (unlikely(curval != uval))
775 goto retry;
776
777
778
779
780 if (unlikely(lock_taken))
781 return 1;
782
783
784
785
786
787 ret = lookup_pi_state(uval, hb, key, ps);
788
789 if (unlikely(ret)) {
790 switch (ret) {
791 case -ESRCH:
792
793
794
795
796
797
798
799
800
801
802 if (get_futex_value_locked(&curval, uaddr))
803 return -EFAULT;
804
805
806
807
808
809
810 if (!(curval & FUTEX_TID_MASK)) {
811 force_take = 1;
812 goto retry;
813 }
814 default:
815 break;
816 }
817 }
818
819 return ret;
820}
821
822
823
824
825
826
827
828static void __unqueue_futex(struct futex_q *q)
829{
830 struct futex_hash_bucket *hb;
831
832 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
833 || WARN_ON(plist_node_empty(&q->list)))
834 return;
835
836 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
837 plist_del(&q->list, &hb->chain);
838}
839
840
841
842
843
844static void wake_futex(struct futex_q *q)
845{
846 struct task_struct *p = q->task;
847
848 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
849 return;
850
851
852
853
854
855
856
857
858 get_task_struct(p);
859
860 __unqueue_futex(q);
861
862
863
864
865
866
867 smp_wmb();
868 q->lock_ptr = NULL;
869
870 wake_up_state(p, TASK_NORMAL);
871 put_task_struct(p);
872}
873
874static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
875{
876 struct task_struct *new_owner;
877 struct futex_pi_state *pi_state = this->pi_state;
878 u32 uninitialized_var(curval), newval;
879
880 if (!pi_state)
881 return -EINVAL;
882
883
884
885
886
887 if (pi_state->owner != current)
888 return -EINVAL;
889
890 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
891 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
892
893
894
895
896
897
898 if (!new_owner)
899 new_owner = this->task;
900
901
902
903
904
905
906 if (!(uval & FUTEX_OWNER_DIED)) {
907 int ret = 0;
908
909 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
910
911 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
912 ret = -EFAULT;
913 else if (curval != uval)
914 ret = -EINVAL;
915 if (ret) {
916 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
917 return ret;
918 }
919 }
920
921 raw_spin_lock_irq(&pi_state->owner->pi_lock);
922 WARN_ON(list_empty(&pi_state->list));
923 list_del_init(&pi_state->list);
924 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
925
926 raw_spin_lock_irq(&new_owner->pi_lock);
927 WARN_ON(!list_empty(&pi_state->list));
928 list_add(&pi_state->list, &new_owner->pi_state_list);
929 pi_state->owner = new_owner;
930 raw_spin_unlock_irq(&new_owner->pi_lock);
931
932 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
933 rt_mutex_unlock(&pi_state->pi_mutex);
934
935 return 0;
936}
937
938static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
939{
940 u32 uninitialized_var(oldval);
941
942
943
944
945
946 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
947 return -EFAULT;
948 if (oldval != uval)
949 return -EAGAIN;
950
951 return 0;
952}
953
954
955
956
957static inline void
958double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
959{
960 if (hb1 <= hb2) {
961 spin_lock(&hb1->lock);
962 if (hb1 < hb2)
963 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
964 } else {
965 spin_lock(&hb2->lock);
966 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
967 }
968}
969
970static inline void
971double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
972{
973 spin_unlock(&hb1->lock);
974 if (hb1 != hb2)
975 spin_unlock(&hb2->lock);
976}
977
978
979
980
981static int
982futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
983{
984 struct futex_hash_bucket *hb;
985 struct futex_q *this, *next;
986 struct plist_head *head;
987 union futex_key key = FUTEX_KEY_INIT;
988 int ret;
989
990 if (!bitset)
991 return -EINVAL;
992
993 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
994 if (unlikely(ret != 0))
995 goto out;
996
997 hb = hash_futex(&key);
998 spin_lock(&hb->lock);
999 head = &hb->chain;
1000
1001 plist_for_each_entry_safe(this, next, head, list) {
1002 if (match_futex (&this->key, &key)) {
1003 if (this->pi_state || this->rt_waiter) {
1004 ret = -EINVAL;
1005 break;
1006 }
1007
1008
1009 if (!(this->bitset & bitset))
1010 continue;
1011
1012 wake_futex(this);
1013 if (++ret >= nr_wake)
1014 break;
1015 }
1016 }
1017
1018 spin_unlock(&hb->lock);
1019 put_futex_key(&key);
1020out:
1021 return ret;
1022}
1023
1024
1025
1026
1027
1028static int
1029futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1030 int nr_wake, int nr_wake2, int op)
1031{
1032 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1033 struct futex_hash_bucket *hb1, *hb2;
1034 struct plist_head *head;
1035 struct futex_q *this, *next;
1036 int ret, op_ret;
1037
1038retry:
1039 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1040 if (unlikely(ret != 0))
1041 goto out;
1042 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1043 if (unlikely(ret != 0))
1044 goto out_put_key1;
1045
1046 hb1 = hash_futex(&key1);
1047 hb2 = hash_futex(&key2);
1048
1049retry_private:
1050 double_lock_hb(hb1, hb2);
1051 op_ret = futex_atomic_op_inuser(op, uaddr2);
1052 if (unlikely(op_ret < 0)) {
1053
1054 double_unlock_hb(hb1, hb2);
1055
1056#ifndef CONFIG_MMU
1057
1058
1059
1060
1061 ret = op_ret;
1062 goto out_put_keys;
1063#endif
1064
1065 if (unlikely(op_ret != -EFAULT)) {
1066 ret = op_ret;
1067 goto out_put_keys;
1068 }
1069
1070 ret = fault_in_user_writeable(uaddr2);
1071 if (ret)
1072 goto out_put_keys;
1073
1074 if (!(flags & FLAGS_SHARED))
1075 goto retry_private;
1076
1077 put_futex_key(&key2);
1078 put_futex_key(&key1);
1079 goto retry;
1080 }
1081
1082 head = &hb1->chain;
1083
1084 plist_for_each_entry_safe(this, next, head, list) {
1085 if (match_futex (&this->key, &key1)) {
1086 if (this->pi_state || this->rt_waiter) {
1087 ret = -EINVAL;
1088 goto out_unlock;
1089 }
1090 wake_futex(this);
1091 if (++ret >= nr_wake)
1092 break;
1093 }
1094 }
1095
1096 if (op_ret > 0) {
1097 head = &hb2->chain;
1098
1099 op_ret = 0;
1100 plist_for_each_entry_safe(this, next, head, list) {
1101 if (match_futex (&this->key, &key2)) {
1102 if (this->pi_state || this->rt_waiter) {
1103 ret = -EINVAL;
1104 goto out_unlock;
1105 }
1106 wake_futex(this);
1107 if (++op_ret >= nr_wake2)
1108 break;
1109 }
1110 }
1111 ret += op_ret;
1112 }
1113
1114out_unlock:
1115 double_unlock_hb(hb1, hb2);
1116out_put_keys:
1117 put_futex_key(&key2);
1118out_put_key1:
1119 put_futex_key(&key1);
1120out:
1121 return ret;
1122}
1123
1124
1125
1126
1127
1128
1129
1130
1131static inline
1132void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1133 struct futex_hash_bucket *hb2, union futex_key *key2)
1134{
1135
1136
1137
1138
1139
1140 if (likely(&hb1->chain != &hb2->chain)) {
1141 plist_del(&q->list, &hb1->chain);
1142 plist_add(&q->list, &hb2->chain);
1143 q->lock_ptr = &hb2->lock;
1144 }
1145 get_futex_key_refs(key2);
1146 q->key = *key2;
1147}
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163static inline
1164void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1165 struct futex_hash_bucket *hb)
1166{
1167 get_futex_key_refs(key);
1168 q->key = *key;
1169
1170 __unqueue_futex(q);
1171
1172 WARN_ON(!q->rt_waiter);
1173 q->rt_waiter = NULL;
1174
1175 q->lock_ptr = &hb->lock;
1176
1177 wake_up_state(q->task, TASK_NORMAL);
1178}
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1201 struct futex_hash_bucket *hb1,
1202 struct futex_hash_bucket *hb2,
1203 union futex_key *key1, union futex_key *key2,
1204 struct futex_pi_state **ps, int set_waiters)
1205{
1206 struct futex_q *top_waiter = NULL;
1207 u32 curval;
1208 int ret;
1209
1210 if (get_futex_value_locked(&curval, pifutex))
1211 return -EFAULT;
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221 top_waiter = futex_top_waiter(hb1, key1);
1222
1223
1224 if (!top_waiter)
1225 return 0;
1226
1227
1228 if (!match_futex(top_waiter->requeue_pi_key, key2))
1229 return -EINVAL;
1230
1231
1232
1233
1234
1235
1236 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1237 set_waiters);
1238 if (ret == 1)
1239 requeue_pi_wake_futex(top_waiter, key2, hb2);
1240
1241 return ret;
1242}
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1263 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1264 u32 *cmpval, int requeue_pi)
1265{
1266 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1267 int drop_count = 0, task_count = 0, ret;
1268 struct futex_pi_state *pi_state = NULL;
1269 struct futex_hash_bucket *hb1, *hb2;
1270 struct plist_head *head1;
1271 struct futex_q *this, *next;
1272 u32 curval2;
1273
1274 if (requeue_pi) {
1275
1276
1277
1278
1279 if (refill_pi_state_cache())
1280 return -ENOMEM;
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291 if (nr_wake != 1)
1292 return -EINVAL;
1293 }
1294
1295retry:
1296 if (pi_state != NULL) {
1297
1298
1299
1300
1301 free_pi_state(pi_state);
1302 pi_state = NULL;
1303 }
1304
1305 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1306 if (unlikely(ret != 0))
1307 goto out;
1308 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1309 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1310 if (unlikely(ret != 0))
1311 goto out_put_key1;
1312
1313 hb1 = hash_futex(&key1);
1314 hb2 = hash_futex(&key2);
1315
1316retry_private:
1317 double_lock_hb(hb1, hb2);
1318
1319 if (likely(cmpval != NULL)) {
1320 u32 curval;
1321
1322 ret = get_futex_value_locked(&curval, uaddr1);
1323
1324 if (unlikely(ret)) {
1325 double_unlock_hb(hb1, hb2);
1326
1327 ret = get_user(curval, uaddr1);
1328 if (ret)
1329 goto out_put_keys;
1330
1331 if (!(flags & FLAGS_SHARED))
1332 goto retry_private;
1333
1334 put_futex_key(&key2);
1335 put_futex_key(&key1);
1336 goto retry;
1337 }
1338 if (curval != *cmpval) {
1339 ret = -EAGAIN;
1340 goto out_unlock;
1341 }
1342 }
1343
1344 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1345
1346
1347
1348
1349
1350
1351 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1352 &key2, &pi_state, nr_requeue);
1353
1354
1355
1356
1357
1358
1359
1360 if (ret == 1) {
1361 WARN_ON(pi_state);
1362 drop_count++;
1363 task_count++;
1364 ret = get_futex_value_locked(&curval2, uaddr2);
1365 if (!ret)
1366 ret = lookup_pi_state(curval2, hb2, &key2,
1367 &pi_state);
1368 }
1369
1370 switch (ret) {
1371 case 0:
1372 break;
1373 case -EFAULT:
1374 double_unlock_hb(hb1, hb2);
1375 put_futex_key(&key2);
1376 put_futex_key(&key1);
1377 ret = fault_in_user_writeable(uaddr2);
1378 if (!ret)
1379 goto retry;
1380 goto out;
1381 case -EAGAIN:
1382
1383 double_unlock_hb(hb1, hb2);
1384 put_futex_key(&key2);
1385 put_futex_key(&key1);
1386 cond_resched();
1387 goto retry;
1388 default:
1389 goto out_unlock;
1390 }
1391 }
1392
1393 head1 = &hb1->chain;
1394 plist_for_each_entry_safe(this, next, head1, list) {
1395 if (task_count - nr_wake >= nr_requeue)
1396 break;
1397
1398 if (!match_futex(&this->key, &key1))
1399 continue;
1400
1401
1402
1403
1404
1405
1406
1407
1408 if ((requeue_pi && !this->rt_waiter) ||
1409 (!requeue_pi && this->rt_waiter) ||
1410 this->pi_state) {
1411 ret = -EINVAL;
1412 break;
1413 }
1414
1415
1416
1417
1418
1419
1420 if (++task_count <= nr_wake && !requeue_pi) {
1421 wake_futex(this);
1422 continue;
1423 }
1424
1425
1426 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1427 ret = -EINVAL;
1428 break;
1429 }
1430
1431
1432
1433
1434
1435 if (requeue_pi) {
1436
1437 atomic_inc(&pi_state->refcount);
1438 this->pi_state = pi_state;
1439 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1440 this->rt_waiter,
1441 this->task, 1);
1442 if (ret == 1) {
1443
1444 requeue_pi_wake_futex(this, &key2, hb2);
1445 drop_count++;
1446 continue;
1447 } else if (ret) {
1448
1449 this->pi_state = NULL;
1450 free_pi_state(pi_state);
1451 goto out_unlock;
1452 }
1453 }
1454 requeue_futex(this, hb1, hb2, &key2);
1455 drop_count++;
1456 }
1457
1458out_unlock:
1459 double_unlock_hb(hb1, hb2);
1460
1461
1462
1463
1464
1465
1466
1467 while (--drop_count >= 0)
1468 drop_futex_key_refs(&key1);
1469
1470out_put_keys:
1471 put_futex_key(&key2);
1472out_put_key1:
1473 put_futex_key(&key1);
1474out:
1475 if (pi_state != NULL)
1476 free_pi_state(pi_state);
1477 return ret ? ret : task_count;
1478}
1479
1480
1481static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1482 __acquires(&hb->lock)
1483{
1484 struct futex_hash_bucket *hb;
1485
1486 hb = hash_futex(&q->key);
1487 q->lock_ptr = &hb->lock;
1488
1489 spin_lock(&hb->lock);
1490 return hb;
1491}
1492
1493static inline void
1494queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1495 __releases(&hb->lock)
1496{
1497 spin_unlock(&hb->lock);
1498}
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1513 __releases(&hb->lock)
1514{
1515 int prio;
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525 prio = min(current->normal_prio, MAX_RT_PRIO);
1526
1527 plist_node_init(&q->list, prio);
1528 plist_add(&q->list, &hb->chain);
1529 q->task = current;
1530 spin_unlock(&hb->lock);
1531}
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544static int unqueue_me(struct futex_q *q)
1545{
1546 spinlock_t *lock_ptr;
1547 int ret = 0;
1548
1549
1550retry:
1551 lock_ptr = q->lock_ptr;
1552 barrier();
1553 if (lock_ptr != NULL) {
1554 spin_lock(lock_ptr);
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568 if (unlikely(lock_ptr != q->lock_ptr)) {
1569 spin_unlock(lock_ptr);
1570 goto retry;
1571 }
1572 __unqueue_futex(q);
1573
1574 BUG_ON(q->pi_state);
1575
1576 spin_unlock(lock_ptr);
1577 ret = 1;
1578 }
1579
1580 drop_futex_key_refs(&q->key);
1581 return ret;
1582}
1583
1584
1585
1586
1587
1588
1589static void unqueue_me_pi(struct futex_q *q)
1590 __releases(q->lock_ptr)
1591{
1592 __unqueue_futex(q);
1593
1594 BUG_ON(!q->pi_state);
1595 free_pi_state(q->pi_state);
1596 q->pi_state = NULL;
1597
1598 spin_unlock(q->lock_ptr);
1599}
1600
1601
1602
1603
1604
1605
1606
1607static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1608 struct task_struct *newowner)
1609{
1610 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1611 struct futex_pi_state *pi_state = q->pi_state;
1612 struct task_struct *oldowner = pi_state->owner;
1613 u32 uval, uninitialized_var(curval), newval;
1614 int ret;
1615
1616
1617 if (!pi_state->owner)
1618 newtid |= FUTEX_OWNER_DIED;
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637retry:
1638 if (get_futex_value_locked(&uval, uaddr))
1639 goto handle_fault;
1640
1641 while (1) {
1642 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1643
1644 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1645 goto handle_fault;
1646 if (curval == uval)
1647 break;
1648 uval = curval;
1649 }
1650
1651
1652
1653
1654
1655 if (pi_state->owner != NULL) {
1656 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1657 WARN_ON(list_empty(&pi_state->list));
1658 list_del_init(&pi_state->list);
1659 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1660 }
1661
1662 pi_state->owner = newowner;
1663
1664 raw_spin_lock_irq(&newowner->pi_lock);
1665 WARN_ON(!list_empty(&pi_state->list));
1666 list_add(&pi_state->list, &newowner->pi_state_list);
1667 raw_spin_unlock_irq(&newowner->pi_lock);
1668 return 0;
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680handle_fault:
1681 spin_unlock(q->lock_ptr);
1682
1683 ret = fault_in_user_writeable(uaddr);
1684
1685 spin_lock(q->lock_ptr);
1686
1687
1688
1689
1690 if (pi_state->owner != oldowner)
1691 return 0;
1692
1693 if (ret)
1694 return ret;
1695
1696 goto retry;
1697}
1698
1699static long futex_wait_restart(struct restart_block *restart);
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1717{
1718 struct task_struct *owner;
1719 int ret = 0;
1720
1721 if (locked) {
1722
1723
1724
1725
1726 if (q->pi_state->owner != current)
1727 ret = fixup_pi_state_owner(uaddr, q, current);
1728 goto out;
1729 }
1730
1731
1732
1733
1734
1735 if (q->pi_state->owner == current) {
1736
1737
1738
1739
1740
1741 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1742 locked = 1;
1743 goto out;
1744 }
1745
1746
1747
1748
1749
1750
1751 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1752 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1753 if (!owner)
1754 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1755 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1756 ret = fixup_pi_state_owner(uaddr, q, owner);
1757 goto out;
1758 }
1759
1760
1761
1762
1763
1764 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1765 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1766 "pi-state %p\n", ret,
1767 q->pi_state->pi_mutex.owner,
1768 q->pi_state->owner);
1769
1770out:
1771 return ret ? ret : locked;
1772}
1773
1774
1775
1776
1777
1778
1779
1780static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1781 struct hrtimer_sleeper *timeout)
1782{
1783
1784
1785
1786
1787
1788
1789 set_current_state(TASK_INTERRUPTIBLE);
1790 queue_me(q, hb);
1791
1792
1793 if (timeout) {
1794 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1795 if (!hrtimer_active(&timeout->timer))
1796 timeout->task = NULL;
1797 }
1798
1799
1800
1801
1802
1803 if (likely(!plist_node_empty(&q->list))) {
1804
1805
1806
1807
1808
1809 if (!timeout || timeout->task)
1810 schedule();
1811 }
1812 __set_current_state(TASK_RUNNING);
1813}
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1833 struct futex_q *q, struct futex_hash_bucket **hb)
1834{
1835 u32 uval;
1836 int ret;
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856retry:
1857 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
1858 if (unlikely(ret != 0))
1859 return ret;
1860
1861retry_private:
1862 *hb = queue_lock(q);
1863
1864 ret = get_futex_value_locked(&uval, uaddr);
1865
1866 if (ret) {
1867 queue_unlock(q, *hb);
1868
1869 ret = get_user(uval, uaddr);
1870 if (ret)
1871 goto out;
1872
1873 if (!(flags & FLAGS_SHARED))
1874 goto retry_private;
1875
1876 put_futex_key(&q->key);
1877 goto retry;
1878 }
1879
1880 if (uval != val) {
1881 queue_unlock(q, *hb);
1882 ret = -EWOULDBLOCK;
1883 }
1884
1885out:
1886 if (ret)
1887 put_futex_key(&q->key);
1888 return ret;
1889}
1890
1891static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
1892 ktime_t *abs_time, u32 bitset)
1893{
1894 struct hrtimer_sleeper timeout, *to = NULL;
1895 struct restart_block *restart;
1896 struct futex_hash_bucket *hb;
1897 struct futex_q q = futex_q_init;
1898 int ret;
1899
1900 if (!bitset)
1901 return -EINVAL;
1902 q.bitset = bitset;
1903
1904 if (abs_time) {
1905 to = &timeout;
1906
1907 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
1908 CLOCK_REALTIME : CLOCK_MONOTONIC,
1909 HRTIMER_MODE_ABS);
1910 hrtimer_init_sleeper(to, current);
1911 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1912 current->timer_slack_ns);
1913 }
1914
1915retry:
1916
1917
1918
1919
1920 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
1921 if (ret)
1922 goto out;
1923
1924
1925 futex_wait_queue_me(hb, &q, to);
1926
1927
1928 ret = 0;
1929
1930 if (!unqueue_me(&q))
1931 goto out;
1932 ret = -ETIMEDOUT;
1933 if (to && !to->task)
1934 goto out;
1935
1936
1937
1938
1939
1940 if (!signal_pending(current))
1941 goto retry;
1942
1943 ret = -ERESTARTSYS;
1944 if (!abs_time)
1945 goto out;
1946
1947 restart = ¤t_thread_info()->restart_block;
1948 restart->fn = futex_wait_restart;
1949 restart->futex.uaddr = uaddr;
1950 restart->futex.val = val;
1951 restart->futex.time = abs_time->tv64;
1952 restart->futex.bitset = bitset;
1953 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
1954
1955 ret = -ERESTART_RESTARTBLOCK;
1956
1957out:
1958 if (to) {
1959 hrtimer_cancel(&to->timer);
1960 destroy_hrtimer_on_stack(&to->timer);
1961 }
1962 return ret;
1963}
1964
1965
1966static long futex_wait_restart(struct restart_block *restart)
1967{
1968 u32 __user *uaddr = restart->futex.uaddr;
1969 ktime_t t, *tp = NULL;
1970
1971 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1972 t.tv64 = restart->futex.time;
1973 tp = &t;
1974 }
1975 restart->fn = do_no_restart_syscall;
1976
1977 return (long)futex_wait(uaddr, restart->futex.flags,
1978 restart->futex.val, tp, restart->futex.bitset);
1979}
1980
1981
1982
1983
1984
1985
1986
1987
1988static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1989 ktime_t *time, int trylock)
1990{
1991 struct hrtimer_sleeper timeout, *to = NULL;
1992 struct futex_hash_bucket *hb;
1993 struct futex_q q = futex_q_init;
1994 int res, ret;
1995
1996 if (refill_pi_state_cache())
1997 return -ENOMEM;
1998
1999 if (time) {
2000 to = &timeout;
2001 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2002 HRTIMER_MODE_ABS);
2003 hrtimer_init_sleeper(to, current);
2004 hrtimer_set_expires(&to->timer, *time);
2005 }
2006
2007retry:
2008 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2009 if (unlikely(ret != 0))
2010 goto out;
2011
2012retry_private:
2013 hb = queue_lock(&q);
2014
2015 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
2016 if (unlikely(ret)) {
2017 switch (ret) {
2018 case 1:
2019
2020 ret = 0;
2021 goto out_unlock_put_key;
2022 case -EFAULT:
2023 goto uaddr_faulted;
2024 case -EAGAIN:
2025
2026
2027
2028
2029 queue_unlock(&q, hb);
2030 put_futex_key(&q.key);
2031 cond_resched();
2032 goto retry;
2033 default:
2034 goto out_unlock_put_key;
2035 }
2036 }
2037
2038
2039
2040
2041 queue_me(&q, hb);
2042
2043 WARN_ON(!q.pi_state);
2044
2045
2046
2047 if (!trylock)
2048 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
2049 else {
2050 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2051
2052 ret = ret ? 0 : -EWOULDBLOCK;
2053 }
2054
2055 spin_lock(q.lock_ptr);
2056
2057
2058
2059
2060 res = fixup_owner(uaddr, &q, !ret);
2061
2062
2063
2064
2065 if (res)
2066 ret = (res < 0) ? res : 0;
2067
2068
2069
2070
2071
2072 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2073 rt_mutex_unlock(&q.pi_state->pi_mutex);
2074
2075
2076 unqueue_me_pi(&q);
2077
2078 goto out_put_key;
2079
2080out_unlock_put_key:
2081 queue_unlock(&q, hb);
2082
2083out_put_key:
2084 put_futex_key(&q.key);
2085out:
2086 if (to)
2087 destroy_hrtimer_on_stack(&to->timer);
2088 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2089
2090uaddr_faulted:
2091 queue_unlock(&q, hb);
2092
2093 ret = fault_in_user_writeable(uaddr);
2094 if (ret)
2095 goto out_put_key;
2096
2097 if (!(flags & FLAGS_SHARED))
2098 goto retry_private;
2099
2100 put_futex_key(&q.key);
2101 goto retry;
2102}
2103
2104
2105
2106
2107
2108
2109static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2110{
2111 struct futex_hash_bucket *hb;
2112 struct futex_q *this, *next;
2113 struct plist_head *head;
2114 union futex_key key = FUTEX_KEY_INIT;
2115 u32 uval, vpid = task_pid_vnr(current);
2116 int ret;
2117
2118retry:
2119 if (get_user(uval, uaddr))
2120 return -EFAULT;
2121
2122
2123
2124 if ((uval & FUTEX_TID_MASK) != vpid)
2125 return -EPERM;
2126
2127 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2128 if (unlikely(ret != 0))
2129 goto out;
2130
2131 hb = hash_futex(&key);
2132 spin_lock(&hb->lock);
2133
2134
2135
2136
2137
2138
2139 if (!(uval & FUTEX_OWNER_DIED) &&
2140 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2141 goto pi_faulted;
2142
2143
2144
2145
2146 if (unlikely(uval == vpid))
2147 goto out_unlock;
2148
2149
2150
2151
2152
2153 head = &hb->chain;
2154
2155 plist_for_each_entry_safe(this, next, head, list) {
2156 if (!match_futex (&this->key, &key))
2157 continue;
2158 ret = wake_futex_pi(uaddr, uval, this);
2159
2160
2161
2162
2163
2164 if (ret == -EFAULT)
2165 goto pi_faulted;
2166 goto out_unlock;
2167 }
2168
2169
2170
2171 if (!(uval & FUTEX_OWNER_DIED)) {
2172 ret = unlock_futex_pi(uaddr, uval);
2173 if (ret == -EFAULT)
2174 goto pi_faulted;
2175 }
2176
2177out_unlock:
2178 spin_unlock(&hb->lock);
2179 put_futex_key(&key);
2180
2181out:
2182 return ret;
2183
2184pi_faulted:
2185 spin_unlock(&hb->lock);
2186 put_futex_key(&key);
2187
2188 ret = fault_in_user_writeable(uaddr);
2189 if (!ret)
2190 goto retry;
2191
2192 return ret;
2193}
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211static inline
2212int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2213 struct futex_q *q, union futex_key *key2,
2214 struct hrtimer_sleeper *timeout)
2215{
2216 int ret = 0;
2217
2218
2219
2220
2221
2222
2223
2224
2225 if (!match_futex(&q->key, key2)) {
2226 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2227
2228
2229
2230
2231 plist_del(&q->list, &hb->chain);
2232
2233
2234 ret = -EWOULDBLOCK;
2235 if (timeout && !timeout->task)
2236 ret = -ETIMEDOUT;
2237 else if (signal_pending(current))
2238 ret = -ERESTARTNOINTR;
2239 }
2240 return ret;
2241}
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2284 u32 val, ktime_t *abs_time, u32 bitset,
2285 u32 __user *uaddr2)
2286{
2287 struct hrtimer_sleeper timeout, *to = NULL;
2288 struct rt_mutex_waiter rt_waiter;
2289 struct rt_mutex *pi_mutex = NULL;
2290 struct futex_hash_bucket *hb;
2291 union futex_key key2 = FUTEX_KEY_INIT;
2292 struct futex_q q = futex_q_init;
2293 int res, ret;
2294
2295 if (uaddr == uaddr2)
2296 return -EINVAL;
2297
2298 if (!bitset)
2299 return -EINVAL;
2300
2301 if (abs_time) {
2302 to = &timeout;
2303 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2304 CLOCK_REALTIME : CLOCK_MONOTONIC,
2305 HRTIMER_MODE_ABS);
2306 hrtimer_init_sleeper(to, current);
2307 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2308 current->timer_slack_ns);
2309 }
2310
2311
2312
2313
2314
2315 debug_rt_mutex_init_waiter(&rt_waiter);
2316 rt_waiter.task = NULL;
2317
2318 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2319 if (unlikely(ret != 0))
2320 goto out;
2321
2322 q.bitset = bitset;
2323 q.rt_waiter = &rt_waiter;
2324 q.requeue_pi_key = &key2;
2325
2326
2327
2328
2329
2330 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2331 if (ret)
2332 goto out_key2;
2333
2334
2335 futex_wait_queue_me(hb, &q, to);
2336
2337 spin_lock(&hb->lock);
2338 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2339 spin_unlock(&hb->lock);
2340 if (ret)
2341 goto out_put_keys;
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353 if (!q.rt_waiter) {
2354
2355
2356
2357
2358 if (q.pi_state && (q.pi_state->owner != current)) {
2359 spin_lock(q.lock_ptr);
2360 ret = fixup_pi_state_owner(uaddr2, &q, current);
2361 spin_unlock(q.lock_ptr);
2362 }
2363 } else {
2364
2365
2366
2367
2368
2369 WARN_ON(!q.pi_state);
2370 pi_mutex = &q.pi_state->pi_mutex;
2371 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2372 debug_rt_mutex_free_waiter(&rt_waiter);
2373
2374 spin_lock(q.lock_ptr);
2375
2376
2377
2378
2379 res = fixup_owner(uaddr2, &q, !ret);
2380
2381
2382
2383
2384 if (res)
2385 ret = (res < 0) ? res : 0;
2386
2387
2388 unqueue_me_pi(&q);
2389 }
2390
2391
2392
2393
2394
2395 if (ret == -EFAULT) {
2396 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2397 rt_mutex_unlock(pi_mutex);
2398 } else if (ret == -EINTR) {
2399
2400
2401
2402
2403
2404
2405
2406 ret = -EWOULDBLOCK;
2407 }
2408
2409out_put_keys:
2410 put_futex_key(&q.key);
2411out_key2:
2412 put_futex_key(&key2);
2413
2414out:
2415 if (to) {
2416 hrtimer_cancel(&to->timer);
2417 destroy_hrtimer_on_stack(&to->timer);
2418 }
2419 return ret;
2420}
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2443 size_t, len)
2444{
2445 if (!futex_cmpxchg_enabled)
2446 return -ENOSYS;
2447
2448
2449
2450 if (unlikely(len != sizeof(*head)))
2451 return -EINVAL;
2452
2453 current->robust_list = head;
2454
2455 return 0;
2456}
2457
2458
2459
2460
2461
2462
2463
2464SYSCALL_DEFINE3(get_robust_list, int, pid,
2465 struct robust_list_head __user * __user *, head_ptr,
2466 size_t __user *, len_ptr)
2467{
2468 struct robust_list_head __user *head;
2469 unsigned long ret;
2470 struct task_struct *p;
2471
2472 if (!futex_cmpxchg_enabled)
2473 return -ENOSYS;
2474
2475 rcu_read_lock();
2476
2477 ret = -ESRCH;
2478 if (!pid)
2479 p = current;
2480 else {
2481 p = find_task_by_vpid(pid);
2482 if (!p)
2483 goto err_unlock;
2484 }
2485
2486 ret = -EPERM;
2487 if (!ptrace_may_access(p, PTRACE_MODE_READ))
2488 goto err_unlock;
2489
2490 head = p->robust_list;
2491 rcu_read_unlock();
2492
2493 if (put_user(sizeof(*head), len_ptr))
2494 return -EFAULT;
2495 return put_user(head, head_ptr);
2496
2497err_unlock:
2498 rcu_read_unlock();
2499
2500 return ret;
2501}
2502
2503
2504
2505
2506
2507int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2508{
2509 u32 uval, uninitialized_var(nval), mval;
2510
2511retry:
2512 if (get_user(uval, uaddr))
2513 return -1;
2514
2515 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2537 if (fault_in_user_writeable(uaddr))
2538 return -1;
2539 goto retry;
2540 }
2541 if (nval != uval)
2542 goto retry;
2543
2544
2545
2546
2547
2548 if (!pi && (uval & FUTEX_WAITERS))
2549 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2550 }
2551 return 0;
2552}
2553
2554
2555
2556
2557static inline int fetch_robust_entry(struct robust_list __user **entry,
2558 struct robust_list __user * __user *head,
2559 unsigned int *pi)
2560{
2561 unsigned long uentry;
2562
2563 if (get_user(uentry, (unsigned long __user *)head))
2564 return -EFAULT;
2565
2566 *entry = (void __user *)(uentry & ~1UL);
2567 *pi = uentry & 1;
2568
2569 return 0;
2570}
2571
2572
2573
2574
2575
2576
2577
2578void exit_robust_list(struct task_struct *curr)
2579{
2580 struct robust_list_head __user *head = curr->robust_list;
2581 struct robust_list __user *entry, *next_entry, *pending;
2582 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2583 unsigned int uninitialized_var(next_pi);
2584 unsigned long futex_offset;
2585 int rc;
2586
2587 if (!futex_cmpxchg_enabled)
2588 return;
2589
2590
2591
2592
2593
2594 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2595 return;
2596
2597
2598
2599 if (get_user(futex_offset, &head->futex_offset))
2600 return;
2601
2602
2603
2604
2605 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2606 return;
2607
2608 next_entry = NULL;
2609 while (entry != &head->list) {
2610
2611
2612
2613
2614 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2615
2616
2617
2618
2619 if (entry != pending)
2620 if (handle_futex_death((void __user *)entry + futex_offset,
2621 curr, pi))
2622 return;
2623 if (rc)
2624 return;
2625 entry = next_entry;
2626 pi = next_pi;
2627
2628
2629
2630 if (!--limit)
2631 break;
2632
2633 cond_resched();
2634 }
2635
2636 if (pending)
2637 handle_futex_death((void __user *)pending + futex_offset,
2638 curr, pip);
2639}
2640
2641long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2642 u32 __user *uaddr2, u32 val2, u32 val3)
2643{
2644 int cmd = op & FUTEX_CMD_MASK;
2645 unsigned int flags = 0;
2646
2647 if (!(op & FUTEX_PRIVATE_FLAG))
2648 flags |= FLAGS_SHARED;
2649
2650 if (op & FUTEX_CLOCK_REALTIME) {
2651 flags |= FLAGS_CLOCKRT;
2652 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2653 return -ENOSYS;
2654 }
2655
2656 switch (cmd) {
2657 case FUTEX_LOCK_PI:
2658 case FUTEX_UNLOCK_PI:
2659 case FUTEX_TRYLOCK_PI:
2660 case FUTEX_WAIT_REQUEUE_PI:
2661 case FUTEX_CMP_REQUEUE_PI:
2662 if (!futex_cmpxchg_enabled)
2663 return -ENOSYS;
2664 }
2665
2666 switch (cmd) {
2667 case FUTEX_WAIT:
2668 val3 = FUTEX_BITSET_MATCH_ANY;
2669 case FUTEX_WAIT_BITSET:
2670 return futex_wait(uaddr, flags, val, timeout, val3);
2671 case FUTEX_WAKE:
2672 val3 = FUTEX_BITSET_MATCH_ANY;
2673 case FUTEX_WAKE_BITSET:
2674 return futex_wake(uaddr, flags, val, val3);
2675 case FUTEX_REQUEUE:
2676 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2677 case FUTEX_CMP_REQUEUE:
2678 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2679 case FUTEX_WAKE_OP:
2680 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2681 case FUTEX_LOCK_PI:
2682 return futex_lock_pi(uaddr, flags, val, timeout, 0);
2683 case FUTEX_UNLOCK_PI:
2684 return futex_unlock_pi(uaddr, flags);
2685 case FUTEX_TRYLOCK_PI:
2686 return futex_lock_pi(uaddr, flags, 0, timeout, 1);
2687 case FUTEX_WAIT_REQUEUE_PI:
2688 val3 = FUTEX_BITSET_MATCH_ANY;
2689 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2690 uaddr2);
2691 case FUTEX_CMP_REQUEUE_PI:
2692 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2693 }
2694 return -ENOSYS;
2695}
2696
2697
2698SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2699 struct timespec __user *, utime, u32 __user *, uaddr2,
2700 u32, val3)
2701{
2702 struct timespec ts;
2703 ktime_t t, *tp = NULL;
2704 u32 val2 = 0;
2705 int cmd = op & FUTEX_CMD_MASK;
2706
2707 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2708 cmd == FUTEX_WAIT_BITSET ||
2709 cmd == FUTEX_WAIT_REQUEUE_PI)) {
2710 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2711 return -EFAULT;
2712 if (!timespec_valid(&ts))
2713 return -EINVAL;
2714
2715 t = timespec_to_ktime(ts);
2716 if (cmd == FUTEX_WAIT)
2717 t = ktime_add_safe(ktime_get(), t);
2718 tp = &t;
2719 }
2720
2721
2722
2723
2724 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2725 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2726 val2 = (u32) (unsigned long) utime;
2727
2728 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2729}
2730
2731static int __init futex_init(void)
2732{
2733 u32 curval;
2734 int i;
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2747 futex_cmpxchg_enabled = 1;
2748
2749 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
2750 plist_head_init(&futex_queues[i].chain);
2751 spin_lock_init(&futex_queues[i].lock);
2752 }
2753
2754 return 0;
2755}
2756__initcall(futex_init);
2757