1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/export.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62
63#include <asm/futex.h>
64
65#include "rtmutex_common.h"
66
67int __read_mostly futex_cmpxchg_enabled;
68
69#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
70
71
72
73
74
75#define FLAGS_SHARED 0x01
76#define FLAGS_CLOCKRT 0x02
77#define FLAGS_HAS_TIMEOUT 0x04
78
79
80
81
82struct futex_pi_state {
83
84
85
86
87 struct list_head list;
88
89
90
91
92 struct rt_mutex pi_mutex;
93
94 struct task_struct *owner;
95 atomic_t refcount;
96
97 union futex_key key;
98};
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122struct futex_q {
123 struct plist_node list;
124
125 struct task_struct *task;
126 spinlock_t *lock_ptr;
127 union futex_key key;
128 struct futex_pi_state *pi_state;
129 struct rt_mutex_waiter *rt_waiter;
130 union futex_key *requeue_pi_key;
131 u32 bitset;
132};
133
134static const struct futex_q futex_q_init = {
135
136 .key = FUTEX_KEY_INIT,
137 .bitset = FUTEX_BITSET_MATCH_ANY
138};
139
140
141
142
143
144
145struct futex_hash_bucket {
146 spinlock_t lock;
147 struct plist_head chain;
148};
149
150static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
151
152
153
154
155static struct futex_hash_bucket *hash_futex(union futex_key *key)
156{
157 u32 hash = jhash2((u32*)&key->both.word,
158 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
159 key->both.offset);
160 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
161}
162
163
164
165
166static inline int match_futex(union futex_key *key1, union futex_key *key2)
167{
168 return (key1 && key2
169 && key1->both.word == key2->both.word
170 && key1->both.ptr == key2->both.ptr
171 && key1->both.offset == key2->both.offset);
172}
173
174
175
176
177
178
179static void get_futex_key_refs(union futex_key *key)
180{
181 if (!key->both.ptr)
182 return;
183
184 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
185 case FUT_OFF_INODE:
186 ihold(key->shared.inode);
187 break;
188 case FUT_OFF_MMSHARED:
189 atomic_inc(&key->private.mm->mm_count);
190 break;
191 }
192}
193
194
195
196
197
198static void drop_futex_key_refs(union futex_key *key)
199{
200 if (!key->both.ptr) {
201
202 WARN_ON_ONCE(1);
203 return;
204 }
205
206 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
207 case FUT_OFF_INODE:
208 iput(key->shared.inode);
209 break;
210 case FUT_OFF_MMSHARED:
211 mmdrop(key->private.mm);
212 break;
213 }
214}
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233static int
234get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
235{
236 unsigned long address = (unsigned long)uaddr;
237 struct mm_struct *mm = current->mm;
238 struct page *page, *page_head;
239 int err, ro = 0;
240
241
242
243
244 key->both.offset = address % PAGE_SIZE;
245 if (unlikely((address % sizeof(u32)) != 0))
246 return -EINVAL;
247 address -= key->both.offset;
248
249
250
251
252
253
254
255
256 if (!fshared) {
257 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
258 return -EFAULT;
259 key->private.mm = mm;
260 key->private.address = address;
261 get_futex_key_refs(key);
262 return 0;
263 }
264
265again:
266 err = get_user_pages_fast(address, 1, 1, &page);
267
268
269
270
271 if (err == -EFAULT && rw == VERIFY_READ) {
272 err = get_user_pages_fast(address, 1, 0, &page);
273 ro = 1;
274 }
275 if (err < 0)
276 return err;
277 else
278 err = 0;
279
280#ifdef CONFIG_TRANSPARENT_HUGEPAGE
281 page_head = page;
282 if (unlikely(PageTail(page))) {
283 put_page(page);
284
285 local_irq_disable();
286 if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
287 page_head = compound_head(page);
288
289
290
291
292
293
294
295
296
297
298 if (page != page_head) {
299 get_page(page_head);
300 put_page(page);
301 }
302 local_irq_enable();
303 } else {
304 local_irq_enable();
305 goto again;
306 }
307 }
308#else
309 page_head = compound_head(page);
310 if (page != page_head) {
311 get_page(page_head);
312 put_page(page);
313 }
314#endif
315
316 lock_page(page_head);
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333 if (!page_head->mapping) {
334 int shmem_swizzled = PageSwapCache(page_head);
335 unlock_page(page_head);
336 put_page(page_head);
337 if (shmem_swizzled)
338 goto again;
339 return -EFAULT;
340 }
341
342
343
344
345
346
347
348
349 if (PageAnon(page_head)) {
350
351
352
353
354 if (ro) {
355 err = -EFAULT;
356 goto out;
357 }
358
359 key->both.offset |= FUT_OFF_MMSHARED;
360 key->private.mm = mm;
361 key->private.address = address;
362 } else {
363 key->both.offset |= FUT_OFF_INODE;
364 key->shared.inode = page_head->mapping->host;
365 key->shared.pgoff = page_head->index;
366 }
367
368 get_futex_key_refs(key);
369
370out:
371 unlock_page(page_head);
372 put_page(page_head);
373 return err;
374}
375
376static inline void put_futex_key(union futex_key *key)
377{
378 drop_futex_key_refs(key);
379}
380
381
382
383
384
385
386
387
388
389
390
391
392
393static int fault_in_user_writeable(u32 __user *uaddr)
394{
395 struct mm_struct *mm = current->mm;
396 int ret;
397
398 down_read(&mm->mmap_sem);
399 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
400 FAULT_FLAG_WRITE);
401 up_read(&mm->mmap_sem);
402
403 return ret < 0 ? ret : 0;
404}
405
406
407
408
409
410
411
412
413static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
414 union futex_key *key)
415{
416 struct futex_q *this;
417
418 plist_for_each_entry(this, &hb->chain, list) {
419 if (match_futex(&this->key, key))
420 return this;
421 }
422 return NULL;
423}
424
425static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
426 u32 uval, u32 newval)
427{
428 int ret;
429
430 pagefault_disable();
431 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
432 pagefault_enable();
433
434 return ret;
435}
436
437static int get_futex_value_locked(u32 *dest, u32 __user *from)
438{
439 int ret;
440
441 pagefault_disable();
442 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
443 pagefault_enable();
444
445 return ret ? -EFAULT : 0;
446}
447
448
449
450
451
452static int refill_pi_state_cache(void)
453{
454 struct futex_pi_state *pi_state;
455
456 if (likely(current->pi_state_cache))
457 return 0;
458
459 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
460
461 if (!pi_state)
462 return -ENOMEM;
463
464 INIT_LIST_HEAD(&pi_state->list);
465
466 pi_state->owner = NULL;
467 atomic_set(&pi_state->refcount, 1);
468 pi_state->key = FUTEX_KEY_INIT;
469
470 current->pi_state_cache = pi_state;
471
472 return 0;
473}
474
475static struct futex_pi_state * alloc_pi_state(void)
476{
477 struct futex_pi_state *pi_state = current->pi_state_cache;
478
479 WARN_ON(!pi_state);
480 current->pi_state_cache = NULL;
481
482 return pi_state;
483}
484
485static void free_pi_state(struct futex_pi_state *pi_state)
486{
487 if (!atomic_dec_and_test(&pi_state->refcount))
488 return;
489
490
491
492
493
494 if (pi_state->owner) {
495 raw_spin_lock_irq(&pi_state->owner->pi_lock);
496 list_del_init(&pi_state->list);
497 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
498
499 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
500 }
501
502 if (current->pi_state_cache)
503 kfree(pi_state);
504 else {
505
506
507
508
509
510 pi_state->owner = NULL;
511 atomic_set(&pi_state->refcount, 1);
512 current->pi_state_cache = pi_state;
513 }
514}
515
516
517
518
519
520static struct task_struct * futex_find_get_task(pid_t pid)
521{
522 struct task_struct *p;
523
524 rcu_read_lock();
525 p = find_task_by_vpid(pid);
526 if (p)
527 get_task_struct(p);
528
529 rcu_read_unlock();
530
531 return p;
532}
533
534
535
536
537
538
539void exit_pi_state_list(struct task_struct *curr)
540{
541 struct list_head *next, *head = &curr->pi_state_list;
542 struct futex_pi_state *pi_state;
543 struct futex_hash_bucket *hb;
544 union futex_key key = FUTEX_KEY_INIT;
545
546 if (!futex_cmpxchg_enabled)
547 return;
548
549
550
551
552
553 raw_spin_lock_irq(&curr->pi_lock);
554 while (!list_empty(head)) {
555
556 next = head->next;
557 pi_state = list_entry(next, struct futex_pi_state, list);
558 key = pi_state->key;
559 hb = hash_futex(&key);
560 raw_spin_unlock_irq(&curr->pi_lock);
561
562 spin_lock(&hb->lock);
563
564 raw_spin_lock_irq(&curr->pi_lock);
565
566
567
568
569 if (head->next != next) {
570 spin_unlock(&hb->lock);
571 continue;
572 }
573
574 WARN_ON(pi_state->owner != curr);
575 WARN_ON(list_empty(&pi_state->list));
576 list_del_init(&pi_state->list);
577 pi_state->owner = NULL;
578 raw_spin_unlock_irq(&curr->pi_lock);
579
580 rt_mutex_unlock(&pi_state->pi_mutex);
581
582 spin_unlock(&hb->lock);
583
584 raw_spin_lock_irq(&curr->pi_lock);
585 }
586 raw_spin_unlock_irq(&curr->pi_lock);
587}
588
589static int
590lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
591 union futex_key *key, struct futex_pi_state **ps)
592{
593 struct futex_pi_state *pi_state = NULL;
594 struct futex_q *this, *next;
595 struct plist_head *head;
596 struct task_struct *p;
597 pid_t pid = uval & FUTEX_TID_MASK;
598
599 head = &hb->chain;
600
601 plist_for_each_entry_safe(this, next, head, list) {
602 if (match_futex(&this->key, key)) {
603
604
605
606
607 pi_state = this->pi_state;
608
609
610
611 if (unlikely(!pi_state))
612 return -EINVAL;
613
614 WARN_ON(!atomic_read(&pi_state->refcount));
615
616
617
618
619
620
621
622
623
624
625
626 if (pid && pi_state->owner) {
627
628
629
630
631 if (pid != task_pid_vnr(pi_state->owner))
632 return -EINVAL;
633 }
634
635 atomic_inc(&pi_state->refcount);
636 *ps = pi_state;
637
638 return 0;
639 }
640 }
641
642
643
644
645
646 if (!pid)
647 return -ESRCH;
648 p = futex_find_get_task(pid);
649 if (!p)
650 return -ESRCH;
651
652
653
654
655
656
657
658 raw_spin_lock_irq(&p->pi_lock);
659 if (unlikely(p->flags & PF_EXITING)) {
660
661
662
663
664
665 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
666
667 raw_spin_unlock_irq(&p->pi_lock);
668 put_task_struct(p);
669 return ret;
670 }
671
672 pi_state = alloc_pi_state();
673
674
675
676
677
678 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
679
680
681 pi_state->key = *key;
682
683 WARN_ON(!list_empty(&pi_state->list));
684 list_add(&pi_state->list, &p->pi_state_list);
685 pi_state->owner = p;
686 raw_spin_unlock_irq(&p->pi_lock);
687
688 put_task_struct(p);
689
690 *ps = pi_state;
691
692 return 0;
693}
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
714 union futex_key *key,
715 struct futex_pi_state **ps,
716 struct task_struct *task, int set_waiters)
717{
718 int lock_taken, ret, ownerdied = 0;
719 u32 uval, newval, curval, vpid = task_pid_vnr(task);
720
721retry:
722 ret = lock_taken = 0;
723
724
725
726
727
728
729 newval = vpid;
730 if (set_waiters)
731 newval |= FUTEX_WAITERS;
732
733 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
734 return -EFAULT;
735
736
737
738
739 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
740 return -EDEADLK;
741
742
743
744
745 if (unlikely(!curval))
746 return 1;
747
748 uval = curval;
749
750
751
752
753
754 newval = curval | FUTEX_WAITERS;
755
756
757
758
759
760
761
762
763
764 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
765
766 newval = (curval & ~FUTEX_TID_MASK) | vpid;
767 ownerdied = 0;
768 lock_taken = 1;
769 }
770
771 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
772 return -EFAULT;
773 if (unlikely(curval != uval))
774 goto retry;
775
776
777
778
779 if (unlikely(lock_taken))
780 return 1;
781
782
783
784
785
786 ret = lookup_pi_state(uval, hb, key, ps);
787
788 if (unlikely(ret)) {
789 switch (ret) {
790 case -ESRCH:
791
792
793
794
795
796 if (get_futex_value_locked(&curval, uaddr))
797 return -EFAULT;
798
799
800
801
802
803
804 if (curval & FUTEX_OWNER_DIED) {
805 ownerdied = 1;
806 goto retry;
807 }
808 default:
809 break;
810 }
811 }
812
813 return ret;
814}
815
816
817
818
819
820
821
822static void __unqueue_futex(struct futex_q *q)
823{
824 struct futex_hash_bucket *hb;
825
826 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
827 || WARN_ON(plist_node_empty(&q->list)))
828 return;
829
830 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
831 plist_del(&q->list, &hb->chain);
832}
833
834
835
836
837
838static void wake_futex(struct futex_q *q)
839{
840 struct task_struct *p = q->task;
841
842
843
844
845
846
847
848
849 get_task_struct(p);
850
851 __unqueue_futex(q);
852
853
854
855
856
857
858 smp_wmb();
859 q->lock_ptr = NULL;
860
861 wake_up_state(p, TASK_NORMAL);
862 put_task_struct(p);
863}
864
865static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
866{
867 struct task_struct *new_owner;
868 struct futex_pi_state *pi_state = this->pi_state;
869 u32 uninitialized_var(curval), newval;
870
871 if (!pi_state)
872 return -EINVAL;
873
874
875
876
877
878 if (pi_state->owner != current)
879 return -EINVAL;
880
881 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
882 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
883
884
885
886
887
888
889 if (!new_owner)
890 new_owner = this->task;
891
892
893
894
895
896
897 if (!(uval & FUTEX_OWNER_DIED)) {
898 int ret = 0;
899
900 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
901
902 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
903 ret = -EFAULT;
904 else if (curval != uval)
905 ret = -EINVAL;
906 if (ret) {
907 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
908 return ret;
909 }
910 }
911
912 raw_spin_lock_irq(&pi_state->owner->pi_lock);
913 WARN_ON(list_empty(&pi_state->list));
914 list_del_init(&pi_state->list);
915 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
916
917 raw_spin_lock_irq(&new_owner->pi_lock);
918 WARN_ON(!list_empty(&pi_state->list));
919 list_add(&pi_state->list, &new_owner->pi_state_list);
920 pi_state->owner = new_owner;
921 raw_spin_unlock_irq(&new_owner->pi_lock);
922
923 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
924 rt_mutex_unlock(&pi_state->pi_mutex);
925
926 return 0;
927}
928
929static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
930{
931 u32 uninitialized_var(oldval);
932
933
934
935
936
937 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
938 return -EFAULT;
939 if (oldval != uval)
940 return -EAGAIN;
941
942 return 0;
943}
944
945
946
947
948static inline void
949double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
950{
951 if (hb1 <= hb2) {
952 spin_lock(&hb1->lock);
953 if (hb1 < hb2)
954 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
955 } else {
956 spin_lock(&hb2->lock);
957 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
958 }
959}
960
961static inline void
962double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
963{
964 spin_unlock(&hb1->lock);
965 if (hb1 != hb2)
966 spin_unlock(&hb2->lock);
967}
968
969
970
971
972static int
973futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
974{
975 struct futex_hash_bucket *hb;
976 struct futex_q *this, *next;
977 struct plist_head *head;
978 union futex_key key = FUTEX_KEY_INIT;
979 int ret;
980
981 if (!bitset)
982 return -EINVAL;
983
984 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
985 if (unlikely(ret != 0))
986 goto out;
987
988 hb = hash_futex(&key);
989 spin_lock(&hb->lock);
990 head = &hb->chain;
991
992 plist_for_each_entry_safe(this, next, head, list) {
993 if (match_futex (&this->key, &key)) {
994 if (this->pi_state || this->rt_waiter) {
995 ret = -EINVAL;
996 break;
997 }
998
999
1000 if (!(this->bitset & bitset))
1001 continue;
1002
1003 wake_futex(this);
1004 if (++ret >= nr_wake)
1005 break;
1006 }
1007 }
1008
1009 spin_unlock(&hb->lock);
1010 put_futex_key(&key);
1011out:
1012 return ret;
1013}
1014
1015
1016
1017
1018
1019static int
1020futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1021 int nr_wake, int nr_wake2, int op)
1022{
1023 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1024 struct futex_hash_bucket *hb1, *hb2;
1025 struct plist_head *head;
1026 struct futex_q *this, *next;
1027 int ret, op_ret;
1028
1029retry:
1030 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1031 if (unlikely(ret != 0))
1032 goto out;
1033 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1034 if (unlikely(ret != 0))
1035 goto out_put_key1;
1036
1037 hb1 = hash_futex(&key1);
1038 hb2 = hash_futex(&key2);
1039
1040retry_private:
1041 double_lock_hb(hb1, hb2);
1042 op_ret = futex_atomic_op_inuser(op, uaddr2);
1043 if (unlikely(op_ret < 0)) {
1044
1045 double_unlock_hb(hb1, hb2);
1046
1047#ifndef CONFIG_MMU
1048
1049
1050
1051
1052 ret = op_ret;
1053 goto out_put_keys;
1054#endif
1055
1056 if (unlikely(op_ret != -EFAULT)) {
1057 ret = op_ret;
1058 goto out_put_keys;
1059 }
1060
1061 ret = fault_in_user_writeable(uaddr2);
1062 if (ret)
1063 goto out_put_keys;
1064
1065 if (!(flags & FLAGS_SHARED))
1066 goto retry_private;
1067
1068 put_futex_key(&key2);
1069 put_futex_key(&key1);
1070 goto retry;
1071 }
1072
1073 head = &hb1->chain;
1074
1075 plist_for_each_entry_safe(this, next, head, list) {
1076 if (match_futex (&this->key, &key1)) {
1077 wake_futex(this);
1078 if (++ret >= nr_wake)
1079 break;
1080 }
1081 }
1082
1083 if (op_ret > 0) {
1084 head = &hb2->chain;
1085
1086 op_ret = 0;
1087 plist_for_each_entry_safe(this, next, head, list) {
1088 if (match_futex (&this->key, &key2)) {
1089 wake_futex(this);
1090 if (++op_ret >= nr_wake2)
1091 break;
1092 }
1093 }
1094 ret += op_ret;
1095 }
1096
1097 double_unlock_hb(hb1, hb2);
1098out_put_keys:
1099 put_futex_key(&key2);
1100out_put_key1:
1101 put_futex_key(&key1);
1102out:
1103 return ret;
1104}
1105
1106
1107
1108
1109
1110
1111
1112
1113static inline
1114void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1115 struct futex_hash_bucket *hb2, union futex_key *key2)
1116{
1117
1118
1119
1120
1121
1122 if (likely(&hb1->chain != &hb2->chain)) {
1123 plist_del(&q->list, &hb1->chain);
1124 plist_add(&q->list, &hb2->chain);
1125 q->lock_ptr = &hb2->lock;
1126 }
1127 get_futex_key_refs(key2);
1128 q->key = *key2;
1129}
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145static inline
1146void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1147 struct futex_hash_bucket *hb)
1148{
1149 get_futex_key_refs(key);
1150 q->key = *key;
1151
1152 __unqueue_futex(q);
1153
1154 WARN_ON(!q->rt_waiter);
1155 q->rt_waiter = NULL;
1156
1157 q->lock_ptr = &hb->lock;
1158
1159 wake_up_state(q->task, TASK_NORMAL);
1160}
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1183 struct futex_hash_bucket *hb1,
1184 struct futex_hash_bucket *hb2,
1185 union futex_key *key1, union futex_key *key2,
1186 struct futex_pi_state **ps, int set_waiters)
1187{
1188 struct futex_q *top_waiter = NULL;
1189 u32 curval;
1190 int ret;
1191
1192 if (get_futex_value_locked(&curval, pifutex))
1193 return -EFAULT;
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203 top_waiter = futex_top_waiter(hb1, key1);
1204
1205
1206 if (!top_waiter)
1207 return 0;
1208
1209
1210 if (!match_futex(top_waiter->requeue_pi_key, key2))
1211 return -EINVAL;
1212
1213
1214
1215
1216
1217
1218 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1219 set_waiters);
1220 if (ret == 1)
1221 requeue_pi_wake_futex(top_waiter, key2, hb2);
1222
1223 return ret;
1224}
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1245 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1246 u32 *cmpval, int requeue_pi)
1247{
1248 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1249 int drop_count = 0, task_count = 0, ret;
1250 struct futex_pi_state *pi_state = NULL;
1251 struct futex_hash_bucket *hb1, *hb2;
1252 struct plist_head *head1;
1253 struct futex_q *this, *next;
1254 u32 curval2;
1255
1256 if (requeue_pi) {
1257
1258
1259
1260
1261 if (refill_pi_state_cache())
1262 return -ENOMEM;
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273 if (nr_wake != 1)
1274 return -EINVAL;
1275 }
1276
1277retry:
1278 if (pi_state != NULL) {
1279
1280
1281
1282
1283 free_pi_state(pi_state);
1284 pi_state = NULL;
1285 }
1286
1287 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1288 if (unlikely(ret != 0))
1289 goto out;
1290 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1291 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1292 if (unlikely(ret != 0))
1293 goto out_put_key1;
1294
1295 hb1 = hash_futex(&key1);
1296 hb2 = hash_futex(&key2);
1297
1298retry_private:
1299 double_lock_hb(hb1, hb2);
1300
1301 if (likely(cmpval != NULL)) {
1302 u32 curval;
1303
1304 ret = get_futex_value_locked(&curval, uaddr1);
1305
1306 if (unlikely(ret)) {
1307 double_unlock_hb(hb1, hb2);
1308
1309 ret = get_user(curval, uaddr1);
1310 if (ret)
1311 goto out_put_keys;
1312
1313 if (!(flags & FLAGS_SHARED))
1314 goto retry_private;
1315
1316 put_futex_key(&key2);
1317 put_futex_key(&key1);
1318 goto retry;
1319 }
1320 if (curval != *cmpval) {
1321 ret = -EAGAIN;
1322 goto out_unlock;
1323 }
1324 }
1325
1326 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1327
1328
1329
1330
1331
1332
1333 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1334 &key2, &pi_state, nr_requeue);
1335
1336
1337
1338
1339
1340
1341
1342 if (ret == 1) {
1343 WARN_ON(pi_state);
1344 drop_count++;
1345 task_count++;
1346 ret = get_futex_value_locked(&curval2, uaddr2);
1347 if (!ret)
1348 ret = lookup_pi_state(curval2, hb2, &key2,
1349 &pi_state);
1350 }
1351
1352 switch (ret) {
1353 case 0:
1354 break;
1355 case -EFAULT:
1356 double_unlock_hb(hb1, hb2);
1357 put_futex_key(&key2);
1358 put_futex_key(&key1);
1359 ret = fault_in_user_writeable(uaddr2);
1360 if (!ret)
1361 goto retry;
1362 goto out;
1363 case -EAGAIN:
1364
1365 double_unlock_hb(hb1, hb2);
1366 put_futex_key(&key2);
1367 put_futex_key(&key1);
1368 cond_resched();
1369 goto retry;
1370 default:
1371 goto out_unlock;
1372 }
1373 }
1374
1375 head1 = &hb1->chain;
1376 plist_for_each_entry_safe(this, next, head1, list) {
1377 if (task_count - nr_wake >= nr_requeue)
1378 break;
1379
1380 if (!match_futex(&this->key, &key1))
1381 continue;
1382
1383
1384
1385
1386
1387 if ((requeue_pi && !this->rt_waiter) ||
1388 (!requeue_pi && this->rt_waiter)) {
1389 ret = -EINVAL;
1390 break;
1391 }
1392
1393
1394
1395
1396
1397
1398 if (++task_count <= nr_wake && !requeue_pi) {
1399 wake_futex(this);
1400 continue;
1401 }
1402
1403
1404 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1405 ret = -EINVAL;
1406 break;
1407 }
1408
1409
1410
1411
1412
1413 if (requeue_pi) {
1414
1415 atomic_inc(&pi_state->refcount);
1416 this->pi_state = pi_state;
1417 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1418 this->rt_waiter,
1419 this->task, 1);
1420 if (ret == 1) {
1421
1422 requeue_pi_wake_futex(this, &key2, hb2);
1423 drop_count++;
1424 continue;
1425 } else if (ret) {
1426
1427 this->pi_state = NULL;
1428 free_pi_state(pi_state);
1429 goto out_unlock;
1430 }
1431 }
1432 requeue_futex(this, hb1, hb2, &key2);
1433 drop_count++;
1434 }
1435
1436out_unlock:
1437 double_unlock_hb(hb1, hb2);
1438
1439
1440
1441
1442
1443
1444
1445 while (--drop_count >= 0)
1446 drop_futex_key_refs(&key1);
1447
1448out_put_keys:
1449 put_futex_key(&key2);
1450out_put_key1:
1451 put_futex_key(&key1);
1452out:
1453 if (pi_state != NULL)
1454 free_pi_state(pi_state);
1455 return ret ? ret : task_count;
1456}
1457
1458
1459static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1460 __acquires(&hb->lock)
1461{
1462 struct futex_hash_bucket *hb;
1463
1464 hb = hash_futex(&q->key);
1465 q->lock_ptr = &hb->lock;
1466
1467 spin_lock(&hb->lock);
1468 return hb;
1469}
1470
1471static inline void
1472queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1473 __releases(&hb->lock)
1474{
1475 spin_unlock(&hb->lock);
1476}
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1491 __releases(&hb->lock)
1492{
1493 int prio;
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503 prio = min(current->normal_prio, MAX_RT_PRIO);
1504
1505 plist_node_init(&q->list, prio);
1506 plist_add(&q->list, &hb->chain);
1507 q->task = current;
1508 spin_unlock(&hb->lock);
1509}
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522static int unqueue_me(struct futex_q *q)
1523{
1524 spinlock_t *lock_ptr;
1525 int ret = 0;
1526
1527
1528retry:
1529 lock_ptr = q->lock_ptr;
1530 barrier();
1531 if (lock_ptr != NULL) {
1532 spin_lock(lock_ptr);
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546 if (unlikely(lock_ptr != q->lock_ptr)) {
1547 spin_unlock(lock_ptr);
1548 goto retry;
1549 }
1550 __unqueue_futex(q);
1551
1552 BUG_ON(q->pi_state);
1553
1554 spin_unlock(lock_ptr);
1555 ret = 1;
1556 }
1557
1558 drop_futex_key_refs(&q->key);
1559 return ret;
1560}
1561
1562
1563
1564
1565
1566
1567static void unqueue_me_pi(struct futex_q *q)
1568 __releases(q->lock_ptr)
1569{
1570 __unqueue_futex(q);
1571
1572 BUG_ON(!q->pi_state);
1573 free_pi_state(q->pi_state);
1574 q->pi_state = NULL;
1575
1576 spin_unlock(q->lock_ptr);
1577}
1578
1579
1580
1581
1582
1583
1584
1585static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1586 struct task_struct *newowner)
1587{
1588 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1589 struct futex_pi_state *pi_state = q->pi_state;
1590 struct task_struct *oldowner = pi_state->owner;
1591 u32 uval, uninitialized_var(curval), newval;
1592 int ret;
1593
1594
1595 if (!pi_state->owner)
1596 newtid |= FUTEX_OWNER_DIED;
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615retry:
1616 if (get_futex_value_locked(&uval, uaddr))
1617 goto handle_fault;
1618
1619 while (1) {
1620 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1621
1622 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1623 goto handle_fault;
1624 if (curval == uval)
1625 break;
1626 uval = curval;
1627 }
1628
1629
1630
1631
1632
1633 if (pi_state->owner != NULL) {
1634 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1635 WARN_ON(list_empty(&pi_state->list));
1636 list_del_init(&pi_state->list);
1637 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1638 }
1639
1640 pi_state->owner = newowner;
1641
1642 raw_spin_lock_irq(&newowner->pi_lock);
1643 WARN_ON(!list_empty(&pi_state->list));
1644 list_add(&pi_state->list, &newowner->pi_state_list);
1645 raw_spin_unlock_irq(&newowner->pi_lock);
1646 return 0;
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658handle_fault:
1659 spin_unlock(q->lock_ptr);
1660
1661 ret = fault_in_user_writeable(uaddr);
1662
1663 spin_lock(q->lock_ptr);
1664
1665
1666
1667
1668 if (pi_state->owner != oldowner)
1669 return 0;
1670
1671 if (ret)
1672 return ret;
1673
1674 goto retry;
1675}
1676
1677static long futex_wait_restart(struct restart_block *restart);
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1695{
1696 struct task_struct *owner;
1697 int ret = 0;
1698
1699 if (locked) {
1700
1701
1702
1703
1704 if (q->pi_state->owner != current)
1705 ret = fixup_pi_state_owner(uaddr, q, current);
1706 goto out;
1707 }
1708
1709
1710
1711
1712
1713 if (q->pi_state->owner == current) {
1714
1715
1716
1717
1718
1719 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1720 locked = 1;
1721 goto out;
1722 }
1723
1724
1725
1726
1727
1728
1729 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1730 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1731 if (!owner)
1732 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1733 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1734 ret = fixup_pi_state_owner(uaddr, q, owner);
1735 goto out;
1736 }
1737
1738
1739
1740
1741
1742 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1743 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1744 "pi-state %p\n", ret,
1745 q->pi_state->pi_mutex.owner,
1746 q->pi_state->owner);
1747
1748out:
1749 return ret ? ret : locked;
1750}
1751
1752
1753
1754
1755
1756
1757
1758static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1759 struct hrtimer_sleeper *timeout)
1760{
1761
1762
1763
1764
1765
1766
1767 set_current_state(TASK_INTERRUPTIBLE);
1768 queue_me(q, hb);
1769
1770
1771 if (timeout) {
1772 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1773 if (!hrtimer_active(&timeout->timer))
1774 timeout->task = NULL;
1775 }
1776
1777
1778
1779
1780
1781 if (likely(!plist_node_empty(&q->list))) {
1782
1783
1784
1785
1786
1787 if (!timeout || timeout->task)
1788 schedule();
1789 }
1790 __set_current_state(TASK_RUNNING);
1791}
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1811 struct futex_q *q, struct futex_hash_bucket **hb)
1812{
1813 u32 uval;
1814 int ret;
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834retry:
1835 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
1836 if (unlikely(ret != 0))
1837 return ret;
1838
1839retry_private:
1840 *hb = queue_lock(q);
1841
1842 ret = get_futex_value_locked(&uval, uaddr);
1843
1844 if (ret) {
1845 queue_unlock(q, *hb);
1846
1847 ret = get_user(uval, uaddr);
1848 if (ret)
1849 goto out;
1850
1851 if (!(flags & FLAGS_SHARED))
1852 goto retry_private;
1853
1854 put_futex_key(&q->key);
1855 goto retry;
1856 }
1857
1858 if (uval != val) {
1859 queue_unlock(q, *hb);
1860 ret = -EWOULDBLOCK;
1861 }
1862
1863out:
1864 if (ret)
1865 put_futex_key(&q->key);
1866 return ret;
1867}
1868
1869static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
1870 ktime_t *abs_time, u32 bitset)
1871{
1872 struct hrtimer_sleeper timeout, *to = NULL;
1873 struct restart_block *restart;
1874 struct futex_hash_bucket *hb;
1875 struct futex_q q = futex_q_init;
1876 int ret;
1877
1878 if (!bitset)
1879 return -EINVAL;
1880 q.bitset = bitset;
1881
1882 if (abs_time) {
1883 to = &timeout;
1884
1885 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
1886 CLOCK_REALTIME : CLOCK_MONOTONIC,
1887 HRTIMER_MODE_ABS);
1888 hrtimer_init_sleeper(to, current);
1889 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1890 current->timer_slack_ns);
1891 }
1892
1893retry:
1894
1895
1896
1897
1898 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
1899 if (ret)
1900 goto out;
1901
1902
1903 futex_wait_queue_me(hb, &q, to);
1904
1905
1906 ret = 0;
1907
1908 if (!unqueue_me(&q))
1909 goto out;
1910 ret = -ETIMEDOUT;
1911 if (to && !to->task)
1912 goto out;
1913
1914
1915
1916
1917
1918 if (!signal_pending(current))
1919 goto retry;
1920
1921 ret = -ERESTARTSYS;
1922 if (!abs_time)
1923 goto out;
1924
1925 restart = ¤t_thread_info()->restart_block;
1926 restart->fn = futex_wait_restart;
1927 restart->futex.uaddr = uaddr;
1928 restart->futex.val = val;
1929 restart->futex.time = abs_time->tv64;
1930 restart->futex.bitset = bitset;
1931 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
1932
1933 ret = -ERESTART_RESTARTBLOCK;
1934
1935out:
1936 if (to) {
1937 hrtimer_cancel(&to->timer);
1938 destroy_hrtimer_on_stack(&to->timer);
1939 }
1940 return ret;
1941}
1942
1943
1944static long futex_wait_restart(struct restart_block *restart)
1945{
1946 u32 __user *uaddr = restart->futex.uaddr;
1947 ktime_t t, *tp = NULL;
1948
1949 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1950 t.tv64 = restart->futex.time;
1951 tp = &t;
1952 }
1953 restart->fn = do_no_restart_syscall;
1954
1955 return (long)futex_wait(uaddr, restart->futex.flags,
1956 restart->futex.val, tp, restart->futex.bitset);
1957}
1958
1959
1960
1961
1962
1963
1964
1965
1966static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1967 ktime_t *time, int trylock)
1968{
1969 struct hrtimer_sleeper timeout, *to = NULL;
1970 struct futex_hash_bucket *hb;
1971 struct futex_q q = futex_q_init;
1972 int res, ret;
1973
1974 if (refill_pi_state_cache())
1975 return -ENOMEM;
1976
1977 if (time) {
1978 to = &timeout;
1979 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
1980 HRTIMER_MODE_ABS);
1981 hrtimer_init_sleeper(to, current);
1982 hrtimer_set_expires(&to->timer, *time);
1983 }
1984
1985retry:
1986 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
1987 if (unlikely(ret != 0))
1988 goto out;
1989
1990retry_private:
1991 hb = queue_lock(&q);
1992
1993 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
1994 if (unlikely(ret)) {
1995 switch (ret) {
1996 case 1:
1997
1998 ret = 0;
1999 goto out_unlock_put_key;
2000 case -EFAULT:
2001 goto uaddr_faulted;
2002 case -EAGAIN:
2003
2004
2005
2006
2007 queue_unlock(&q, hb);
2008 put_futex_key(&q.key);
2009 cond_resched();
2010 goto retry;
2011 default:
2012 goto out_unlock_put_key;
2013 }
2014 }
2015
2016
2017
2018
2019 queue_me(&q, hb);
2020
2021 WARN_ON(!q.pi_state);
2022
2023
2024
2025 if (!trylock)
2026 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
2027 else {
2028 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2029
2030 ret = ret ? 0 : -EWOULDBLOCK;
2031 }
2032
2033 spin_lock(q.lock_ptr);
2034
2035
2036
2037
2038 res = fixup_owner(uaddr, &q, !ret);
2039
2040
2041
2042
2043 if (res)
2044 ret = (res < 0) ? res : 0;
2045
2046
2047
2048
2049
2050 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2051 rt_mutex_unlock(&q.pi_state->pi_mutex);
2052
2053
2054 unqueue_me_pi(&q);
2055
2056 goto out_put_key;
2057
2058out_unlock_put_key:
2059 queue_unlock(&q, hb);
2060
2061out_put_key:
2062 put_futex_key(&q.key);
2063out:
2064 if (to)
2065 destroy_hrtimer_on_stack(&to->timer);
2066 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2067
2068uaddr_faulted:
2069 queue_unlock(&q, hb);
2070
2071 ret = fault_in_user_writeable(uaddr);
2072 if (ret)
2073 goto out_put_key;
2074
2075 if (!(flags & FLAGS_SHARED))
2076 goto retry_private;
2077
2078 put_futex_key(&q.key);
2079 goto retry;
2080}
2081
2082
2083
2084
2085
2086
2087static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2088{
2089 struct futex_hash_bucket *hb;
2090 struct futex_q *this, *next;
2091 struct plist_head *head;
2092 union futex_key key = FUTEX_KEY_INIT;
2093 u32 uval, vpid = task_pid_vnr(current);
2094 int ret;
2095
2096retry:
2097 if (get_user(uval, uaddr))
2098 return -EFAULT;
2099
2100
2101
2102 if ((uval & FUTEX_TID_MASK) != vpid)
2103 return -EPERM;
2104
2105 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2106 if (unlikely(ret != 0))
2107 goto out;
2108
2109 hb = hash_futex(&key);
2110 spin_lock(&hb->lock);
2111
2112
2113
2114
2115
2116
2117 if (!(uval & FUTEX_OWNER_DIED) &&
2118 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2119 goto pi_faulted;
2120
2121
2122
2123
2124 if (unlikely(uval == vpid))
2125 goto out_unlock;
2126
2127
2128
2129
2130
2131 head = &hb->chain;
2132
2133 plist_for_each_entry_safe(this, next, head, list) {
2134 if (!match_futex (&this->key, &key))
2135 continue;
2136 ret = wake_futex_pi(uaddr, uval, this);
2137
2138
2139
2140
2141
2142 if (ret == -EFAULT)
2143 goto pi_faulted;
2144 goto out_unlock;
2145 }
2146
2147
2148
2149 if (!(uval & FUTEX_OWNER_DIED)) {
2150 ret = unlock_futex_pi(uaddr, uval);
2151 if (ret == -EFAULT)
2152 goto pi_faulted;
2153 }
2154
2155out_unlock:
2156 spin_unlock(&hb->lock);
2157 put_futex_key(&key);
2158
2159out:
2160 return ret;
2161
2162pi_faulted:
2163 spin_unlock(&hb->lock);
2164 put_futex_key(&key);
2165
2166 ret = fault_in_user_writeable(uaddr);
2167 if (!ret)
2168 goto retry;
2169
2170 return ret;
2171}
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189static inline
2190int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2191 struct futex_q *q, union futex_key *key2,
2192 struct hrtimer_sleeper *timeout)
2193{
2194 int ret = 0;
2195
2196
2197
2198
2199
2200
2201
2202
2203 if (!match_futex(&q->key, key2)) {
2204 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2205
2206
2207
2208
2209 plist_del(&q->list, &hb->chain);
2210
2211
2212 ret = -EWOULDBLOCK;
2213 if (timeout && !timeout->task)
2214 ret = -ETIMEDOUT;
2215 else if (signal_pending(current))
2216 ret = -ERESTARTNOINTR;
2217 }
2218 return ret;
2219}
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2263 u32 val, ktime_t *abs_time, u32 bitset,
2264 u32 __user *uaddr2)
2265{
2266 struct hrtimer_sleeper timeout, *to = NULL;
2267 struct rt_mutex_waiter rt_waiter;
2268 struct rt_mutex *pi_mutex = NULL;
2269 struct futex_hash_bucket *hb;
2270 union futex_key key2 = FUTEX_KEY_INIT;
2271 struct futex_q q = futex_q_init;
2272 int res, ret;
2273
2274 if (!bitset)
2275 return -EINVAL;
2276
2277 if (abs_time) {
2278 to = &timeout;
2279 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2280 CLOCK_REALTIME : CLOCK_MONOTONIC,
2281 HRTIMER_MODE_ABS);
2282 hrtimer_init_sleeper(to, current);
2283 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2284 current->timer_slack_ns);
2285 }
2286
2287
2288
2289
2290
2291 debug_rt_mutex_init_waiter(&rt_waiter);
2292 rt_waiter.task = NULL;
2293
2294 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2295 if (unlikely(ret != 0))
2296 goto out;
2297
2298 q.bitset = bitset;
2299 q.rt_waiter = &rt_waiter;
2300 q.requeue_pi_key = &key2;
2301
2302
2303
2304
2305
2306 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2307 if (ret)
2308 goto out_key2;
2309
2310
2311 futex_wait_queue_me(hb, &q, to);
2312
2313 spin_lock(&hb->lock);
2314 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2315 spin_unlock(&hb->lock);
2316 if (ret)
2317 goto out_put_keys;
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329 if (!q.rt_waiter) {
2330
2331
2332
2333
2334 if (q.pi_state && (q.pi_state->owner != current)) {
2335 spin_lock(q.lock_ptr);
2336 ret = fixup_pi_state_owner(uaddr2, &q, current);
2337 spin_unlock(q.lock_ptr);
2338 }
2339 } else {
2340
2341
2342
2343
2344
2345 WARN_ON(!&q.pi_state);
2346 pi_mutex = &q.pi_state->pi_mutex;
2347 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2348 debug_rt_mutex_free_waiter(&rt_waiter);
2349
2350 spin_lock(q.lock_ptr);
2351
2352
2353
2354
2355 res = fixup_owner(uaddr2, &q, !ret);
2356
2357
2358
2359
2360 if (res)
2361 ret = (res < 0) ? res : 0;
2362
2363
2364 unqueue_me_pi(&q);
2365 }
2366
2367
2368
2369
2370
2371 if (ret == -EFAULT) {
2372 if (rt_mutex_owner(pi_mutex) == current)
2373 rt_mutex_unlock(pi_mutex);
2374 } else if (ret == -EINTR) {
2375
2376
2377
2378
2379
2380
2381
2382 ret = -EWOULDBLOCK;
2383 }
2384
2385out_put_keys:
2386 put_futex_key(&q.key);
2387out_key2:
2388 put_futex_key(&key2);
2389
2390out:
2391 if (to) {
2392 hrtimer_cancel(&to->timer);
2393 destroy_hrtimer_on_stack(&to->timer);
2394 }
2395 return ret;
2396}
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2419 size_t, len)
2420{
2421 if (!futex_cmpxchg_enabled)
2422 return -ENOSYS;
2423
2424
2425
2426 if (unlikely(len != sizeof(*head)))
2427 return -EINVAL;
2428
2429 current->robust_list = head;
2430
2431 return 0;
2432}
2433
2434
2435
2436
2437
2438
2439
2440SYSCALL_DEFINE3(get_robust_list, int, pid,
2441 struct robust_list_head __user * __user *, head_ptr,
2442 size_t __user *, len_ptr)
2443{
2444 struct robust_list_head __user *head;
2445 unsigned long ret;
2446 const struct cred *cred = current_cred(), *pcred;
2447
2448 if (!futex_cmpxchg_enabled)
2449 return -ENOSYS;
2450
2451 if (!pid)
2452 head = current->robust_list;
2453 else {
2454 struct task_struct *p;
2455
2456 ret = -ESRCH;
2457 rcu_read_lock();
2458 p = find_task_by_vpid(pid);
2459 if (!p)
2460 goto err_unlock;
2461 ret = -EPERM;
2462 pcred = __task_cred(p);
2463
2464
2465 if (cred->user->user_ns != pcred->user->user_ns) {
2466 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2467 goto err_unlock;
2468 goto ok;
2469 }
2470
2471 if (cred->euid != pcred->euid &&
2472 cred->euid != pcred->uid &&
2473 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2474 goto err_unlock;
2475ok:
2476 head = p->robust_list;
2477 rcu_read_unlock();
2478 }
2479
2480 if (put_user(sizeof(*head), len_ptr))
2481 return -EFAULT;
2482 return put_user(head, head_ptr);
2483
2484err_unlock:
2485 rcu_read_unlock();
2486
2487 return ret;
2488}
2489
2490
2491
2492
2493
2494int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2495{
2496 u32 uval, uninitialized_var(nval), mval;
2497
2498retry:
2499 if (get_user(uval, uaddr))
2500 return -1;
2501
2502 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2524 if (fault_in_user_writeable(uaddr))
2525 return -1;
2526 goto retry;
2527 }
2528 if (nval != uval)
2529 goto retry;
2530
2531
2532
2533
2534
2535 if (!pi && (uval & FUTEX_WAITERS))
2536 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2537 }
2538 return 0;
2539}
2540
2541
2542
2543
2544static inline int fetch_robust_entry(struct robust_list __user **entry,
2545 struct robust_list __user * __user *head,
2546 unsigned int *pi)
2547{
2548 unsigned long uentry;
2549
2550 if (get_user(uentry, (unsigned long __user *)head))
2551 return -EFAULT;
2552
2553 *entry = (void __user *)(uentry & ~1UL);
2554 *pi = uentry & 1;
2555
2556 return 0;
2557}
2558
2559
2560
2561
2562
2563
2564
2565void exit_robust_list(struct task_struct *curr)
2566{
2567 struct robust_list_head __user *head = curr->robust_list;
2568 struct robust_list __user *entry, *next_entry, *pending;
2569 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2570 unsigned int uninitialized_var(next_pi);
2571 unsigned long futex_offset;
2572 int rc;
2573
2574 if (!futex_cmpxchg_enabled)
2575 return;
2576
2577
2578
2579
2580
2581 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2582 return;
2583
2584
2585
2586 if (get_user(futex_offset, &head->futex_offset))
2587 return;
2588
2589
2590
2591
2592 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2593 return;
2594
2595 next_entry = NULL;
2596 while (entry != &head->list) {
2597
2598
2599
2600
2601 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2602
2603
2604
2605
2606 if (entry != pending)
2607 if (handle_futex_death((void __user *)entry + futex_offset,
2608 curr, pi))
2609 return;
2610 if (rc)
2611 return;
2612 entry = next_entry;
2613 pi = next_pi;
2614
2615
2616
2617 if (!--limit)
2618 break;
2619
2620 cond_resched();
2621 }
2622
2623 if (pending)
2624 handle_futex_death((void __user *)pending + futex_offset,
2625 curr, pip);
2626}
2627
2628long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2629 u32 __user *uaddr2, u32 val2, u32 val3)
2630{
2631 int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK;
2632 unsigned int flags = 0;
2633
2634 if (!(op & FUTEX_PRIVATE_FLAG))
2635 flags |= FLAGS_SHARED;
2636
2637 if (op & FUTEX_CLOCK_REALTIME) {
2638 flags |= FLAGS_CLOCKRT;
2639 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2640 return -ENOSYS;
2641 }
2642
2643 switch (cmd) {
2644 case FUTEX_WAIT:
2645 val3 = FUTEX_BITSET_MATCH_ANY;
2646 case FUTEX_WAIT_BITSET:
2647 ret = futex_wait(uaddr, flags, val, timeout, val3);
2648 break;
2649 case FUTEX_WAKE:
2650 val3 = FUTEX_BITSET_MATCH_ANY;
2651 case FUTEX_WAKE_BITSET:
2652 ret = futex_wake(uaddr, flags, val, val3);
2653 break;
2654 case FUTEX_REQUEUE:
2655 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2656 break;
2657 case FUTEX_CMP_REQUEUE:
2658 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2659 break;
2660 case FUTEX_WAKE_OP:
2661 ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2662 break;
2663 case FUTEX_LOCK_PI:
2664 if (futex_cmpxchg_enabled)
2665 ret = futex_lock_pi(uaddr, flags, val, timeout, 0);
2666 break;
2667 case FUTEX_UNLOCK_PI:
2668 if (futex_cmpxchg_enabled)
2669 ret = futex_unlock_pi(uaddr, flags);
2670 break;
2671 case FUTEX_TRYLOCK_PI:
2672 if (futex_cmpxchg_enabled)
2673 ret = futex_lock_pi(uaddr, flags, 0, timeout, 1);
2674 break;
2675 case FUTEX_WAIT_REQUEUE_PI:
2676 val3 = FUTEX_BITSET_MATCH_ANY;
2677 ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2678 uaddr2);
2679 break;
2680 case FUTEX_CMP_REQUEUE_PI:
2681 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2682 break;
2683 default:
2684 ret = -ENOSYS;
2685 }
2686 return ret;
2687}
2688
2689
2690SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2691 struct timespec __user *, utime, u32 __user *, uaddr2,
2692 u32, val3)
2693{
2694 struct timespec ts;
2695 ktime_t t, *tp = NULL;
2696 u32 val2 = 0;
2697 int cmd = op & FUTEX_CMD_MASK;
2698
2699 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2700 cmd == FUTEX_WAIT_BITSET ||
2701 cmd == FUTEX_WAIT_REQUEUE_PI)) {
2702 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2703 return -EFAULT;
2704 if (!timespec_valid(&ts))
2705 return -EINVAL;
2706
2707 t = timespec_to_ktime(ts);
2708 if (cmd == FUTEX_WAIT)
2709 t = ktime_add_safe(ktime_get(), t);
2710 tp = &t;
2711 }
2712
2713
2714
2715
2716 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2717 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2718 val2 = (u32) (unsigned long) utime;
2719
2720 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2721}
2722
2723static int __init futex_init(void)
2724{
2725 u32 curval;
2726 int i;
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2739 futex_cmpxchg_enabled = 1;
2740
2741 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
2742 plist_head_init(&futex_queues[i].chain);
2743 spin_lock_init(&futex_queues[i].lock);
2744 }
2745
2746 return 0;
2747}
2748__initcall(futex_init);
2749