1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/module.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62
63#include <asm/futex.h>
64
65#include "rtmutex_common.h"
66
67int __read_mostly futex_cmpxchg_enabled;
68
69#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
70
71
72
73
74
75#define FLAGS_SHARED 0x01
76#define FLAGS_CLOCKRT 0x02
77#define FLAGS_HAS_TIMEOUT 0x04
78
79
80
81
82struct futex_pi_state {
83
84
85
86
87 struct list_head list;
88
89
90
91
92 struct rt_mutex pi_mutex;
93
94 struct task_struct *owner;
95 atomic_t refcount;
96
97 union futex_key key;
98};
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122struct futex_q {
123 struct plist_node list;
124
125 struct task_struct *task;
126 spinlock_t *lock_ptr;
127 union futex_key key;
128 struct futex_pi_state *pi_state;
129 struct rt_mutex_waiter *rt_waiter;
130 union futex_key *requeue_pi_key;
131 u32 bitset;
132};
133
134static const struct futex_q futex_q_init = {
135
136 .key = FUTEX_KEY_INIT,
137 .bitset = FUTEX_BITSET_MATCH_ANY
138};
139
140
141
142
143
144
145struct futex_hash_bucket {
146 spinlock_t lock;
147 struct plist_head chain;
148};
149
150static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
151
152
153
154
155static struct futex_hash_bucket *hash_futex(union futex_key *key)
156{
157 u32 hash = jhash2((u32*)&key->both.word,
158 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
159 key->both.offset);
160 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
161}
162
163
164
165
166static inline int match_futex(union futex_key *key1, union futex_key *key2)
167{
168 return (key1 && key2
169 && key1->both.word == key2->both.word
170 && key1->both.ptr == key2->both.ptr
171 && key1->both.offset == key2->both.offset);
172}
173
174
175
176
177
178
179static void get_futex_key_refs(union futex_key *key)
180{
181 if (!key->both.ptr)
182 return;
183
184 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
185 case FUT_OFF_INODE:
186 ihold(key->shared.inode);
187 break;
188 case FUT_OFF_MMSHARED:
189 atomic_inc(&key->private.mm->mm_count);
190 break;
191 }
192}
193
194
195
196
197
198static void drop_futex_key_refs(union futex_key *key)
199{
200 if (!key->both.ptr) {
201
202 WARN_ON_ONCE(1);
203 return;
204 }
205
206 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
207 case FUT_OFF_INODE:
208 iput(key->shared.inode);
209 break;
210 case FUT_OFF_MMSHARED:
211 mmdrop(key->private.mm);
212 break;
213 }
214}
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231static int
232get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
233{
234 unsigned long address = (unsigned long)uaddr;
235 struct mm_struct *mm = current->mm;
236 struct page *page, *page_head;
237 int err;
238
239
240
241
242 key->both.offset = address % PAGE_SIZE;
243 if (unlikely((address % sizeof(u32)) != 0))
244 return -EINVAL;
245 address -= key->both.offset;
246
247
248
249
250
251
252
253
254 if (!fshared) {
255 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
256 return -EFAULT;
257 key->private.mm = mm;
258 key->private.address = address;
259 get_futex_key_refs(key);
260 return 0;
261 }
262
263again:
264 err = get_user_pages_fast(address, 1, 1, &page);
265 if (err < 0)
266 return err;
267
268#ifdef CONFIG_TRANSPARENT_HUGEPAGE
269 page_head = page;
270 if (unlikely(PageTail(page))) {
271 put_page(page);
272
273 local_irq_disable();
274 if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
275 page_head = compound_head(page);
276
277
278
279
280
281
282
283
284
285
286 if (page != page_head) {
287 get_page(page_head);
288 put_page(page);
289 }
290 local_irq_enable();
291 } else {
292 local_irq_enable();
293 goto again;
294 }
295 }
296#else
297 page_head = compound_head(page);
298 if (page != page_head) {
299 get_page(page_head);
300 put_page(page);
301 }
302#endif
303
304 lock_page(page_head);
305 if (!page_head->mapping) {
306 unlock_page(page_head);
307 put_page(page_head);
308 goto again;
309 }
310
311
312
313
314
315
316
317
318 if (PageAnon(page_head)) {
319 key->both.offset |= FUT_OFF_MMSHARED;
320 key->private.mm = mm;
321 key->private.address = address;
322 } else {
323 key->both.offset |= FUT_OFF_INODE;
324 key->shared.inode = page_head->mapping->host;
325 key->shared.pgoff = page_head->index;
326 }
327
328 get_futex_key_refs(key);
329
330 unlock_page(page_head);
331 put_page(page_head);
332 return 0;
333}
334
335static inline void put_futex_key(union futex_key *key)
336{
337 drop_futex_key_refs(key);
338}
339
340
341
342
343
344
345
346
347
348
349
350
351
352static int fault_in_user_writeable(u32 __user *uaddr)
353{
354 struct mm_struct *mm = current->mm;
355 int ret;
356
357 down_read(&mm->mmap_sem);
358 ret = get_user_pages(current, mm, (unsigned long)uaddr,
359 1, 1, 0, NULL, NULL);
360 up_read(&mm->mmap_sem);
361
362 return ret < 0 ? ret : 0;
363}
364
365
366
367
368
369
370
371
372static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
373 union futex_key *key)
374{
375 struct futex_q *this;
376
377 plist_for_each_entry(this, &hb->chain, list) {
378 if (match_futex(&this->key, key))
379 return this;
380 }
381 return NULL;
382}
383
384static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
385{
386 u32 curval;
387
388 pagefault_disable();
389 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
390 pagefault_enable();
391
392 return curval;
393}
394
395static int get_futex_value_locked(u32 *dest, u32 __user *from)
396{
397 int ret;
398
399 pagefault_disable();
400 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
401 pagefault_enable();
402
403 return ret ? -EFAULT : 0;
404}
405
406
407
408
409
410static int refill_pi_state_cache(void)
411{
412 struct futex_pi_state *pi_state;
413
414 if (likely(current->pi_state_cache))
415 return 0;
416
417 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
418
419 if (!pi_state)
420 return -ENOMEM;
421
422 INIT_LIST_HEAD(&pi_state->list);
423
424 pi_state->owner = NULL;
425 atomic_set(&pi_state->refcount, 1);
426 pi_state->key = FUTEX_KEY_INIT;
427
428 current->pi_state_cache = pi_state;
429
430 return 0;
431}
432
433static struct futex_pi_state * alloc_pi_state(void)
434{
435 struct futex_pi_state *pi_state = current->pi_state_cache;
436
437 WARN_ON(!pi_state);
438 current->pi_state_cache = NULL;
439
440 return pi_state;
441}
442
443static void free_pi_state(struct futex_pi_state *pi_state)
444{
445 if (!atomic_dec_and_test(&pi_state->refcount))
446 return;
447
448
449
450
451
452 if (pi_state->owner) {
453 raw_spin_lock_irq(&pi_state->owner->pi_lock);
454 list_del_init(&pi_state->list);
455 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
456
457 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
458 }
459
460 if (current->pi_state_cache)
461 kfree(pi_state);
462 else {
463
464
465
466
467
468 pi_state->owner = NULL;
469 atomic_set(&pi_state->refcount, 1);
470 current->pi_state_cache = pi_state;
471 }
472}
473
474
475
476
477
478static struct task_struct * futex_find_get_task(pid_t pid)
479{
480 struct task_struct *p;
481
482 rcu_read_lock();
483 p = find_task_by_vpid(pid);
484 if (p)
485 get_task_struct(p);
486
487 rcu_read_unlock();
488
489 return p;
490}
491
492
493
494
495
496
497void exit_pi_state_list(struct task_struct *curr)
498{
499 struct list_head *next, *head = &curr->pi_state_list;
500 struct futex_pi_state *pi_state;
501 struct futex_hash_bucket *hb;
502 union futex_key key = FUTEX_KEY_INIT;
503
504 if (!futex_cmpxchg_enabled)
505 return;
506
507
508
509
510
511 raw_spin_lock_irq(&curr->pi_lock);
512 while (!list_empty(head)) {
513
514 next = head->next;
515 pi_state = list_entry(next, struct futex_pi_state, list);
516 key = pi_state->key;
517 hb = hash_futex(&key);
518 raw_spin_unlock_irq(&curr->pi_lock);
519
520 spin_lock(&hb->lock);
521
522 raw_spin_lock_irq(&curr->pi_lock);
523
524
525
526
527 if (head->next != next) {
528 spin_unlock(&hb->lock);
529 continue;
530 }
531
532 WARN_ON(pi_state->owner != curr);
533 WARN_ON(list_empty(&pi_state->list));
534 list_del_init(&pi_state->list);
535 pi_state->owner = NULL;
536 raw_spin_unlock_irq(&curr->pi_lock);
537
538 rt_mutex_unlock(&pi_state->pi_mutex);
539
540 spin_unlock(&hb->lock);
541
542 raw_spin_lock_irq(&curr->pi_lock);
543 }
544 raw_spin_unlock_irq(&curr->pi_lock);
545}
546
547static int
548lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
549 union futex_key *key, struct futex_pi_state **ps)
550{
551 struct futex_pi_state *pi_state = NULL;
552 struct futex_q *this, *next;
553 struct plist_head *head;
554 struct task_struct *p;
555 pid_t pid = uval & FUTEX_TID_MASK;
556
557 head = &hb->chain;
558
559 plist_for_each_entry_safe(this, next, head, list) {
560 if (match_futex(&this->key, key)) {
561
562
563
564
565 pi_state = this->pi_state;
566
567
568
569 if (unlikely(!pi_state))
570 return -EINVAL;
571
572 WARN_ON(!atomic_read(&pi_state->refcount));
573
574
575
576
577
578
579
580
581
582
583
584 if (pid && pi_state->owner) {
585
586
587
588
589 if (pid != task_pid_vnr(pi_state->owner))
590 return -EINVAL;
591 }
592
593 atomic_inc(&pi_state->refcount);
594 *ps = pi_state;
595
596 return 0;
597 }
598 }
599
600
601
602
603
604 if (!pid)
605 return -ESRCH;
606 p = futex_find_get_task(pid);
607 if (!p)
608 return -ESRCH;
609
610
611
612
613
614
615
616 raw_spin_lock_irq(&p->pi_lock);
617 if (unlikely(p->flags & PF_EXITING)) {
618
619
620
621
622
623 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
624
625 raw_spin_unlock_irq(&p->pi_lock);
626 put_task_struct(p);
627 return ret;
628 }
629
630 pi_state = alloc_pi_state();
631
632
633
634
635
636 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
637
638
639 pi_state->key = *key;
640
641 WARN_ON(!list_empty(&pi_state->list));
642 list_add(&pi_state->list, &p->pi_state_list);
643 pi_state->owner = p;
644 raw_spin_unlock_irq(&p->pi_lock);
645
646 put_task_struct(p);
647
648 *ps = pi_state;
649
650 return 0;
651}
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
672 union futex_key *key,
673 struct futex_pi_state **ps,
674 struct task_struct *task, int set_waiters)
675{
676 int lock_taken, ret, ownerdied = 0;
677 u32 uval, newval, curval;
678
679retry:
680 ret = lock_taken = 0;
681
682
683
684
685
686
687 newval = task_pid_vnr(task);
688 if (set_waiters)
689 newval |= FUTEX_WAITERS;
690
691 curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
692
693 if (unlikely(curval == -EFAULT))
694 return -EFAULT;
695
696
697
698
699 if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task))))
700 return -EDEADLK;
701
702
703
704
705 if (unlikely(!curval))
706 return 1;
707
708 uval = curval;
709
710
711
712
713
714 newval = curval | FUTEX_WAITERS;
715
716
717
718
719
720
721
722
723
724 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
725
726 newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task);
727 ownerdied = 0;
728 lock_taken = 1;
729 }
730
731 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
732
733 if (unlikely(curval == -EFAULT))
734 return -EFAULT;
735 if (unlikely(curval != uval))
736 goto retry;
737
738
739
740
741 if (unlikely(lock_taken))
742 return 1;
743
744
745
746
747
748 ret = lookup_pi_state(uval, hb, key, ps);
749
750 if (unlikely(ret)) {
751 switch (ret) {
752 case -ESRCH:
753
754
755
756
757
758 if (get_futex_value_locked(&curval, uaddr))
759 return -EFAULT;
760
761
762
763
764
765
766 if (curval & FUTEX_OWNER_DIED) {
767 ownerdied = 1;
768 goto retry;
769 }
770 default:
771 break;
772 }
773 }
774
775 return ret;
776}
777
778
779
780
781
782static void wake_futex(struct futex_q *q)
783{
784 struct task_struct *p = q->task;
785
786
787
788
789
790
791
792
793 get_task_struct(p);
794
795 plist_del(&q->list, &q->list.plist);
796
797
798
799
800
801
802 smp_wmb();
803 q->lock_ptr = NULL;
804
805 wake_up_state(p, TASK_NORMAL);
806 put_task_struct(p);
807}
808
809static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
810{
811 struct task_struct *new_owner;
812 struct futex_pi_state *pi_state = this->pi_state;
813 u32 curval, newval;
814
815 if (!pi_state)
816 return -EINVAL;
817
818
819
820
821
822 if (pi_state->owner != current)
823 return -EINVAL;
824
825 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
826 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
827
828
829
830
831
832
833 if (!new_owner)
834 new_owner = this->task;
835
836
837
838
839
840
841 if (!(uval & FUTEX_OWNER_DIED)) {
842 int ret = 0;
843
844 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
845
846 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
847
848 if (curval == -EFAULT)
849 ret = -EFAULT;
850 else if (curval != uval)
851 ret = -EINVAL;
852 if (ret) {
853 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
854 return ret;
855 }
856 }
857
858 raw_spin_lock_irq(&pi_state->owner->pi_lock);
859 WARN_ON(list_empty(&pi_state->list));
860 list_del_init(&pi_state->list);
861 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
862
863 raw_spin_lock_irq(&new_owner->pi_lock);
864 WARN_ON(!list_empty(&pi_state->list));
865 list_add(&pi_state->list, &new_owner->pi_state_list);
866 pi_state->owner = new_owner;
867 raw_spin_unlock_irq(&new_owner->pi_lock);
868
869 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
870 rt_mutex_unlock(&pi_state->pi_mutex);
871
872 return 0;
873}
874
875static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
876{
877 u32 oldval;
878
879
880
881
882
883 oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
884
885 if (oldval == -EFAULT)
886 return oldval;
887 if (oldval != uval)
888 return -EAGAIN;
889
890 return 0;
891}
892
893
894
895
896static inline void
897double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
898{
899 if (hb1 <= hb2) {
900 spin_lock(&hb1->lock);
901 if (hb1 < hb2)
902 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
903 } else {
904 spin_lock(&hb2->lock);
905 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
906 }
907}
908
909static inline void
910double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
911{
912 spin_unlock(&hb1->lock);
913 if (hb1 != hb2)
914 spin_unlock(&hb2->lock);
915}
916
917
918
919
920static int
921futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
922{
923 struct futex_hash_bucket *hb;
924 struct futex_q *this, *next;
925 struct plist_head *head;
926 union futex_key key = FUTEX_KEY_INIT;
927 int ret;
928
929 if (!bitset)
930 return -EINVAL;
931
932 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
933 if (unlikely(ret != 0))
934 goto out;
935
936 hb = hash_futex(&key);
937 spin_lock(&hb->lock);
938 head = &hb->chain;
939
940 plist_for_each_entry_safe(this, next, head, list) {
941 if (match_futex (&this->key, &key)) {
942 if (this->pi_state || this->rt_waiter) {
943 ret = -EINVAL;
944 break;
945 }
946
947
948 if (!(this->bitset & bitset))
949 continue;
950
951 wake_futex(this);
952 if (++ret >= nr_wake)
953 break;
954 }
955 }
956
957 spin_unlock(&hb->lock);
958 put_futex_key(&key);
959out:
960 return ret;
961}
962
963
964
965
966
967static int
968futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
969 int nr_wake, int nr_wake2, int op)
970{
971 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
972 struct futex_hash_bucket *hb1, *hb2;
973 struct plist_head *head;
974 struct futex_q *this, *next;
975 int ret, op_ret;
976
977retry:
978 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
979 if (unlikely(ret != 0))
980 goto out;
981 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
982 if (unlikely(ret != 0))
983 goto out_put_key1;
984
985 hb1 = hash_futex(&key1);
986 hb2 = hash_futex(&key2);
987
988retry_private:
989 double_lock_hb(hb1, hb2);
990 op_ret = futex_atomic_op_inuser(op, uaddr2);
991 if (unlikely(op_ret < 0)) {
992
993 double_unlock_hb(hb1, hb2);
994
995#ifndef CONFIG_MMU
996
997
998
999
1000 ret = op_ret;
1001 goto out_put_keys;
1002#endif
1003
1004 if (unlikely(op_ret != -EFAULT)) {
1005 ret = op_ret;
1006 goto out_put_keys;
1007 }
1008
1009 ret = fault_in_user_writeable(uaddr2);
1010 if (ret)
1011 goto out_put_keys;
1012
1013 if (!(flags & FLAGS_SHARED))
1014 goto retry_private;
1015
1016 put_futex_key(&key2);
1017 put_futex_key(&key1);
1018 goto retry;
1019 }
1020
1021 head = &hb1->chain;
1022
1023 plist_for_each_entry_safe(this, next, head, list) {
1024 if (match_futex (&this->key, &key1)) {
1025 wake_futex(this);
1026 if (++ret >= nr_wake)
1027 break;
1028 }
1029 }
1030
1031 if (op_ret > 0) {
1032 head = &hb2->chain;
1033
1034 op_ret = 0;
1035 plist_for_each_entry_safe(this, next, head, list) {
1036 if (match_futex (&this->key, &key2)) {
1037 wake_futex(this);
1038 if (++op_ret >= nr_wake2)
1039 break;
1040 }
1041 }
1042 ret += op_ret;
1043 }
1044
1045 double_unlock_hb(hb1, hb2);
1046out_put_keys:
1047 put_futex_key(&key2);
1048out_put_key1:
1049 put_futex_key(&key1);
1050out:
1051 return ret;
1052}
1053
1054
1055
1056
1057
1058
1059
1060
1061static inline
1062void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1063 struct futex_hash_bucket *hb2, union futex_key *key2)
1064{
1065
1066
1067
1068
1069
1070 if (likely(&hb1->chain != &hb2->chain)) {
1071 plist_del(&q->list, &hb1->chain);
1072 plist_add(&q->list, &hb2->chain);
1073 q->lock_ptr = &hb2->lock;
1074#ifdef CONFIG_DEBUG_PI_LIST
1075 q->list.plist.spinlock = &hb2->lock;
1076#endif
1077 }
1078 get_futex_key_refs(key2);
1079 q->key = *key2;
1080}
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096static inline
1097void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1098 struct futex_hash_bucket *hb)
1099{
1100 get_futex_key_refs(key);
1101 q->key = *key;
1102
1103 WARN_ON(plist_node_empty(&q->list));
1104 plist_del(&q->list, &q->list.plist);
1105
1106 WARN_ON(!q->rt_waiter);
1107 q->rt_waiter = NULL;
1108
1109 q->lock_ptr = &hb->lock;
1110#ifdef CONFIG_DEBUG_PI_LIST
1111 q->list.plist.spinlock = &hb->lock;
1112#endif
1113
1114 wake_up_state(q->task, TASK_NORMAL);
1115}
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1138 struct futex_hash_bucket *hb1,
1139 struct futex_hash_bucket *hb2,
1140 union futex_key *key1, union futex_key *key2,
1141 struct futex_pi_state **ps, int set_waiters)
1142{
1143 struct futex_q *top_waiter = NULL;
1144 u32 curval;
1145 int ret;
1146
1147 if (get_futex_value_locked(&curval, pifutex))
1148 return -EFAULT;
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158 top_waiter = futex_top_waiter(hb1, key1);
1159
1160
1161 if (!top_waiter)
1162 return 0;
1163
1164
1165 if (!match_futex(top_waiter->requeue_pi_key, key2))
1166 return -EINVAL;
1167
1168
1169
1170
1171
1172
1173 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1174 set_waiters);
1175 if (ret == 1)
1176 requeue_pi_wake_futex(top_waiter, key2, hb2);
1177
1178 return ret;
1179}
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1200 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1201 u32 *cmpval, int requeue_pi)
1202{
1203 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1204 int drop_count = 0, task_count = 0, ret;
1205 struct futex_pi_state *pi_state = NULL;
1206 struct futex_hash_bucket *hb1, *hb2;
1207 struct plist_head *head1;
1208 struct futex_q *this, *next;
1209 u32 curval2;
1210
1211 if (requeue_pi) {
1212
1213
1214
1215
1216 if (refill_pi_state_cache())
1217 return -ENOMEM;
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228 if (nr_wake != 1)
1229 return -EINVAL;
1230 }
1231
1232retry:
1233 if (pi_state != NULL) {
1234
1235
1236
1237
1238 free_pi_state(pi_state);
1239 pi_state = NULL;
1240 }
1241
1242 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
1243 if (unlikely(ret != 0))
1244 goto out;
1245 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
1246 if (unlikely(ret != 0))
1247 goto out_put_key1;
1248
1249 hb1 = hash_futex(&key1);
1250 hb2 = hash_futex(&key2);
1251
1252retry_private:
1253 double_lock_hb(hb1, hb2);
1254
1255 if (likely(cmpval != NULL)) {
1256 u32 curval;
1257
1258 ret = get_futex_value_locked(&curval, uaddr1);
1259
1260 if (unlikely(ret)) {
1261 double_unlock_hb(hb1, hb2);
1262
1263 ret = get_user(curval, uaddr1);
1264 if (ret)
1265 goto out_put_keys;
1266
1267 if (!(flags & FLAGS_SHARED))
1268 goto retry_private;
1269
1270 put_futex_key(&key2);
1271 put_futex_key(&key1);
1272 goto retry;
1273 }
1274 if (curval != *cmpval) {
1275 ret = -EAGAIN;
1276 goto out_unlock;
1277 }
1278 }
1279
1280 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1281
1282
1283
1284
1285
1286
1287 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1288 &key2, &pi_state, nr_requeue);
1289
1290
1291
1292
1293
1294
1295
1296 if (ret == 1) {
1297 WARN_ON(pi_state);
1298 drop_count++;
1299 task_count++;
1300 ret = get_futex_value_locked(&curval2, uaddr2);
1301 if (!ret)
1302 ret = lookup_pi_state(curval2, hb2, &key2,
1303 &pi_state);
1304 }
1305
1306 switch (ret) {
1307 case 0:
1308 break;
1309 case -EFAULT:
1310 double_unlock_hb(hb1, hb2);
1311 put_futex_key(&key2);
1312 put_futex_key(&key1);
1313 ret = fault_in_user_writeable(uaddr2);
1314 if (!ret)
1315 goto retry;
1316 goto out;
1317 case -EAGAIN:
1318
1319 double_unlock_hb(hb1, hb2);
1320 put_futex_key(&key2);
1321 put_futex_key(&key1);
1322 cond_resched();
1323 goto retry;
1324 default:
1325 goto out_unlock;
1326 }
1327 }
1328
1329 head1 = &hb1->chain;
1330 plist_for_each_entry_safe(this, next, head1, list) {
1331 if (task_count - nr_wake >= nr_requeue)
1332 break;
1333
1334 if (!match_futex(&this->key, &key1))
1335 continue;
1336
1337
1338
1339
1340
1341 if ((requeue_pi && !this->rt_waiter) ||
1342 (!requeue_pi && this->rt_waiter)) {
1343 ret = -EINVAL;
1344 break;
1345 }
1346
1347
1348
1349
1350
1351
1352 if (++task_count <= nr_wake && !requeue_pi) {
1353 wake_futex(this);
1354 continue;
1355 }
1356
1357
1358 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1359 ret = -EINVAL;
1360 break;
1361 }
1362
1363
1364
1365
1366
1367 if (requeue_pi) {
1368
1369 atomic_inc(&pi_state->refcount);
1370 this->pi_state = pi_state;
1371 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1372 this->rt_waiter,
1373 this->task, 1);
1374 if (ret == 1) {
1375
1376 requeue_pi_wake_futex(this, &key2, hb2);
1377 drop_count++;
1378 continue;
1379 } else if (ret) {
1380
1381 this->pi_state = NULL;
1382 free_pi_state(pi_state);
1383 goto out_unlock;
1384 }
1385 }
1386 requeue_futex(this, hb1, hb2, &key2);
1387 drop_count++;
1388 }
1389
1390out_unlock:
1391 double_unlock_hb(hb1, hb2);
1392
1393
1394
1395
1396
1397
1398
1399 while (--drop_count >= 0)
1400 drop_futex_key_refs(&key1);
1401
1402out_put_keys:
1403 put_futex_key(&key2);
1404out_put_key1:
1405 put_futex_key(&key1);
1406out:
1407 if (pi_state != NULL)
1408 free_pi_state(pi_state);
1409 return ret ? ret : task_count;
1410}
1411
1412
1413static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1414 __acquires(&hb->lock)
1415{
1416 struct futex_hash_bucket *hb;
1417
1418 hb = hash_futex(&q->key);
1419 q->lock_ptr = &hb->lock;
1420
1421 spin_lock(&hb->lock);
1422 return hb;
1423}
1424
1425static inline void
1426queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1427 __releases(&hb->lock)
1428{
1429 spin_unlock(&hb->lock);
1430}
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1445 __releases(&hb->lock)
1446{
1447 int prio;
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457 prio = min(current->normal_prio, MAX_RT_PRIO);
1458
1459 plist_node_init(&q->list, prio);
1460#ifdef CONFIG_DEBUG_PI_LIST
1461 q->list.plist.spinlock = &hb->lock;
1462#endif
1463 plist_add(&q->list, &hb->chain);
1464 q->task = current;
1465 spin_unlock(&hb->lock);
1466}
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479static int unqueue_me(struct futex_q *q)
1480{
1481 spinlock_t *lock_ptr;
1482 int ret = 0;
1483
1484
1485retry:
1486 lock_ptr = q->lock_ptr;
1487 barrier();
1488 if (lock_ptr != NULL) {
1489 spin_lock(lock_ptr);
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503 if (unlikely(lock_ptr != q->lock_ptr)) {
1504 spin_unlock(lock_ptr);
1505 goto retry;
1506 }
1507 WARN_ON(plist_node_empty(&q->list));
1508 plist_del(&q->list, &q->list.plist);
1509
1510 BUG_ON(q->pi_state);
1511
1512 spin_unlock(lock_ptr);
1513 ret = 1;
1514 }
1515
1516 drop_futex_key_refs(&q->key);
1517 return ret;
1518}
1519
1520
1521
1522
1523
1524
1525static void unqueue_me_pi(struct futex_q *q)
1526 __releases(q->lock_ptr)
1527{
1528 WARN_ON(plist_node_empty(&q->list));
1529 plist_del(&q->list, &q->list.plist);
1530
1531 BUG_ON(!q->pi_state);
1532 free_pi_state(q->pi_state);
1533 q->pi_state = NULL;
1534
1535 spin_unlock(q->lock_ptr);
1536}
1537
1538
1539
1540
1541
1542
1543
1544static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1545 struct task_struct *newowner)
1546{
1547 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1548 struct futex_pi_state *pi_state = q->pi_state;
1549 struct task_struct *oldowner = pi_state->owner;
1550 u32 uval, curval, newval;
1551 int ret;
1552
1553
1554 if (!pi_state->owner)
1555 newtid |= FUTEX_OWNER_DIED;
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574retry:
1575 if (get_futex_value_locked(&uval, uaddr))
1576 goto handle_fault;
1577
1578 while (1) {
1579 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1580
1581 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1582
1583 if (curval == -EFAULT)
1584 goto handle_fault;
1585 if (curval == uval)
1586 break;
1587 uval = curval;
1588 }
1589
1590
1591
1592
1593
1594 if (pi_state->owner != NULL) {
1595 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1596 WARN_ON(list_empty(&pi_state->list));
1597 list_del_init(&pi_state->list);
1598 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1599 }
1600
1601 pi_state->owner = newowner;
1602
1603 raw_spin_lock_irq(&newowner->pi_lock);
1604 WARN_ON(!list_empty(&pi_state->list));
1605 list_add(&pi_state->list, &newowner->pi_state_list);
1606 raw_spin_unlock_irq(&newowner->pi_lock);
1607 return 0;
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619handle_fault:
1620 spin_unlock(q->lock_ptr);
1621
1622 ret = fault_in_user_writeable(uaddr);
1623
1624 spin_lock(q->lock_ptr);
1625
1626
1627
1628
1629 if (pi_state->owner != oldowner)
1630 return 0;
1631
1632 if (ret)
1633 return ret;
1634
1635 goto retry;
1636}
1637
1638static long futex_wait_restart(struct restart_block *restart);
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1656{
1657 struct task_struct *owner;
1658 int ret = 0;
1659
1660 if (locked) {
1661
1662
1663
1664
1665 if (q->pi_state->owner != current)
1666 ret = fixup_pi_state_owner(uaddr, q, current);
1667 goto out;
1668 }
1669
1670
1671
1672
1673
1674 if (q->pi_state->owner == current) {
1675
1676
1677
1678
1679
1680 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1681 locked = 1;
1682 goto out;
1683 }
1684
1685
1686
1687
1688
1689
1690
1691
1692 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1693 ret = fixup_pi_state_owner(uaddr, q, owner);
1694 goto out;
1695 }
1696
1697
1698
1699
1700
1701 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1702 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1703 "pi-state %p\n", ret,
1704 q->pi_state->pi_mutex.owner,
1705 q->pi_state->owner);
1706
1707out:
1708 return ret ? ret : locked;
1709}
1710
1711
1712
1713
1714
1715
1716
1717static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1718 struct hrtimer_sleeper *timeout)
1719{
1720
1721
1722
1723
1724
1725
1726 set_current_state(TASK_INTERRUPTIBLE);
1727 queue_me(q, hb);
1728
1729
1730 if (timeout) {
1731 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1732 if (!hrtimer_active(&timeout->timer))
1733 timeout->task = NULL;
1734 }
1735
1736
1737
1738
1739
1740 if (likely(!plist_node_empty(&q->list))) {
1741
1742
1743
1744
1745
1746 if (!timeout || timeout->task)
1747 schedule();
1748 }
1749 __set_current_state(TASK_RUNNING);
1750}
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1770 struct futex_q *q, struct futex_hash_bucket **hb)
1771{
1772 u32 uval;
1773 int ret;
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792retry:
1793 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
1794 if (unlikely(ret != 0))
1795 return ret;
1796
1797retry_private:
1798 *hb = queue_lock(q);
1799
1800 ret = get_futex_value_locked(&uval, uaddr);
1801
1802 if (ret) {
1803 queue_unlock(q, *hb);
1804
1805 ret = get_user(uval, uaddr);
1806 if (ret)
1807 goto out;
1808
1809 if (!(flags & FLAGS_SHARED))
1810 goto retry_private;
1811
1812 put_futex_key(&q->key);
1813 goto retry;
1814 }
1815
1816 if (uval != val) {
1817 queue_unlock(q, *hb);
1818 ret = -EWOULDBLOCK;
1819 }
1820
1821out:
1822 if (ret)
1823 put_futex_key(&q->key);
1824 return ret;
1825}
1826
1827static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
1828 ktime_t *abs_time, u32 bitset)
1829{
1830 struct hrtimer_sleeper timeout, *to = NULL;
1831 struct restart_block *restart;
1832 struct futex_hash_bucket *hb;
1833 struct futex_q q = futex_q_init;
1834 int ret;
1835
1836 if (!bitset)
1837 return -EINVAL;
1838 q.bitset = bitset;
1839
1840 if (abs_time) {
1841 to = &timeout;
1842
1843 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
1844 CLOCK_REALTIME : CLOCK_MONOTONIC,
1845 HRTIMER_MODE_ABS);
1846 hrtimer_init_sleeper(to, current);
1847 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1848 current->timer_slack_ns);
1849 }
1850
1851retry:
1852
1853
1854
1855
1856 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
1857 if (ret)
1858 goto out;
1859
1860
1861 futex_wait_queue_me(hb, &q, to);
1862
1863
1864 ret = 0;
1865
1866 if (!unqueue_me(&q))
1867 goto out;
1868 ret = -ETIMEDOUT;
1869 if (to && !to->task)
1870 goto out;
1871
1872
1873
1874
1875
1876 if (!signal_pending(current))
1877 goto retry;
1878
1879 ret = -ERESTARTSYS;
1880 if (!abs_time)
1881 goto out;
1882
1883 restart = ¤t_thread_info()->restart_block;
1884 restart->fn = futex_wait_restart;
1885 restart->futex.uaddr = uaddr;
1886 restart->futex.val = val;
1887 restart->futex.time = abs_time->tv64;
1888 restart->futex.bitset = bitset;
1889 restart->futex.flags = flags;
1890
1891 ret = -ERESTART_RESTARTBLOCK;
1892
1893out:
1894 if (to) {
1895 hrtimer_cancel(&to->timer);
1896 destroy_hrtimer_on_stack(&to->timer);
1897 }
1898 return ret;
1899}
1900
1901
1902static long futex_wait_restart(struct restart_block *restart)
1903{
1904 u32 __user *uaddr = restart->futex.uaddr;
1905 ktime_t t, *tp = NULL;
1906
1907 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1908 t.tv64 = restart->futex.time;
1909 tp = &t;
1910 }
1911 restart->fn = do_no_restart_syscall;
1912
1913 return (long)futex_wait(uaddr, restart->futex.flags,
1914 restart->futex.val, tp, restart->futex.bitset);
1915}
1916
1917
1918
1919
1920
1921
1922
1923
1924static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1925 ktime_t *time, int trylock)
1926{
1927 struct hrtimer_sleeper timeout, *to = NULL;
1928 struct futex_hash_bucket *hb;
1929 struct futex_q q = futex_q_init;
1930 int res, ret;
1931
1932 if (refill_pi_state_cache())
1933 return -ENOMEM;
1934
1935 if (time) {
1936 to = &timeout;
1937 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
1938 HRTIMER_MODE_ABS);
1939 hrtimer_init_sleeper(to, current);
1940 hrtimer_set_expires(&to->timer, *time);
1941 }
1942
1943retry:
1944 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key);
1945 if (unlikely(ret != 0))
1946 goto out;
1947
1948retry_private:
1949 hb = queue_lock(&q);
1950
1951 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
1952 if (unlikely(ret)) {
1953 switch (ret) {
1954 case 1:
1955
1956 ret = 0;
1957 goto out_unlock_put_key;
1958 case -EFAULT:
1959 goto uaddr_faulted;
1960 case -EAGAIN:
1961
1962
1963
1964
1965 queue_unlock(&q, hb);
1966 put_futex_key(&q.key);
1967 cond_resched();
1968 goto retry;
1969 default:
1970 goto out_unlock_put_key;
1971 }
1972 }
1973
1974
1975
1976
1977 queue_me(&q, hb);
1978
1979 WARN_ON(!q.pi_state);
1980
1981
1982
1983 if (!trylock)
1984 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
1985 else {
1986 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
1987
1988 ret = ret ? 0 : -EWOULDBLOCK;
1989 }
1990
1991 spin_lock(q.lock_ptr);
1992
1993
1994
1995
1996 res = fixup_owner(uaddr, &q, !ret);
1997
1998
1999
2000
2001 if (res)
2002 ret = (res < 0) ? res : 0;
2003
2004
2005
2006
2007
2008 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2009 rt_mutex_unlock(&q.pi_state->pi_mutex);
2010
2011
2012 unqueue_me_pi(&q);
2013
2014 goto out_put_key;
2015
2016out_unlock_put_key:
2017 queue_unlock(&q, hb);
2018
2019out_put_key:
2020 put_futex_key(&q.key);
2021out:
2022 if (to)
2023 destroy_hrtimer_on_stack(&to->timer);
2024 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2025
2026uaddr_faulted:
2027 queue_unlock(&q, hb);
2028
2029 ret = fault_in_user_writeable(uaddr);
2030 if (ret)
2031 goto out_put_key;
2032
2033 if (!(flags & FLAGS_SHARED))
2034 goto retry_private;
2035
2036 put_futex_key(&q.key);
2037 goto retry;
2038}
2039
2040
2041
2042
2043
2044
2045static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2046{
2047 struct futex_hash_bucket *hb;
2048 struct futex_q *this, *next;
2049 u32 uval;
2050 struct plist_head *head;
2051 union futex_key key = FUTEX_KEY_INIT;
2052 int ret;
2053
2054retry:
2055 if (get_user(uval, uaddr))
2056 return -EFAULT;
2057
2058
2059
2060 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
2061 return -EPERM;
2062
2063 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
2064 if (unlikely(ret != 0))
2065 goto out;
2066
2067 hb = hash_futex(&key);
2068 spin_lock(&hb->lock);
2069
2070
2071
2072
2073
2074
2075 if (!(uval & FUTEX_OWNER_DIED))
2076 uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
2077
2078
2079 if (unlikely(uval == -EFAULT))
2080 goto pi_faulted;
2081
2082
2083
2084
2085 if (unlikely(uval == task_pid_vnr(current)))
2086 goto out_unlock;
2087
2088
2089
2090
2091
2092 head = &hb->chain;
2093
2094 plist_for_each_entry_safe(this, next, head, list) {
2095 if (!match_futex (&this->key, &key))
2096 continue;
2097 ret = wake_futex_pi(uaddr, uval, this);
2098
2099
2100
2101
2102
2103 if (ret == -EFAULT)
2104 goto pi_faulted;
2105 goto out_unlock;
2106 }
2107
2108
2109
2110 if (!(uval & FUTEX_OWNER_DIED)) {
2111 ret = unlock_futex_pi(uaddr, uval);
2112 if (ret == -EFAULT)
2113 goto pi_faulted;
2114 }
2115
2116out_unlock:
2117 spin_unlock(&hb->lock);
2118 put_futex_key(&key);
2119
2120out:
2121 return ret;
2122
2123pi_faulted:
2124 spin_unlock(&hb->lock);
2125 put_futex_key(&key);
2126
2127 ret = fault_in_user_writeable(uaddr);
2128 if (!ret)
2129 goto retry;
2130
2131 return ret;
2132}
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150static inline
2151int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2152 struct futex_q *q, union futex_key *key2,
2153 struct hrtimer_sleeper *timeout)
2154{
2155 int ret = 0;
2156
2157
2158
2159
2160
2161
2162
2163
2164 if (!match_futex(&q->key, key2)) {
2165 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2166
2167
2168
2169
2170 plist_del(&q->list, &q->list.plist);
2171
2172
2173 ret = -EWOULDBLOCK;
2174 if (timeout && !timeout->task)
2175 ret = -ETIMEDOUT;
2176 else if (signal_pending(current))
2177 ret = -ERESTARTNOINTR;
2178 }
2179 return ret;
2180}
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2224 u32 val, ktime_t *abs_time, u32 bitset,
2225 u32 __user *uaddr2)
2226{
2227 struct hrtimer_sleeper timeout, *to = NULL;
2228 struct rt_mutex_waiter rt_waiter;
2229 struct rt_mutex *pi_mutex = NULL;
2230 struct futex_hash_bucket *hb;
2231 union futex_key key2 = FUTEX_KEY_INIT;
2232 struct futex_q q = futex_q_init;
2233 int res, ret;
2234
2235 if (!bitset)
2236 return -EINVAL;
2237
2238 if (abs_time) {
2239 to = &timeout;
2240 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2241 CLOCK_REALTIME : CLOCK_MONOTONIC,
2242 HRTIMER_MODE_ABS);
2243 hrtimer_init_sleeper(to, current);
2244 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2245 current->timer_slack_ns);
2246 }
2247
2248
2249
2250
2251
2252 debug_rt_mutex_init_waiter(&rt_waiter);
2253 rt_waiter.task = NULL;
2254
2255 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
2256 if (unlikely(ret != 0))
2257 goto out;
2258
2259 q.bitset = bitset;
2260 q.rt_waiter = &rt_waiter;
2261 q.requeue_pi_key = &key2;
2262
2263
2264
2265
2266
2267 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2268 if (ret)
2269 goto out_key2;
2270
2271
2272 futex_wait_queue_me(hb, &q, to);
2273
2274 spin_lock(&hb->lock);
2275 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2276 spin_unlock(&hb->lock);
2277 if (ret)
2278 goto out_put_keys;
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290 if (!q.rt_waiter) {
2291
2292
2293
2294
2295 if (q.pi_state && (q.pi_state->owner != current)) {
2296 spin_lock(q.lock_ptr);
2297 ret = fixup_pi_state_owner(uaddr2, &q, current);
2298 spin_unlock(q.lock_ptr);
2299 }
2300 } else {
2301
2302
2303
2304
2305
2306 WARN_ON(!&q.pi_state);
2307 pi_mutex = &q.pi_state->pi_mutex;
2308 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2309 debug_rt_mutex_free_waiter(&rt_waiter);
2310
2311 spin_lock(q.lock_ptr);
2312
2313
2314
2315
2316 res = fixup_owner(uaddr2, &q, !ret);
2317
2318
2319
2320
2321 if (res)
2322 ret = (res < 0) ? res : 0;
2323
2324
2325 unqueue_me_pi(&q);
2326 }
2327
2328
2329
2330
2331
2332 if (ret == -EFAULT) {
2333 if (rt_mutex_owner(pi_mutex) == current)
2334 rt_mutex_unlock(pi_mutex);
2335 } else if (ret == -EINTR) {
2336
2337
2338
2339
2340
2341
2342
2343 ret = -EWOULDBLOCK;
2344 }
2345
2346out_put_keys:
2347 put_futex_key(&q.key);
2348out_key2:
2349 put_futex_key(&key2);
2350
2351out:
2352 if (to) {
2353 hrtimer_cancel(&to->timer);
2354 destroy_hrtimer_on_stack(&to->timer);
2355 }
2356 return ret;
2357}
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2380 size_t, len)
2381{
2382 if (!futex_cmpxchg_enabled)
2383 return -ENOSYS;
2384
2385
2386
2387 if (unlikely(len != sizeof(*head)))
2388 return -EINVAL;
2389
2390 current->robust_list = head;
2391
2392 return 0;
2393}
2394
2395
2396
2397
2398
2399
2400
2401SYSCALL_DEFINE3(get_robust_list, int, pid,
2402 struct robust_list_head __user * __user *, head_ptr,
2403 size_t __user *, len_ptr)
2404{
2405 struct robust_list_head __user *head;
2406 unsigned long ret;
2407 const struct cred *cred = current_cred(), *pcred;
2408
2409 if (!futex_cmpxchg_enabled)
2410 return -ENOSYS;
2411
2412 if (!pid)
2413 head = current->robust_list;
2414 else {
2415 struct task_struct *p;
2416
2417 ret = -ESRCH;
2418 rcu_read_lock();
2419 p = find_task_by_vpid(pid);
2420 if (!p)
2421 goto err_unlock;
2422 ret = -EPERM;
2423 pcred = __task_cred(p);
2424 if (cred->euid != pcred->euid &&
2425 cred->euid != pcred->uid &&
2426 !capable(CAP_SYS_PTRACE))
2427 goto err_unlock;
2428 head = p->robust_list;
2429 rcu_read_unlock();
2430 }
2431
2432 if (put_user(sizeof(*head), len_ptr))
2433 return -EFAULT;
2434 return put_user(head, head_ptr);
2435
2436err_unlock:
2437 rcu_read_unlock();
2438
2439 return ret;
2440}
2441
2442
2443
2444
2445
2446int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2447{
2448 u32 uval, nval, mval;
2449
2450retry:
2451 if (get_user(uval, uaddr))
2452 return -1;
2453
2454 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2466 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
2467
2468 if (nval == -EFAULT)
2469 return -1;
2470
2471 if (nval != uval)
2472 goto retry;
2473
2474
2475
2476
2477
2478 if (!pi && (uval & FUTEX_WAITERS))
2479 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2480 }
2481 return 0;
2482}
2483
2484
2485
2486
2487static inline int fetch_robust_entry(struct robust_list __user **entry,
2488 struct robust_list __user * __user *head,
2489 unsigned int *pi)
2490{
2491 unsigned long uentry;
2492
2493 if (get_user(uentry, (unsigned long __user *)head))
2494 return -EFAULT;
2495
2496 *entry = (void __user *)(uentry & ~1UL);
2497 *pi = uentry & 1;
2498
2499 return 0;
2500}
2501
2502
2503
2504
2505
2506
2507
2508void exit_robust_list(struct task_struct *curr)
2509{
2510 struct robust_list_head __user *head = curr->robust_list;
2511 struct robust_list __user *entry, *next_entry, *pending;
2512 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2513 unsigned int uninitialized_var(next_pi);
2514 unsigned long futex_offset;
2515 int rc;
2516
2517 if (!futex_cmpxchg_enabled)
2518 return;
2519
2520
2521
2522
2523
2524 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2525 return;
2526
2527
2528
2529 if (get_user(futex_offset, &head->futex_offset))
2530 return;
2531
2532
2533
2534
2535 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2536 return;
2537
2538 next_entry = NULL;
2539 while (entry != &head->list) {
2540
2541
2542
2543
2544 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2545
2546
2547
2548
2549 if (entry != pending)
2550 if (handle_futex_death((void __user *)entry + futex_offset,
2551 curr, pi))
2552 return;
2553 if (rc)
2554 return;
2555 entry = next_entry;
2556 pi = next_pi;
2557
2558
2559
2560 if (!--limit)
2561 break;
2562
2563 cond_resched();
2564 }
2565
2566 if (pending)
2567 handle_futex_death((void __user *)pending + futex_offset,
2568 curr, pip);
2569}
2570
2571long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2572 u32 __user *uaddr2, u32 val2, u32 val3)
2573{
2574 int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK;
2575 unsigned int flags = 0;
2576
2577 if (!(op & FUTEX_PRIVATE_FLAG))
2578 flags |= FLAGS_SHARED;
2579
2580 if (op & FUTEX_CLOCK_REALTIME) {
2581 flags |= FLAGS_CLOCKRT;
2582 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2583 return -ENOSYS;
2584 }
2585
2586 switch (cmd) {
2587 case FUTEX_WAIT:
2588 val3 = FUTEX_BITSET_MATCH_ANY;
2589 case FUTEX_WAIT_BITSET:
2590 ret = futex_wait(uaddr, flags, val, timeout, val3);
2591 break;
2592 case FUTEX_WAKE:
2593 val3 = FUTEX_BITSET_MATCH_ANY;
2594 case FUTEX_WAKE_BITSET:
2595 ret = futex_wake(uaddr, flags, val, val3);
2596 break;
2597 case FUTEX_REQUEUE:
2598 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2599 break;
2600 case FUTEX_CMP_REQUEUE:
2601 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2602 break;
2603 case FUTEX_WAKE_OP:
2604 ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2605 break;
2606 case FUTEX_LOCK_PI:
2607 if (futex_cmpxchg_enabled)
2608 ret = futex_lock_pi(uaddr, flags, val, timeout, 0);
2609 break;
2610 case FUTEX_UNLOCK_PI:
2611 if (futex_cmpxchg_enabled)
2612 ret = futex_unlock_pi(uaddr, flags);
2613 break;
2614 case FUTEX_TRYLOCK_PI:
2615 if (futex_cmpxchg_enabled)
2616 ret = futex_lock_pi(uaddr, flags, 0, timeout, 1);
2617 break;
2618 case FUTEX_WAIT_REQUEUE_PI:
2619 val3 = FUTEX_BITSET_MATCH_ANY;
2620 ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2621 uaddr2);
2622 break;
2623 case FUTEX_CMP_REQUEUE_PI:
2624 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2625 break;
2626 default:
2627 ret = -ENOSYS;
2628 }
2629 return ret;
2630}
2631
2632
2633SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2634 struct timespec __user *, utime, u32 __user *, uaddr2,
2635 u32, val3)
2636{
2637 struct timespec ts;
2638 ktime_t t, *tp = NULL;
2639 u32 val2 = 0;
2640 int cmd = op & FUTEX_CMD_MASK;
2641
2642 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2643 cmd == FUTEX_WAIT_BITSET ||
2644 cmd == FUTEX_WAIT_REQUEUE_PI)) {
2645 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2646 return -EFAULT;
2647 if (!timespec_valid(&ts))
2648 return -EINVAL;
2649
2650 t = timespec_to_ktime(ts);
2651 if (cmd == FUTEX_WAIT)
2652 t = ktime_add_safe(ktime_get(), t);
2653 tp = &t;
2654 }
2655
2656
2657
2658
2659 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2660 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2661 val2 = (u32) (unsigned long) utime;
2662
2663 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2664}
2665
2666static int __init futex_init(void)
2667{
2668 u32 curval;
2669 int i;
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681 curval = cmpxchg_futex_value_locked(NULL, 0, 0);
2682 if (curval == -EFAULT)
2683 futex_cmpxchg_enabled = 1;
2684
2685 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
2686 plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
2687 spin_lock_init(&futex_queues[i].lock);
2688 }
2689
2690 return 0;
2691}
2692__initcall(futex_init);
2693