1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/module.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62
63#include <asm/futex.h>
64
65#include "rtmutex_common.h"
66
67int __read_mostly futex_cmpxchg_enabled;
68
69#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
70
71
72
73
74
75#define FLAGS_SHARED 0x01
76#define FLAGS_CLOCKRT 0x02
77#define FLAGS_HAS_TIMEOUT 0x04
78
79
80
81
82struct futex_pi_state {
83
84
85
86
87 struct list_head list;
88
89
90
91
92 struct rt_mutex pi_mutex;
93
94 struct task_struct *owner;
95 atomic_t refcount;
96
97 union futex_key key;
98};
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122struct futex_q {
123 struct plist_node list;
124
125 struct task_struct *task;
126 spinlock_t *lock_ptr;
127 union futex_key key;
128 struct futex_pi_state *pi_state;
129 struct rt_mutex_waiter *rt_waiter;
130 union futex_key *requeue_pi_key;
131 u32 bitset;
132};
133
134static const struct futex_q futex_q_init = {
135
136 .key = FUTEX_KEY_INIT,
137 .bitset = FUTEX_BITSET_MATCH_ANY
138};
139
140
141
142
143
144
145struct futex_hash_bucket {
146 spinlock_t lock;
147 struct plist_head chain;
148};
149
150static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
151
152
153
154
155static struct futex_hash_bucket *hash_futex(union futex_key *key)
156{
157 u32 hash = jhash2((u32*)&key->both.word,
158 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
159 key->both.offset);
160 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
161}
162
163
164
165
166static inline int match_futex(union futex_key *key1, union futex_key *key2)
167{
168 return (key1 && key2
169 && key1->both.word == key2->both.word
170 && key1->both.ptr == key2->both.ptr
171 && key1->both.offset == key2->both.offset);
172}
173
174
175
176
177
178
179static void get_futex_key_refs(union futex_key *key)
180{
181 if (!key->both.ptr)
182 return;
183
184 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
185 case FUT_OFF_INODE:
186 ihold(key->shared.inode);
187 break;
188 case FUT_OFF_MMSHARED:
189 atomic_inc(&key->private.mm->mm_count);
190 break;
191 }
192}
193
194
195
196
197
198static void drop_futex_key_refs(union futex_key *key)
199{
200 if (!key->both.ptr) {
201
202 WARN_ON_ONCE(1);
203 return;
204 }
205
206 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
207 case FUT_OFF_INODE:
208 iput(key->shared.inode);
209 break;
210 case FUT_OFF_MMSHARED:
211 mmdrop(key->private.mm);
212 break;
213 }
214}
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233static int
234get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
235{
236 unsigned long address = (unsigned long)uaddr;
237 struct mm_struct *mm = current->mm;
238 struct page *page, *page_head;
239 int err, ro = 0;
240
241
242
243
244 key->both.offset = address % PAGE_SIZE;
245 if (unlikely((address % sizeof(u32)) != 0))
246 return -EINVAL;
247 address -= key->both.offset;
248
249
250
251
252
253
254
255
256 if (!fshared) {
257 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
258 return -EFAULT;
259 key->private.mm = mm;
260 key->private.address = address;
261 get_futex_key_refs(key);
262 return 0;
263 }
264
265again:
266 err = get_user_pages_fast(address, 1, 1, &page);
267
268
269
270
271 if (err == -EFAULT && rw == VERIFY_READ) {
272 err = get_user_pages_fast(address, 1, 0, &page);
273 ro = 1;
274 }
275 if (err < 0)
276 return err;
277 else
278 err = 0;
279
280#ifdef CONFIG_TRANSPARENT_HUGEPAGE
281 page_head = page;
282 if (unlikely(PageTail(page))) {
283 put_page(page);
284
285 local_irq_disable();
286 if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
287 page_head = compound_head(page);
288
289
290
291
292
293
294
295
296
297
298 if (page != page_head) {
299 get_page(page_head);
300 put_page(page);
301 }
302 local_irq_enable();
303 } else {
304 local_irq_enable();
305 goto again;
306 }
307 }
308#else
309 page_head = compound_head(page);
310 if (page != page_head) {
311 get_page(page_head);
312 put_page(page);
313 }
314#endif
315
316 lock_page(page_head);
317 if (!page_head->mapping) {
318 unlock_page(page_head);
319 put_page(page_head);
320
321
322
323
324
325 if ((page_head == ZERO_PAGE(address)))
326 return -EFAULT;
327 goto again;
328 }
329
330
331
332
333
334
335
336
337 if (PageAnon(page_head)) {
338
339
340
341
342 if (ro) {
343 err = -EFAULT;
344 goto out;
345 }
346
347 key->both.offset |= FUT_OFF_MMSHARED;
348 key->private.mm = mm;
349 key->private.address = address;
350 } else {
351 key->both.offset |= FUT_OFF_INODE;
352 key->shared.inode = page_head->mapping->host;
353 key->shared.pgoff = page_head->index;
354 }
355
356 get_futex_key_refs(key);
357
358out:
359 unlock_page(page_head);
360 put_page(page_head);
361 return err;
362}
363
364static inline void put_futex_key(union futex_key *key)
365{
366 drop_futex_key_refs(key);
367}
368
369
370
371
372
373
374
375
376
377
378
379
380
381static int fault_in_user_writeable(u32 __user *uaddr)
382{
383 struct mm_struct *mm = current->mm;
384 int ret;
385
386 down_read(&mm->mmap_sem);
387 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
388 FAULT_FLAG_WRITE);
389 up_read(&mm->mmap_sem);
390
391 return ret < 0 ? ret : 0;
392}
393
394
395
396
397
398
399
400
401static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
402 union futex_key *key)
403{
404 struct futex_q *this;
405
406 plist_for_each_entry(this, &hb->chain, list) {
407 if (match_futex(&this->key, key))
408 return this;
409 }
410 return NULL;
411}
412
413static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
414 u32 uval, u32 newval)
415{
416 int ret;
417
418 pagefault_disable();
419 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
420 pagefault_enable();
421
422 return ret;
423}
424
425static int get_futex_value_locked(u32 *dest, u32 __user *from)
426{
427 int ret;
428
429 pagefault_disable();
430 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
431 pagefault_enable();
432
433 return ret ? -EFAULT : 0;
434}
435
436
437
438
439
440static int refill_pi_state_cache(void)
441{
442 struct futex_pi_state *pi_state;
443
444 if (likely(current->pi_state_cache))
445 return 0;
446
447 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
448
449 if (!pi_state)
450 return -ENOMEM;
451
452 INIT_LIST_HEAD(&pi_state->list);
453
454 pi_state->owner = NULL;
455 atomic_set(&pi_state->refcount, 1);
456 pi_state->key = FUTEX_KEY_INIT;
457
458 current->pi_state_cache = pi_state;
459
460 return 0;
461}
462
463static struct futex_pi_state * alloc_pi_state(void)
464{
465 struct futex_pi_state *pi_state = current->pi_state_cache;
466
467 WARN_ON(!pi_state);
468 current->pi_state_cache = NULL;
469
470 return pi_state;
471}
472
473static void free_pi_state(struct futex_pi_state *pi_state)
474{
475 if (!atomic_dec_and_test(&pi_state->refcount))
476 return;
477
478
479
480
481
482 if (pi_state->owner) {
483 raw_spin_lock_irq(&pi_state->owner->pi_lock);
484 list_del_init(&pi_state->list);
485 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
486
487 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
488 }
489
490 if (current->pi_state_cache)
491 kfree(pi_state);
492 else {
493
494
495
496
497
498 pi_state->owner = NULL;
499 atomic_set(&pi_state->refcount, 1);
500 current->pi_state_cache = pi_state;
501 }
502}
503
504
505
506
507
508static struct task_struct * futex_find_get_task(pid_t pid)
509{
510 struct task_struct *p;
511
512 rcu_read_lock();
513 p = find_task_by_vpid(pid);
514 if (p)
515 get_task_struct(p);
516
517 rcu_read_unlock();
518
519 return p;
520}
521
522
523
524
525
526
527void exit_pi_state_list(struct task_struct *curr)
528{
529 struct list_head *next, *head = &curr->pi_state_list;
530 struct futex_pi_state *pi_state;
531 struct futex_hash_bucket *hb;
532 union futex_key key = FUTEX_KEY_INIT;
533
534 if (!futex_cmpxchg_enabled)
535 return;
536
537
538
539
540
541 raw_spin_lock_irq(&curr->pi_lock);
542 while (!list_empty(head)) {
543
544 next = head->next;
545 pi_state = list_entry(next, struct futex_pi_state, list);
546 key = pi_state->key;
547 hb = hash_futex(&key);
548 raw_spin_unlock_irq(&curr->pi_lock);
549
550 spin_lock(&hb->lock);
551
552 raw_spin_lock_irq(&curr->pi_lock);
553
554
555
556
557 if (head->next != next) {
558 spin_unlock(&hb->lock);
559 continue;
560 }
561
562 WARN_ON(pi_state->owner != curr);
563 WARN_ON(list_empty(&pi_state->list));
564 list_del_init(&pi_state->list);
565 pi_state->owner = NULL;
566 raw_spin_unlock_irq(&curr->pi_lock);
567
568 rt_mutex_unlock(&pi_state->pi_mutex);
569
570 spin_unlock(&hb->lock);
571
572 raw_spin_lock_irq(&curr->pi_lock);
573 }
574 raw_spin_unlock_irq(&curr->pi_lock);
575}
576
577static int
578lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
579 union futex_key *key, struct futex_pi_state **ps)
580{
581 struct futex_pi_state *pi_state = NULL;
582 struct futex_q *this, *next;
583 struct plist_head *head;
584 struct task_struct *p;
585 pid_t pid = uval & FUTEX_TID_MASK;
586
587 head = &hb->chain;
588
589 plist_for_each_entry_safe(this, next, head, list) {
590 if (match_futex(&this->key, key)) {
591
592
593
594
595 pi_state = this->pi_state;
596
597
598
599 if (unlikely(!pi_state))
600 return -EINVAL;
601
602 WARN_ON(!atomic_read(&pi_state->refcount));
603
604
605
606
607
608
609
610
611
612
613
614 if (pid && pi_state->owner) {
615
616
617
618
619 if (pid != task_pid_vnr(pi_state->owner))
620 return -EINVAL;
621 }
622
623 atomic_inc(&pi_state->refcount);
624 *ps = pi_state;
625
626 return 0;
627 }
628 }
629
630
631
632
633
634 if (!pid)
635 return -ESRCH;
636 p = futex_find_get_task(pid);
637 if (!p)
638 return -ESRCH;
639
640
641
642
643
644
645
646 raw_spin_lock_irq(&p->pi_lock);
647 if (unlikely(p->flags & PF_EXITING)) {
648
649
650
651
652
653 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
654
655 raw_spin_unlock_irq(&p->pi_lock);
656 put_task_struct(p);
657 return ret;
658 }
659
660 pi_state = alloc_pi_state();
661
662
663
664
665
666 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
667
668
669 pi_state->key = *key;
670
671 WARN_ON(!list_empty(&pi_state->list));
672 list_add(&pi_state->list, &p->pi_state_list);
673 pi_state->owner = p;
674 raw_spin_unlock_irq(&p->pi_lock);
675
676 put_task_struct(p);
677
678 *ps = pi_state;
679
680 return 0;
681}
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
702 union futex_key *key,
703 struct futex_pi_state **ps,
704 struct task_struct *task, int set_waiters)
705{
706 int lock_taken, ret, ownerdied = 0;
707 u32 uval, newval, curval, vpid = task_pid_vnr(task);
708
709retry:
710 ret = lock_taken = 0;
711
712
713
714
715
716
717 newval = vpid;
718 if (set_waiters)
719 newval |= FUTEX_WAITERS;
720
721 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
722 return -EFAULT;
723
724
725
726
727 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
728 return -EDEADLK;
729
730
731
732
733 if (unlikely(!curval))
734 return 1;
735
736 uval = curval;
737
738
739
740
741
742 newval = curval | FUTEX_WAITERS;
743
744
745
746
747
748
749
750
751
752 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
753
754 newval = (curval & ~FUTEX_TID_MASK) | vpid;
755 ownerdied = 0;
756 lock_taken = 1;
757 }
758
759 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
760 return -EFAULT;
761 if (unlikely(curval != uval))
762 goto retry;
763
764
765
766
767 if (unlikely(lock_taken))
768 return 1;
769
770
771
772
773
774 ret = lookup_pi_state(uval, hb, key, ps);
775
776 if (unlikely(ret)) {
777 switch (ret) {
778 case -ESRCH:
779
780
781
782
783
784 if (get_futex_value_locked(&curval, uaddr))
785 return -EFAULT;
786
787
788
789
790
791
792 if (curval & FUTEX_OWNER_DIED) {
793 ownerdied = 1;
794 goto retry;
795 }
796 default:
797 break;
798 }
799 }
800
801 return ret;
802}
803
804
805
806
807
808
809
810static void __unqueue_futex(struct futex_q *q)
811{
812 struct futex_hash_bucket *hb;
813
814 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
815 || WARN_ON(plist_node_empty(&q->list)))
816 return;
817
818 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
819 plist_del(&q->list, &hb->chain);
820}
821
822
823
824
825
826static void wake_futex(struct futex_q *q)
827{
828 struct task_struct *p = q->task;
829
830
831
832
833
834
835
836
837 get_task_struct(p);
838
839 __unqueue_futex(q);
840
841
842
843
844
845
846 smp_wmb();
847 q->lock_ptr = NULL;
848
849 wake_up_state(p, TASK_NORMAL);
850 put_task_struct(p);
851}
852
853static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
854{
855 struct task_struct *new_owner;
856 struct futex_pi_state *pi_state = this->pi_state;
857 u32 curval, newval;
858
859 if (!pi_state)
860 return -EINVAL;
861
862
863
864
865
866 if (pi_state->owner != current)
867 return -EINVAL;
868
869 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
870 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
871
872
873
874
875
876
877 if (!new_owner)
878 new_owner = this->task;
879
880
881
882
883
884
885 if (!(uval & FUTEX_OWNER_DIED)) {
886 int ret = 0;
887
888 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
889
890 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
891 ret = -EFAULT;
892 else if (curval != uval)
893 ret = -EINVAL;
894 if (ret) {
895 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
896 return ret;
897 }
898 }
899
900 raw_spin_lock_irq(&pi_state->owner->pi_lock);
901 WARN_ON(list_empty(&pi_state->list));
902 list_del_init(&pi_state->list);
903 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
904
905 raw_spin_lock_irq(&new_owner->pi_lock);
906 WARN_ON(!list_empty(&pi_state->list));
907 list_add(&pi_state->list, &new_owner->pi_state_list);
908 pi_state->owner = new_owner;
909 raw_spin_unlock_irq(&new_owner->pi_lock);
910
911 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
912 rt_mutex_unlock(&pi_state->pi_mutex);
913
914 return 0;
915}
916
917static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
918{
919 u32 oldval;
920
921
922
923
924
925 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
926 return -EFAULT;
927 if (oldval != uval)
928 return -EAGAIN;
929
930 return 0;
931}
932
933
934
935
936static inline void
937double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
938{
939 if (hb1 <= hb2) {
940 spin_lock(&hb1->lock);
941 if (hb1 < hb2)
942 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
943 } else {
944 spin_lock(&hb2->lock);
945 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
946 }
947}
948
949static inline void
950double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
951{
952 spin_unlock(&hb1->lock);
953 if (hb1 != hb2)
954 spin_unlock(&hb2->lock);
955}
956
957
958
959
960static int
961futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
962{
963 struct futex_hash_bucket *hb;
964 struct futex_q *this, *next;
965 struct plist_head *head;
966 union futex_key key = FUTEX_KEY_INIT;
967 int ret;
968
969 if (!bitset)
970 return -EINVAL;
971
972 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
973 if (unlikely(ret != 0))
974 goto out;
975
976 hb = hash_futex(&key);
977 spin_lock(&hb->lock);
978 head = &hb->chain;
979
980 plist_for_each_entry_safe(this, next, head, list) {
981 if (match_futex (&this->key, &key)) {
982 if (this->pi_state || this->rt_waiter) {
983 ret = -EINVAL;
984 break;
985 }
986
987
988 if (!(this->bitset & bitset))
989 continue;
990
991 wake_futex(this);
992 if (++ret >= nr_wake)
993 break;
994 }
995 }
996
997 spin_unlock(&hb->lock);
998 put_futex_key(&key);
999out:
1000 return ret;
1001}
1002
1003
1004
1005
1006
1007static int
1008futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1009 int nr_wake, int nr_wake2, int op)
1010{
1011 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1012 struct futex_hash_bucket *hb1, *hb2;
1013 struct plist_head *head;
1014 struct futex_q *this, *next;
1015 int ret, op_ret;
1016
1017retry:
1018 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1019 if (unlikely(ret != 0))
1020 goto out;
1021 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1022 if (unlikely(ret != 0))
1023 goto out_put_key1;
1024
1025 hb1 = hash_futex(&key1);
1026 hb2 = hash_futex(&key2);
1027
1028retry_private:
1029 double_lock_hb(hb1, hb2);
1030 op_ret = futex_atomic_op_inuser(op, uaddr2);
1031 if (unlikely(op_ret < 0)) {
1032
1033 double_unlock_hb(hb1, hb2);
1034
1035#ifndef CONFIG_MMU
1036
1037
1038
1039
1040 ret = op_ret;
1041 goto out_put_keys;
1042#endif
1043
1044 if (unlikely(op_ret != -EFAULT)) {
1045 ret = op_ret;
1046 goto out_put_keys;
1047 }
1048
1049 ret = fault_in_user_writeable(uaddr2);
1050 if (ret)
1051 goto out_put_keys;
1052
1053 if (!(flags & FLAGS_SHARED))
1054 goto retry_private;
1055
1056 put_futex_key(&key2);
1057 put_futex_key(&key1);
1058 goto retry;
1059 }
1060
1061 head = &hb1->chain;
1062
1063 plist_for_each_entry_safe(this, next, head, list) {
1064 if (match_futex (&this->key, &key1)) {
1065 wake_futex(this);
1066 if (++ret >= nr_wake)
1067 break;
1068 }
1069 }
1070
1071 if (op_ret > 0) {
1072 head = &hb2->chain;
1073
1074 op_ret = 0;
1075 plist_for_each_entry_safe(this, next, head, list) {
1076 if (match_futex (&this->key, &key2)) {
1077 wake_futex(this);
1078 if (++op_ret >= nr_wake2)
1079 break;
1080 }
1081 }
1082 ret += op_ret;
1083 }
1084
1085 double_unlock_hb(hb1, hb2);
1086out_put_keys:
1087 put_futex_key(&key2);
1088out_put_key1:
1089 put_futex_key(&key1);
1090out:
1091 return ret;
1092}
1093
1094
1095
1096
1097
1098
1099
1100
1101static inline
1102void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1103 struct futex_hash_bucket *hb2, union futex_key *key2)
1104{
1105
1106
1107
1108
1109
1110 if (likely(&hb1->chain != &hb2->chain)) {
1111 plist_del(&q->list, &hb1->chain);
1112 plist_add(&q->list, &hb2->chain);
1113 q->lock_ptr = &hb2->lock;
1114 }
1115 get_futex_key_refs(key2);
1116 q->key = *key2;
1117}
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133static inline
1134void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1135 struct futex_hash_bucket *hb)
1136{
1137 get_futex_key_refs(key);
1138 q->key = *key;
1139
1140 __unqueue_futex(q);
1141
1142 WARN_ON(!q->rt_waiter);
1143 q->rt_waiter = NULL;
1144
1145 q->lock_ptr = &hb->lock;
1146
1147 wake_up_state(q->task, TASK_NORMAL);
1148}
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1171 struct futex_hash_bucket *hb1,
1172 struct futex_hash_bucket *hb2,
1173 union futex_key *key1, union futex_key *key2,
1174 struct futex_pi_state **ps, int set_waiters)
1175{
1176 struct futex_q *top_waiter = NULL;
1177 u32 curval;
1178 int ret;
1179
1180 if (get_futex_value_locked(&curval, pifutex))
1181 return -EFAULT;
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191 top_waiter = futex_top_waiter(hb1, key1);
1192
1193
1194 if (!top_waiter)
1195 return 0;
1196
1197
1198 if (!match_futex(top_waiter->requeue_pi_key, key2))
1199 return -EINVAL;
1200
1201
1202
1203
1204
1205
1206 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1207 set_waiters);
1208 if (ret == 1)
1209 requeue_pi_wake_futex(top_waiter, key2, hb2);
1210
1211 return ret;
1212}
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1233 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1234 u32 *cmpval, int requeue_pi)
1235{
1236 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1237 int drop_count = 0, task_count = 0, ret;
1238 struct futex_pi_state *pi_state = NULL;
1239 struct futex_hash_bucket *hb1, *hb2;
1240 struct plist_head *head1;
1241 struct futex_q *this, *next;
1242 u32 curval2;
1243
1244 if (requeue_pi) {
1245
1246
1247
1248
1249 if (refill_pi_state_cache())
1250 return -ENOMEM;
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261 if (nr_wake != 1)
1262 return -EINVAL;
1263 }
1264
1265retry:
1266 if (pi_state != NULL) {
1267
1268
1269
1270
1271 free_pi_state(pi_state);
1272 pi_state = NULL;
1273 }
1274
1275 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1276 if (unlikely(ret != 0))
1277 goto out;
1278 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1279 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1280 if (unlikely(ret != 0))
1281 goto out_put_key1;
1282
1283 hb1 = hash_futex(&key1);
1284 hb2 = hash_futex(&key2);
1285
1286retry_private:
1287 double_lock_hb(hb1, hb2);
1288
1289 if (likely(cmpval != NULL)) {
1290 u32 curval;
1291
1292 ret = get_futex_value_locked(&curval, uaddr1);
1293
1294 if (unlikely(ret)) {
1295 double_unlock_hb(hb1, hb2);
1296
1297 ret = get_user(curval, uaddr1);
1298 if (ret)
1299 goto out_put_keys;
1300
1301 if (!(flags & FLAGS_SHARED))
1302 goto retry_private;
1303
1304 put_futex_key(&key2);
1305 put_futex_key(&key1);
1306 goto retry;
1307 }
1308 if (curval != *cmpval) {
1309 ret = -EAGAIN;
1310 goto out_unlock;
1311 }
1312 }
1313
1314 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1315
1316
1317
1318
1319
1320
1321 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1322 &key2, &pi_state, nr_requeue);
1323
1324
1325
1326
1327
1328
1329
1330 if (ret == 1) {
1331 WARN_ON(pi_state);
1332 drop_count++;
1333 task_count++;
1334 ret = get_futex_value_locked(&curval2, uaddr2);
1335 if (!ret)
1336 ret = lookup_pi_state(curval2, hb2, &key2,
1337 &pi_state);
1338 }
1339
1340 switch (ret) {
1341 case 0:
1342 break;
1343 case -EFAULT:
1344 double_unlock_hb(hb1, hb2);
1345 put_futex_key(&key2);
1346 put_futex_key(&key1);
1347 ret = fault_in_user_writeable(uaddr2);
1348 if (!ret)
1349 goto retry;
1350 goto out;
1351 case -EAGAIN:
1352
1353 double_unlock_hb(hb1, hb2);
1354 put_futex_key(&key2);
1355 put_futex_key(&key1);
1356 cond_resched();
1357 goto retry;
1358 default:
1359 goto out_unlock;
1360 }
1361 }
1362
1363 head1 = &hb1->chain;
1364 plist_for_each_entry_safe(this, next, head1, list) {
1365 if (task_count - nr_wake >= nr_requeue)
1366 break;
1367
1368 if (!match_futex(&this->key, &key1))
1369 continue;
1370
1371
1372
1373
1374
1375 if ((requeue_pi && !this->rt_waiter) ||
1376 (!requeue_pi && this->rt_waiter)) {
1377 ret = -EINVAL;
1378 break;
1379 }
1380
1381
1382
1383
1384
1385
1386 if (++task_count <= nr_wake && !requeue_pi) {
1387 wake_futex(this);
1388 continue;
1389 }
1390
1391
1392 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1393 ret = -EINVAL;
1394 break;
1395 }
1396
1397
1398
1399
1400
1401 if (requeue_pi) {
1402
1403 atomic_inc(&pi_state->refcount);
1404 this->pi_state = pi_state;
1405 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1406 this->rt_waiter,
1407 this->task, 1);
1408 if (ret == 1) {
1409
1410 requeue_pi_wake_futex(this, &key2, hb2);
1411 drop_count++;
1412 continue;
1413 } else if (ret) {
1414
1415 this->pi_state = NULL;
1416 free_pi_state(pi_state);
1417 goto out_unlock;
1418 }
1419 }
1420 requeue_futex(this, hb1, hb2, &key2);
1421 drop_count++;
1422 }
1423
1424out_unlock:
1425 double_unlock_hb(hb1, hb2);
1426
1427
1428
1429
1430
1431
1432
1433 while (--drop_count >= 0)
1434 drop_futex_key_refs(&key1);
1435
1436out_put_keys:
1437 put_futex_key(&key2);
1438out_put_key1:
1439 put_futex_key(&key1);
1440out:
1441 if (pi_state != NULL)
1442 free_pi_state(pi_state);
1443 return ret ? ret : task_count;
1444}
1445
1446
1447static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1448 __acquires(&hb->lock)
1449{
1450 struct futex_hash_bucket *hb;
1451
1452 hb = hash_futex(&q->key);
1453 q->lock_ptr = &hb->lock;
1454
1455 spin_lock(&hb->lock);
1456 return hb;
1457}
1458
1459static inline void
1460queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1461 __releases(&hb->lock)
1462{
1463 spin_unlock(&hb->lock);
1464}
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1479 __releases(&hb->lock)
1480{
1481 int prio;
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491 prio = min(current->normal_prio, MAX_RT_PRIO);
1492
1493 plist_node_init(&q->list, prio);
1494 plist_add(&q->list, &hb->chain);
1495 q->task = current;
1496 spin_unlock(&hb->lock);
1497}
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510static int unqueue_me(struct futex_q *q)
1511{
1512 spinlock_t *lock_ptr;
1513 int ret = 0;
1514
1515
1516retry:
1517 lock_ptr = q->lock_ptr;
1518 barrier();
1519 if (lock_ptr != NULL) {
1520 spin_lock(lock_ptr);
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534 if (unlikely(lock_ptr != q->lock_ptr)) {
1535 spin_unlock(lock_ptr);
1536 goto retry;
1537 }
1538 __unqueue_futex(q);
1539
1540 BUG_ON(q->pi_state);
1541
1542 spin_unlock(lock_ptr);
1543 ret = 1;
1544 }
1545
1546 drop_futex_key_refs(&q->key);
1547 return ret;
1548}
1549
1550
1551
1552
1553
1554
1555static void unqueue_me_pi(struct futex_q *q)
1556 __releases(q->lock_ptr)
1557{
1558 __unqueue_futex(q);
1559
1560 BUG_ON(!q->pi_state);
1561 free_pi_state(q->pi_state);
1562 q->pi_state = NULL;
1563
1564 spin_unlock(q->lock_ptr);
1565}
1566
1567
1568
1569
1570
1571
1572
1573static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1574 struct task_struct *newowner)
1575{
1576 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1577 struct futex_pi_state *pi_state = q->pi_state;
1578 struct task_struct *oldowner = pi_state->owner;
1579 u32 uval, curval, newval;
1580 int ret;
1581
1582
1583 if (!pi_state->owner)
1584 newtid |= FUTEX_OWNER_DIED;
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603retry:
1604 if (get_futex_value_locked(&uval, uaddr))
1605 goto handle_fault;
1606
1607 while (1) {
1608 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1609
1610 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1611 goto handle_fault;
1612 if (curval == uval)
1613 break;
1614 uval = curval;
1615 }
1616
1617
1618
1619
1620
1621 if (pi_state->owner != NULL) {
1622 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1623 WARN_ON(list_empty(&pi_state->list));
1624 list_del_init(&pi_state->list);
1625 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1626 }
1627
1628 pi_state->owner = newowner;
1629
1630 raw_spin_lock_irq(&newowner->pi_lock);
1631 WARN_ON(!list_empty(&pi_state->list));
1632 list_add(&pi_state->list, &newowner->pi_state_list);
1633 raw_spin_unlock_irq(&newowner->pi_lock);
1634 return 0;
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646handle_fault:
1647 spin_unlock(q->lock_ptr);
1648
1649 ret = fault_in_user_writeable(uaddr);
1650
1651 spin_lock(q->lock_ptr);
1652
1653
1654
1655
1656 if (pi_state->owner != oldowner)
1657 return 0;
1658
1659 if (ret)
1660 return ret;
1661
1662 goto retry;
1663}
1664
1665static long futex_wait_restart(struct restart_block *restart);
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1683{
1684 struct task_struct *owner;
1685 int ret = 0;
1686
1687 if (locked) {
1688
1689
1690
1691
1692 if (q->pi_state->owner != current)
1693 ret = fixup_pi_state_owner(uaddr, q, current);
1694 goto out;
1695 }
1696
1697
1698
1699
1700
1701 if (q->pi_state->owner == current) {
1702
1703
1704
1705
1706
1707 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1708 locked = 1;
1709 goto out;
1710 }
1711
1712
1713
1714
1715
1716
1717 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1718 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1719 if (!owner)
1720 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1721 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1722 ret = fixup_pi_state_owner(uaddr, q, owner);
1723 goto out;
1724 }
1725
1726
1727
1728
1729
1730 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1731 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1732 "pi-state %p\n", ret,
1733 q->pi_state->pi_mutex.owner,
1734 q->pi_state->owner);
1735
1736out:
1737 return ret ? ret : locked;
1738}
1739
1740
1741
1742
1743
1744
1745
1746static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1747 struct hrtimer_sleeper *timeout)
1748{
1749
1750
1751
1752
1753
1754
1755 set_current_state(TASK_INTERRUPTIBLE);
1756 queue_me(q, hb);
1757
1758
1759 if (timeout) {
1760 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1761 if (!hrtimer_active(&timeout->timer))
1762 timeout->task = NULL;
1763 }
1764
1765
1766
1767
1768
1769 if (likely(!plist_node_empty(&q->list))) {
1770
1771
1772
1773
1774
1775 if (!timeout || timeout->task)
1776 schedule();
1777 }
1778 __set_current_state(TASK_RUNNING);
1779}
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1799 struct futex_q *q, struct futex_hash_bucket **hb)
1800{
1801 u32 uval;
1802 int ret;
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822retry:
1823 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
1824 if (unlikely(ret != 0))
1825 return ret;
1826
1827retry_private:
1828 *hb = queue_lock(q);
1829
1830 ret = get_futex_value_locked(&uval, uaddr);
1831
1832 if (ret) {
1833 queue_unlock(q, *hb);
1834
1835 ret = get_user(uval, uaddr);
1836 if (ret)
1837 goto out;
1838
1839 if (!(flags & FLAGS_SHARED))
1840 goto retry_private;
1841
1842 put_futex_key(&q->key);
1843 goto retry;
1844 }
1845
1846 if (uval != val) {
1847 queue_unlock(q, *hb);
1848 ret = -EWOULDBLOCK;
1849 }
1850
1851out:
1852 if (ret)
1853 put_futex_key(&q->key);
1854 return ret;
1855}
1856
1857static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
1858 ktime_t *abs_time, u32 bitset)
1859{
1860 struct hrtimer_sleeper timeout, *to = NULL;
1861 struct restart_block *restart;
1862 struct futex_hash_bucket *hb;
1863 struct futex_q q = futex_q_init;
1864 int ret;
1865
1866 if (!bitset)
1867 return -EINVAL;
1868 q.bitset = bitset;
1869
1870 if (abs_time) {
1871 to = &timeout;
1872
1873 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
1874 CLOCK_REALTIME : CLOCK_MONOTONIC,
1875 HRTIMER_MODE_ABS);
1876 hrtimer_init_sleeper(to, current);
1877 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1878 current->timer_slack_ns);
1879 }
1880
1881retry:
1882
1883
1884
1885
1886 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
1887 if (ret)
1888 goto out;
1889
1890
1891 futex_wait_queue_me(hb, &q, to);
1892
1893
1894 ret = 0;
1895
1896 if (!unqueue_me(&q))
1897 goto out;
1898 ret = -ETIMEDOUT;
1899 if (to && !to->task)
1900 goto out;
1901
1902
1903
1904
1905
1906 if (!signal_pending(current))
1907 goto retry;
1908
1909 ret = -ERESTARTSYS;
1910 if (!abs_time)
1911 goto out;
1912
1913 restart = ¤t_thread_info()->restart_block;
1914 restart->fn = futex_wait_restart;
1915 restart->futex.uaddr = uaddr;
1916 restart->futex.val = val;
1917 restart->futex.time = abs_time->tv64;
1918 restart->futex.bitset = bitset;
1919 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
1920
1921 ret = -ERESTART_RESTARTBLOCK;
1922
1923out:
1924 if (to) {
1925 hrtimer_cancel(&to->timer);
1926 destroy_hrtimer_on_stack(&to->timer);
1927 }
1928 return ret;
1929}
1930
1931
1932static long futex_wait_restart(struct restart_block *restart)
1933{
1934 u32 __user *uaddr = restart->futex.uaddr;
1935 ktime_t t, *tp = NULL;
1936
1937 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1938 t.tv64 = restart->futex.time;
1939 tp = &t;
1940 }
1941 restart->fn = do_no_restart_syscall;
1942
1943 return (long)futex_wait(uaddr, restart->futex.flags,
1944 restart->futex.val, tp, restart->futex.bitset);
1945}
1946
1947
1948
1949
1950
1951
1952
1953
1954static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1955 ktime_t *time, int trylock)
1956{
1957 struct hrtimer_sleeper timeout, *to = NULL;
1958 struct futex_hash_bucket *hb;
1959 struct futex_q q = futex_q_init;
1960 int res, ret;
1961
1962 if (refill_pi_state_cache())
1963 return -ENOMEM;
1964
1965 if (time) {
1966 to = &timeout;
1967 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
1968 HRTIMER_MODE_ABS);
1969 hrtimer_init_sleeper(to, current);
1970 hrtimer_set_expires(&to->timer, *time);
1971 }
1972
1973retry:
1974 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
1975 if (unlikely(ret != 0))
1976 goto out;
1977
1978retry_private:
1979 hb = queue_lock(&q);
1980
1981 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
1982 if (unlikely(ret)) {
1983 switch (ret) {
1984 case 1:
1985
1986 ret = 0;
1987 goto out_unlock_put_key;
1988 case -EFAULT:
1989 goto uaddr_faulted;
1990 case -EAGAIN:
1991
1992
1993
1994
1995 queue_unlock(&q, hb);
1996 put_futex_key(&q.key);
1997 cond_resched();
1998 goto retry;
1999 default:
2000 goto out_unlock_put_key;
2001 }
2002 }
2003
2004
2005
2006
2007 queue_me(&q, hb);
2008
2009 WARN_ON(!q.pi_state);
2010
2011
2012
2013 if (!trylock)
2014 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
2015 else {
2016 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2017
2018 ret = ret ? 0 : -EWOULDBLOCK;
2019 }
2020
2021 spin_lock(q.lock_ptr);
2022
2023
2024
2025
2026 res = fixup_owner(uaddr, &q, !ret);
2027
2028
2029
2030
2031 if (res)
2032 ret = (res < 0) ? res : 0;
2033
2034
2035
2036
2037
2038 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2039 rt_mutex_unlock(&q.pi_state->pi_mutex);
2040
2041
2042 unqueue_me_pi(&q);
2043
2044 goto out_put_key;
2045
2046out_unlock_put_key:
2047 queue_unlock(&q, hb);
2048
2049out_put_key:
2050 put_futex_key(&q.key);
2051out:
2052 if (to)
2053 destroy_hrtimer_on_stack(&to->timer);
2054 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2055
2056uaddr_faulted:
2057 queue_unlock(&q, hb);
2058
2059 ret = fault_in_user_writeable(uaddr);
2060 if (ret)
2061 goto out_put_key;
2062
2063 if (!(flags & FLAGS_SHARED))
2064 goto retry_private;
2065
2066 put_futex_key(&q.key);
2067 goto retry;
2068}
2069
2070
2071
2072
2073
2074
2075static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2076{
2077 struct futex_hash_bucket *hb;
2078 struct futex_q *this, *next;
2079 struct plist_head *head;
2080 union futex_key key = FUTEX_KEY_INIT;
2081 u32 uval, vpid = task_pid_vnr(current);
2082 int ret;
2083
2084retry:
2085 if (get_user(uval, uaddr))
2086 return -EFAULT;
2087
2088
2089
2090 if ((uval & FUTEX_TID_MASK) != vpid)
2091 return -EPERM;
2092
2093 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2094 if (unlikely(ret != 0))
2095 goto out;
2096
2097 hb = hash_futex(&key);
2098 spin_lock(&hb->lock);
2099
2100
2101
2102
2103
2104
2105 if (!(uval & FUTEX_OWNER_DIED) &&
2106 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2107 goto pi_faulted;
2108
2109
2110
2111
2112 if (unlikely(uval == vpid))
2113 goto out_unlock;
2114
2115
2116
2117
2118
2119 head = &hb->chain;
2120
2121 plist_for_each_entry_safe(this, next, head, list) {
2122 if (!match_futex (&this->key, &key))
2123 continue;
2124 ret = wake_futex_pi(uaddr, uval, this);
2125
2126
2127
2128
2129
2130 if (ret == -EFAULT)
2131 goto pi_faulted;
2132 goto out_unlock;
2133 }
2134
2135
2136
2137 if (!(uval & FUTEX_OWNER_DIED)) {
2138 ret = unlock_futex_pi(uaddr, uval);
2139 if (ret == -EFAULT)
2140 goto pi_faulted;
2141 }
2142
2143out_unlock:
2144 spin_unlock(&hb->lock);
2145 put_futex_key(&key);
2146
2147out:
2148 return ret;
2149
2150pi_faulted:
2151 spin_unlock(&hb->lock);
2152 put_futex_key(&key);
2153
2154 ret = fault_in_user_writeable(uaddr);
2155 if (!ret)
2156 goto retry;
2157
2158 return ret;
2159}
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177static inline
2178int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2179 struct futex_q *q, union futex_key *key2,
2180 struct hrtimer_sleeper *timeout)
2181{
2182 int ret = 0;
2183
2184
2185
2186
2187
2188
2189
2190
2191 if (!match_futex(&q->key, key2)) {
2192 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2193
2194
2195
2196
2197 plist_del(&q->list, &hb->chain);
2198
2199
2200 ret = -EWOULDBLOCK;
2201 if (timeout && !timeout->task)
2202 ret = -ETIMEDOUT;
2203 else if (signal_pending(current))
2204 ret = -ERESTARTNOINTR;
2205 }
2206 return ret;
2207}
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2251 u32 val, ktime_t *abs_time, u32 bitset,
2252 u32 __user *uaddr2)
2253{
2254 struct hrtimer_sleeper timeout, *to = NULL;
2255 struct rt_mutex_waiter rt_waiter;
2256 struct rt_mutex *pi_mutex = NULL;
2257 struct futex_hash_bucket *hb;
2258 union futex_key key2 = FUTEX_KEY_INIT;
2259 struct futex_q q = futex_q_init;
2260 int res, ret;
2261
2262 if (!bitset)
2263 return -EINVAL;
2264
2265 if (abs_time) {
2266 to = &timeout;
2267 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2268 CLOCK_REALTIME : CLOCK_MONOTONIC,
2269 HRTIMER_MODE_ABS);
2270 hrtimer_init_sleeper(to, current);
2271 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2272 current->timer_slack_ns);
2273 }
2274
2275
2276
2277
2278
2279 debug_rt_mutex_init_waiter(&rt_waiter);
2280 rt_waiter.task = NULL;
2281
2282 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2283 if (unlikely(ret != 0))
2284 goto out;
2285
2286 q.bitset = bitset;
2287 q.rt_waiter = &rt_waiter;
2288 q.requeue_pi_key = &key2;
2289
2290
2291
2292
2293
2294 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2295 if (ret)
2296 goto out_key2;
2297
2298
2299 futex_wait_queue_me(hb, &q, to);
2300
2301 spin_lock(&hb->lock);
2302 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2303 spin_unlock(&hb->lock);
2304 if (ret)
2305 goto out_put_keys;
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317 if (!q.rt_waiter) {
2318
2319
2320
2321
2322 if (q.pi_state && (q.pi_state->owner != current)) {
2323 spin_lock(q.lock_ptr);
2324 ret = fixup_pi_state_owner(uaddr2, &q, current);
2325 spin_unlock(q.lock_ptr);
2326 }
2327 } else {
2328
2329
2330
2331
2332
2333 WARN_ON(!&q.pi_state);
2334 pi_mutex = &q.pi_state->pi_mutex;
2335 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2336 debug_rt_mutex_free_waiter(&rt_waiter);
2337
2338 spin_lock(q.lock_ptr);
2339
2340
2341
2342
2343 res = fixup_owner(uaddr2, &q, !ret);
2344
2345
2346
2347
2348 if (res)
2349 ret = (res < 0) ? res : 0;
2350
2351
2352 unqueue_me_pi(&q);
2353 }
2354
2355
2356
2357
2358
2359 if (ret == -EFAULT) {
2360 if (rt_mutex_owner(pi_mutex) == current)
2361 rt_mutex_unlock(pi_mutex);
2362 } else if (ret == -EINTR) {
2363
2364
2365
2366
2367
2368
2369
2370 ret = -EWOULDBLOCK;
2371 }
2372
2373out_put_keys:
2374 put_futex_key(&q.key);
2375out_key2:
2376 put_futex_key(&key2);
2377
2378out:
2379 if (to) {
2380 hrtimer_cancel(&to->timer);
2381 destroy_hrtimer_on_stack(&to->timer);
2382 }
2383 return ret;
2384}
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2407 size_t, len)
2408{
2409 if (!futex_cmpxchg_enabled)
2410 return -ENOSYS;
2411
2412
2413
2414 if (unlikely(len != sizeof(*head)))
2415 return -EINVAL;
2416
2417 current->robust_list = head;
2418
2419 return 0;
2420}
2421
2422
2423
2424
2425
2426
2427
2428SYSCALL_DEFINE3(get_robust_list, int, pid,
2429 struct robust_list_head __user * __user *, head_ptr,
2430 size_t __user *, len_ptr)
2431{
2432 struct robust_list_head __user *head;
2433 unsigned long ret;
2434 const struct cred *cred = current_cred(), *pcred;
2435
2436 if (!futex_cmpxchg_enabled)
2437 return -ENOSYS;
2438
2439 if (!pid)
2440 head = current->robust_list;
2441 else {
2442 struct task_struct *p;
2443
2444 ret = -ESRCH;
2445 rcu_read_lock();
2446 p = find_task_by_vpid(pid);
2447 if (!p)
2448 goto err_unlock;
2449 ret = -EPERM;
2450 pcred = __task_cred(p);
2451
2452
2453 if (cred->user->user_ns != pcred->user->user_ns) {
2454 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2455 goto err_unlock;
2456 goto ok;
2457 }
2458
2459 if (cred->euid != pcred->euid &&
2460 cred->euid != pcred->uid &&
2461 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2462 goto err_unlock;
2463ok:
2464 head = p->robust_list;
2465 rcu_read_unlock();
2466 }
2467
2468 if (put_user(sizeof(*head), len_ptr))
2469 return -EFAULT;
2470 return put_user(head, head_ptr);
2471
2472err_unlock:
2473 rcu_read_unlock();
2474
2475 return ret;
2476}
2477
2478
2479
2480
2481
2482int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2483{
2484 u32 uval, nval, mval;
2485
2486retry:
2487 if (get_user(uval, uaddr))
2488 return -1;
2489
2490 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2512 if (fault_in_user_writeable(uaddr))
2513 return -1;
2514 goto retry;
2515 }
2516 if (nval != uval)
2517 goto retry;
2518
2519
2520
2521
2522
2523 if (!pi && (uval & FUTEX_WAITERS))
2524 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2525 }
2526 return 0;
2527}
2528
2529
2530
2531
2532static inline int fetch_robust_entry(struct robust_list __user **entry,
2533 struct robust_list __user * __user *head,
2534 unsigned int *pi)
2535{
2536 unsigned long uentry;
2537
2538 if (get_user(uentry, (unsigned long __user *)head))
2539 return -EFAULT;
2540
2541 *entry = (void __user *)(uentry & ~1UL);
2542 *pi = uentry & 1;
2543
2544 return 0;
2545}
2546
2547
2548
2549
2550
2551
2552
2553void exit_robust_list(struct task_struct *curr)
2554{
2555 struct robust_list_head __user *head = curr->robust_list;
2556 struct robust_list __user *entry, *next_entry, *pending;
2557 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2558 unsigned int uninitialized_var(next_pi);
2559 unsigned long futex_offset;
2560 int rc;
2561
2562 if (!futex_cmpxchg_enabled)
2563 return;
2564
2565
2566
2567
2568
2569 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2570 return;
2571
2572
2573
2574 if (get_user(futex_offset, &head->futex_offset))
2575 return;
2576
2577
2578
2579
2580 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2581 return;
2582
2583 next_entry = NULL;
2584 while (entry != &head->list) {
2585
2586
2587
2588
2589 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2590
2591
2592
2593
2594 if (entry != pending)
2595 if (handle_futex_death((void __user *)entry + futex_offset,
2596 curr, pi))
2597 return;
2598 if (rc)
2599 return;
2600 entry = next_entry;
2601 pi = next_pi;
2602
2603
2604
2605 if (!--limit)
2606 break;
2607
2608 cond_resched();
2609 }
2610
2611 if (pending)
2612 handle_futex_death((void __user *)pending + futex_offset,
2613 curr, pip);
2614}
2615
2616long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2617 u32 __user *uaddr2, u32 val2, u32 val3)
2618{
2619 int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK;
2620 unsigned int flags = 0;
2621
2622 if (!(op & FUTEX_PRIVATE_FLAG))
2623 flags |= FLAGS_SHARED;
2624
2625 if (op & FUTEX_CLOCK_REALTIME) {
2626 flags |= FLAGS_CLOCKRT;
2627 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2628 return -ENOSYS;
2629 }
2630
2631 switch (cmd) {
2632 case FUTEX_WAIT:
2633 val3 = FUTEX_BITSET_MATCH_ANY;
2634 case FUTEX_WAIT_BITSET:
2635 ret = futex_wait(uaddr, flags, val, timeout, val3);
2636 break;
2637 case FUTEX_WAKE:
2638 val3 = FUTEX_BITSET_MATCH_ANY;
2639 case FUTEX_WAKE_BITSET:
2640 ret = futex_wake(uaddr, flags, val, val3);
2641 break;
2642 case FUTEX_REQUEUE:
2643 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2644 break;
2645 case FUTEX_CMP_REQUEUE:
2646 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2647 break;
2648 case FUTEX_WAKE_OP:
2649 ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2650 break;
2651 case FUTEX_LOCK_PI:
2652 if (futex_cmpxchg_enabled)
2653 ret = futex_lock_pi(uaddr, flags, val, timeout, 0);
2654 break;
2655 case FUTEX_UNLOCK_PI:
2656 if (futex_cmpxchg_enabled)
2657 ret = futex_unlock_pi(uaddr, flags);
2658 break;
2659 case FUTEX_TRYLOCK_PI:
2660 if (futex_cmpxchg_enabled)
2661 ret = futex_lock_pi(uaddr, flags, 0, timeout, 1);
2662 break;
2663 case FUTEX_WAIT_REQUEUE_PI:
2664 val3 = FUTEX_BITSET_MATCH_ANY;
2665 ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2666 uaddr2);
2667 break;
2668 case FUTEX_CMP_REQUEUE_PI:
2669 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2670 break;
2671 default:
2672 ret = -ENOSYS;
2673 }
2674 return ret;
2675}
2676
2677
2678SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2679 struct timespec __user *, utime, u32 __user *, uaddr2,
2680 u32, val3)
2681{
2682 struct timespec ts;
2683 ktime_t t, *tp = NULL;
2684 u32 val2 = 0;
2685 int cmd = op & FUTEX_CMD_MASK;
2686
2687 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2688 cmd == FUTEX_WAIT_BITSET ||
2689 cmd == FUTEX_WAIT_REQUEUE_PI)) {
2690 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2691 return -EFAULT;
2692 if (!timespec_valid(&ts))
2693 return -EINVAL;
2694
2695 t = timespec_to_ktime(ts);
2696 if (cmd == FUTEX_WAIT)
2697 t = ktime_add_safe(ktime_get(), t);
2698 tp = &t;
2699 }
2700
2701
2702
2703
2704 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2705 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2706 val2 = (u32) (unsigned long) utime;
2707
2708 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2709}
2710
2711static int __init futex_init(void)
2712{
2713 u32 curval;
2714 int i;
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2727 futex_cmpxchg_enabled = 1;
2728
2729 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
2730 plist_head_init(&futex_queues[i].chain);
2731 spin_lock_init(&futex_queues[i].lock);
2732 }
2733
2734 return 0;
2735}
2736__initcall(futex_init);
2737