1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/module.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62
63#include <asm/futex.h>
64
65#include "rtmutex_common.h"
66
67int __read_mostly futex_cmpxchg_enabled;
68
69#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
70
71
72
73
74struct futex_pi_state {
75
76
77
78
79 struct list_head list;
80
81
82
83
84 struct rt_mutex pi_mutex;
85
86 struct task_struct *owner;
87 atomic_t refcount;
88
89 union futex_key key;
90};
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113struct futex_q {
114 struct plist_node list;
115
116 struct task_struct *task;
117 spinlock_t *lock_ptr;
118 union futex_key key;
119 struct futex_pi_state *pi_state;
120 struct rt_mutex_waiter *rt_waiter;
121 union futex_key *requeue_pi_key;
122 u32 bitset;
123};
124
125
126
127
128
129
130struct futex_hash_bucket {
131 spinlock_t lock;
132 struct plist_head chain;
133};
134
135static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
136
137
138
139
140static struct futex_hash_bucket *hash_futex(union futex_key *key)
141{
142 u32 hash = jhash2((u32*)&key->both.word,
143 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
144 key->both.offset);
145 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
146}
147
148
149
150
151static inline int match_futex(union futex_key *key1, union futex_key *key2)
152{
153 return (key1 && key2
154 && key1->both.word == key2->both.word
155 && key1->both.ptr == key2->both.ptr
156 && key1->both.offset == key2->both.offset);
157}
158
159
160
161
162
163
164static void get_futex_key_refs(union futex_key *key)
165{
166 if (!key->both.ptr)
167 return;
168
169 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
170 case FUT_OFF_INODE:
171 atomic_inc(&key->shared.inode->i_count);
172 break;
173 case FUT_OFF_MMSHARED:
174 atomic_inc(&key->private.mm->mm_count);
175 break;
176 }
177}
178
179
180
181
182
183static void drop_futex_key_refs(union futex_key *key)
184{
185 if (!key->both.ptr) {
186
187 WARN_ON_ONCE(1);
188 return;
189 }
190
191 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
192 case FUT_OFF_INODE:
193 iput(key->shared.inode);
194 break;
195 case FUT_OFF_MMSHARED:
196 mmdrop(key->private.mm);
197 break;
198 }
199}
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218static int
219get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
220{
221 unsigned long address = (unsigned long)uaddr;
222 struct mm_struct *mm = current->mm;
223 struct page *page;
224 int err;
225
226
227
228
229 key->both.offset = address % PAGE_SIZE;
230 if (unlikely((address % sizeof(u32)) != 0))
231 return -EINVAL;
232 address -= key->both.offset;
233
234
235
236
237
238
239
240
241 if (!fshared) {
242 if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
243 return -EFAULT;
244 key->private.mm = mm;
245 key->private.address = address;
246 get_futex_key_refs(key);
247 return 0;
248 }
249
250again:
251 err = get_user_pages_fast(address, 1, rw == VERIFY_WRITE, &page);
252 if (err < 0)
253 return err;
254
255 page = compound_head(page);
256 lock_page(page);
257 if (!page->mapping) {
258 unlock_page(page);
259 put_page(page);
260 goto again;
261 }
262
263
264
265
266
267
268
269
270 if (PageAnon(page)) {
271 key->both.offset |= FUT_OFF_MMSHARED;
272 key->private.mm = mm;
273 key->private.address = address;
274 } else {
275 key->both.offset |= FUT_OFF_INODE;
276 key->shared.inode = page->mapping->host;
277 key->shared.pgoff = page->index;
278 }
279
280 get_futex_key_refs(key);
281
282 unlock_page(page);
283 put_page(page);
284 return 0;
285}
286
287static inline
288void put_futex_key(int fshared, union futex_key *key)
289{
290 drop_futex_key_refs(key);
291}
292
293
294
295
296
297
298
299
300
301
302
303
304
305static int fault_in_user_writeable(u32 __user *uaddr)
306{
307 int ret = get_user_pages(current, current->mm, (unsigned long)uaddr,
308 1, 1, 0, NULL, NULL);
309 return ret < 0 ? ret : 0;
310}
311
312
313
314
315
316
317
318
319static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
320 union futex_key *key)
321{
322 struct futex_q *this;
323
324 plist_for_each_entry(this, &hb->chain, list) {
325 if (match_futex(&this->key, key))
326 return this;
327 }
328 return NULL;
329}
330
331static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
332{
333 u32 curval;
334
335 pagefault_disable();
336 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
337 pagefault_enable();
338
339 return curval;
340}
341
342static int get_futex_value_locked(u32 *dest, u32 __user *from)
343{
344 int ret;
345
346 pagefault_disable();
347 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
348 pagefault_enable();
349
350 return ret ? -EFAULT : 0;
351}
352
353
354
355
356
357static int refill_pi_state_cache(void)
358{
359 struct futex_pi_state *pi_state;
360
361 if (likely(current->pi_state_cache))
362 return 0;
363
364 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
365
366 if (!pi_state)
367 return -ENOMEM;
368
369 INIT_LIST_HEAD(&pi_state->list);
370
371 pi_state->owner = NULL;
372 atomic_set(&pi_state->refcount, 1);
373 pi_state->key = FUTEX_KEY_INIT;
374
375 current->pi_state_cache = pi_state;
376
377 return 0;
378}
379
380static struct futex_pi_state * alloc_pi_state(void)
381{
382 struct futex_pi_state *pi_state = current->pi_state_cache;
383
384 WARN_ON(!pi_state);
385 current->pi_state_cache = NULL;
386
387 return pi_state;
388}
389
390static void free_pi_state(struct futex_pi_state *pi_state)
391{
392 if (!atomic_dec_and_test(&pi_state->refcount))
393 return;
394
395
396
397
398
399 if (pi_state->owner) {
400 spin_lock_irq(&pi_state->owner->pi_lock);
401 list_del_init(&pi_state->list);
402 spin_unlock_irq(&pi_state->owner->pi_lock);
403
404 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
405 }
406
407 if (current->pi_state_cache)
408 kfree(pi_state);
409 else {
410
411
412
413
414
415 pi_state->owner = NULL;
416 atomic_set(&pi_state->refcount, 1);
417 current->pi_state_cache = pi_state;
418 }
419}
420
421
422
423
424
425static struct task_struct * futex_find_get_task(pid_t pid)
426{
427 struct task_struct *p;
428 const struct cred *cred = current_cred(), *pcred;
429
430 rcu_read_lock();
431 p = find_task_by_vpid(pid);
432 if (!p) {
433 p = ERR_PTR(-ESRCH);
434 } else {
435 pcred = __task_cred(p);
436 if (cred->euid != pcred->euid &&
437 cred->euid != pcred->uid)
438 p = ERR_PTR(-ESRCH);
439 else
440 get_task_struct(p);
441 }
442
443 rcu_read_unlock();
444
445 return p;
446}
447
448
449
450
451
452
453void exit_pi_state_list(struct task_struct *curr)
454{
455 struct list_head *next, *head = &curr->pi_state_list;
456 struct futex_pi_state *pi_state;
457 struct futex_hash_bucket *hb;
458 union futex_key key = FUTEX_KEY_INIT;
459
460 if (!futex_cmpxchg_enabled)
461 return;
462
463
464
465
466
467 spin_lock_irq(&curr->pi_lock);
468 while (!list_empty(head)) {
469
470 next = head->next;
471 pi_state = list_entry(next, struct futex_pi_state, list);
472 key = pi_state->key;
473 hb = hash_futex(&key);
474 spin_unlock_irq(&curr->pi_lock);
475
476 spin_lock(&hb->lock);
477
478 spin_lock_irq(&curr->pi_lock);
479
480
481
482
483 if (head->next != next) {
484 spin_unlock(&hb->lock);
485 continue;
486 }
487
488 WARN_ON(pi_state->owner != curr);
489 WARN_ON(list_empty(&pi_state->list));
490 list_del_init(&pi_state->list);
491 pi_state->owner = NULL;
492 spin_unlock_irq(&curr->pi_lock);
493
494 rt_mutex_unlock(&pi_state->pi_mutex);
495
496 spin_unlock(&hb->lock);
497
498 spin_lock_irq(&curr->pi_lock);
499 }
500 spin_unlock_irq(&curr->pi_lock);
501}
502
503static int
504lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
505 union futex_key *key, struct futex_pi_state **ps)
506{
507 struct futex_pi_state *pi_state = NULL;
508 struct futex_q *this, *next;
509 struct plist_head *head;
510 struct task_struct *p;
511 pid_t pid = uval & FUTEX_TID_MASK;
512
513 head = &hb->chain;
514
515 plist_for_each_entry_safe(this, next, head, list) {
516 if (match_futex(&this->key, key)) {
517
518
519
520
521 pi_state = this->pi_state;
522
523
524
525 if (unlikely(!pi_state))
526 return -EINVAL;
527
528 WARN_ON(!atomic_read(&pi_state->refcount));
529 WARN_ON(pid && pi_state->owner &&
530 pi_state->owner->pid != pid);
531
532 atomic_inc(&pi_state->refcount);
533 *ps = pi_state;
534
535 return 0;
536 }
537 }
538
539
540
541
542
543 if (!pid)
544 return -ESRCH;
545 p = futex_find_get_task(pid);
546 if (IS_ERR(p))
547 return PTR_ERR(p);
548
549
550
551
552
553
554
555 spin_lock_irq(&p->pi_lock);
556 if (unlikely(p->flags & PF_EXITING)) {
557
558
559
560
561
562 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
563
564 spin_unlock_irq(&p->pi_lock);
565 put_task_struct(p);
566 return ret;
567 }
568
569 pi_state = alloc_pi_state();
570
571
572
573
574
575 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
576
577
578 pi_state->key = *key;
579
580 WARN_ON(!list_empty(&pi_state->list));
581 list_add(&pi_state->list, &p->pi_state_list);
582 pi_state->owner = p;
583 spin_unlock_irq(&p->pi_lock);
584
585 put_task_struct(p);
586
587 *ps = pi_state;
588
589 return 0;
590}
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
611 union futex_key *key,
612 struct futex_pi_state **ps,
613 struct task_struct *task, int set_waiters)
614{
615 int lock_taken, ret, ownerdied = 0;
616 u32 uval, newval, curval;
617
618retry:
619 ret = lock_taken = 0;
620
621
622
623
624
625
626 newval = task_pid_vnr(task);
627 if (set_waiters)
628 newval |= FUTEX_WAITERS;
629
630 curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
631
632 if (unlikely(curval == -EFAULT))
633 return -EFAULT;
634
635
636
637
638 if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task))))
639 return -EDEADLK;
640
641
642
643
644 if (unlikely(!curval))
645 return 1;
646
647 uval = curval;
648
649
650
651
652
653 newval = curval | FUTEX_WAITERS;
654
655
656
657
658
659
660
661
662
663 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
664
665 newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task);
666 ownerdied = 0;
667 lock_taken = 1;
668 }
669
670 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
671
672 if (unlikely(curval == -EFAULT))
673 return -EFAULT;
674 if (unlikely(curval != uval))
675 goto retry;
676
677
678
679
680 if (unlikely(lock_taken))
681 return 1;
682
683
684
685
686
687 ret = lookup_pi_state(uval, hb, key, ps);
688
689 if (unlikely(ret)) {
690 switch (ret) {
691 case -ESRCH:
692
693
694
695
696
697 if (get_futex_value_locked(&curval, uaddr))
698 return -EFAULT;
699
700
701
702
703
704
705 if (curval & FUTEX_OWNER_DIED) {
706 ownerdied = 1;
707 goto retry;
708 }
709 default:
710 break;
711 }
712 }
713
714 return ret;
715}
716
717
718
719
720
721static void wake_futex(struct futex_q *q)
722{
723 struct task_struct *p = q->task;
724
725
726
727
728
729
730
731
732 get_task_struct(p);
733
734 plist_del(&q->list, &q->list.plist);
735
736
737
738
739
740
741 smp_wmb();
742 q->lock_ptr = NULL;
743
744 wake_up_state(p, TASK_NORMAL);
745 put_task_struct(p);
746}
747
748static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
749{
750 struct task_struct *new_owner;
751 struct futex_pi_state *pi_state = this->pi_state;
752 u32 curval, newval;
753
754 if (!pi_state)
755 return -EINVAL;
756
757 spin_lock(&pi_state->pi_mutex.wait_lock);
758 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
759
760
761
762
763
764
765
766 if (!new_owner)
767 new_owner = this->task;
768
769
770
771
772
773
774 if (!(uval & FUTEX_OWNER_DIED)) {
775 int ret = 0;
776
777 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
778
779 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
780
781 if (curval == -EFAULT)
782 ret = -EFAULT;
783 else if (curval != uval)
784 ret = -EINVAL;
785 if (ret) {
786 spin_unlock(&pi_state->pi_mutex.wait_lock);
787 return ret;
788 }
789 }
790
791 spin_lock_irq(&pi_state->owner->pi_lock);
792 WARN_ON(list_empty(&pi_state->list));
793 list_del_init(&pi_state->list);
794 spin_unlock_irq(&pi_state->owner->pi_lock);
795
796 spin_lock_irq(&new_owner->pi_lock);
797 WARN_ON(!list_empty(&pi_state->list));
798 list_add(&pi_state->list, &new_owner->pi_state_list);
799 pi_state->owner = new_owner;
800 spin_unlock_irq(&new_owner->pi_lock);
801
802 spin_unlock(&pi_state->pi_mutex.wait_lock);
803 rt_mutex_unlock(&pi_state->pi_mutex);
804
805 return 0;
806}
807
808static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
809{
810 u32 oldval;
811
812
813
814
815
816 oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
817
818 if (oldval == -EFAULT)
819 return oldval;
820 if (oldval != uval)
821 return -EAGAIN;
822
823 return 0;
824}
825
826
827
828
829static inline void
830double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
831{
832 if (hb1 <= hb2) {
833 spin_lock(&hb1->lock);
834 if (hb1 < hb2)
835 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
836 } else {
837 spin_lock(&hb2->lock);
838 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
839 }
840}
841
842static inline void
843double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
844{
845 spin_unlock(&hb1->lock);
846 if (hb1 != hb2)
847 spin_unlock(&hb2->lock);
848}
849
850
851
852
853static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
854{
855 struct futex_hash_bucket *hb;
856 struct futex_q *this, *next;
857 struct plist_head *head;
858 union futex_key key = FUTEX_KEY_INIT;
859 int ret;
860
861 if (!bitset)
862 return -EINVAL;
863
864 ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ);
865 if (unlikely(ret != 0))
866 goto out;
867
868 hb = hash_futex(&key);
869 spin_lock(&hb->lock);
870 head = &hb->chain;
871
872 plist_for_each_entry_safe(this, next, head, list) {
873 if (match_futex (&this->key, &key)) {
874 if (this->pi_state || this->rt_waiter) {
875 ret = -EINVAL;
876 break;
877 }
878
879
880 if (!(this->bitset & bitset))
881 continue;
882
883 wake_futex(this);
884 if (++ret >= nr_wake)
885 break;
886 }
887 }
888
889 spin_unlock(&hb->lock);
890 put_futex_key(fshared, &key);
891out:
892 return ret;
893}
894
895
896
897
898
899static int
900futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
901 int nr_wake, int nr_wake2, int op)
902{
903 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
904 struct futex_hash_bucket *hb1, *hb2;
905 struct plist_head *head;
906 struct futex_q *this, *next;
907 int ret, op_ret;
908
909retry:
910 ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
911 if (unlikely(ret != 0))
912 goto out;
913 ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
914 if (unlikely(ret != 0))
915 goto out_put_key1;
916
917 hb1 = hash_futex(&key1);
918 hb2 = hash_futex(&key2);
919
920retry_private:
921 double_lock_hb(hb1, hb2);
922 op_ret = futex_atomic_op_inuser(op, uaddr2);
923 if (unlikely(op_ret < 0)) {
924
925 double_unlock_hb(hb1, hb2);
926
927#ifndef CONFIG_MMU
928
929
930
931
932 ret = op_ret;
933 goto out_put_keys;
934#endif
935
936 if (unlikely(op_ret != -EFAULT)) {
937 ret = op_ret;
938 goto out_put_keys;
939 }
940
941 ret = fault_in_user_writeable(uaddr2);
942 if (ret)
943 goto out_put_keys;
944
945 if (!fshared)
946 goto retry_private;
947
948 put_futex_key(fshared, &key2);
949 put_futex_key(fshared, &key1);
950 goto retry;
951 }
952
953 head = &hb1->chain;
954
955 plist_for_each_entry_safe(this, next, head, list) {
956 if (match_futex (&this->key, &key1)) {
957 wake_futex(this);
958 if (++ret >= nr_wake)
959 break;
960 }
961 }
962
963 if (op_ret > 0) {
964 head = &hb2->chain;
965
966 op_ret = 0;
967 plist_for_each_entry_safe(this, next, head, list) {
968 if (match_futex (&this->key, &key2)) {
969 wake_futex(this);
970 if (++op_ret >= nr_wake2)
971 break;
972 }
973 }
974 ret += op_ret;
975 }
976
977 double_unlock_hb(hb1, hb2);
978out_put_keys:
979 put_futex_key(fshared, &key2);
980out_put_key1:
981 put_futex_key(fshared, &key1);
982out:
983 return ret;
984}
985
986
987
988
989
990
991
992
993static inline
994void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
995 struct futex_hash_bucket *hb2, union futex_key *key2)
996{
997
998
999
1000
1001
1002 if (likely(&hb1->chain != &hb2->chain)) {
1003 plist_del(&q->list, &hb1->chain);
1004 plist_add(&q->list, &hb2->chain);
1005 q->lock_ptr = &hb2->lock;
1006#ifdef CONFIG_DEBUG_PI_LIST
1007 q->list.plist.lock = &hb2->lock;
1008#endif
1009 }
1010 get_futex_key_refs(key2);
1011 q->key = *key2;
1012}
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028static inline
1029void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1030 struct futex_hash_bucket *hb)
1031{
1032 get_futex_key_refs(key);
1033 q->key = *key;
1034
1035 WARN_ON(plist_node_empty(&q->list));
1036 plist_del(&q->list, &q->list.plist);
1037
1038 WARN_ON(!q->rt_waiter);
1039 q->rt_waiter = NULL;
1040
1041 q->lock_ptr = &hb->lock;
1042#ifdef CONFIG_DEBUG_PI_LIST
1043 q->list.plist.lock = &hb->lock;
1044#endif
1045
1046 wake_up_state(q->task, TASK_NORMAL);
1047}
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1070 struct futex_hash_bucket *hb1,
1071 struct futex_hash_bucket *hb2,
1072 union futex_key *key1, union futex_key *key2,
1073 struct futex_pi_state **ps, int set_waiters)
1074{
1075 struct futex_q *top_waiter = NULL;
1076 u32 curval;
1077 int ret;
1078
1079 if (get_futex_value_locked(&curval, pifutex))
1080 return -EFAULT;
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090 top_waiter = futex_top_waiter(hb1, key1);
1091
1092
1093 if (!top_waiter)
1094 return 0;
1095
1096
1097 if (!match_futex(top_waiter->requeue_pi_key, key2))
1098 return -EINVAL;
1099
1100
1101
1102
1103
1104
1105 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1106 set_waiters);
1107 if (ret == 1)
1108 requeue_pi_wake_futex(top_waiter, key2, hb2);
1109
1110 return ret;
1111}
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
1130 int nr_wake, int nr_requeue, u32 *cmpval,
1131 int requeue_pi)
1132{
1133 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1134 int drop_count = 0, task_count = 0, ret;
1135 struct futex_pi_state *pi_state = NULL;
1136 struct futex_hash_bucket *hb1, *hb2;
1137 struct plist_head *head1;
1138 struct futex_q *this, *next;
1139 u32 curval2;
1140
1141 if (requeue_pi) {
1142
1143
1144
1145
1146 if (refill_pi_state_cache())
1147 return -ENOMEM;
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158 if (nr_wake != 1)
1159 return -EINVAL;
1160 }
1161
1162retry:
1163 if (pi_state != NULL) {
1164
1165
1166
1167
1168 free_pi_state(pi_state);
1169 pi_state = NULL;
1170 }
1171
1172 ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
1173 if (unlikely(ret != 0))
1174 goto out;
1175 ret = get_futex_key(uaddr2, fshared, &key2,
1176 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1177 if (unlikely(ret != 0))
1178 goto out_put_key1;
1179
1180 hb1 = hash_futex(&key1);
1181 hb2 = hash_futex(&key2);
1182
1183retry_private:
1184 double_lock_hb(hb1, hb2);
1185
1186 if (likely(cmpval != NULL)) {
1187 u32 curval;
1188
1189 ret = get_futex_value_locked(&curval, uaddr1);
1190
1191 if (unlikely(ret)) {
1192 double_unlock_hb(hb1, hb2);
1193
1194 ret = get_user(curval, uaddr1);
1195 if (ret)
1196 goto out_put_keys;
1197
1198 if (!fshared)
1199 goto retry_private;
1200
1201 put_futex_key(fshared, &key2);
1202 put_futex_key(fshared, &key1);
1203 goto retry;
1204 }
1205 if (curval != *cmpval) {
1206 ret = -EAGAIN;
1207 goto out_unlock;
1208 }
1209 }
1210
1211 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1212
1213
1214
1215
1216
1217
1218 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1219 &key2, &pi_state, nr_requeue);
1220
1221
1222
1223
1224
1225
1226
1227 if (ret == 1) {
1228 WARN_ON(pi_state);
1229 drop_count++;
1230 task_count++;
1231 ret = get_futex_value_locked(&curval2, uaddr2);
1232 if (!ret)
1233 ret = lookup_pi_state(curval2, hb2, &key2,
1234 &pi_state);
1235 }
1236
1237 switch (ret) {
1238 case 0:
1239 break;
1240 case -EFAULT:
1241 double_unlock_hb(hb1, hb2);
1242 put_futex_key(fshared, &key2);
1243 put_futex_key(fshared, &key1);
1244 ret = fault_in_user_writeable(uaddr2);
1245 if (!ret)
1246 goto retry;
1247 goto out;
1248 case -EAGAIN:
1249
1250 double_unlock_hb(hb1, hb2);
1251 put_futex_key(fshared, &key2);
1252 put_futex_key(fshared, &key1);
1253 cond_resched();
1254 goto retry;
1255 default:
1256 goto out_unlock;
1257 }
1258 }
1259
1260 head1 = &hb1->chain;
1261 plist_for_each_entry_safe(this, next, head1, list) {
1262 if (task_count - nr_wake >= nr_requeue)
1263 break;
1264
1265 if (!match_futex(&this->key, &key1))
1266 continue;
1267
1268
1269
1270
1271
1272 if ((requeue_pi && !this->rt_waiter) ||
1273 (!requeue_pi && this->rt_waiter)) {
1274 ret = -EINVAL;
1275 break;
1276 }
1277
1278
1279
1280
1281
1282
1283 if (++task_count <= nr_wake && !requeue_pi) {
1284 wake_futex(this);
1285 continue;
1286 }
1287
1288
1289 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1290 ret = -EINVAL;
1291 break;
1292 }
1293
1294
1295
1296
1297
1298 if (requeue_pi) {
1299
1300 atomic_inc(&pi_state->refcount);
1301 this->pi_state = pi_state;
1302 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1303 this->rt_waiter,
1304 this->task, 1);
1305 if (ret == 1) {
1306
1307 requeue_pi_wake_futex(this, &key2, hb2);
1308 drop_count++;
1309 continue;
1310 } else if (ret) {
1311
1312 this->pi_state = NULL;
1313 free_pi_state(pi_state);
1314 goto out_unlock;
1315 }
1316 }
1317 requeue_futex(this, hb1, hb2, &key2);
1318 drop_count++;
1319 }
1320
1321out_unlock:
1322 double_unlock_hb(hb1, hb2);
1323
1324
1325
1326
1327
1328
1329
1330 while (--drop_count >= 0)
1331 drop_futex_key_refs(&key1);
1332
1333out_put_keys:
1334 put_futex_key(fshared, &key2);
1335out_put_key1:
1336 put_futex_key(fshared, &key1);
1337out:
1338 if (pi_state != NULL)
1339 free_pi_state(pi_state);
1340 return ret ? ret : task_count;
1341}
1342
1343
1344static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1345{
1346 struct futex_hash_bucket *hb;
1347
1348 get_futex_key_refs(&q->key);
1349 hb = hash_futex(&q->key);
1350 q->lock_ptr = &hb->lock;
1351
1352 spin_lock(&hb->lock);
1353 return hb;
1354}
1355
1356static inline void
1357queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1358{
1359 spin_unlock(&hb->lock);
1360 drop_futex_key_refs(&q->key);
1361}
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1376{
1377 int prio;
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387 prio = min(current->normal_prio, MAX_RT_PRIO);
1388
1389 plist_node_init(&q->list, prio);
1390#ifdef CONFIG_DEBUG_PI_LIST
1391 q->list.plist.lock = &hb->lock;
1392#endif
1393 plist_add(&q->list, &hb->chain);
1394 q->task = current;
1395 spin_unlock(&hb->lock);
1396}
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409static int unqueue_me(struct futex_q *q)
1410{
1411 spinlock_t *lock_ptr;
1412 int ret = 0;
1413
1414
1415retry:
1416 lock_ptr = q->lock_ptr;
1417 barrier();
1418 if (lock_ptr != NULL) {
1419 spin_lock(lock_ptr);
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433 if (unlikely(lock_ptr != q->lock_ptr)) {
1434 spin_unlock(lock_ptr);
1435 goto retry;
1436 }
1437 WARN_ON(plist_node_empty(&q->list));
1438 plist_del(&q->list, &q->list.plist);
1439
1440 BUG_ON(q->pi_state);
1441
1442 spin_unlock(lock_ptr);
1443 ret = 1;
1444 }
1445
1446 drop_futex_key_refs(&q->key);
1447 return ret;
1448}
1449
1450
1451
1452
1453
1454
1455static void unqueue_me_pi(struct futex_q *q)
1456{
1457 WARN_ON(plist_node_empty(&q->list));
1458 plist_del(&q->list, &q->list.plist);
1459
1460 BUG_ON(!q->pi_state);
1461 free_pi_state(q->pi_state);
1462 q->pi_state = NULL;
1463
1464 spin_unlock(q->lock_ptr);
1465
1466 drop_futex_key_refs(&q->key);
1467}
1468
1469
1470
1471
1472
1473
1474
1475static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1476 struct task_struct *newowner, int fshared)
1477{
1478 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1479 struct futex_pi_state *pi_state = q->pi_state;
1480 struct task_struct *oldowner = pi_state->owner;
1481 u32 uval, curval, newval;
1482 int ret;
1483
1484
1485 if (!pi_state->owner)
1486 newtid |= FUTEX_OWNER_DIED;
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505retry:
1506 if (get_futex_value_locked(&uval, uaddr))
1507 goto handle_fault;
1508
1509 while (1) {
1510 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1511
1512 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1513
1514 if (curval == -EFAULT)
1515 goto handle_fault;
1516 if (curval == uval)
1517 break;
1518 uval = curval;
1519 }
1520
1521
1522
1523
1524
1525 if (pi_state->owner != NULL) {
1526 spin_lock_irq(&pi_state->owner->pi_lock);
1527 WARN_ON(list_empty(&pi_state->list));
1528 list_del_init(&pi_state->list);
1529 spin_unlock_irq(&pi_state->owner->pi_lock);
1530 }
1531
1532 pi_state->owner = newowner;
1533
1534 spin_lock_irq(&newowner->pi_lock);
1535 WARN_ON(!list_empty(&pi_state->list));
1536 list_add(&pi_state->list, &newowner->pi_state_list);
1537 spin_unlock_irq(&newowner->pi_lock);
1538 return 0;
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550handle_fault:
1551 spin_unlock(q->lock_ptr);
1552
1553 ret = fault_in_user_writeable(uaddr);
1554
1555 spin_lock(q->lock_ptr);
1556
1557
1558
1559
1560 if (pi_state->owner != oldowner)
1561 return 0;
1562
1563 if (ret)
1564 return ret;
1565
1566 goto retry;
1567}
1568
1569
1570
1571
1572
1573#define FLAGS_SHARED 0x01
1574#define FLAGS_CLOCKRT 0x02
1575#define FLAGS_HAS_TIMEOUT 0x04
1576
1577static long futex_wait_restart(struct restart_block *restart);
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
1596 int locked)
1597{
1598 struct task_struct *owner;
1599 int ret = 0;
1600
1601 if (locked) {
1602
1603
1604
1605
1606 if (q->pi_state->owner != current)
1607 ret = fixup_pi_state_owner(uaddr, q, current, fshared);
1608 goto out;
1609 }
1610
1611
1612
1613
1614
1615 if (q->pi_state->owner == current) {
1616
1617
1618
1619
1620
1621 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1622 locked = 1;
1623 goto out;
1624 }
1625
1626
1627
1628
1629
1630
1631
1632
1633 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1634 ret = fixup_pi_state_owner(uaddr, q, owner, fshared);
1635 goto out;
1636 }
1637
1638
1639
1640
1641
1642 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1643 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1644 "pi-state %p\n", ret,
1645 q->pi_state->pi_mutex.owner,
1646 q->pi_state->owner);
1647
1648out:
1649 return ret ? ret : locked;
1650}
1651
1652
1653
1654
1655
1656
1657
1658static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1659 struct hrtimer_sleeper *timeout)
1660{
1661
1662
1663
1664
1665
1666
1667 set_current_state(TASK_INTERRUPTIBLE);
1668 queue_me(q, hb);
1669
1670
1671 if (timeout) {
1672 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1673 if (!hrtimer_active(&timeout->timer))
1674 timeout->task = NULL;
1675 }
1676
1677
1678
1679
1680
1681 if (likely(!plist_node_empty(&q->list))) {
1682
1683
1684
1685
1686
1687 if (!timeout || timeout->task)
1688 schedule();
1689 }
1690 __set_current_state(TASK_RUNNING);
1691}
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
1711 struct futex_q *q, struct futex_hash_bucket **hb)
1712{
1713 u32 uval;
1714 int ret;
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733retry:
1734 q->key = FUTEX_KEY_INIT;
1735 ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ);
1736 if (unlikely(ret != 0))
1737 return ret;
1738
1739retry_private:
1740 *hb = queue_lock(q);
1741
1742 ret = get_futex_value_locked(&uval, uaddr);
1743
1744 if (ret) {
1745 queue_unlock(q, *hb);
1746
1747 ret = get_user(uval, uaddr);
1748 if (ret)
1749 goto out;
1750
1751 if (!fshared)
1752 goto retry_private;
1753
1754 put_futex_key(fshared, &q->key);
1755 goto retry;
1756 }
1757
1758 if (uval != val) {
1759 queue_unlock(q, *hb);
1760 ret = -EWOULDBLOCK;
1761 }
1762
1763out:
1764 if (ret)
1765 put_futex_key(fshared, &q->key);
1766 return ret;
1767}
1768
1769static int futex_wait(u32 __user *uaddr, int fshared,
1770 u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
1771{
1772 struct hrtimer_sleeper timeout, *to = NULL;
1773 struct restart_block *restart;
1774 struct futex_hash_bucket *hb;
1775 struct futex_q q;
1776 int ret;
1777
1778 if (!bitset)
1779 return -EINVAL;
1780
1781 q.pi_state = NULL;
1782 q.bitset = bitset;
1783 q.rt_waiter = NULL;
1784 q.requeue_pi_key = NULL;
1785
1786 if (abs_time) {
1787 to = &timeout;
1788
1789 hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
1790 CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1791 hrtimer_init_sleeper(to, current);
1792 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1793 current->timer_slack_ns);
1794 }
1795
1796retry:
1797
1798 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
1799 if (ret)
1800 goto out;
1801
1802
1803 futex_wait_queue_me(hb, &q, to);
1804
1805
1806 ret = 0;
1807 if (!unqueue_me(&q))
1808 goto out_put_key;
1809 ret = -ETIMEDOUT;
1810 if (to && !to->task)
1811 goto out_put_key;
1812
1813
1814
1815
1816
1817 if (!signal_pending(current)) {
1818 put_futex_key(fshared, &q.key);
1819 goto retry;
1820 }
1821
1822 ret = -ERESTARTSYS;
1823 if (!abs_time)
1824 goto out_put_key;
1825
1826 restart = ¤t_thread_info()->restart_block;
1827 restart->fn = futex_wait_restart;
1828 restart->futex.uaddr = (u32 *)uaddr;
1829 restart->futex.val = val;
1830 restart->futex.time = abs_time->tv64;
1831 restart->futex.bitset = bitset;
1832 restart->futex.flags = FLAGS_HAS_TIMEOUT;
1833
1834 if (fshared)
1835 restart->futex.flags |= FLAGS_SHARED;
1836 if (clockrt)
1837 restart->futex.flags |= FLAGS_CLOCKRT;
1838
1839 ret = -ERESTART_RESTARTBLOCK;
1840
1841out_put_key:
1842 put_futex_key(fshared, &q.key);
1843out:
1844 if (to) {
1845 hrtimer_cancel(&to->timer);
1846 destroy_hrtimer_on_stack(&to->timer);
1847 }
1848 return ret;
1849}
1850
1851
1852static long futex_wait_restart(struct restart_block *restart)
1853{
1854 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
1855 int fshared = 0;
1856 ktime_t t, *tp = NULL;
1857
1858 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1859 t.tv64 = restart->futex.time;
1860 tp = &t;
1861 }
1862 restart->fn = do_no_restart_syscall;
1863 if (restart->futex.flags & FLAGS_SHARED)
1864 fshared = 1;
1865 return (long)futex_wait(uaddr, fshared, restart->futex.val, tp,
1866 restart->futex.bitset,
1867 restart->futex.flags & FLAGS_CLOCKRT);
1868}
1869
1870
1871
1872
1873
1874
1875
1876
1877static int futex_lock_pi(u32 __user *uaddr, int fshared,
1878 int detect, ktime_t *time, int trylock)
1879{
1880 struct hrtimer_sleeper timeout, *to = NULL;
1881 struct futex_hash_bucket *hb;
1882 struct futex_q q;
1883 int res, ret;
1884
1885 if (refill_pi_state_cache())
1886 return -ENOMEM;
1887
1888 if (time) {
1889 to = &timeout;
1890 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
1891 HRTIMER_MODE_ABS);
1892 hrtimer_init_sleeper(to, current);
1893 hrtimer_set_expires(&to->timer, *time);
1894 }
1895
1896 q.pi_state = NULL;
1897 q.rt_waiter = NULL;
1898 q.requeue_pi_key = NULL;
1899retry:
1900 q.key = FUTEX_KEY_INIT;
1901 ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
1902 if (unlikely(ret != 0))
1903 goto out;
1904
1905retry_private:
1906 hb = queue_lock(&q);
1907
1908 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
1909 if (unlikely(ret)) {
1910 switch (ret) {
1911 case 1:
1912
1913 ret = 0;
1914 goto out_unlock_put_key;
1915 case -EFAULT:
1916 goto uaddr_faulted;
1917 case -EAGAIN:
1918
1919
1920
1921
1922 queue_unlock(&q, hb);
1923 put_futex_key(fshared, &q.key);
1924 cond_resched();
1925 goto retry;
1926 default:
1927 goto out_unlock_put_key;
1928 }
1929 }
1930
1931
1932
1933
1934 queue_me(&q, hb);
1935
1936 WARN_ON(!q.pi_state);
1937
1938
1939
1940 if (!trylock)
1941 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
1942 else {
1943 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
1944
1945 ret = ret ? 0 : -EWOULDBLOCK;
1946 }
1947
1948 spin_lock(q.lock_ptr);
1949
1950
1951
1952
1953 res = fixup_owner(uaddr, fshared, &q, !ret);
1954
1955
1956
1957
1958 if (res)
1959 ret = (res < 0) ? res : 0;
1960
1961
1962
1963
1964
1965 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
1966 rt_mutex_unlock(&q.pi_state->pi_mutex);
1967
1968
1969 unqueue_me_pi(&q);
1970
1971 goto out;
1972
1973out_unlock_put_key:
1974 queue_unlock(&q, hb);
1975
1976out_put_key:
1977 put_futex_key(fshared, &q.key);
1978out:
1979 if (to)
1980 destroy_hrtimer_on_stack(&to->timer);
1981 return ret != -EINTR ? ret : -ERESTARTNOINTR;
1982
1983uaddr_faulted:
1984 queue_unlock(&q, hb);
1985
1986 ret = fault_in_user_writeable(uaddr);
1987 if (ret)
1988 goto out_put_key;
1989
1990 if (!fshared)
1991 goto retry_private;
1992
1993 put_futex_key(fshared, &q.key);
1994 goto retry;
1995}
1996
1997
1998
1999
2000
2001
2002static int futex_unlock_pi(u32 __user *uaddr, int fshared)
2003{
2004 struct futex_hash_bucket *hb;
2005 struct futex_q *this, *next;
2006 u32 uval;
2007 struct plist_head *head;
2008 union futex_key key = FUTEX_KEY_INIT;
2009 int ret;
2010
2011retry:
2012 if (get_user(uval, uaddr))
2013 return -EFAULT;
2014
2015
2016
2017 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
2018 return -EPERM;
2019
2020 ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE);
2021 if (unlikely(ret != 0))
2022 goto out;
2023
2024 hb = hash_futex(&key);
2025 spin_lock(&hb->lock);
2026
2027
2028
2029
2030
2031
2032 if (!(uval & FUTEX_OWNER_DIED))
2033 uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
2034
2035
2036 if (unlikely(uval == -EFAULT))
2037 goto pi_faulted;
2038
2039
2040
2041
2042 if (unlikely(uval == task_pid_vnr(current)))
2043 goto out_unlock;
2044
2045
2046
2047
2048
2049 head = &hb->chain;
2050
2051 plist_for_each_entry_safe(this, next, head, list) {
2052 if (!match_futex (&this->key, &key))
2053 continue;
2054 ret = wake_futex_pi(uaddr, uval, this);
2055
2056
2057
2058
2059
2060 if (ret == -EFAULT)
2061 goto pi_faulted;
2062 goto out_unlock;
2063 }
2064
2065
2066
2067 if (!(uval & FUTEX_OWNER_DIED)) {
2068 ret = unlock_futex_pi(uaddr, uval);
2069 if (ret == -EFAULT)
2070 goto pi_faulted;
2071 }
2072
2073out_unlock:
2074 spin_unlock(&hb->lock);
2075 put_futex_key(fshared, &key);
2076
2077out:
2078 return ret;
2079
2080pi_faulted:
2081 spin_unlock(&hb->lock);
2082 put_futex_key(fshared, &key);
2083
2084 ret = fault_in_user_writeable(uaddr);
2085 if (!ret)
2086 goto retry;
2087
2088 return ret;
2089}
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107static inline
2108int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2109 struct futex_q *q, union futex_key *key2,
2110 struct hrtimer_sleeper *timeout)
2111{
2112 int ret = 0;
2113
2114
2115
2116
2117
2118
2119
2120
2121 if (!match_futex(&q->key, key2)) {
2122 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2123
2124
2125
2126
2127 plist_del(&q->list, &q->list.plist);
2128
2129
2130 ret = -EWOULDBLOCK;
2131 if (timeout && !timeout->task)
2132 ret = -ETIMEDOUT;
2133 else if (signal_pending(current))
2134 ret = -ERESTARTNOINTR;
2135 }
2136 return ret;
2137}
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2181 u32 val, ktime_t *abs_time, u32 bitset,
2182 int clockrt, u32 __user *uaddr2)
2183{
2184 struct hrtimer_sleeper timeout, *to = NULL;
2185 struct rt_mutex_waiter rt_waiter;
2186 struct rt_mutex *pi_mutex = NULL;
2187 struct futex_hash_bucket *hb;
2188 union futex_key key2;
2189 struct futex_q q;
2190 int res, ret;
2191
2192 if (!bitset)
2193 return -EINVAL;
2194
2195 if (abs_time) {
2196 to = &timeout;
2197 hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
2198 CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
2199 hrtimer_init_sleeper(to, current);
2200 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2201 current->timer_slack_ns);
2202 }
2203
2204
2205
2206
2207
2208 debug_rt_mutex_init_waiter(&rt_waiter);
2209 rt_waiter.task = NULL;
2210
2211 key2 = FUTEX_KEY_INIT;
2212 ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
2213 if (unlikely(ret != 0))
2214 goto out;
2215
2216 q.pi_state = NULL;
2217 q.bitset = bitset;
2218 q.rt_waiter = &rt_waiter;
2219 q.requeue_pi_key = &key2;
2220
2221
2222 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
2223 if (ret)
2224 goto out_key2;
2225
2226
2227 futex_wait_queue_me(hb, &q, to);
2228
2229 spin_lock(&hb->lock);
2230 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2231 spin_unlock(&hb->lock);
2232 if (ret)
2233 goto out_put_keys;
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243 if (!q.rt_waiter) {
2244
2245
2246
2247
2248 if (q.pi_state && (q.pi_state->owner != current)) {
2249 spin_lock(q.lock_ptr);
2250 ret = fixup_pi_state_owner(uaddr2, &q, current,
2251 fshared);
2252 spin_unlock(q.lock_ptr);
2253 }
2254 } else {
2255
2256
2257
2258
2259
2260 WARN_ON(!&q.pi_state);
2261 pi_mutex = &q.pi_state->pi_mutex;
2262 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2263 debug_rt_mutex_free_waiter(&rt_waiter);
2264
2265 spin_lock(q.lock_ptr);
2266
2267
2268
2269
2270 res = fixup_owner(uaddr2, fshared, &q, !ret);
2271
2272
2273
2274
2275 if (res)
2276 ret = (res < 0) ? res : 0;
2277
2278
2279 unqueue_me_pi(&q);
2280 }
2281
2282
2283
2284
2285
2286 if (ret == -EFAULT) {
2287 if (rt_mutex_owner(pi_mutex) == current)
2288 rt_mutex_unlock(pi_mutex);
2289 } else if (ret == -EINTR) {
2290
2291
2292
2293
2294
2295
2296
2297 ret = -EWOULDBLOCK;
2298 }
2299
2300out_put_keys:
2301 put_futex_key(fshared, &q.key);
2302out_key2:
2303 put_futex_key(fshared, &key2);
2304
2305out:
2306 if (to) {
2307 hrtimer_cancel(&to->timer);
2308 destroy_hrtimer_on_stack(&to->timer);
2309 }
2310 return ret;
2311}
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2334 size_t, len)
2335{
2336 if (!futex_cmpxchg_enabled)
2337 return -ENOSYS;
2338
2339
2340
2341 if (unlikely(len != sizeof(*head)))
2342 return -EINVAL;
2343
2344 current->robust_list = head;
2345
2346 return 0;
2347}
2348
2349
2350
2351
2352
2353
2354
2355SYSCALL_DEFINE3(get_robust_list, int, pid,
2356 struct robust_list_head __user * __user *, head_ptr,
2357 size_t __user *, len_ptr)
2358{
2359 struct robust_list_head __user *head;
2360 unsigned long ret;
2361 const struct cred *cred = current_cred(), *pcred;
2362
2363 if (!futex_cmpxchg_enabled)
2364 return -ENOSYS;
2365
2366 if (!pid)
2367 head = current->robust_list;
2368 else {
2369 struct task_struct *p;
2370
2371 ret = -ESRCH;
2372 rcu_read_lock();
2373 p = find_task_by_vpid(pid);
2374 if (!p)
2375 goto err_unlock;
2376 ret = -EPERM;
2377 pcred = __task_cred(p);
2378 if (cred->euid != pcred->euid &&
2379 cred->euid != pcred->uid &&
2380 !capable(CAP_SYS_PTRACE))
2381 goto err_unlock;
2382 head = p->robust_list;
2383 rcu_read_unlock();
2384 }
2385
2386 if (put_user(sizeof(*head), len_ptr))
2387 return -EFAULT;
2388 return put_user(head, head_ptr);
2389
2390err_unlock:
2391 rcu_read_unlock();
2392
2393 return ret;
2394}
2395
2396
2397
2398
2399
2400int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2401{
2402 u32 uval, nval, mval;
2403
2404retry:
2405 if (get_user(uval, uaddr))
2406 return -1;
2407
2408 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2420 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
2421
2422 if (nval == -EFAULT)
2423 return -1;
2424
2425 if (nval != uval)
2426 goto retry;
2427
2428
2429
2430
2431
2432 if (!pi && (uval & FUTEX_WAITERS))
2433 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2434 }
2435 return 0;
2436}
2437
2438
2439
2440
2441static inline int fetch_robust_entry(struct robust_list __user **entry,
2442 struct robust_list __user * __user *head,
2443 int *pi)
2444{
2445 unsigned long uentry;
2446
2447 if (get_user(uentry, (unsigned long __user *)head))
2448 return -EFAULT;
2449
2450 *entry = (void __user *)(uentry & ~1UL);
2451 *pi = uentry & 1;
2452
2453 return 0;
2454}
2455
2456
2457
2458
2459
2460
2461
2462void exit_robust_list(struct task_struct *curr)
2463{
2464 struct robust_list_head __user *head = curr->robust_list;
2465 struct robust_list __user *entry, *next_entry, *pending;
2466 unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
2467 unsigned long futex_offset;
2468 int rc;
2469
2470 if (!futex_cmpxchg_enabled)
2471 return;
2472
2473
2474
2475
2476
2477 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2478 return;
2479
2480
2481
2482 if (get_user(futex_offset, &head->futex_offset))
2483 return;
2484
2485
2486
2487
2488 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2489 return;
2490
2491 next_entry = NULL;
2492 while (entry != &head->list) {
2493
2494
2495
2496
2497 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2498
2499
2500
2501
2502 if (entry != pending)
2503 if (handle_futex_death((void __user *)entry + futex_offset,
2504 curr, pi))
2505 return;
2506 if (rc)
2507 return;
2508 entry = next_entry;
2509 pi = next_pi;
2510
2511
2512
2513 if (!--limit)
2514 break;
2515
2516 cond_resched();
2517 }
2518
2519 if (pending)
2520 handle_futex_death((void __user *)pending + futex_offset,
2521 curr, pip);
2522}
2523
2524long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2525 u32 __user *uaddr2, u32 val2, u32 val3)
2526{
2527 int clockrt, ret = -ENOSYS;
2528 int cmd = op & FUTEX_CMD_MASK;
2529 int fshared = 0;
2530
2531 if (!(op & FUTEX_PRIVATE_FLAG))
2532 fshared = 1;
2533
2534 clockrt = op & FUTEX_CLOCK_REALTIME;
2535 if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2536 return -ENOSYS;
2537
2538 switch (cmd) {
2539 case FUTEX_WAIT:
2540 val3 = FUTEX_BITSET_MATCH_ANY;
2541 case FUTEX_WAIT_BITSET:
2542 ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
2543 break;
2544 case FUTEX_WAKE:
2545 val3 = FUTEX_BITSET_MATCH_ANY;
2546 case FUTEX_WAKE_BITSET:
2547 ret = futex_wake(uaddr, fshared, val, val3);
2548 break;
2549 case FUTEX_REQUEUE:
2550 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
2551 break;
2552 case FUTEX_CMP_REQUEUE:
2553 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
2554 0);
2555 break;
2556 case FUTEX_WAKE_OP:
2557 ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
2558 break;
2559 case FUTEX_LOCK_PI:
2560 if (futex_cmpxchg_enabled)
2561 ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
2562 break;
2563 case FUTEX_UNLOCK_PI:
2564 if (futex_cmpxchg_enabled)
2565 ret = futex_unlock_pi(uaddr, fshared);
2566 break;
2567 case FUTEX_TRYLOCK_PI:
2568 if (futex_cmpxchg_enabled)
2569 ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
2570 break;
2571 case FUTEX_WAIT_REQUEUE_PI:
2572 val3 = FUTEX_BITSET_MATCH_ANY;
2573 ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
2574 clockrt, uaddr2);
2575 break;
2576 case FUTEX_CMP_REQUEUE_PI:
2577 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
2578 1);
2579 break;
2580 default:
2581 ret = -ENOSYS;
2582 }
2583 return ret;
2584}
2585
2586
2587SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2588 struct timespec __user *, utime, u32 __user *, uaddr2,
2589 u32, val3)
2590{
2591 struct timespec ts;
2592 ktime_t t, *tp = NULL;
2593 u32 val2 = 0;
2594 int cmd = op & FUTEX_CMD_MASK;
2595
2596 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2597 cmd == FUTEX_WAIT_BITSET ||
2598 cmd == FUTEX_WAIT_REQUEUE_PI)) {
2599 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2600 return -EFAULT;
2601 if (!timespec_valid(&ts))
2602 return -EINVAL;
2603
2604 t = timespec_to_ktime(ts);
2605 if (cmd == FUTEX_WAIT)
2606 t = ktime_add_safe(ktime_get(), t);
2607 tp = &t;
2608 }
2609
2610
2611
2612
2613 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2614 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2615 val2 = (u32) (unsigned long) utime;
2616
2617 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2618}
2619
2620static int __init futex_init(void)
2621{
2622 u32 curval;
2623 int i;
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635 curval = cmpxchg_futex_value_locked(NULL, 0, 0);
2636 if (curval == -EFAULT)
2637 futex_cmpxchg_enabled = 1;
2638
2639 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
2640 plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
2641 spin_lock_init(&futex_queues[i].lock);
2642 }
2643
2644 return 0;
2645}
2646__initcall(futex_init);
2647