1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/export.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
63#include <linux/sched/rt.h>
64#include <linux/freezer.h>
65#include <linux/bootmem.h>
66#include <linux/hugetlb.h>
67
68#include <asm/futex.h>
69
70#include "rtmutex_common.h"
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159int __read_mostly futex_cmpxchg_enabled;
160
161
162
163
164
165#define FLAGS_SHARED 0x01
166#define FLAGS_CLOCKRT 0x02
167#define FLAGS_HAS_TIMEOUT 0x04
168
169
170
171
172struct futex_pi_state {
173
174
175
176
177 struct list_head list;
178
179
180
181
182 struct rt_mutex pi_mutex;
183
184 struct task_struct *owner;
185 atomic_t refcount;
186
187 union futex_key key;
188};
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212struct futex_q {
213 struct plist_node list;
214
215 struct task_struct *task;
216 spinlock_t *lock_ptr;
217 union futex_key key;
218 struct futex_pi_state *pi_state;
219 struct rt_mutex_waiter *rt_waiter;
220 union futex_key *requeue_pi_key;
221 u32 bitset;
222};
223
224static const struct futex_q futex_q_init = {
225
226 .key = FUTEX_KEY_INIT,
227 .bitset = FUTEX_BITSET_MATCH_ANY
228};
229
230
231
232
233
234
235struct futex_hash_bucket {
236 atomic_t waiters;
237 spinlock_t lock;
238 struct plist_head chain;
239} ____cacheline_aligned_in_smp;
240
241
242
243
244
245
246static struct {
247 struct futex_hash_bucket *queues;
248 unsigned long hashsize;
249} __futex_data __read_mostly __aligned(2*sizeof(long));
250#define futex_queues (__futex_data.queues)
251#define futex_hashsize (__futex_data.hashsize)
252
253
254static inline void futex_get_mm(union futex_key *key)
255{
256 atomic_inc(&key->private.mm->mm_count);
257
258
259
260
261
262 smp_mb__after_atomic_inc();
263}
264
265
266
267
268static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
269{
270#ifdef CONFIG_SMP
271 atomic_inc(&hb->waiters);
272
273
274
275 smp_mb__after_atomic_inc();
276#endif
277}
278
279
280
281
282
283static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
284{
285#ifdef CONFIG_SMP
286 atomic_dec(&hb->waiters);
287#endif
288}
289
290static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
291{
292#ifdef CONFIG_SMP
293 return atomic_read(&hb->waiters);
294#else
295 return 1;
296#endif
297}
298
299
300
301
302static struct futex_hash_bucket *hash_futex(union futex_key *key)
303{
304 u32 hash = jhash2((u32*)&key->both.word,
305 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
306 key->both.offset);
307 return &futex_queues[hash & (futex_hashsize - 1)];
308}
309
310
311
312
313static inline int match_futex(union futex_key *key1, union futex_key *key2)
314{
315 return (key1 && key2
316 && key1->both.word == key2->both.word
317 && key1->both.ptr == key2->both.ptr
318 && key1->both.offset == key2->both.offset);
319}
320
321
322
323
324
325
326static void get_futex_key_refs(union futex_key *key)
327{
328 if (!key->both.ptr)
329 return;
330
331 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
332 case FUT_OFF_INODE:
333 ihold(key->shared.inode);
334 break;
335 case FUT_OFF_MMSHARED:
336 futex_get_mm(key);
337 break;
338 default:
339
340
341
342
343
344 smp_mb();
345 }
346}
347
348
349
350
351
352
353
354static void drop_futex_key_refs(union futex_key *key)
355{
356 if (!key->both.ptr) {
357
358 WARN_ON_ONCE(1);
359 return;
360 }
361
362 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
363 case FUT_OFF_INODE:
364 iput(key->shared.inode);
365 break;
366 case FUT_OFF_MMSHARED:
367 mmdrop(key->private.mm);
368 break;
369 }
370}
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390static int
391get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
392{
393 unsigned long address = (unsigned long)uaddr;
394 struct mm_struct *mm = current->mm;
395 struct page *page, *page_head;
396 int err, ro = 0;
397
398
399
400
401 key->both.offset = address % PAGE_SIZE;
402 if (unlikely((address % sizeof(u32)) != 0))
403 return -EINVAL;
404 address -= key->both.offset;
405
406 if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
407 return -EFAULT;
408
409
410
411
412
413
414
415
416 if (!fshared) {
417 key->private.mm = mm;
418 key->private.address = address;
419 get_futex_key_refs(key);
420 return 0;
421 }
422
423again:
424 err = get_user_pages_fast(address, 1, 1, &page);
425
426
427
428
429 if (err == -EFAULT && rw == VERIFY_READ) {
430 err = get_user_pages_fast(address, 1, 0, &page);
431 ro = 1;
432 }
433 if (err < 0)
434 return err;
435 else
436 err = 0;
437
438#ifdef CONFIG_TRANSPARENT_HUGEPAGE
439 page_head = page;
440 if (unlikely(PageTail(page))) {
441 put_page(page);
442
443 local_irq_disable();
444 if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) {
445 page_head = compound_head(page);
446
447
448
449
450
451
452
453
454
455
456 if (page != page_head) {
457 get_page(page_head);
458 put_page(page);
459 }
460 local_irq_enable();
461 } else {
462 local_irq_enable();
463 goto again;
464 }
465 }
466#else
467 page_head = compound_head(page);
468 if (page != page_head) {
469 get_page(page_head);
470 put_page(page);
471 }
472#endif
473
474 lock_page(page_head);
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491 if (!page_head->mapping) {
492 int shmem_swizzled = PageSwapCache(page_head);
493 unlock_page(page_head);
494 put_page(page_head);
495 if (shmem_swizzled)
496 goto again;
497 return -EFAULT;
498 }
499
500
501
502
503
504
505
506
507 if (PageAnon(page_head)) {
508
509
510
511
512 if (ro) {
513 err = -EFAULT;
514 goto out;
515 }
516
517 key->both.offset |= FUT_OFF_MMSHARED;
518 key->private.mm = mm;
519 key->private.address = address;
520 } else {
521 key->both.offset |= FUT_OFF_INODE;
522 key->shared.inode = page_head->mapping->host;
523 key->shared.pgoff = basepage_index(page);
524 }
525
526 get_futex_key_refs(key);
527
528out:
529 unlock_page(page_head);
530 put_page(page_head);
531 return err;
532}
533
534static inline void put_futex_key(union futex_key *key)
535{
536 drop_futex_key_refs(key);
537}
538
539
540
541
542
543
544
545
546
547
548
549
550
551static int fault_in_user_writeable(u32 __user *uaddr)
552{
553 struct mm_struct *mm = current->mm;
554 int ret;
555
556 down_read(&mm->mmap_sem);
557 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
558 FAULT_FLAG_WRITE);
559 up_read(&mm->mmap_sem);
560
561 return ret < 0 ? ret : 0;
562}
563
564
565
566
567
568
569
570
571static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
572 union futex_key *key)
573{
574 struct futex_q *this;
575
576 plist_for_each_entry(this, &hb->chain, list) {
577 if (match_futex(&this->key, key))
578 return this;
579 }
580 return NULL;
581}
582
583static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
584 u32 uval, u32 newval)
585{
586 int ret;
587
588 pagefault_disable();
589 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
590 pagefault_enable();
591
592 return ret;
593}
594
595static int get_futex_value_locked(u32 *dest, u32 __user *from)
596{
597 int ret;
598
599 pagefault_disable();
600 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
601 pagefault_enable();
602
603 return ret ? -EFAULT : 0;
604}
605
606
607
608
609
610static int refill_pi_state_cache(void)
611{
612 struct futex_pi_state *pi_state;
613
614 if (likely(current->pi_state_cache))
615 return 0;
616
617 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
618
619 if (!pi_state)
620 return -ENOMEM;
621
622 INIT_LIST_HEAD(&pi_state->list);
623
624 pi_state->owner = NULL;
625 atomic_set(&pi_state->refcount, 1);
626 pi_state->key = FUTEX_KEY_INIT;
627
628 current->pi_state_cache = pi_state;
629
630 return 0;
631}
632
633static struct futex_pi_state * alloc_pi_state(void)
634{
635 struct futex_pi_state *pi_state = current->pi_state_cache;
636
637 WARN_ON(!pi_state);
638 current->pi_state_cache = NULL;
639
640 return pi_state;
641}
642
643static void free_pi_state(struct futex_pi_state *pi_state)
644{
645 if (!atomic_dec_and_test(&pi_state->refcount))
646 return;
647
648
649
650
651
652 if (pi_state->owner) {
653 raw_spin_lock_irq(&pi_state->owner->pi_lock);
654 list_del_init(&pi_state->list);
655 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
656
657 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
658 }
659
660 if (current->pi_state_cache)
661 kfree(pi_state);
662 else {
663
664
665
666
667
668 pi_state->owner = NULL;
669 atomic_set(&pi_state->refcount, 1);
670 current->pi_state_cache = pi_state;
671 }
672}
673
674
675
676
677
678static struct task_struct * futex_find_get_task(pid_t pid)
679{
680 struct task_struct *p;
681
682 rcu_read_lock();
683 p = find_task_by_vpid(pid);
684 if (p)
685 get_task_struct(p);
686
687 rcu_read_unlock();
688
689 return p;
690}
691
692
693
694
695
696
697void exit_pi_state_list(struct task_struct *curr)
698{
699 struct list_head *next, *head = &curr->pi_state_list;
700 struct futex_pi_state *pi_state;
701 struct futex_hash_bucket *hb;
702 union futex_key key = FUTEX_KEY_INIT;
703
704 if (!futex_cmpxchg_enabled)
705 return;
706
707
708
709
710
711 raw_spin_lock_irq(&curr->pi_lock);
712 while (!list_empty(head)) {
713
714 next = head->next;
715 pi_state = list_entry(next, struct futex_pi_state, list);
716 key = pi_state->key;
717 hb = hash_futex(&key);
718 raw_spin_unlock_irq(&curr->pi_lock);
719
720 spin_lock(&hb->lock);
721
722 raw_spin_lock_irq(&curr->pi_lock);
723
724
725
726
727 if (head->next != next) {
728 spin_unlock(&hb->lock);
729 continue;
730 }
731
732 WARN_ON(pi_state->owner != curr);
733 WARN_ON(list_empty(&pi_state->list));
734 list_del_init(&pi_state->list);
735 pi_state->owner = NULL;
736 raw_spin_unlock_irq(&curr->pi_lock);
737
738 rt_mutex_unlock(&pi_state->pi_mutex);
739
740 spin_unlock(&hb->lock);
741
742 raw_spin_lock_irq(&curr->pi_lock);
743 }
744 raw_spin_unlock_irq(&curr->pi_lock);
745}
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796static int
797lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
798 union futex_key *key, struct futex_pi_state **ps,
799 struct task_struct *task)
800{
801 struct futex_pi_state *pi_state = NULL;
802 struct futex_q *this, *next;
803 struct task_struct *p;
804 pid_t pid = uval & FUTEX_TID_MASK;
805
806 plist_for_each_entry_safe(this, next, &hb->chain, list) {
807 if (match_futex(&this->key, key)) {
808
809
810
811
812 pi_state = this->pi_state;
813
814
815
816
817 if (unlikely(!pi_state))
818 return -EINVAL;
819
820 WARN_ON(!atomic_read(&pi_state->refcount));
821
822
823
824
825 if (uval & FUTEX_OWNER_DIED) {
826
827
828
829
830
831
832 if (!pi_state->owner) {
833
834
835
836
837
838 if (pid)
839 return -EINVAL;
840
841
842
843
844 goto out_state;
845 }
846
847
848
849
850
851
852
853
854
855
856 if (!pid)
857 goto out_state;
858 } else {
859
860
861
862
863
864 if (!pi_state->owner)
865 return -EINVAL;
866 }
867
868
869
870
871
872
873
874 if (pid != task_pid_vnr(pi_state->owner))
875 return -EINVAL;
876
877 out_state:
878 atomic_inc(&pi_state->refcount);
879 *ps = pi_state;
880 return 0;
881 }
882 }
883
884
885
886
887
888 if (!pid)
889 return -ESRCH;
890 p = futex_find_get_task(pid);
891 if (!p)
892 return -ESRCH;
893
894 if (!p->mm) {
895 put_task_struct(p);
896 return -EPERM;
897 }
898
899
900
901
902
903
904
905 raw_spin_lock_irq(&p->pi_lock);
906 if (unlikely(p->flags & PF_EXITING)) {
907
908
909
910
911
912 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
913
914 raw_spin_unlock_irq(&p->pi_lock);
915 put_task_struct(p);
916 return ret;
917 }
918
919
920
921
922 pi_state = alloc_pi_state();
923
924
925
926
927
928 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
929
930
931 pi_state->key = *key;
932
933 WARN_ON(!list_empty(&pi_state->list));
934 list_add(&pi_state->list, &p->pi_state_list);
935 pi_state->owner = p;
936 raw_spin_unlock_irq(&p->pi_lock);
937
938 put_task_struct(p);
939
940 *ps = pi_state;
941
942 return 0;
943}
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
964 union futex_key *key,
965 struct futex_pi_state **ps,
966 struct task_struct *task, int set_waiters)
967{
968 int lock_taken, ret, force_take = 0;
969 u32 uval, newval, curval, vpid = task_pid_vnr(task);
970
971retry:
972 ret = lock_taken = 0;
973
974
975
976
977
978
979 newval = vpid;
980 if (set_waiters)
981 newval |= FUTEX_WAITERS;
982
983 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
984 return -EFAULT;
985
986
987
988
989 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
990 return -EDEADLK;
991
992
993
994
995 if (unlikely(!curval)) {
996
997
998
999
1000
1001
1002
1003 return futex_top_waiter(hb, key) ? -EINVAL : 1;
1004 }
1005
1006 uval = curval;
1007
1008
1009
1010
1011
1012 newval = curval | FUTEX_WAITERS;
1013
1014
1015
1016
1017 if (unlikely(force_take)) {
1018
1019
1020
1021
1022 newval = (curval & ~FUTEX_TID_MASK) | vpid;
1023 force_take = 0;
1024 lock_taken = 1;
1025 }
1026
1027 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
1028 return -EFAULT;
1029 if (unlikely(curval != uval))
1030 goto retry;
1031
1032
1033
1034
1035 if (unlikely(lock_taken))
1036 return 1;
1037
1038
1039
1040
1041
1042 ret = lookup_pi_state(uval, hb, key, ps, task);
1043
1044 if (unlikely(ret)) {
1045 switch (ret) {
1046 case -ESRCH:
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057 if (get_futex_value_locked(&curval, uaddr))
1058 return -EFAULT;
1059
1060
1061
1062
1063
1064
1065 if (!(curval & FUTEX_TID_MASK)) {
1066 force_take = 1;
1067 goto retry;
1068 }
1069 default:
1070 break;
1071 }
1072 }
1073
1074 return ret;
1075}
1076
1077
1078
1079
1080
1081
1082
1083static void __unqueue_futex(struct futex_q *q)
1084{
1085 struct futex_hash_bucket *hb;
1086
1087 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
1088 || WARN_ON(plist_node_empty(&q->list)))
1089 return;
1090
1091 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
1092 plist_del(&q->list, &hb->chain);
1093 hb_waiters_dec(hb);
1094}
1095
1096
1097
1098
1099
1100
1101
1102static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1103{
1104 struct task_struct *p = q->task;
1105
1106 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
1107 return;
1108
1109
1110
1111
1112
1113 wake_q_add(wake_q, p);
1114 __unqueue_futex(q);
1115
1116
1117
1118
1119
1120
1121 smp_wmb();
1122 q->lock_ptr = NULL;
1123}
1124
1125static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
1126{
1127 struct task_struct *new_owner;
1128 struct futex_pi_state *pi_state = this->pi_state;
1129 u32 uninitialized_var(curval), newval;
1130 int ret = 0;
1131
1132 if (!pi_state)
1133 return -EINVAL;
1134
1135
1136
1137
1138
1139 if (pi_state->owner != current)
1140 return -EINVAL;
1141
1142 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
1143 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
1144
1145
1146
1147
1148
1149
1150 if (!new_owner)
1151 new_owner = this->task;
1152
1153
1154
1155
1156
1157
1158 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
1159
1160 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1161 ret = -EFAULT;
1162 else if (curval != uval)
1163 ret = -EINVAL;
1164 if (ret) {
1165 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
1166 return ret;
1167 }
1168
1169 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1170 WARN_ON(list_empty(&pi_state->list));
1171 list_del_init(&pi_state->list);
1172 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1173
1174 raw_spin_lock_irq(&new_owner->pi_lock);
1175 WARN_ON(!list_empty(&pi_state->list));
1176 list_add(&pi_state->list, &new_owner->pi_state_list);
1177 pi_state->owner = new_owner;
1178 raw_spin_unlock_irq(&new_owner->pi_lock);
1179
1180 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
1181 rt_mutex_unlock(&pi_state->pi_mutex);
1182
1183 return 0;
1184}
1185
1186static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
1187{
1188 u32 uninitialized_var(oldval);
1189
1190
1191
1192
1193
1194 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
1195 return -EFAULT;
1196 if (oldval != uval)
1197 return -EAGAIN;
1198
1199 return 0;
1200}
1201
1202
1203
1204
1205static inline void
1206double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1207{
1208 if (hb1 <= hb2) {
1209 spin_lock(&hb1->lock);
1210 if (hb1 < hb2)
1211 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
1212 } else {
1213 spin_lock(&hb2->lock);
1214 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
1215 }
1216}
1217
1218static inline void
1219double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1220{
1221 spin_unlock(&hb1->lock);
1222 if (hb1 != hb2)
1223 spin_unlock(&hb2->lock);
1224}
1225
1226
1227
1228
1229static int
1230futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
1231{
1232 struct futex_hash_bucket *hb;
1233 struct futex_q *this, *next;
1234 union futex_key key = FUTEX_KEY_INIT;
1235 int ret;
1236 WAKE_Q(wake_q);
1237
1238 if (!bitset)
1239 return -EINVAL;
1240
1241 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
1242 if (unlikely(ret != 0))
1243 goto out;
1244
1245 hb = hash_futex(&key);
1246
1247
1248 if (!hb_waiters_pending(hb))
1249 goto out_put_key;
1250
1251 spin_lock(&hb->lock);
1252
1253 plist_for_each_entry_safe(this, next, &hb->chain, list) {
1254 if (match_futex (&this->key, &key)) {
1255 if (this->pi_state || this->rt_waiter) {
1256 ret = -EINVAL;
1257 break;
1258 }
1259
1260
1261 if (!(this->bitset & bitset))
1262 continue;
1263
1264 mark_wake_futex(&wake_q, this);
1265 if (++ret >= nr_wake)
1266 break;
1267 }
1268 }
1269
1270 spin_unlock(&hb->lock);
1271 wake_up_q(&wake_q);
1272out_put_key:
1273 put_futex_key(&key);
1274out:
1275 return ret;
1276}
1277
1278
1279
1280
1281
1282static int
1283futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1284 int nr_wake, int nr_wake2, int op)
1285{
1286 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1287 struct futex_hash_bucket *hb1, *hb2;
1288 struct futex_q *this, *next;
1289 int ret, op_ret;
1290 WAKE_Q(wake_q);
1291
1292retry:
1293 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1294 if (unlikely(ret != 0))
1295 goto out;
1296 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1297 if (unlikely(ret != 0))
1298 goto out_put_key1;
1299
1300 hb1 = hash_futex(&key1);
1301 hb2 = hash_futex(&key2);
1302
1303retry_private:
1304 double_lock_hb(hb1, hb2);
1305 op_ret = futex_atomic_op_inuser(op, uaddr2);
1306 if (unlikely(op_ret < 0)) {
1307
1308 double_unlock_hb(hb1, hb2);
1309
1310#ifndef CONFIG_MMU
1311
1312
1313
1314
1315 ret = op_ret;
1316 goto out_put_keys;
1317#endif
1318
1319 if (unlikely(op_ret != -EFAULT)) {
1320 ret = op_ret;
1321 goto out_put_keys;
1322 }
1323
1324 ret = fault_in_user_writeable(uaddr2);
1325 if (ret)
1326 goto out_put_keys;
1327
1328 if (!(flags & FLAGS_SHARED))
1329 goto retry_private;
1330
1331 put_futex_key(&key2);
1332 put_futex_key(&key1);
1333 goto retry;
1334 }
1335
1336 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1337 if (match_futex (&this->key, &key1)) {
1338 if (this->pi_state || this->rt_waiter) {
1339 ret = -EINVAL;
1340 goto out_unlock;
1341 }
1342 mark_wake_futex(&wake_q, this);
1343 if (++ret >= nr_wake)
1344 break;
1345 }
1346 }
1347
1348 if (op_ret > 0) {
1349 op_ret = 0;
1350 plist_for_each_entry_safe(this, next, &hb2->chain, list) {
1351 if (match_futex (&this->key, &key2)) {
1352 if (this->pi_state || this->rt_waiter) {
1353 ret = -EINVAL;
1354 goto out_unlock;
1355 }
1356 mark_wake_futex(&wake_q, this);
1357 if (++op_ret >= nr_wake2)
1358 break;
1359 }
1360 }
1361 ret += op_ret;
1362 }
1363
1364out_unlock:
1365 double_unlock_hb(hb1, hb2);
1366 wake_up_q(&wake_q);
1367out_put_keys:
1368 put_futex_key(&key2);
1369out_put_key1:
1370 put_futex_key(&key1);
1371out:
1372 return ret;
1373}
1374
1375
1376
1377
1378
1379
1380
1381
1382static inline
1383void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1384 struct futex_hash_bucket *hb2, union futex_key *key2)
1385{
1386
1387
1388
1389
1390
1391 if (likely(&hb1->chain != &hb2->chain)) {
1392 plist_del(&q->list, &hb1->chain);
1393 hb_waiters_dec(hb1);
1394 plist_add(&q->list, &hb2->chain);
1395 hb_waiters_inc(hb2);
1396 q->lock_ptr = &hb2->lock;
1397 }
1398 get_futex_key_refs(key2);
1399 q->key = *key2;
1400}
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416static inline
1417void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1418 struct futex_hash_bucket *hb)
1419{
1420 get_futex_key_refs(key);
1421 q->key = *key;
1422
1423 __unqueue_futex(q);
1424
1425 WARN_ON(!q->rt_waiter);
1426 q->rt_waiter = NULL;
1427
1428 q->lock_ptr = &hb->lock;
1429
1430 wake_up_state(q->task, TASK_NORMAL);
1431}
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1454 struct futex_hash_bucket *hb1,
1455 struct futex_hash_bucket *hb2,
1456 union futex_key *key1, union futex_key *key2,
1457 struct futex_pi_state **ps, int set_waiters)
1458{
1459 struct futex_q *top_waiter = NULL;
1460 u32 curval;
1461 int ret, vpid;
1462
1463 if (get_futex_value_locked(&curval, pifutex))
1464 return -EFAULT;
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474 top_waiter = futex_top_waiter(hb1, key1);
1475
1476
1477 if (!top_waiter)
1478 return 0;
1479
1480
1481 if (!match_futex(top_waiter->requeue_pi_key, key2))
1482 return -EINVAL;
1483
1484
1485
1486
1487
1488
1489 vpid = task_pid_vnr(top_waiter->task);
1490 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1491 set_waiters);
1492 if (ret == 1) {
1493 requeue_pi_wake_futex(top_waiter, key2, hb2);
1494 return vpid;
1495 }
1496 return ret;
1497}
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1518 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1519 u32 *cmpval, int requeue_pi)
1520{
1521 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1522 int drop_count = 0, task_count = 0, ret;
1523 struct futex_pi_state *pi_state = NULL;
1524 struct futex_hash_bucket *hb1, *hb2;
1525 struct futex_q *this, *next;
1526 WAKE_Q(wake_q);
1527
1528 if (nr_wake < 0 || nr_requeue < 0)
1529 return -EINVAL;
1530
1531 if (requeue_pi) {
1532
1533
1534
1535
1536 if (uaddr1 == uaddr2)
1537 return -EINVAL;
1538
1539
1540
1541
1542
1543 if (refill_pi_state_cache())
1544 return -ENOMEM;
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555 if (nr_wake != 1)
1556 return -EINVAL;
1557 }
1558
1559retry:
1560 if (pi_state != NULL) {
1561
1562
1563
1564
1565 free_pi_state(pi_state);
1566 pi_state = NULL;
1567 }
1568
1569 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1570 if (unlikely(ret != 0))
1571 goto out;
1572 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1573 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1574 if (unlikely(ret != 0))
1575 goto out_put_key1;
1576
1577
1578
1579
1580
1581 if (requeue_pi && match_futex(&key1, &key2)) {
1582 ret = -EINVAL;
1583 goto out_put_keys;
1584 }
1585
1586 hb1 = hash_futex(&key1);
1587 hb2 = hash_futex(&key2);
1588
1589retry_private:
1590 hb_waiters_inc(hb2);
1591 double_lock_hb(hb1, hb2);
1592
1593 if (likely(cmpval != NULL)) {
1594 u32 curval;
1595
1596 ret = get_futex_value_locked(&curval, uaddr1);
1597
1598 if (unlikely(ret)) {
1599 double_unlock_hb(hb1, hb2);
1600 hb_waiters_dec(hb2);
1601
1602 ret = get_user(curval, uaddr1);
1603 if (ret)
1604 goto out_put_keys;
1605
1606 if (!(flags & FLAGS_SHARED))
1607 goto retry_private;
1608
1609 put_futex_key(&key2);
1610 put_futex_key(&key1);
1611 goto retry;
1612 }
1613 if (curval != *cmpval) {
1614 ret = -EAGAIN;
1615 goto out_unlock;
1616 }
1617 }
1618
1619 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1620
1621
1622
1623
1624
1625
1626 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1627 &key2, &pi_state, nr_requeue);
1628
1629
1630
1631
1632
1633
1634
1635
1636 if (ret > 0) {
1637 WARN_ON(pi_state);
1638 drop_count++;
1639 task_count++;
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651 ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL);
1652 }
1653
1654 switch (ret) {
1655 case 0:
1656 break;
1657 case -EFAULT:
1658 double_unlock_hb(hb1, hb2);
1659 hb_waiters_dec(hb2);
1660 put_futex_key(&key2);
1661 put_futex_key(&key1);
1662 ret = fault_in_user_writeable(uaddr2);
1663 if (!ret)
1664 goto retry;
1665 goto out;
1666 case -EAGAIN:
1667
1668 double_unlock_hb(hb1, hb2);
1669 hb_waiters_dec(hb2);
1670 put_futex_key(&key2);
1671 put_futex_key(&key1);
1672 cond_resched();
1673 goto retry;
1674 default:
1675 goto out_unlock;
1676 }
1677 }
1678
1679 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1680 if (task_count - nr_wake >= nr_requeue)
1681 break;
1682
1683 if (!match_futex(&this->key, &key1))
1684 continue;
1685
1686
1687
1688
1689
1690
1691
1692
1693 if ((requeue_pi && !this->rt_waiter) ||
1694 (!requeue_pi && this->rt_waiter) ||
1695 this->pi_state) {
1696 ret = -EINVAL;
1697 break;
1698 }
1699
1700
1701
1702
1703
1704
1705 if (++task_count <= nr_wake && !requeue_pi) {
1706 mark_wake_futex(&wake_q, this);
1707 continue;
1708 }
1709
1710
1711 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1712 ret = -EINVAL;
1713 break;
1714 }
1715
1716
1717
1718
1719
1720 if (requeue_pi) {
1721
1722 atomic_inc(&pi_state->refcount);
1723 this->pi_state = pi_state;
1724 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1725 this->rt_waiter,
1726 this->task, 1);
1727 if (ret == 1) {
1728
1729 requeue_pi_wake_futex(this, &key2, hb2);
1730 drop_count++;
1731 continue;
1732 } else if (ret) {
1733
1734 this->pi_state = NULL;
1735 free_pi_state(pi_state);
1736 goto out_unlock;
1737 }
1738 }
1739 requeue_futex(this, hb1, hb2, &key2);
1740 drop_count++;
1741 }
1742
1743out_unlock:
1744 double_unlock_hb(hb1, hb2);
1745 wake_up_q(&wake_q);
1746 hb_waiters_dec(hb2);
1747
1748
1749
1750
1751
1752
1753
1754 while (--drop_count >= 0)
1755 drop_futex_key_refs(&key1);
1756
1757out_put_keys:
1758 put_futex_key(&key2);
1759out_put_key1:
1760 put_futex_key(&key1);
1761out:
1762 if (pi_state != NULL)
1763 free_pi_state(pi_state);
1764 return ret ? ret : task_count;
1765}
1766
1767
1768static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1769 __acquires(&hb->lock)
1770{
1771 struct futex_hash_bucket *hb;
1772
1773 hb = hash_futex(&q->key);
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783 hb_waiters_inc(hb);
1784
1785 q->lock_ptr = &hb->lock;
1786
1787 spin_lock(&hb->lock);
1788 return hb;
1789}
1790
1791static inline void
1792queue_unlock(struct futex_hash_bucket *hb)
1793 __releases(&hb->lock)
1794{
1795 spin_unlock(&hb->lock);
1796 hb_waiters_dec(hb);
1797}
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1812 __releases(&hb->lock)
1813{
1814 int prio;
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824 prio = min(current->normal_prio, MAX_RT_PRIO);
1825
1826 plist_node_init(&q->list, prio);
1827 plist_add(&q->list, &hb->chain);
1828 q->task = current;
1829 spin_unlock(&hb->lock);
1830}
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843static int unqueue_me(struct futex_q *q)
1844{
1845 spinlock_t *lock_ptr;
1846 int ret = 0;
1847
1848
1849retry:
1850 lock_ptr = q->lock_ptr;
1851 barrier();
1852 if (lock_ptr != NULL) {
1853 spin_lock(lock_ptr);
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867 if (unlikely(lock_ptr != q->lock_ptr)) {
1868 spin_unlock(lock_ptr);
1869 goto retry;
1870 }
1871 __unqueue_futex(q);
1872
1873 BUG_ON(q->pi_state);
1874
1875 spin_unlock(lock_ptr);
1876 ret = 1;
1877 }
1878
1879 drop_futex_key_refs(&q->key);
1880 return ret;
1881}
1882
1883
1884
1885
1886
1887
1888static void unqueue_me_pi(struct futex_q *q)
1889 __releases(q->lock_ptr)
1890{
1891 __unqueue_futex(q);
1892
1893 BUG_ON(!q->pi_state);
1894 free_pi_state(q->pi_state);
1895 q->pi_state = NULL;
1896
1897 spin_unlock(q->lock_ptr);
1898}
1899
1900
1901
1902
1903
1904
1905
1906static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1907 struct task_struct *newowner)
1908{
1909 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1910 struct futex_pi_state *pi_state = q->pi_state;
1911 struct task_struct *oldowner = pi_state->owner;
1912 u32 uval, uninitialized_var(curval), newval;
1913 int ret;
1914
1915
1916 if (!pi_state->owner)
1917 newtid |= FUTEX_OWNER_DIED;
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936retry:
1937 if (get_futex_value_locked(&uval, uaddr))
1938 goto handle_fault;
1939
1940 while (1) {
1941 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1942
1943 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1944 goto handle_fault;
1945 if (curval == uval)
1946 break;
1947 uval = curval;
1948 }
1949
1950
1951
1952
1953
1954 if (pi_state->owner != NULL) {
1955 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1956 WARN_ON(list_empty(&pi_state->list));
1957 list_del_init(&pi_state->list);
1958 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1959 }
1960
1961 pi_state->owner = newowner;
1962
1963 raw_spin_lock_irq(&newowner->pi_lock);
1964 WARN_ON(!list_empty(&pi_state->list));
1965 list_add(&pi_state->list, &newowner->pi_state_list);
1966 raw_spin_unlock_irq(&newowner->pi_lock);
1967 return 0;
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979handle_fault:
1980 spin_unlock(q->lock_ptr);
1981
1982 ret = fault_in_user_writeable(uaddr);
1983
1984 spin_lock(q->lock_ptr);
1985
1986
1987
1988
1989 if (pi_state->owner != oldowner)
1990 return 0;
1991
1992 if (ret)
1993 return ret;
1994
1995 goto retry;
1996}
1997
1998static long futex_wait_restart(struct restart_block *restart);
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
2016{
2017 struct task_struct *owner;
2018 int ret = 0;
2019
2020 if (locked) {
2021
2022
2023
2024
2025 if (q->pi_state->owner != current)
2026 ret = fixup_pi_state_owner(uaddr, q, current);
2027 goto out;
2028 }
2029
2030
2031
2032
2033
2034 if (q->pi_state->owner == current) {
2035
2036
2037
2038
2039
2040 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
2041 locked = 1;
2042 goto out;
2043 }
2044
2045
2046
2047
2048
2049
2050 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
2051 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
2052 if (!owner)
2053 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
2054 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
2055 ret = fixup_pi_state_owner(uaddr, q, owner);
2056 goto out;
2057 }
2058
2059
2060
2061
2062
2063 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
2064 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
2065 "pi-state %p\n", ret,
2066 q->pi_state->pi_mutex.owner,
2067 q->pi_state->owner);
2068
2069out:
2070 return ret ? ret : locked;
2071}
2072
2073
2074
2075
2076
2077
2078
2079static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
2080 struct hrtimer_sleeper *timeout)
2081{
2082
2083
2084
2085
2086
2087
2088 set_current_state(TASK_INTERRUPTIBLE);
2089 queue_me(q, hb);
2090
2091
2092 if (timeout)
2093 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
2094
2095
2096
2097
2098
2099 if (likely(!plist_node_empty(&q->list))) {
2100
2101
2102
2103
2104
2105 if (!timeout || timeout->task)
2106 freezable_schedule();
2107 }
2108 __set_current_state(TASK_RUNNING);
2109}
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
2129 struct futex_q *q, struct futex_hash_bucket **hb)
2130{
2131 u32 uval;
2132 int ret;
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152retry:
2153 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
2154 if (unlikely(ret != 0))
2155 return ret;
2156
2157retry_private:
2158 *hb = queue_lock(q);
2159
2160 ret = get_futex_value_locked(&uval, uaddr);
2161
2162 if (ret) {
2163 queue_unlock(*hb);
2164
2165 ret = get_user(uval, uaddr);
2166 if (ret)
2167 goto out;
2168
2169 if (!(flags & FLAGS_SHARED))
2170 goto retry_private;
2171
2172 put_futex_key(&q->key);
2173 goto retry;
2174 }
2175
2176 if (uval != val) {
2177 queue_unlock(*hb);
2178 ret = -EWOULDBLOCK;
2179 }
2180
2181out:
2182 if (ret)
2183 put_futex_key(&q->key);
2184 return ret;
2185}
2186
2187static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
2188 ktime_t *abs_time, u32 bitset)
2189{
2190 struct hrtimer_sleeper timeout, *to = NULL;
2191 struct restart_block *restart;
2192 struct futex_hash_bucket *hb;
2193 struct futex_q q = futex_q_init;
2194 int ret;
2195
2196 if (!bitset)
2197 return -EINVAL;
2198 q.bitset = bitset;
2199
2200 if (abs_time) {
2201 to = &timeout;
2202
2203 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2204 CLOCK_REALTIME : CLOCK_MONOTONIC,
2205 HRTIMER_MODE_ABS);
2206 hrtimer_init_sleeper(to, current);
2207 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2208 current->timer_slack_ns);
2209 }
2210
2211retry:
2212
2213
2214
2215
2216 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2217 if (ret)
2218 goto out;
2219
2220
2221 futex_wait_queue_me(hb, &q, to);
2222
2223
2224 ret = 0;
2225
2226 if (!unqueue_me(&q))
2227 goto out;
2228 ret = -ETIMEDOUT;
2229 if (to && !to->task)
2230 goto out;
2231
2232
2233
2234
2235
2236 if (!signal_pending(current))
2237 goto retry;
2238
2239 ret = -ERESTARTSYS;
2240 if (!abs_time)
2241 goto out;
2242
2243 restart = ¤t_thread_info()->restart_block;
2244 restart->fn = futex_wait_restart;
2245 restart->futex.uaddr = uaddr;
2246 restart->futex.val = val;
2247 restart->futex.time = abs_time->tv64;
2248 restart->futex.bitset = bitset;
2249 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
2250
2251 ret = -ERESTART_RESTARTBLOCK;
2252
2253out:
2254 if (to) {
2255 hrtimer_cancel(&to->timer);
2256 destroy_hrtimer_on_stack(&to->timer);
2257 }
2258 return ret;
2259}
2260
2261
2262static long futex_wait_restart(struct restart_block *restart)
2263{
2264 u32 __user *uaddr = restart->futex.uaddr;
2265 ktime_t t, *tp = NULL;
2266
2267 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
2268 t.tv64 = restart->futex.time;
2269 tp = &t;
2270 }
2271 restart->fn = do_no_restart_syscall;
2272
2273 return (long)futex_wait(uaddr, restart->futex.flags,
2274 restart->futex.val, tp, restart->futex.bitset);
2275}
2276
2277
2278
2279
2280
2281
2282
2283
2284static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
2285 ktime_t *time, int trylock)
2286{
2287 struct hrtimer_sleeper timeout, *to = NULL;
2288 struct futex_hash_bucket *hb;
2289 struct futex_q q = futex_q_init;
2290 int res, ret;
2291
2292 if (refill_pi_state_cache())
2293 return -ENOMEM;
2294
2295 if (time) {
2296 to = &timeout;
2297 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2298 HRTIMER_MODE_ABS);
2299 hrtimer_init_sleeper(to, current);
2300 hrtimer_set_expires(&to->timer, *time);
2301 }
2302
2303retry:
2304 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2305 if (unlikely(ret != 0))
2306 goto out;
2307
2308retry_private:
2309 hb = queue_lock(&q);
2310
2311 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
2312 if (unlikely(ret)) {
2313 switch (ret) {
2314 case 1:
2315
2316 ret = 0;
2317 goto out_unlock_put_key;
2318 case -EFAULT:
2319 goto uaddr_faulted;
2320 case -EAGAIN:
2321
2322
2323
2324
2325 queue_unlock(hb);
2326 put_futex_key(&q.key);
2327 cond_resched();
2328 goto retry;
2329 default:
2330 goto out_unlock_put_key;
2331 }
2332 }
2333
2334
2335
2336
2337 queue_me(&q, hb);
2338
2339 WARN_ON(!q.pi_state);
2340
2341
2342
2343 if (!trylock)
2344 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
2345 else {
2346 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2347
2348 ret = ret ? 0 : -EWOULDBLOCK;
2349 }
2350
2351 spin_lock(q.lock_ptr);
2352
2353
2354
2355
2356 res = fixup_owner(uaddr, &q, !ret);
2357
2358
2359
2360
2361 if (res)
2362 ret = (res < 0) ? res : 0;
2363
2364
2365
2366
2367
2368 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2369 rt_mutex_unlock(&q.pi_state->pi_mutex);
2370
2371
2372 unqueue_me_pi(&q);
2373
2374 goto out_put_key;
2375
2376out_unlock_put_key:
2377 queue_unlock(hb);
2378
2379out_put_key:
2380 put_futex_key(&q.key);
2381out:
2382 if (to)
2383 destroy_hrtimer_on_stack(&to->timer);
2384 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2385
2386uaddr_faulted:
2387 queue_unlock(hb);
2388
2389 ret = fault_in_user_writeable(uaddr);
2390 if (ret)
2391 goto out_put_key;
2392
2393 if (!(flags & FLAGS_SHARED))
2394 goto retry_private;
2395
2396 put_futex_key(&q.key);
2397 goto retry;
2398}
2399
2400
2401
2402
2403
2404
2405static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2406{
2407 struct futex_hash_bucket *hb;
2408 struct futex_q *this, *next;
2409 union futex_key key = FUTEX_KEY_INIT;
2410 u32 uval, vpid = task_pid_vnr(current);
2411 int ret;
2412
2413retry:
2414 if (get_user(uval, uaddr))
2415 return -EFAULT;
2416
2417
2418
2419 if ((uval & FUTEX_TID_MASK) != vpid)
2420 return -EPERM;
2421
2422 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2423 if (unlikely(ret != 0))
2424 goto out;
2425
2426 hb = hash_futex(&key);
2427 spin_lock(&hb->lock);
2428
2429
2430
2431
2432
2433
2434
2435 if (!(uval & ~FUTEX_TID_MASK) &&
2436 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2437 goto pi_faulted;
2438
2439
2440
2441
2442 if (unlikely(uval == vpid))
2443 goto out_unlock;
2444
2445
2446
2447
2448
2449 plist_for_each_entry_safe(this, next, &hb->chain, list) {
2450 if (!match_futex (&this->key, &key))
2451 continue;
2452 ret = wake_futex_pi(uaddr, uval, this);
2453
2454
2455
2456
2457
2458 if (ret == -EFAULT)
2459 goto pi_faulted;
2460 goto out_unlock;
2461 }
2462
2463
2464
2465 ret = unlock_futex_pi(uaddr, uval);
2466 if (ret == -EFAULT)
2467 goto pi_faulted;
2468
2469out_unlock:
2470 spin_unlock(&hb->lock);
2471 put_futex_key(&key);
2472
2473out:
2474 return ret;
2475
2476pi_faulted:
2477 spin_unlock(&hb->lock);
2478 put_futex_key(&key);
2479
2480 ret = fault_in_user_writeable(uaddr);
2481 if (!ret)
2482 goto retry;
2483
2484 return ret;
2485}
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503static inline
2504int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2505 struct futex_q *q, union futex_key *key2,
2506 struct hrtimer_sleeper *timeout)
2507{
2508 int ret = 0;
2509
2510
2511
2512
2513
2514
2515
2516
2517 if (!match_futex(&q->key, key2)) {
2518 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2519
2520
2521
2522
2523 plist_del(&q->list, &hb->chain);
2524 hb_waiters_dec(hb);
2525
2526
2527 ret = -EWOULDBLOCK;
2528 if (timeout && !timeout->task)
2529 ret = -ETIMEDOUT;
2530 else if (signal_pending(current))
2531 ret = -ERESTARTNOINTR;
2532 }
2533 return ret;
2534}
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2577 u32 val, ktime_t *abs_time, u32 bitset,
2578 u32 __user *uaddr2)
2579{
2580 struct hrtimer_sleeper timeout, *to = NULL;
2581 struct rt_mutex_waiter rt_waiter;
2582 struct rt_mutex *pi_mutex = NULL;
2583 struct futex_hash_bucket *hb;
2584 union futex_key key2 = FUTEX_KEY_INIT;
2585 struct futex_q q = futex_q_init;
2586 int res, ret;
2587
2588 if (uaddr == uaddr2)
2589 return -EINVAL;
2590
2591 if (!bitset)
2592 return -EINVAL;
2593
2594 if (abs_time) {
2595 to = &timeout;
2596 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2597 CLOCK_REALTIME : CLOCK_MONOTONIC,
2598 HRTIMER_MODE_ABS);
2599 hrtimer_init_sleeper(to, current);
2600 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2601 current->timer_slack_ns);
2602 }
2603
2604
2605
2606
2607
2608 debug_rt_mutex_init_waiter(&rt_waiter);
2609 RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
2610 RB_CLEAR_NODE(&rt_waiter.tree_entry);
2611 rt_waiter.task = NULL;
2612
2613 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2614 if (unlikely(ret != 0))
2615 goto out;
2616
2617 q.bitset = bitset;
2618 q.rt_waiter = &rt_waiter;
2619 q.requeue_pi_key = &key2;
2620
2621
2622
2623
2624
2625 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2626 if (ret)
2627 goto out_key2;
2628
2629
2630
2631
2632
2633 if (match_futex(&q.key, &key2)) {
2634 ret = -EINVAL;
2635 goto out_put_keys;
2636 }
2637
2638
2639 futex_wait_queue_me(hb, &q, to);
2640
2641 spin_lock(&hb->lock);
2642 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2643 spin_unlock(&hb->lock);
2644 if (ret)
2645 goto out_put_keys;
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657 if (!q.rt_waiter) {
2658
2659
2660
2661
2662 if (q.pi_state && (q.pi_state->owner != current)) {
2663 spin_lock(q.lock_ptr);
2664 ret = fixup_pi_state_owner(uaddr2, &q, current);
2665 spin_unlock(q.lock_ptr);
2666 }
2667 } else {
2668
2669
2670
2671
2672
2673 WARN_ON(!q.pi_state);
2674 pi_mutex = &q.pi_state->pi_mutex;
2675 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2676 debug_rt_mutex_free_waiter(&rt_waiter);
2677
2678 spin_lock(q.lock_ptr);
2679
2680
2681
2682
2683 res = fixup_owner(uaddr2, &q, !ret);
2684
2685
2686
2687
2688 if (res)
2689 ret = (res < 0) ? res : 0;
2690
2691
2692 unqueue_me_pi(&q);
2693 }
2694
2695
2696
2697
2698
2699 if (ret == -EFAULT) {
2700 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2701 rt_mutex_unlock(pi_mutex);
2702 } else if (ret == -EINTR) {
2703
2704
2705
2706
2707
2708
2709
2710 ret = -EWOULDBLOCK;
2711 }
2712
2713out_put_keys:
2714 put_futex_key(&q.key);
2715out_key2:
2716 put_futex_key(&key2);
2717
2718out:
2719 if (to) {
2720 hrtimer_cancel(&to->timer);
2721 destroy_hrtimer_on_stack(&to->timer);
2722 }
2723 return ret;
2724}
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2747 size_t, len)
2748{
2749 if (!futex_cmpxchg_enabled)
2750 return -ENOSYS;
2751
2752
2753
2754 if (unlikely(len != sizeof(*head)))
2755 return -EINVAL;
2756
2757 current->robust_list = head;
2758
2759 return 0;
2760}
2761
2762
2763
2764
2765
2766
2767
2768SYSCALL_DEFINE3(get_robust_list, int, pid,
2769 struct robust_list_head __user * __user *, head_ptr,
2770 size_t __user *, len_ptr)
2771{
2772 struct robust_list_head __user *head;
2773 unsigned long ret;
2774 struct task_struct *p;
2775
2776 if (!futex_cmpxchg_enabled)
2777 return -ENOSYS;
2778
2779 rcu_read_lock();
2780
2781 ret = -ESRCH;
2782 if (!pid)
2783 p = current;
2784 else {
2785 p = find_task_by_vpid(pid);
2786 if (!p)
2787 goto err_unlock;
2788 }
2789
2790 ret = -EPERM;
2791 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
2792 goto err_unlock;
2793
2794 head = p->robust_list;
2795 rcu_read_unlock();
2796
2797 if (put_user(sizeof(*head), len_ptr))
2798 return -EFAULT;
2799 return put_user(head, head_ptr);
2800
2801err_unlock:
2802 rcu_read_unlock();
2803
2804 return ret;
2805}
2806
2807
2808
2809
2810
2811int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2812{
2813 u32 uval, uninitialized_var(nval), mval;
2814
2815retry:
2816 if (get_user(uval, uaddr))
2817 return -1;
2818
2819 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2841 if (fault_in_user_writeable(uaddr))
2842 return -1;
2843 goto retry;
2844 }
2845 if (nval != uval)
2846 goto retry;
2847
2848
2849
2850
2851
2852 if (!pi && (uval & FUTEX_WAITERS))
2853 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2854 }
2855 return 0;
2856}
2857
2858
2859
2860
2861static inline int fetch_robust_entry(struct robust_list __user **entry,
2862 struct robust_list __user * __user *head,
2863 unsigned int *pi)
2864{
2865 unsigned long uentry;
2866
2867 if (get_user(uentry, (unsigned long __user *)head))
2868 return -EFAULT;
2869
2870 *entry = (void __user *)(uentry & ~1UL);
2871 *pi = uentry & 1;
2872
2873 return 0;
2874}
2875
2876
2877
2878
2879
2880
2881
2882void exit_robust_list(struct task_struct *curr)
2883{
2884 struct robust_list_head __user *head = curr->robust_list;
2885 struct robust_list __user *entry, *next_entry, *pending;
2886 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2887 unsigned int uninitialized_var(next_pi);
2888 unsigned long futex_offset;
2889 int rc;
2890
2891 if (!futex_cmpxchg_enabled)
2892 return;
2893
2894
2895
2896
2897
2898 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2899 return;
2900
2901
2902
2903 if (get_user(futex_offset, &head->futex_offset))
2904 return;
2905
2906
2907
2908
2909 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2910 return;
2911
2912 next_entry = NULL;
2913 while (entry != &head->list) {
2914
2915
2916
2917
2918 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2919
2920
2921
2922
2923 if (entry != pending)
2924 if (handle_futex_death((void __user *)entry + futex_offset,
2925 curr, pi))
2926 return;
2927 if (rc)
2928 return;
2929 entry = next_entry;
2930 pi = next_pi;
2931
2932
2933
2934 if (!--limit)
2935 break;
2936
2937 cond_resched();
2938 }
2939
2940 if (pending)
2941 handle_futex_death((void __user *)pending + futex_offset,
2942 curr, pip);
2943}
2944
2945long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2946 u32 __user *uaddr2, u32 val2, u32 val3)
2947{
2948 int cmd = op & FUTEX_CMD_MASK;
2949 unsigned int flags = 0;
2950
2951 if (!(op & FUTEX_PRIVATE_FLAG))
2952 flags |= FLAGS_SHARED;
2953
2954 if (op & FUTEX_CLOCK_REALTIME) {
2955 flags |= FLAGS_CLOCKRT;
2956 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2957 return -ENOSYS;
2958 }
2959
2960 switch (cmd) {
2961 case FUTEX_LOCK_PI:
2962 case FUTEX_UNLOCK_PI:
2963 case FUTEX_TRYLOCK_PI:
2964 case FUTEX_WAIT_REQUEUE_PI:
2965 case FUTEX_CMP_REQUEUE_PI:
2966 if (!futex_cmpxchg_enabled)
2967 return -ENOSYS;
2968 }
2969
2970 switch (cmd) {
2971 case FUTEX_WAIT:
2972 val3 = FUTEX_BITSET_MATCH_ANY;
2973 case FUTEX_WAIT_BITSET:
2974 return futex_wait(uaddr, flags, val, timeout, val3);
2975 case FUTEX_WAKE:
2976 val3 = FUTEX_BITSET_MATCH_ANY;
2977 case FUTEX_WAKE_BITSET:
2978 return futex_wake(uaddr, flags, val, val3);
2979 case FUTEX_REQUEUE:
2980 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2981 case FUTEX_CMP_REQUEUE:
2982 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2983 case FUTEX_WAKE_OP:
2984 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2985 case FUTEX_LOCK_PI:
2986 return futex_lock_pi(uaddr, flags, val, timeout, 0);
2987 case FUTEX_UNLOCK_PI:
2988 return futex_unlock_pi(uaddr, flags);
2989 case FUTEX_TRYLOCK_PI:
2990 return futex_lock_pi(uaddr, flags, 0, timeout, 1);
2991 case FUTEX_WAIT_REQUEUE_PI:
2992 val3 = FUTEX_BITSET_MATCH_ANY;
2993 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2994 uaddr2);
2995 case FUTEX_CMP_REQUEUE_PI:
2996 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2997 }
2998 return -ENOSYS;
2999}
3000
3001
3002SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
3003 struct timespec __user *, utime, u32 __user *, uaddr2,
3004 u32, val3)
3005{
3006 struct timespec ts;
3007 ktime_t t, *tp = NULL;
3008 u32 val2 = 0;
3009 int cmd = op & FUTEX_CMD_MASK;
3010
3011 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
3012 cmd == FUTEX_WAIT_BITSET ||
3013 cmd == FUTEX_WAIT_REQUEUE_PI)) {
3014 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
3015 return -EFAULT;
3016 if (!timespec_valid(&ts))
3017 return -EINVAL;
3018
3019 t = timespec_to_ktime(ts);
3020 if (cmd == FUTEX_WAIT)
3021 t = ktime_add_safe(ktime_get(), t);
3022 tp = &t;
3023 }
3024
3025
3026
3027
3028 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
3029 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
3030 val2 = (u32) (unsigned long) utime;
3031
3032 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
3033}
3034
3035static int __init futex_init(void)
3036{
3037 u32 curval;
3038 unsigned int futex_shift;
3039 unsigned long i;
3040
3041#if CONFIG_BASE_SMALL
3042 futex_hashsize = 16;
3043#else
3044 futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
3045#endif
3046
3047 futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
3048 futex_hashsize, 0,
3049 futex_hashsize < 256 ? HASH_SMALL : 0,
3050 &futex_shift, NULL,
3051 futex_hashsize, futex_hashsize);
3052 futex_hashsize = 1UL << futex_shift;
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
3064 futex_cmpxchg_enabled = 1;
3065
3066 for (i = 0; i < futex_hashsize; i++) {
3067 atomic_set(&futex_queues[i].waiters, 0);
3068 plist_head_init(&futex_queues[i].chain);
3069 spin_lock_init(&futex_queues[i].lock);
3070 }
3071
3072 return 0;
3073}
3074__initcall(futex_init);
3075