1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/export.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
63#include <linux/sched/rt.h>
64#include <linux/hugetlb.h>
65#include <linux/freezer.h>
66#include <linux/bootmem.h>
67#include <linux/fault-inject.h>
68
69#include <asm/futex.h>
70
71#include "locking/rtmutex_common.h"
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
175int __read_mostly futex_cmpxchg_enabled;
176#endif
177
178
179
180
181
182#define FLAGS_SHARED 0x01
183#define FLAGS_CLOCKRT 0x02
184#define FLAGS_HAS_TIMEOUT 0x04
185
186
187
188
189struct futex_pi_state {
190
191
192
193
194 struct list_head list;
195
196
197
198
199 struct rt_mutex pi_mutex;
200
201 struct task_struct *owner;
202 atomic_t refcount;
203
204 union futex_key key;
205};
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229struct futex_q {
230 struct plist_node list;
231
232 struct task_struct *task;
233 spinlock_t *lock_ptr;
234 union futex_key key;
235 struct futex_pi_state *pi_state;
236 struct rt_mutex_waiter *rt_waiter;
237 union futex_key *requeue_pi_key;
238 u32 bitset;
239};
240
241static const struct futex_q futex_q_init = {
242
243 .key = FUTEX_KEY_INIT,
244 .bitset = FUTEX_BITSET_MATCH_ANY
245};
246
247
248
249
250
251
252struct futex_hash_bucket {
253 atomic_t waiters;
254 spinlock_t lock;
255 struct plist_head chain;
256} ____cacheline_aligned_in_smp;
257
258static unsigned long __read_mostly futex_hashsize;
259
260static struct futex_hash_bucket *futex_queues;
261
262
263
264
265#ifdef CONFIG_FAIL_FUTEX
266
267static struct {
268 struct fault_attr attr;
269
270 u32 ignore_private;
271} fail_futex = {
272 .attr = FAULT_ATTR_INITIALIZER,
273 .ignore_private = 0,
274};
275
276static int __init setup_fail_futex(char *str)
277{
278 return setup_fault_attr(&fail_futex.attr, str);
279}
280__setup("fail_futex=", setup_fail_futex);
281
282static bool should_fail_futex(bool fshared)
283{
284 if (fail_futex.ignore_private && !fshared)
285 return false;
286
287 return should_fail(&fail_futex.attr, 1);
288}
289
290#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
291
292static int __init fail_futex_debugfs(void)
293{
294 umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
295 struct dentry *dir;
296
297 dir = fault_create_debugfs_attr("fail_futex", NULL,
298 &fail_futex.attr);
299 if (IS_ERR(dir))
300 return PTR_ERR(dir);
301
302 if (!debugfs_create_bool("ignore-private", mode, dir,
303 &fail_futex.ignore_private)) {
304 debugfs_remove_recursive(dir);
305 return -ENOMEM;
306 }
307
308 return 0;
309}
310
311late_initcall(fail_futex_debugfs);
312
313#endif
314
315#else
316static inline bool should_fail_futex(bool fshared)
317{
318 return false;
319}
320#endif
321
322static inline void futex_get_mm(union futex_key *key)
323{
324 atomic_inc(&key->private.mm->mm_count);
325
326
327
328
329
330 smp_mb__after_atomic();
331}
332
333
334
335
336static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
337{
338#ifdef CONFIG_SMP
339 atomic_inc(&hb->waiters);
340
341
342
343 smp_mb__after_atomic();
344#endif
345}
346
347
348
349
350
351static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
352{
353#ifdef CONFIG_SMP
354 atomic_dec(&hb->waiters);
355#endif
356}
357
358static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
359{
360#ifdef CONFIG_SMP
361 return atomic_read(&hb->waiters);
362#else
363 return 1;
364#endif
365}
366
367
368
369
370static struct futex_hash_bucket *hash_futex(union futex_key *key)
371{
372 u32 hash = jhash2((u32*)&key->both.word,
373 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
374 key->both.offset);
375 return &futex_queues[hash & (futex_hashsize - 1)];
376}
377
378
379
380
381static inline int match_futex(union futex_key *key1, union futex_key *key2)
382{
383 return (key1 && key2
384 && key1->both.word == key2->both.word
385 && key1->both.ptr == key2->both.ptr
386 && key1->both.offset == key2->both.offset);
387}
388
389
390
391
392
393
394static void get_futex_key_refs(union futex_key *key)
395{
396 if (!key->both.ptr)
397 return;
398
399 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
400 case FUT_OFF_INODE:
401 ihold(key->shared.inode);
402 break;
403 case FUT_OFF_MMSHARED:
404 futex_get_mm(key);
405 break;
406 default:
407
408
409
410
411
412 smp_mb();
413 }
414}
415
416
417
418
419
420
421
422static void drop_futex_key_refs(union futex_key *key)
423{
424 if (!key->both.ptr) {
425
426 WARN_ON_ONCE(1);
427 return;
428 }
429
430 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
431 case FUT_OFF_INODE:
432 iput(key->shared.inode);
433 break;
434 case FUT_OFF_MMSHARED:
435 mmdrop(key->private.mm);
436 break;
437 }
438}
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458static int
459get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
460{
461 unsigned long address = (unsigned long)uaddr;
462 struct mm_struct *mm = current->mm;
463 struct page *page, *page_head;
464 int err, ro = 0;
465
466
467
468
469 key->both.offset = address % PAGE_SIZE;
470 if (unlikely((address % sizeof(u32)) != 0))
471 return -EINVAL;
472 address -= key->both.offset;
473
474 if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
475 return -EFAULT;
476
477 if (unlikely(should_fail_futex(fshared)))
478 return -EFAULT;
479
480
481
482
483
484
485
486
487 if (!fshared) {
488 key->private.mm = mm;
489 key->private.address = address;
490 get_futex_key_refs(key);
491 return 0;
492 }
493
494again:
495
496 if (unlikely(should_fail_futex(fshared)))
497 return -EFAULT;
498
499 err = get_user_pages_fast(address, 1, 1, &page);
500
501
502
503
504 if (err == -EFAULT && rw == VERIFY_READ) {
505 err = get_user_pages_fast(address, 1, 0, &page);
506 ro = 1;
507 }
508 if (err < 0)
509 return err;
510 else
511 err = 0;
512
513#ifdef CONFIG_TRANSPARENT_HUGEPAGE
514 page_head = page;
515 if (unlikely(PageTail(page))) {
516 put_page(page);
517
518 local_irq_disable();
519 if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) {
520 page_head = compound_head(page);
521
522
523
524
525
526
527
528
529
530
531 if (page != page_head) {
532 get_page(page_head);
533 put_page(page);
534 }
535 local_irq_enable();
536 } else {
537 local_irq_enable();
538 goto again;
539 }
540 }
541#else
542 page_head = compound_head(page);
543 if (page != page_head) {
544 get_page(page_head);
545 put_page(page);
546 }
547#endif
548
549 lock_page(page_head);
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566 if (!page_head->mapping) {
567 int shmem_swizzled = PageSwapCache(page_head);
568 unlock_page(page_head);
569 put_page(page_head);
570 if (shmem_swizzled)
571 goto again;
572 return -EFAULT;
573 }
574
575
576
577
578
579
580
581
582 if (PageAnon(page_head)) {
583
584
585
586
587 if (unlikely(should_fail_futex(fshared)) || ro) {
588 err = -EFAULT;
589 goto out;
590 }
591
592 key->both.offset |= FUT_OFF_MMSHARED;
593 key->private.mm = mm;
594 key->private.address = address;
595 } else {
596 key->both.offset |= FUT_OFF_INODE;
597 key->shared.inode = page_head->mapping->host;
598 key->shared.pgoff = basepage_index(page);
599 }
600
601 get_futex_key_refs(key);
602
603out:
604 unlock_page(page_head);
605 put_page(page_head);
606 return err;
607}
608
609static inline void put_futex_key(union futex_key *key)
610{
611 drop_futex_key_refs(key);
612}
613
614
615
616
617
618
619
620
621
622
623
624
625
626static int fault_in_user_writeable(u32 __user *uaddr)
627{
628 struct mm_struct *mm = current->mm;
629 int ret;
630
631 down_read(&mm->mmap_sem);
632 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
633 FAULT_FLAG_WRITE);
634 up_read(&mm->mmap_sem);
635
636 return ret < 0 ? ret : 0;
637}
638
639
640
641
642
643
644
645
646static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
647 union futex_key *key)
648{
649 struct futex_q *this;
650
651 plist_for_each_entry(this, &hb->chain, list) {
652 if (match_futex(&this->key, key))
653 return this;
654 }
655 return NULL;
656}
657
658static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
659 u32 uval, u32 newval)
660{
661 int ret;
662
663 pagefault_disable();
664 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
665 pagefault_enable();
666
667 return ret;
668}
669
670static int get_futex_value_locked(u32 *dest, u32 __user *from)
671{
672 int ret;
673
674 pagefault_disable();
675 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
676 pagefault_enable();
677
678 return ret ? -EFAULT : 0;
679}
680
681
682
683
684
685static int refill_pi_state_cache(void)
686{
687 struct futex_pi_state *pi_state;
688
689 if (likely(current->pi_state_cache))
690 return 0;
691
692 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
693
694 if (!pi_state)
695 return -ENOMEM;
696
697 INIT_LIST_HEAD(&pi_state->list);
698
699 pi_state->owner = NULL;
700 atomic_set(&pi_state->refcount, 1);
701 pi_state->key = FUTEX_KEY_INIT;
702
703 current->pi_state_cache = pi_state;
704
705 return 0;
706}
707
708static struct futex_pi_state * alloc_pi_state(void)
709{
710 struct futex_pi_state *pi_state = current->pi_state_cache;
711
712 WARN_ON(!pi_state);
713 current->pi_state_cache = NULL;
714
715 return pi_state;
716}
717
718
719
720
721static void free_pi_state(struct futex_pi_state *pi_state)
722{
723 if (!pi_state)
724 return;
725
726 if (!atomic_dec_and_test(&pi_state->refcount))
727 return;
728
729
730
731
732
733 if (pi_state->owner) {
734 raw_spin_lock_irq(&pi_state->owner->pi_lock);
735 list_del_init(&pi_state->list);
736 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
737
738 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
739 }
740
741 if (current->pi_state_cache)
742 kfree(pi_state);
743 else {
744
745
746
747
748
749 pi_state->owner = NULL;
750 atomic_set(&pi_state->refcount, 1);
751 current->pi_state_cache = pi_state;
752 }
753}
754
755
756
757
758
759static struct task_struct * futex_find_get_task(pid_t pid)
760{
761 struct task_struct *p;
762
763 rcu_read_lock();
764 p = find_task_by_vpid(pid);
765 if (p)
766 get_task_struct(p);
767
768 rcu_read_unlock();
769
770 return p;
771}
772
773
774
775
776
777
778void exit_pi_state_list(struct task_struct *curr)
779{
780 struct list_head *next, *head = &curr->pi_state_list;
781 struct futex_pi_state *pi_state;
782 struct futex_hash_bucket *hb;
783 union futex_key key = FUTEX_KEY_INIT;
784
785 if (!futex_cmpxchg_enabled)
786 return;
787
788
789
790
791
792 raw_spin_lock_irq(&curr->pi_lock);
793 while (!list_empty(head)) {
794
795 next = head->next;
796 pi_state = list_entry(next, struct futex_pi_state, list);
797 key = pi_state->key;
798 hb = hash_futex(&key);
799 raw_spin_unlock_irq(&curr->pi_lock);
800
801 spin_lock(&hb->lock);
802
803 raw_spin_lock_irq(&curr->pi_lock);
804
805
806
807
808 if (head->next != next) {
809 spin_unlock(&hb->lock);
810 continue;
811 }
812
813 WARN_ON(pi_state->owner != curr);
814 WARN_ON(list_empty(&pi_state->list));
815 list_del_init(&pi_state->list);
816 pi_state->owner = NULL;
817 raw_spin_unlock_irq(&curr->pi_lock);
818
819 rt_mutex_unlock(&pi_state->pi_mutex);
820
821 spin_unlock(&hb->lock);
822
823 raw_spin_lock_irq(&curr->pi_lock);
824 }
825 raw_spin_unlock_irq(&curr->pi_lock);
826}
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
884 struct futex_pi_state **ps)
885{
886 pid_t pid = uval & FUTEX_TID_MASK;
887
888
889
890
891 if (unlikely(!pi_state))
892 return -EINVAL;
893
894 WARN_ON(!atomic_read(&pi_state->refcount));
895
896
897
898
899 if (uval & FUTEX_OWNER_DIED) {
900
901
902
903
904
905 if (!pi_state->owner) {
906
907
908
909
910 if (pid)
911 return -EINVAL;
912
913
914
915 goto out_state;
916 }
917
918
919
920
921
922
923
924
925
926 if (!pid)
927 goto out_state;
928 } else {
929
930
931
932
933 if (!pi_state->owner)
934 return -EINVAL;
935 }
936
937
938
939
940
941
942 if (pid != task_pid_vnr(pi_state->owner))
943 return -EINVAL;
944out_state:
945 atomic_inc(&pi_state->refcount);
946 *ps = pi_state;
947 return 0;
948}
949
950
951
952
953
954static int attach_to_pi_owner(u32 uval, union futex_key *key,
955 struct futex_pi_state **ps)
956{
957 pid_t pid = uval & FUTEX_TID_MASK;
958 struct futex_pi_state *pi_state;
959 struct task_struct *p;
960
961
962
963
964
965 if (!pid)
966 return -ESRCH;
967 p = futex_find_get_task(pid);
968 if (!p)
969 return -ESRCH;
970
971 if (unlikely(p->flags & PF_KTHREAD)) {
972 put_task_struct(p);
973 return -EPERM;
974 }
975
976
977
978
979
980
981
982 raw_spin_lock_irq(&p->pi_lock);
983 if (unlikely(p->flags & PF_EXITING)) {
984
985
986
987
988
989 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
990
991 raw_spin_unlock_irq(&p->pi_lock);
992 put_task_struct(p);
993 return ret;
994 }
995
996
997
998
999 pi_state = alloc_pi_state();
1000
1001
1002
1003
1004
1005 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
1006
1007
1008 pi_state->key = *key;
1009
1010 WARN_ON(!list_empty(&pi_state->list));
1011 list_add(&pi_state->list, &p->pi_state_list);
1012 pi_state->owner = p;
1013 raw_spin_unlock_irq(&p->pi_lock);
1014
1015 put_task_struct(p);
1016
1017 *ps = pi_state;
1018
1019 return 0;
1020}
1021
1022static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
1023 union futex_key *key, struct futex_pi_state **ps)
1024{
1025 struct futex_q *match = futex_top_waiter(hb, key);
1026
1027
1028
1029
1030
1031 if (match)
1032 return attach_to_pi_state(uval, match->pi_state, ps);
1033
1034
1035
1036
1037
1038 return attach_to_pi_owner(uval, key, ps);
1039}
1040
1041static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
1042{
1043 u32 uninitialized_var(curval);
1044
1045 if (unlikely(should_fail_futex(true)))
1046 return -EFAULT;
1047
1048 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
1049 return -EFAULT;
1050
1051
1052 return curval != uval ? -EAGAIN : 0;
1053}
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
1074 union futex_key *key,
1075 struct futex_pi_state **ps,
1076 struct task_struct *task, int set_waiters)
1077{
1078 u32 uval, newval, vpid = task_pid_vnr(task);
1079 struct futex_q *match;
1080 int ret;
1081
1082
1083
1084
1085
1086 if (get_futex_value_locked(&uval, uaddr))
1087 return -EFAULT;
1088
1089 if (unlikely(should_fail_futex(true)))
1090 return -EFAULT;
1091
1092
1093
1094
1095 if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
1096 return -EDEADLK;
1097
1098 if ((unlikely(should_fail_futex(true))))
1099 return -EDEADLK;
1100
1101
1102
1103
1104
1105 match = futex_top_waiter(hb, key);
1106 if (match)
1107 return attach_to_pi_state(uval, match->pi_state, ps);
1108
1109
1110
1111
1112
1113
1114
1115 if (!(uval & FUTEX_TID_MASK)) {
1116
1117
1118
1119
1120 newval = uval & FUTEX_OWNER_DIED;
1121 newval |= vpid;
1122
1123
1124 if (set_waiters)
1125 newval |= FUTEX_WAITERS;
1126
1127 ret = lock_pi_update_atomic(uaddr, uval, newval);
1128
1129 return ret < 0 ? ret : 1;
1130 }
1131
1132
1133
1134
1135
1136
1137 newval = uval | FUTEX_WAITERS;
1138 ret = lock_pi_update_atomic(uaddr, uval, newval);
1139 if (ret)
1140 return ret;
1141
1142
1143
1144
1145
1146 return attach_to_pi_owner(uval, key, ps);
1147}
1148
1149
1150
1151
1152
1153
1154
1155static void __unqueue_futex(struct futex_q *q)
1156{
1157 struct futex_hash_bucket *hb;
1158
1159 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
1160 || WARN_ON(plist_node_empty(&q->list)))
1161 return;
1162
1163 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
1164 plist_del(&q->list, &hb->chain);
1165 hb_waiters_dec(hb);
1166}
1167
1168
1169
1170
1171
1172
1173
1174static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1175{
1176 struct task_struct *p = q->task;
1177
1178 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
1179 return;
1180
1181
1182
1183
1184
1185 wake_q_add(wake_q, p);
1186 __unqueue_futex(q);
1187
1188
1189
1190
1191
1192
1193 smp_wmb();
1194 q->lock_ptr = NULL;
1195}
1196
1197static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
1198 struct futex_hash_bucket *hb)
1199{
1200 struct task_struct *new_owner;
1201 struct futex_pi_state *pi_state = this->pi_state;
1202 u32 uninitialized_var(curval), newval;
1203 WAKE_Q(wake_q);
1204 bool deboost;
1205 int ret = 0;
1206
1207 if (!pi_state)
1208 return -EINVAL;
1209
1210
1211
1212
1213
1214 if (pi_state->owner != current)
1215 return -EINVAL;
1216
1217 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
1218 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
1219
1220
1221
1222
1223
1224
1225 if (!new_owner)
1226 new_owner = this->task;
1227
1228
1229
1230
1231
1232
1233 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
1234
1235 if (unlikely(should_fail_futex(true)))
1236 ret = -EFAULT;
1237
1238 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1239 ret = -EFAULT;
1240 else if (curval != uval)
1241 ret = -EINVAL;
1242 if (ret) {
1243 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
1244 return ret;
1245 }
1246
1247 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1248 WARN_ON(list_empty(&pi_state->list));
1249 list_del_init(&pi_state->list);
1250 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1251
1252 raw_spin_lock_irq(&new_owner->pi_lock);
1253 WARN_ON(!list_empty(&pi_state->list));
1254 list_add(&pi_state->list, &new_owner->pi_state_list);
1255 pi_state->owner = new_owner;
1256 raw_spin_unlock_irq(&new_owner->pi_lock);
1257
1258 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
1259
1260 deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
1261
1262
1263
1264
1265
1266
1267
1268 spin_unlock(&hb->lock);
1269 wake_up_q(&wake_q);
1270 if (deboost)
1271 rt_mutex_adjust_prio(current);
1272
1273 return 0;
1274}
1275
1276
1277
1278
1279static inline void
1280double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1281{
1282 if (hb1 <= hb2) {
1283 spin_lock(&hb1->lock);
1284 if (hb1 < hb2)
1285 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
1286 } else {
1287 spin_lock(&hb2->lock);
1288 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
1289 }
1290}
1291
1292static inline void
1293double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1294{
1295 spin_unlock(&hb1->lock);
1296 if (hb1 != hb2)
1297 spin_unlock(&hb2->lock);
1298}
1299
1300
1301
1302
1303static int
1304futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
1305{
1306 struct futex_hash_bucket *hb;
1307 struct futex_q *this, *next;
1308 union futex_key key = FUTEX_KEY_INIT;
1309 int ret;
1310 WAKE_Q(wake_q);
1311
1312 if (!bitset)
1313 return -EINVAL;
1314
1315 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
1316 if (unlikely(ret != 0))
1317 goto out;
1318
1319 hb = hash_futex(&key);
1320
1321
1322 if (!hb_waiters_pending(hb))
1323 goto out_put_key;
1324
1325 spin_lock(&hb->lock);
1326
1327 plist_for_each_entry_safe(this, next, &hb->chain, list) {
1328 if (match_futex (&this->key, &key)) {
1329 if (this->pi_state || this->rt_waiter) {
1330 ret = -EINVAL;
1331 break;
1332 }
1333
1334
1335 if (!(this->bitset & bitset))
1336 continue;
1337
1338 mark_wake_futex(&wake_q, this);
1339 if (++ret >= nr_wake)
1340 break;
1341 }
1342 }
1343
1344 spin_unlock(&hb->lock);
1345 wake_up_q(&wake_q);
1346out_put_key:
1347 put_futex_key(&key);
1348out:
1349 return ret;
1350}
1351
1352
1353
1354
1355
1356static int
1357futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1358 int nr_wake, int nr_wake2, int op)
1359{
1360 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1361 struct futex_hash_bucket *hb1, *hb2;
1362 struct futex_q *this, *next;
1363 int ret, op_ret;
1364 WAKE_Q(wake_q);
1365
1366retry:
1367 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1368 if (unlikely(ret != 0))
1369 goto out;
1370 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1371 if (unlikely(ret != 0))
1372 goto out_put_key1;
1373
1374 hb1 = hash_futex(&key1);
1375 hb2 = hash_futex(&key2);
1376
1377retry_private:
1378 double_lock_hb(hb1, hb2);
1379 op_ret = futex_atomic_op_inuser(op, uaddr2);
1380 if (unlikely(op_ret < 0)) {
1381
1382 double_unlock_hb(hb1, hb2);
1383
1384#ifndef CONFIG_MMU
1385
1386
1387
1388
1389 ret = op_ret;
1390 goto out_put_keys;
1391#endif
1392
1393 if (unlikely(op_ret != -EFAULT)) {
1394 ret = op_ret;
1395 goto out_put_keys;
1396 }
1397
1398 ret = fault_in_user_writeable(uaddr2);
1399 if (ret)
1400 goto out_put_keys;
1401
1402 if (!(flags & FLAGS_SHARED))
1403 goto retry_private;
1404
1405 put_futex_key(&key2);
1406 put_futex_key(&key1);
1407 goto retry;
1408 }
1409
1410 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1411 if (match_futex (&this->key, &key1)) {
1412 if (this->pi_state || this->rt_waiter) {
1413 ret = -EINVAL;
1414 goto out_unlock;
1415 }
1416 mark_wake_futex(&wake_q, this);
1417 if (++ret >= nr_wake)
1418 break;
1419 }
1420 }
1421
1422 if (op_ret > 0) {
1423 op_ret = 0;
1424 plist_for_each_entry_safe(this, next, &hb2->chain, list) {
1425 if (match_futex (&this->key, &key2)) {
1426 if (this->pi_state || this->rt_waiter) {
1427 ret = -EINVAL;
1428 goto out_unlock;
1429 }
1430 mark_wake_futex(&wake_q, this);
1431 if (++op_ret >= nr_wake2)
1432 break;
1433 }
1434 }
1435 ret += op_ret;
1436 }
1437
1438out_unlock:
1439 double_unlock_hb(hb1, hb2);
1440 wake_up_q(&wake_q);
1441out_put_keys:
1442 put_futex_key(&key2);
1443out_put_key1:
1444 put_futex_key(&key1);
1445out:
1446 return ret;
1447}
1448
1449
1450
1451
1452
1453
1454
1455
1456static inline
1457void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1458 struct futex_hash_bucket *hb2, union futex_key *key2)
1459{
1460
1461
1462
1463
1464
1465 if (likely(&hb1->chain != &hb2->chain)) {
1466 plist_del(&q->list, &hb1->chain);
1467 hb_waiters_dec(hb1);
1468 plist_add(&q->list, &hb2->chain);
1469 hb_waiters_inc(hb2);
1470 q->lock_ptr = &hb2->lock;
1471 }
1472 get_futex_key_refs(key2);
1473 q->key = *key2;
1474}
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490static inline
1491void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1492 struct futex_hash_bucket *hb)
1493{
1494 get_futex_key_refs(key);
1495 q->key = *key;
1496
1497 __unqueue_futex(q);
1498
1499 WARN_ON(!q->rt_waiter);
1500 q->rt_waiter = NULL;
1501
1502 q->lock_ptr = &hb->lock;
1503
1504 wake_up_state(q->task, TASK_NORMAL);
1505}
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1528 struct futex_hash_bucket *hb1,
1529 struct futex_hash_bucket *hb2,
1530 union futex_key *key1, union futex_key *key2,
1531 struct futex_pi_state **ps, int set_waiters)
1532{
1533 struct futex_q *top_waiter = NULL;
1534 u32 curval;
1535 int ret, vpid;
1536
1537 if (get_futex_value_locked(&curval, pifutex))
1538 return -EFAULT;
1539
1540 if (unlikely(should_fail_futex(true)))
1541 return -EFAULT;
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551 top_waiter = futex_top_waiter(hb1, key1);
1552
1553
1554 if (!top_waiter)
1555 return 0;
1556
1557
1558 if (!match_futex(top_waiter->requeue_pi_key, key2))
1559 return -EINVAL;
1560
1561
1562
1563
1564
1565
1566 vpid = task_pid_vnr(top_waiter->task);
1567 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1568 set_waiters);
1569 if (ret == 1) {
1570 requeue_pi_wake_futex(top_waiter, key2, hb2);
1571 return vpid;
1572 }
1573 return ret;
1574}
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1595 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1596 u32 *cmpval, int requeue_pi)
1597{
1598 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1599 int drop_count = 0, task_count = 0, ret;
1600 struct futex_pi_state *pi_state = NULL;
1601 struct futex_hash_bucket *hb1, *hb2;
1602 struct futex_q *this, *next;
1603 WAKE_Q(wake_q);
1604
1605 if (requeue_pi) {
1606
1607
1608
1609
1610 if (uaddr1 == uaddr2)
1611 return -EINVAL;
1612
1613
1614
1615
1616
1617 if (refill_pi_state_cache())
1618 return -ENOMEM;
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629 if (nr_wake != 1)
1630 return -EINVAL;
1631 }
1632
1633retry:
1634 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1635 if (unlikely(ret != 0))
1636 goto out;
1637 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1638 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1639 if (unlikely(ret != 0))
1640 goto out_put_key1;
1641
1642
1643
1644
1645
1646 if (requeue_pi && match_futex(&key1, &key2)) {
1647 ret = -EINVAL;
1648 goto out_put_keys;
1649 }
1650
1651 hb1 = hash_futex(&key1);
1652 hb2 = hash_futex(&key2);
1653
1654retry_private:
1655 hb_waiters_inc(hb2);
1656 double_lock_hb(hb1, hb2);
1657
1658 if (likely(cmpval != NULL)) {
1659 u32 curval;
1660
1661 ret = get_futex_value_locked(&curval, uaddr1);
1662
1663 if (unlikely(ret)) {
1664 double_unlock_hb(hb1, hb2);
1665 hb_waiters_dec(hb2);
1666
1667 ret = get_user(curval, uaddr1);
1668 if (ret)
1669 goto out_put_keys;
1670
1671 if (!(flags & FLAGS_SHARED))
1672 goto retry_private;
1673
1674 put_futex_key(&key2);
1675 put_futex_key(&key1);
1676 goto retry;
1677 }
1678 if (curval != *cmpval) {
1679 ret = -EAGAIN;
1680 goto out_unlock;
1681 }
1682 }
1683
1684 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1685
1686
1687
1688
1689
1690
1691 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1692 &key2, &pi_state, nr_requeue);
1693
1694
1695
1696
1697
1698
1699
1700
1701 if (ret > 0) {
1702 WARN_ON(pi_state);
1703 drop_count++;
1704 task_count++;
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716 ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
1717 }
1718
1719 switch (ret) {
1720 case 0:
1721 break;
1722 case -EFAULT:
1723 free_pi_state(pi_state);
1724 pi_state = NULL;
1725 double_unlock_hb(hb1, hb2);
1726 hb_waiters_dec(hb2);
1727 put_futex_key(&key2);
1728 put_futex_key(&key1);
1729 ret = fault_in_user_writeable(uaddr2);
1730 if (!ret)
1731 goto retry;
1732 goto out;
1733 case -EAGAIN:
1734
1735
1736
1737
1738
1739
1740 free_pi_state(pi_state);
1741 pi_state = NULL;
1742 double_unlock_hb(hb1, hb2);
1743 hb_waiters_dec(hb2);
1744 put_futex_key(&key2);
1745 put_futex_key(&key1);
1746 cond_resched();
1747 goto retry;
1748 default:
1749 goto out_unlock;
1750 }
1751 }
1752
1753 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1754 if (task_count - nr_wake >= nr_requeue)
1755 break;
1756
1757 if (!match_futex(&this->key, &key1))
1758 continue;
1759
1760
1761
1762
1763
1764
1765
1766
1767 if ((requeue_pi && !this->rt_waiter) ||
1768 (!requeue_pi && this->rt_waiter) ||
1769 this->pi_state) {
1770 ret = -EINVAL;
1771 break;
1772 }
1773
1774
1775
1776
1777
1778
1779 if (++task_count <= nr_wake && !requeue_pi) {
1780 mark_wake_futex(&wake_q, this);
1781 continue;
1782 }
1783
1784
1785 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1786 ret = -EINVAL;
1787 break;
1788 }
1789
1790
1791
1792
1793
1794 if (requeue_pi) {
1795
1796 atomic_inc(&pi_state->refcount);
1797 this->pi_state = pi_state;
1798 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1799 this->rt_waiter,
1800 this->task);
1801 if (ret == 1) {
1802
1803 requeue_pi_wake_futex(this, &key2, hb2);
1804 drop_count++;
1805 continue;
1806 } else if (ret) {
1807
1808 this->pi_state = NULL;
1809 free_pi_state(pi_state);
1810 goto out_unlock;
1811 }
1812 }
1813 requeue_futex(this, hb1, hb2, &key2);
1814 drop_count++;
1815 }
1816
1817out_unlock:
1818 free_pi_state(pi_state);
1819 double_unlock_hb(hb1, hb2);
1820 wake_up_q(&wake_q);
1821 hb_waiters_dec(hb2);
1822
1823
1824
1825
1826
1827
1828
1829 while (--drop_count >= 0)
1830 drop_futex_key_refs(&key1);
1831
1832out_put_keys:
1833 put_futex_key(&key2);
1834out_put_key1:
1835 put_futex_key(&key1);
1836out:
1837 return ret ? ret : task_count;
1838}
1839
1840
1841static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1842 __acquires(&hb->lock)
1843{
1844 struct futex_hash_bucket *hb;
1845
1846 hb = hash_futex(&q->key);
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856 hb_waiters_inc(hb);
1857
1858 q->lock_ptr = &hb->lock;
1859
1860 spin_lock(&hb->lock);
1861 return hb;
1862}
1863
1864static inline void
1865queue_unlock(struct futex_hash_bucket *hb)
1866 __releases(&hb->lock)
1867{
1868 spin_unlock(&hb->lock);
1869 hb_waiters_dec(hb);
1870}
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1885 __releases(&hb->lock)
1886{
1887 int prio;
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897 prio = min(current->normal_prio, MAX_RT_PRIO);
1898
1899 plist_node_init(&q->list, prio);
1900 plist_add(&q->list, &hb->chain);
1901 q->task = current;
1902 spin_unlock(&hb->lock);
1903}
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916static int unqueue_me(struct futex_q *q)
1917{
1918 spinlock_t *lock_ptr;
1919 int ret = 0;
1920
1921
1922retry:
1923 lock_ptr = q->lock_ptr;
1924 barrier();
1925 if (lock_ptr != NULL) {
1926 spin_lock(lock_ptr);
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940 if (unlikely(lock_ptr != q->lock_ptr)) {
1941 spin_unlock(lock_ptr);
1942 goto retry;
1943 }
1944 __unqueue_futex(q);
1945
1946 BUG_ON(q->pi_state);
1947
1948 spin_unlock(lock_ptr);
1949 ret = 1;
1950 }
1951
1952 drop_futex_key_refs(&q->key);
1953 return ret;
1954}
1955
1956
1957
1958
1959
1960
1961static void unqueue_me_pi(struct futex_q *q)
1962 __releases(q->lock_ptr)
1963{
1964 __unqueue_futex(q);
1965
1966 BUG_ON(!q->pi_state);
1967 free_pi_state(q->pi_state);
1968 q->pi_state = NULL;
1969
1970 spin_unlock(q->lock_ptr);
1971}
1972
1973
1974
1975
1976
1977
1978
1979static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1980 struct task_struct *newowner)
1981{
1982 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1983 struct futex_pi_state *pi_state = q->pi_state;
1984 struct task_struct *oldowner = pi_state->owner;
1985 u32 uval, uninitialized_var(curval), newval;
1986 int ret;
1987
1988
1989 if (!pi_state->owner)
1990 newtid |= FUTEX_OWNER_DIED;
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009retry:
2010 if (get_futex_value_locked(&uval, uaddr))
2011 goto handle_fault;
2012
2013 while (1) {
2014 newval = (uval & FUTEX_OWNER_DIED) | newtid;
2015
2016 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
2017 goto handle_fault;
2018 if (curval == uval)
2019 break;
2020 uval = curval;
2021 }
2022
2023
2024
2025
2026
2027 if (pi_state->owner != NULL) {
2028 raw_spin_lock_irq(&pi_state->owner->pi_lock);
2029 WARN_ON(list_empty(&pi_state->list));
2030 list_del_init(&pi_state->list);
2031 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
2032 }
2033
2034 pi_state->owner = newowner;
2035
2036 raw_spin_lock_irq(&newowner->pi_lock);
2037 WARN_ON(!list_empty(&pi_state->list));
2038 list_add(&pi_state->list, &newowner->pi_state_list);
2039 raw_spin_unlock_irq(&newowner->pi_lock);
2040 return 0;
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052handle_fault:
2053 spin_unlock(q->lock_ptr);
2054
2055 ret = fault_in_user_writeable(uaddr);
2056
2057 spin_lock(q->lock_ptr);
2058
2059
2060
2061
2062 if (pi_state->owner != oldowner)
2063 return 0;
2064
2065 if (ret)
2066 return ret;
2067
2068 goto retry;
2069}
2070
2071static long futex_wait_restart(struct restart_block *restart);
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
2089{
2090 struct task_struct *owner;
2091 int ret = 0;
2092
2093 if (locked) {
2094
2095
2096
2097
2098 if (q->pi_state->owner != current)
2099 ret = fixup_pi_state_owner(uaddr, q, current);
2100 goto out;
2101 }
2102
2103
2104
2105
2106
2107 if (q->pi_state->owner == current) {
2108
2109
2110
2111
2112
2113 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
2114 locked = 1;
2115 goto out;
2116 }
2117
2118
2119
2120
2121
2122
2123 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
2124 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
2125 if (!owner)
2126 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
2127 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
2128 ret = fixup_pi_state_owner(uaddr, q, owner);
2129 goto out;
2130 }
2131
2132
2133
2134
2135
2136 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
2137 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
2138 "pi-state %p\n", ret,
2139 q->pi_state->pi_mutex.owner,
2140 q->pi_state->owner);
2141
2142out:
2143 return ret ? ret : locked;
2144}
2145
2146
2147
2148
2149
2150
2151
2152static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
2153 struct hrtimer_sleeper *timeout)
2154{
2155
2156
2157
2158
2159
2160
2161 set_current_state(TASK_INTERRUPTIBLE);
2162 queue_me(q, hb);
2163
2164
2165 if (timeout)
2166 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
2167
2168
2169
2170
2171
2172 if (likely(!plist_node_empty(&q->list))) {
2173
2174
2175
2176
2177
2178 if (!timeout || timeout->task)
2179 freezable_schedule();
2180 }
2181 __set_current_state(TASK_RUNNING);
2182}
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
2202 struct futex_q *q, struct futex_hash_bucket **hb)
2203{
2204 u32 uval;
2205 int ret;
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225retry:
2226 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
2227 if (unlikely(ret != 0))
2228 return ret;
2229
2230retry_private:
2231 *hb = queue_lock(q);
2232
2233 ret = get_futex_value_locked(&uval, uaddr);
2234
2235 if (ret) {
2236 queue_unlock(*hb);
2237
2238 ret = get_user(uval, uaddr);
2239 if (ret)
2240 goto out;
2241
2242 if (!(flags & FLAGS_SHARED))
2243 goto retry_private;
2244
2245 put_futex_key(&q->key);
2246 goto retry;
2247 }
2248
2249 if (uval != val) {
2250 queue_unlock(*hb);
2251 ret = -EWOULDBLOCK;
2252 }
2253
2254out:
2255 if (ret)
2256 put_futex_key(&q->key);
2257 return ret;
2258}
2259
2260static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
2261 ktime_t *abs_time, u32 bitset)
2262{
2263 struct hrtimer_sleeper timeout, *to = NULL;
2264 struct restart_block *restart;
2265 struct futex_hash_bucket *hb;
2266 struct futex_q q = futex_q_init;
2267 int ret;
2268
2269 if (!bitset)
2270 return -EINVAL;
2271 q.bitset = bitset;
2272
2273 if (abs_time) {
2274 to = &timeout;
2275
2276 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2277 CLOCK_REALTIME : CLOCK_MONOTONIC,
2278 HRTIMER_MODE_ABS);
2279 hrtimer_init_sleeper(to, current);
2280 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2281 current->timer_slack_ns);
2282 }
2283
2284retry:
2285
2286
2287
2288
2289 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2290 if (ret)
2291 goto out;
2292
2293
2294 futex_wait_queue_me(hb, &q, to);
2295
2296
2297 ret = 0;
2298
2299 if (!unqueue_me(&q))
2300 goto out;
2301 ret = -ETIMEDOUT;
2302 if (to && !to->task)
2303 goto out;
2304
2305
2306
2307
2308
2309 if (!signal_pending(current))
2310 goto retry;
2311
2312 ret = -ERESTARTSYS;
2313 if (!abs_time)
2314 goto out;
2315
2316 restart = ¤t->restart_block;
2317 restart->fn = futex_wait_restart;
2318 restart->futex.uaddr = uaddr;
2319 restart->futex.val = val;
2320 restart->futex.time = abs_time->tv64;
2321 restart->futex.bitset = bitset;
2322 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
2323
2324 ret = -ERESTART_RESTARTBLOCK;
2325
2326out:
2327 if (to) {
2328 hrtimer_cancel(&to->timer);
2329 destroy_hrtimer_on_stack(&to->timer);
2330 }
2331 return ret;
2332}
2333
2334
2335static long futex_wait_restart(struct restart_block *restart)
2336{
2337 u32 __user *uaddr = restart->futex.uaddr;
2338 ktime_t t, *tp = NULL;
2339
2340 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
2341 t.tv64 = restart->futex.time;
2342 tp = &t;
2343 }
2344 restart->fn = do_no_restart_syscall;
2345
2346 return (long)futex_wait(uaddr, restart->futex.flags,
2347 restart->futex.val, tp, restart->futex.bitset);
2348}
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
2361 ktime_t *time, int trylock)
2362{
2363 struct hrtimer_sleeper timeout, *to = NULL;
2364 struct futex_hash_bucket *hb;
2365 struct futex_q q = futex_q_init;
2366 int res, ret;
2367
2368 if (refill_pi_state_cache())
2369 return -ENOMEM;
2370
2371 if (time) {
2372 to = &timeout;
2373 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2374 HRTIMER_MODE_ABS);
2375 hrtimer_init_sleeper(to, current);
2376 hrtimer_set_expires(&to->timer, *time);
2377 }
2378
2379retry:
2380 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2381 if (unlikely(ret != 0))
2382 goto out;
2383
2384retry_private:
2385 hb = queue_lock(&q);
2386
2387 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
2388 if (unlikely(ret)) {
2389
2390
2391
2392
2393 switch (ret) {
2394 case 1:
2395
2396 ret = 0;
2397 goto out_unlock_put_key;
2398 case -EFAULT:
2399 goto uaddr_faulted;
2400 case -EAGAIN:
2401
2402
2403
2404
2405
2406
2407 queue_unlock(hb);
2408 put_futex_key(&q.key);
2409 cond_resched();
2410 goto retry;
2411 default:
2412 goto out_unlock_put_key;
2413 }
2414 }
2415
2416
2417
2418
2419 queue_me(&q, hb);
2420
2421 WARN_ON(!q.pi_state);
2422
2423
2424
2425 if (!trylock) {
2426 ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to);
2427 } else {
2428 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2429
2430 ret = ret ? 0 : -EWOULDBLOCK;
2431 }
2432
2433 spin_lock(q.lock_ptr);
2434
2435
2436
2437
2438 res = fixup_owner(uaddr, &q, !ret);
2439
2440
2441
2442
2443 if (res)
2444 ret = (res < 0) ? res : 0;
2445
2446
2447
2448
2449
2450 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2451 rt_mutex_unlock(&q.pi_state->pi_mutex);
2452
2453
2454 unqueue_me_pi(&q);
2455
2456 goto out_put_key;
2457
2458out_unlock_put_key:
2459 queue_unlock(hb);
2460
2461out_put_key:
2462 put_futex_key(&q.key);
2463out:
2464 if (to)
2465 destroy_hrtimer_on_stack(&to->timer);
2466 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2467
2468uaddr_faulted:
2469 queue_unlock(hb);
2470
2471 ret = fault_in_user_writeable(uaddr);
2472 if (ret)
2473 goto out_put_key;
2474
2475 if (!(flags & FLAGS_SHARED))
2476 goto retry_private;
2477
2478 put_futex_key(&q.key);
2479 goto retry;
2480}
2481
2482
2483
2484
2485
2486
2487static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2488{
2489 u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
2490 union futex_key key = FUTEX_KEY_INIT;
2491 struct futex_hash_bucket *hb;
2492 struct futex_q *match;
2493 int ret;
2494
2495retry:
2496 if (get_user(uval, uaddr))
2497 return -EFAULT;
2498
2499
2500
2501 if ((uval & FUTEX_TID_MASK) != vpid)
2502 return -EPERM;
2503
2504 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2505 if (ret)
2506 return ret;
2507
2508 hb = hash_futex(&key);
2509 spin_lock(&hb->lock);
2510
2511
2512
2513
2514
2515
2516 match = futex_top_waiter(hb, &key);
2517 if (match) {
2518 ret = wake_futex_pi(uaddr, uval, match, hb);
2519
2520
2521
2522
2523 if (!ret)
2524 goto out_putkey;
2525
2526
2527
2528
2529 if (ret == -EFAULT)
2530 goto pi_faulted;
2531
2532
2533
2534
2535 goto out_unlock;
2536 }
2537
2538
2539
2540
2541
2542
2543
2544
2545 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))
2546 goto pi_faulted;
2547
2548
2549
2550
2551 ret = (curval == uval) ? 0 : -EAGAIN;
2552
2553out_unlock:
2554 spin_unlock(&hb->lock);
2555out_putkey:
2556 put_futex_key(&key);
2557 return ret;
2558
2559pi_faulted:
2560 spin_unlock(&hb->lock);
2561 put_futex_key(&key);
2562
2563 ret = fault_in_user_writeable(uaddr);
2564 if (!ret)
2565 goto retry;
2566
2567 return ret;
2568}
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586static inline
2587int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2588 struct futex_q *q, union futex_key *key2,
2589 struct hrtimer_sleeper *timeout)
2590{
2591 int ret = 0;
2592
2593
2594
2595
2596
2597
2598
2599
2600 if (!match_futex(&q->key, key2)) {
2601 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2602
2603
2604
2605
2606 plist_del(&q->list, &hb->chain);
2607 hb_waiters_dec(hb);
2608
2609
2610 ret = -EWOULDBLOCK;
2611 if (timeout && !timeout->task)
2612 ret = -ETIMEDOUT;
2613 else if (signal_pending(current))
2614 ret = -ERESTARTNOINTR;
2615 }
2616 return ret;
2617}
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2660 u32 val, ktime_t *abs_time, u32 bitset,
2661 u32 __user *uaddr2)
2662{
2663 struct hrtimer_sleeper timeout, *to = NULL;
2664 struct rt_mutex_waiter rt_waiter;
2665 struct rt_mutex *pi_mutex = NULL;
2666 struct futex_hash_bucket *hb;
2667 union futex_key key2 = FUTEX_KEY_INIT;
2668 struct futex_q q = futex_q_init;
2669 int res, ret;
2670
2671 if (uaddr == uaddr2)
2672 return -EINVAL;
2673
2674 if (!bitset)
2675 return -EINVAL;
2676
2677 if (abs_time) {
2678 to = &timeout;
2679 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2680 CLOCK_REALTIME : CLOCK_MONOTONIC,
2681 HRTIMER_MODE_ABS);
2682 hrtimer_init_sleeper(to, current);
2683 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2684 current->timer_slack_ns);
2685 }
2686
2687
2688
2689
2690
2691 debug_rt_mutex_init_waiter(&rt_waiter);
2692 RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
2693 RB_CLEAR_NODE(&rt_waiter.tree_entry);
2694 rt_waiter.task = NULL;
2695
2696 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2697 if (unlikely(ret != 0))
2698 goto out;
2699
2700 q.bitset = bitset;
2701 q.rt_waiter = &rt_waiter;
2702 q.requeue_pi_key = &key2;
2703
2704
2705
2706
2707
2708 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2709 if (ret)
2710 goto out_key2;
2711
2712
2713
2714
2715
2716 if (match_futex(&q.key, &key2)) {
2717 queue_unlock(hb);
2718 ret = -EINVAL;
2719 goto out_put_keys;
2720 }
2721
2722
2723 futex_wait_queue_me(hb, &q, to);
2724
2725 spin_lock(&hb->lock);
2726 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2727 spin_unlock(&hb->lock);
2728 if (ret)
2729 goto out_put_keys;
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741 if (!q.rt_waiter) {
2742
2743
2744
2745
2746 if (q.pi_state && (q.pi_state->owner != current)) {
2747 spin_lock(q.lock_ptr);
2748 ret = fixup_pi_state_owner(uaddr2, &q, current);
2749 spin_unlock(q.lock_ptr);
2750 }
2751 } else {
2752
2753
2754
2755
2756
2757 WARN_ON(!q.pi_state);
2758 pi_mutex = &q.pi_state->pi_mutex;
2759 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
2760 debug_rt_mutex_free_waiter(&rt_waiter);
2761
2762 spin_lock(q.lock_ptr);
2763
2764
2765
2766
2767 res = fixup_owner(uaddr2, &q, !ret);
2768
2769
2770
2771
2772 if (res)
2773 ret = (res < 0) ? res : 0;
2774
2775
2776 unqueue_me_pi(&q);
2777 }
2778
2779
2780
2781
2782
2783 if (ret == -EFAULT) {
2784 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2785 rt_mutex_unlock(pi_mutex);
2786 } else if (ret == -EINTR) {
2787
2788
2789
2790
2791
2792
2793
2794 ret = -EWOULDBLOCK;
2795 }
2796
2797out_put_keys:
2798 put_futex_key(&q.key);
2799out_key2:
2800 put_futex_key(&key2);
2801
2802out:
2803 if (to) {
2804 hrtimer_cancel(&to->timer);
2805 destroy_hrtimer_on_stack(&to->timer);
2806 }
2807 return ret;
2808}
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2831 size_t, len)
2832{
2833 if (!futex_cmpxchg_enabled)
2834 return -ENOSYS;
2835
2836
2837
2838 if (unlikely(len != sizeof(*head)))
2839 return -EINVAL;
2840
2841 current->robust_list = head;
2842
2843 return 0;
2844}
2845
2846
2847
2848
2849
2850
2851
2852SYSCALL_DEFINE3(get_robust_list, int, pid,
2853 struct robust_list_head __user * __user *, head_ptr,
2854 size_t __user *, len_ptr)
2855{
2856 struct robust_list_head __user *head;
2857 unsigned long ret;
2858 struct task_struct *p;
2859
2860 if (!futex_cmpxchg_enabled)
2861 return -ENOSYS;
2862
2863 rcu_read_lock();
2864
2865 ret = -ESRCH;
2866 if (!pid)
2867 p = current;
2868 else {
2869 p = find_task_by_vpid(pid);
2870 if (!p)
2871 goto err_unlock;
2872 }
2873
2874 ret = -EPERM;
2875 if (!ptrace_may_access(p, PTRACE_MODE_READ))
2876 goto err_unlock;
2877
2878 head = p->robust_list;
2879 rcu_read_unlock();
2880
2881 if (put_user(sizeof(*head), len_ptr))
2882 return -EFAULT;
2883 return put_user(head, head_ptr);
2884
2885err_unlock:
2886 rcu_read_unlock();
2887
2888 return ret;
2889}
2890
2891
2892
2893
2894
2895int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2896{
2897 u32 uval, uninitialized_var(nval), mval;
2898
2899retry:
2900 if (get_user(uval, uaddr))
2901 return -1;
2902
2903 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2925 if (fault_in_user_writeable(uaddr))
2926 return -1;
2927 goto retry;
2928 }
2929 if (nval != uval)
2930 goto retry;
2931
2932
2933
2934
2935
2936 if (!pi && (uval & FUTEX_WAITERS))
2937 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2938 }
2939 return 0;
2940}
2941
2942
2943
2944
2945static inline int fetch_robust_entry(struct robust_list __user **entry,
2946 struct robust_list __user * __user *head,
2947 unsigned int *pi)
2948{
2949 unsigned long uentry;
2950
2951 if (get_user(uentry, (unsigned long __user *)head))
2952 return -EFAULT;
2953
2954 *entry = (void __user *)(uentry & ~1UL);
2955 *pi = uentry & 1;
2956
2957 return 0;
2958}
2959
2960
2961
2962
2963
2964
2965
2966void exit_robust_list(struct task_struct *curr)
2967{
2968 struct robust_list_head __user *head = curr->robust_list;
2969 struct robust_list __user *entry, *next_entry, *pending;
2970 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2971 unsigned int uninitialized_var(next_pi);
2972 unsigned long futex_offset;
2973 int rc;
2974
2975 if (!futex_cmpxchg_enabled)
2976 return;
2977
2978
2979
2980
2981
2982 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2983 return;
2984
2985
2986
2987 if (get_user(futex_offset, &head->futex_offset))
2988 return;
2989
2990
2991
2992
2993 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2994 return;
2995
2996 next_entry = NULL;
2997 while (entry != &head->list) {
2998
2999
3000
3001
3002 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
3003
3004
3005
3006
3007 if (entry != pending)
3008 if (handle_futex_death((void __user *)entry + futex_offset,
3009 curr, pi))
3010 return;
3011 if (rc)
3012 return;
3013 entry = next_entry;
3014 pi = next_pi;
3015
3016
3017
3018 if (!--limit)
3019 break;
3020
3021 cond_resched();
3022 }
3023
3024 if (pending)
3025 handle_futex_death((void __user *)pending + futex_offset,
3026 curr, pip);
3027}
3028
3029long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
3030 u32 __user *uaddr2, u32 val2, u32 val3)
3031{
3032 int cmd = op & FUTEX_CMD_MASK;
3033 unsigned int flags = 0;
3034
3035 if (!(op & FUTEX_PRIVATE_FLAG))
3036 flags |= FLAGS_SHARED;
3037
3038 if (op & FUTEX_CLOCK_REALTIME) {
3039 flags |= FLAGS_CLOCKRT;
3040 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
3041 return -ENOSYS;
3042 }
3043
3044 switch (cmd) {
3045 case FUTEX_LOCK_PI:
3046 case FUTEX_UNLOCK_PI:
3047 case FUTEX_TRYLOCK_PI:
3048 case FUTEX_WAIT_REQUEUE_PI:
3049 case FUTEX_CMP_REQUEUE_PI:
3050 if (!futex_cmpxchg_enabled)
3051 return -ENOSYS;
3052 }
3053
3054 switch (cmd) {
3055 case FUTEX_WAIT:
3056 val3 = FUTEX_BITSET_MATCH_ANY;
3057 case FUTEX_WAIT_BITSET:
3058 return futex_wait(uaddr, flags, val, timeout, val3);
3059 case FUTEX_WAKE:
3060 val3 = FUTEX_BITSET_MATCH_ANY;
3061 case FUTEX_WAKE_BITSET:
3062 return futex_wake(uaddr, flags, val, val3);
3063 case FUTEX_REQUEUE:
3064 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
3065 case FUTEX_CMP_REQUEUE:
3066 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
3067 case FUTEX_WAKE_OP:
3068 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
3069 case FUTEX_LOCK_PI:
3070 return futex_lock_pi(uaddr, flags, timeout, 0);
3071 case FUTEX_UNLOCK_PI:
3072 return futex_unlock_pi(uaddr, flags);
3073 case FUTEX_TRYLOCK_PI:
3074 return futex_lock_pi(uaddr, flags, NULL, 1);
3075 case FUTEX_WAIT_REQUEUE_PI:
3076 val3 = FUTEX_BITSET_MATCH_ANY;
3077 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
3078 uaddr2);
3079 case FUTEX_CMP_REQUEUE_PI:
3080 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
3081 }
3082 return -ENOSYS;
3083}
3084
3085
3086SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
3087 struct timespec __user *, utime, u32 __user *, uaddr2,
3088 u32, val3)
3089{
3090 struct timespec ts;
3091 ktime_t t, *tp = NULL;
3092 u32 val2 = 0;
3093 int cmd = op & FUTEX_CMD_MASK;
3094
3095 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
3096 cmd == FUTEX_WAIT_BITSET ||
3097 cmd == FUTEX_WAIT_REQUEUE_PI)) {
3098 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
3099 return -EFAULT;
3100 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
3101 return -EFAULT;
3102 if (!timespec_valid(&ts))
3103 return -EINVAL;
3104
3105 t = timespec_to_ktime(ts);
3106 if (cmd == FUTEX_WAIT)
3107 t = ktime_add_safe(ktime_get(), t);
3108 tp = &t;
3109 }
3110
3111
3112
3113
3114 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
3115 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
3116 val2 = (u32) (unsigned long) utime;
3117
3118 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
3119}
3120
3121static void __init futex_detect_cmpxchg(void)
3122{
3123#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
3124 u32 curval;
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
3137 futex_cmpxchg_enabled = 1;
3138#endif
3139}
3140
3141static int __init futex_init(void)
3142{
3143 unsigned int futex_shift;
3144 unsigned long i;
3145
3146#if CONFIG_BASE_SMALL
3147 futex_hashsize = 16;
3148#else
3149 futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
3150#endif
3151
3152 futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
3153 futex_hashsize, 0,
3154 futex_hashsize < 256 ? HASH_SMALL : 0,
3155 &futex_shift, NULL,
3156 futex_hashsize, futex_hashsize);
3157 futex_hashsize = 1UL << futex_shift;
3158
3159 futex_detect_cmpxchg();
3160
3161 for (i = 0; i < futex_hashsize; i++) {
3162 atomic_set(&futex_queues[i].waiters, 0);
3163 plist_head_init(&futex_queues[i].chain);
3164 spin_lock_init(&futex_queues[i].lock);
3165 }
3166
3167 return 0;
3168}
3169__initcall(futex_init);
3170