1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/export.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
63#include <linux/sched/rt.h>
64#include <linux/hugetlb.h>
65#include <linux/freezer.h>
66#include <linux/bootmem.h>
67#include <linux/fault-inject.h>
68
69#include <asm/futex.h>
70
71#include "locking/rtmutex_common.h"
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
175int __read_mostly futex_cmpxchg_enabled;
176#endif
177
178
179
180
181
182#define FLAGS_SHARED 0x01
183#define FLAGS_CLOCKRT 0x02
184#define FLAGS_HAS_TIMEOUT 0x04
185
186
187
188
189struct futex_pi_state {
190
191
192
193
194 struct list_head list;
195
196
197
198
199 struct rt_mutex pi_mutex;
200
201 struct task_struct *owner;
202 atomic_t refcount;
203
204 union futex_key key;
205};
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229struct futex_q {
230 struct plist_node list;
231
232 struct task_struct *task;
233 spinlock_t *lock_ptr;
234 union futex_key key;
235 struct futex_pi_state *pi_state;
236 struct rt_mutex_waiter *rt_waiter;
237 union futex_key *requeue_pi_key;
238 u32 bitset;
239};
240
241static const struct futex_q futex_q_init = {
242
243 .key = FUTEX_KEY_INIT,
244 .bitset = FUTEX_BITSET_MATCH_ANY
245};
246
247
248
249
250
251
252struct futex_hash_bucket {
253 atomic_t waiters;
254 spinlock_t lock;
255 struct plist_head chain;
256} ____cacheline_aligned_in_smp;
257
258
259
260
261
262
263static struct {
264 struct futex_hash_bucket *queues;
265 unsigned long hashsize;
266} __futex_data __read_mostly __aligned(2*sizeof(long));
267#define futex_queues (__futex_data.queues)
268#define futex_hashsize (__futex_data.hashsize)
269
270
271
272
273
274#ifdef CONFIG_FAIL_FUTEX
275
276static struct {
277 struct fault_attr attr;
278
279 bool ignore_private;
280} fail_futex = {
281 .attr = FAULT_ATTR_INITIALIZER,
282 .ignore_private = false,
283};
284
285static int __init setup_fail_futex(char *str)
286{
287 return setup_fault_attr(&fail_futex.attr, str);
288}
289__setup("fail_futex=", setup_fail_futex);
290
291static bool should_fail_futex(bool fshared)
292{
293 if (fail_futex.ignore_private && !fshared)
294 return false;
295
296 return should_fail(&fail_futex.attr, 1);
297}
298
299#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
300
301static int __init fail_futex_debugfs(void)
302{
303 umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
304 struct dentry *dir;
305
306 dir = fault_create_debugfs_attr("fail_futex", NULL,
307 &fail_futex.attr);
308 if (IS_ERR(dir))
309 return PTR_ERR(dir);
310
311 if (!debugfs_create_bool("ignore-private", mode, dir,
312 &fail_futex.ignore_private)) {
313 debugfs_remove_recursive(dir);
314 return -ENOMEM;
315 }
316
317 return 0;
318}
319
320late_initcall(fail_futex_debugfs);
321
322#endif
323
324#else
325static inline bool should_fail_futex(bool fshared)
326{
327 return false;
328}
329#endif
330
331static inline void futex_get_mm(union futex_key *key)
332{
333 atomic_inc(&key->private.mm->mm_count);
334
335
336
337
338
339 smp_mb__after_atomic();
340}
341
342
343
344
345static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
346{
347#ifdef CONFIG_SMP
348 atomic_inc(&hb->waiters);
349
350
351
352 smp_mb__after_atomic();
353#endif
354}
355
356
357
358
359
360static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
361{
362#ifdef CONFIG_SMP
363 atomic_dec(&hb->waiters);
364#endif
365}
366
367static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
368{
369#ifdef CONFIG_SMP
370 return atomic_read(&hb->waiters);
371#else
372 return 1;
373#endif
374}
375
376
377
378
379static struct futex_hash_bucket *hash_futex(union futex_key *key)
380{
381 u32 hash = jhash2((u32*)&key->both.word,
382 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
383 key->both.offset);
384 return &futex_queues[hash & (futex_hashsize - 1)];
385}
386
387
388
389
390static inline int match_futex(union futex_key *key1, union futex_key *key2)
391{
392 return (key1 && key2
393 && key1->both.word == key2->both.word
394 && key1->both.ptr == key2->both.ptr
395 && key1->both.offset == key2->both.offset);
396}
397
398
399
400
401
402
403static void get_futex_key_refs(union futex_key *key)
404{
405 if (!key->both.ptr)
406 return;
407
408 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
409 case FUT_OFF_INODE:
410 ihold(key->shared.inode);
411 break;
412 case FUT_OFF_MMSHARED:
413 futex_get_mm(key);
414 break;
415 default:
416
417
418
419
420
421 smp_mb();
422 }
423}
424
425
426
427
428
429
430
431static void drop_futex_key_refs(union futex_key *key)
432{
433 if (!key->both.ptr) {
434
435 WARN_ON_ONCE(1);
436 return;
437 }
438
439 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
440 case FUT_OFF_INODE:
441 iput(key->shared.inode);
442 break;
443 case FUT_OFF_MMSHARED:
444 mmdrop(key->private.mm);
445 break;
446 }
447}
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467static int
468get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
469{
470 unsigned long address = (unsigned long)uaddr;
471 struct mm_struct *mm = current->mm;
472 struct page *page;
473 struct address_space *mapping;
474 int err, ro = 0;
475
476
477
478
479 key->both.offset = address % PAGE_SIZE;
480 if (unlikely((address % sizeof(u32)) != 0))
481 return -EINVAL;
482 address -= key->both.offset;
483
484 if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
485 return -EFAULT;
486
487 if (unlikely(should_fail_futex(fshared)))
488 return -EFAULT;
489
490
491
492
493
494
495
496
497 if (!fshared) {
498 key->private.mm = mm;
499 key->private.address = address;
500 get_futex_key_refs(key);
501 return 0;
502 }
503
504again:
505
506 if (unlikely(should_fail_futex(fshared)))
507 return -EFAULT;
508
509 err = get_user_pages_fast(address, 1, 1, &page);
510
511
512
513
514 if (err == -EFAULT && rw == VERIFY_READ) {
515 err = get_user_pages_fast(address, 1, 0, &page);
516 ro = 1;
517 }
518 if (err < 0)
519 return err;
520 else
521 err = 0;
522
523
524
525
526
527
528
529
530
531
532
533
534 page = compound_head(page);
535 mapping = READ_ONCE(page->mapping);
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552 if (unlikely(!mapping)) {
553 int shmem_swizzled;
554
555
556
557
558
559
560 lock_page(page);
561 shmem_swizzled = PageSwapCache(page) || page->mapping;
562 unlock_page(page);
563 put_page(page);
564
565 if (shmem_swizzled)
566 goto again;
567
568 return -EFAULT;
569 }
570
571
572
573
574
575
576
577
578
579
580
581 if (PageAnon(page)) {
582
583
584
585
586 if (unlikely(should_fail_futex(fshared)) || ro) {
587 err = -EFAULT;
588 goto out;
589 }
590
591 key->both.offset |= FUT_OFF_MMSHARED;
592 key->private.mm = mm;
593 key->private.address = address;
594
595 get_futex_key_refs(key);
596
597 } else {
598 struct inode *inode;
599
600
601
602
603
604
605
606
607
608
609
610
611 rcu_read_lock();
612
613 if (READ_ONCE(page->mapping) != mapping) {
614 rcu_read_unlock();
615 put_page(page);
616
617 goto again;
618 }
619
620 inode = READ_ONCE(mapping->host);
621 if (!inode) {
622 rcu_read_unlock();
623 put_page(page);
624
625 goto again;
626 }
627
628
629
630
631
632
633
634
635
636
637
638
639 if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
640 rcu_read_unlock();
641 put_page(page);
642
643 goto again;
644 }
645
646
647 if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
648 err = -EFAULT;
649 rcu_read_unlock();
650 iput(inode);
651
652 goto out;
653 }
654
655 key->both.offset |= FUT_OFF_INODE;
656 key->shared.inode = inode;
657 key->shared.pgoff = basepage_index(page);
658 rcu_read_unlock();
659 }
660
661out:
662 put_page(page);
663 return err;
664}
665
666static inline void put_futex_key(union futex_key *key)
667{
668 drop_futex_key_refs(key);
669}
670
671
672
673
674
675
676
677
678
679
680
681
682
683static int fault_in_user_writeable(u32 __user *uaddr)
684{
685 struct mm_struct *mm = current->mm;
686 int ret;
687
688 down_read(&mm->mmap_sem);
689 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
690 FAULT_FLAG_WRITE, NULL);
691 up_read(&mm->mmap_sem);
692
693 return ret < 0 ? ret : 0;
694}
695
696
697
698
699
700
701
702
703static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
704 union futex_key *key)
705{
706 struct futex_q *this;
707
708 plist_for_each_entry(this, &hb->chain, list) {
709 if (match_futex(&this->key, key))
710 return this;
711 }
712 return NULL;
713}
714
715static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
716 u32 uval, u32 newval)
717{
718 int ret;
719
720 pagefault_disable();
721 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
722 pagefault_enable();
723
724 return ret;
725}
726
727static int get_futex_value_locked(u32 *dest, u32 __user *from)
728{
729 int ret;
730
731 pagefault_disable();
732 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
733 pagefault_enable();
734
735 return ret ? -EFAULT : 0;
736}
737
738
739
740
741
742static int refill_pi_state_cache(void)
743{
744 struct futex_pi_state *pi_state;
745
746 if (likely(current->pi_state_cache))
747 return 0;
748
749 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
750
751 if (!pi_state)
752 return -ENOMEM;
753
754 INIT_LIST_HEAD(&pi_state->list);
755
756 pi_state->owner = NULL;
757 atomic_set(&pi_state->refcount, 1);
758 pi_state->key = FUTEX_KEY_INIT;
759
760 current->pi_state_cache = pi_state;
761
762 return 0;
763}
764
765static struct futex_pi_state * alloc_pi_state(void)
766{
767 struct futex_pi_state *pi_state = current->pi_state_cache;
768
769 WARN_ON(!pi_state);
770 current->pi_state_cache = NULL;
771
772 return pi_state;
773}
774
775
776
777
778
779
780
781static void put_pi_state(struct futex_pi_state *pi_state)
782{
783 if (!pi_state)
784 return;
785
786 if (!atomic_dec_and_test(&pi_state->refcount))
787 return;
788
789
790
791
792
793 if (pi_state->owner) {
794 raw_spin_lock_irq(&pi_state->owner->pi_lock);
795 list_del_init(&pi_state->list);
796 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
797
798 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
799 }
800
801 if (current->pi_state_cache)
802 kfree(pi_state);
803 else {
804
805
806
807
808
809 pi_state->owner = NULL;
810 atomic_set(&pi_state->refcount, 1);
811 current->pi_state_cache = pi_state;
812 }
813}
814
815
816
817
818
819static struct task_struct * futex_find_get_task(pid_t pid)
820{
821 struct task_struct *p;
822
823 rcu_read_lock();
824 p = find_task_by_vpid(pid);
825 if (p)
826 get_task_struct(p);
827
828 rcu_read_unlock();
829
830 return p;
831}
832
833
834
835
836
837
838void exit_pi_state_list(struct task_struct *curr)
839{
840 struct list_head *next, *head = &curr->pi_state_list;
841 struct futex_pi_state *pi_state;
842 struct futex_hash_bucket *hb;
843 union futex_key key = FUTEX_KEY_INIT;
844
845 if (!futex_cmpxchg_enabled)
846 return;
847
848
849
850
851
852 raw_spin_lock_irq(&curr->pi_lock);
853 while (!list_empty(head)) {
854
855 next = head->next;
856 pi_state = list_entry(next, struct futex_pi_state, list);
857 key = pi_state->key;
858 hb = hash_futex(&key);
859 raw_spin_unlock_irq(&curr->pi_lock);
860
861 spin_lock(&hb->lock);
862
863 raw_spin_lock_irq(&curr->pi_lock);
864
865
866
867
868 if (head->next != next) {
869 spin_unlock(&hb->lock);
870 continue;
871 }
872
873 WARN_ON(pi_state->owner != curr);
874 WARN_ON(list_empty(&pi_state->list));
875 list_del_init(&pi_state->list);
876 pi_state->owner = NULL;
877 raw_spin_unlock_irq(&curr->pi_lock);
878
879 rt_mutex_unlock(&pi_state->pi_mutex);
880
881 spin_unlock(&hb->lock);
882
883 raw_spin_lock_irq(&curr->pi_lock);
884 }
885 raw_spin_unlock_irq(&curr->pi_lock);
886}
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
944 struct futex_pi_state **ps)
945{
946 pid_t pid = uval & FUTEX_TID_MASK;
947
948
949
950
951 if (unlikely(!pi_state))
952 return -EINVAL;
953
954 WARN_ON(!atomic_read(&pi_state->refcount));
955
956
957
958
959 if (uval & FUTEX_OWNER_DIED) {
960
961
962
963
964
965 if (!pi_state->owner) {
966
967
968
969
970 if (pid)
971 return -EINVAL;
972
973
974
975 goto out_state;
976 }
977
978
979
980
981
982
983
984
985
986 if (!pid)
987 goto out_state;
988 } else {
989
990
991
992
993 if (!pi_state->owner)
994 return -EINVAL;
995 }
996
997
998
999
1000
1001
1002 if (pid != task_pid_vnr(pi_state->owner))
1003 return -EINVAL;
1004out_state:
1005 atomic_inc(&pi_state->refcount);
1006 *ps = pi_state;
1007 return 0;
1008}
1009
1010
1011
1012
1013
1014static int attach_to_pi_owner(u32 uval, union futex_key *key,
1015 struct futex_pi_state **ps)
1016{
1017 pid_t pid = uval & FUTEX_TID_MASK;
1018 struct futex_pi_state *pi_state;
1019 struct task_struct *p;
1020
1021
1022
1023
1024
1025 if (!pid)
1026 return -ESRCH;
1027 p = futex_find_get_task(pid);
1028 if (!p)
1029 return -ESRCH;
1030
1031 if (unlikely(p->flags & PF_KTHREAD)) {
1032 put_task_struct(p);
1033 return -EPERM;
1034 }
1035
1036
1037
1038
1039
1040
1041
1042 raw_spin_lock_irq(&p->pi_lock);
1043 if (unlikely(p->flags & PF_EXITING)) {
1044
1045
1046
1047
1048
1049 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
1050
1051 raw_spin_unlock_irq(&p->pi_lock);
1052 put_task_struct(p);
1053 return ret;
1054 }
1055
1056
1057
1058
1059 pi_state = alloc_pi_state();
1060
1061
1062
1063
1064
1065 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
1066
1067
1068 pi_state->key = *key;
1069
1070 WARN_ON(!list_empty(&pi_state->list));
1071 list_add(&pi_state->list, &p->pi_state_list);
1072 pi_state->owner = p;
1073 raw_spin_unlock_irq(&p->pi_lock);
1074
1075 put_task_struct(p);
1076
1077 *ps = pi_state;
1078
1079 return 0;
1080}
1081
1082static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
1083 union futex_key *key, struct futex_pi_state **ps)
1084{
1085 struct futex_q *match = futex_top_waiter(hb, key);
1086
1087
1088
1089
1090
1091 if (match)
1092 return attach_to_pi_state(uval, match->pi_state, ps);
1093
1094
1095
1096
1097
1098 return attach_to_pi_owner(uval, key, ps);
1099}
1100
1101static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
1102{
1103 u32 uninitialized_var(curval);
1104
1105 if (unlikely(should_fail_futex(true)))
1106 return -EFAULT;
1107
1108 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
1109 return -EFAULT;
1110
1111
1112 return curval != uval ? -EAGAIN : 0;
1113}
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
1134 union futex_key *key,
1135 struct futex_pi_state **ps,
1136 struct task_struct *task, int set_waiters)
1137{
1138 u32 uval, newval, vpid = task_pid_vnr(task);
1139 struct futex_q *match;
1140 int ret;
1141
1142
1143
1144
1145
1146 if (get_futex_value_locked(&uval, uaddr))
1147 return -EFAULT;
1148
1149 if (unlikely(should_fail_futex(true)))
1150 return -EFAULT;
1151
1152
1153
1154
1155 if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
1156 return -EDEADLK;
1157
1158 if ((unlikely(should_fail_futex(true))))
1159 return -EDEADLK;
1160
1161
1162
1163
1164
1165 match = futex_top_waiter(hb, key);
1166 if (match)
1167 return attach_to_pi_state(uval, match->pi_state, ps);
1168
1169
1170
1171
1172
1173
1174
1175 if (!(uval & FUTEX_TID_MASK)) {
1176
1177
1178
1179
1180 newval = uval & FUTEX_OWNER_DIED;
1181 newval |= vpid;
1182
1183
1184 if (set_waiters)
1185 newval |= FUTEX_WAITERS;
1186
1187 ret = lock_pi_update_atomic(uaddr, uval, newval);
1188
1189 return ret < 0 ? ret : 1;
1190 }
1191
1192
1193
1194
1195
1196
1197 newval = uval | FUTEX_WAITERS;
1198 ret = lock_pi_update_atomic(uaddr, uval, newval);
1199 if (ret)
1200 return ret;
1201
1202
1203
1204
1205
1206 return attach_to_pi_owner(uval, key, ps);
1207}
1208
1209
1210
1211
1212
1213
1214
1215static void __unqueue_futex(struct futex_q *q)
1216{
1217 struct futex_hash_bucket *hb;
1218
1219 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
1220 || WARN_ON(plist_node_empty(&q->list)))
1221 return;
1222
1223 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
1224 plist_del(&q->list, &hb->chain);
1225 hb_waiters_dec(hb);
1226}
1227
1228
1229
1230
1231
1232
1233
1234static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1235{
1236 struct task_struct *p = q->task;
1237
1238 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
1239 return;
1240
1241
1242
1243
1244
1245 wake_q_add(wake_q, p);
1246 __unqueue_futex(q);
1247
1248
1249
1250
1251
1252
1253 smp_wmb();
1254 q->lock_ptr = NULL;
1255}
1256
1257static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
1258 struct futex_hash_bucket *hb)
1259{
1260 struct task_struct *new_owner;
1261 struct futex_pi_state *pi_state = this->pi_state;
1262 u32 uninitialized_var(curval), newval;
1263 WAKE_Q(wake_q);
1264 bool deboost;
1265 int ret = 0;
1266
1267 if (!pi_state)
1268 return -EINVAL;
1269
1270
1271
1272
1273
1274 if (pi_state->owner != current)
1275 return -EINVAL;
1276
1277 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
1278 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
1279
1280
1281
1282
1283
1284
1285 if (!new_owner)
1286 new_owner = this->task;
1287
1288
1289
1290
1291
1292
1293 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
1294
1295 if (unlikely(should_fail_futex(true)))
1296 ret = -EFAULT;
1297
1298 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
1299 ret = -EFAULT;
1300 } else if (curval != uval) {
1301
1302
1303
1304
1305
1306
1307 if ((FUTEX_TID_MASK & curval) == uval)
1308 ret = -EAGAIN;
1309 else
1310 ret = -EINVAL;
1311 }
1312 if (ret) {
1313 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1314 return ret;
1315 }
1316
1317 raw_spin_lock(&pi_state->owner->pi_lock);
1318 WARN_ON(list_empty(&pi_state->list));
1319 list_del_init(&pi_state->list);
1320 raw_spin_unlock(&pi_state->owner->pi_lock);
1321
1322 raw_spin_lock(&new_owner->pi_lock);
1323 WARN_ON(!list_empty(&pi_state->list));
1324 list_add(&pi_state->list, &new_owner->pi_state_list);
1325 pi_state->owner = new_owner;
1326 raw_spin_unlock(&new_owner->pi_lock);
1327
1328 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1329
1330 deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
1331
1332
1333
1334
1335
1336
1337
1338 spin_unlock(&hb->lock);
1339 wake_up_q(&wake_q);
1340 if (deboost)
1341 rt_mutex_adjust_prio(current);
1342
1343 return 0;
1344}
1345
1346
1347
1348
1349static inline void
1350double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1351{
1352 if (hb1 <= hb2) {
1353 spin_lock(&hb1->lock);
1354 if (hb1 < hb2)
1355 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
1356 } else {
1357 spin_lock(&hb2->lock);
1358 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
1359 }
1360}
1361
1362static inline void
1363double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1364{
1365 spin_unlock(&hb1->lock);
1366 if (hb1 != hb2)
1367 spin_unlock(&hb2->lock);
1368}
1369
1370
1371
1372
1373static int
1374futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
1375{
1376 struct futex_hash_bucket *hb;
1377 struct futex_q *this, *next;
1378 union futex_key key = FUTEX_KEY_INIT;
1379 int ret;
1380 WAKE_Q(wake_q);
1381
1382 if (!bitset)
1383 return -EINVAL;
1384
1385 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
1386 if (unlikely(ret != 0))
1387 goto out;
1388
1389 hb = hash_futex(&key);
1390
1391
1392 if (!hb_waiters_pending(hb))
1393 goto out_put_key;
1394
1395 spin_lock(&hb->lock);
1396
1397 plist_for_each_entry_safe(this, next, &hb->chain, list) {
1398 if (match_futex (&this->key, &key)) {
1399 if (this->pi_state || this->rt_waiter) {
1400 ret = -EINVAL;
1401 break;
1402 }
1403
1404
1405 if (!(this->bitset & bitset))
1406 continue;
1407
1408 mark_wake_futex(&wake_q, this);
1409 if (++ret >= nr_wake)
1410 break;
1411 }
1412 }
1413
1414 spin_unlock(&hb->lock);
1415 wake_up_q(&wake_q);
1416out_put_key:
1417 put_futex_key(&key);
1418out:
1419 return ret;
1420}
1421
1422
1423
1424
1425
1426static int
1427futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1428 int nr_wake, int nr_wake2, int op)
1429{
1430 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1431 struct futex_hash_bucket *hb1, *hb2;
1432 struct futex_q *this, *next;
1433 int ret, op_ret;
1434 WAKE_Q(wake_q);
1435
1436retry:
1437 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1438 if (unlikely(ret != 0))
1439 goto out;
1440 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1441 if (unlikely(ret != 0))
1442 goto out_put_key1;
1443
1444 hb1 = hash_futex(&key1);
1445 hb2 = hash_futex(&key2);
1446
1447retry_private:
1448 double_lock_hb(hb1, hb2);
1449 op_ret = futex_atomic_op_inuser(op, uaddr2);
1450 if (unlikely(op_ret < 0)) {
1451
1452 double_unlock_hb(hb1, hb2);
1453
1454#ifndef CONFIG_MMU
1455
1456
1457
1458
1459 ret = op_ret;
1460 goto out_put_keys;
1461#endif
1462
1463 if (unlikely(op_ret != -EFAULT)) {
1464 ret = op_ret;
1465 goto out_put_keys;
1466 }
1467
1468 ret = fault_in_user_writeable(uaddr2);
1469 if (ret)
1470 goto out_put_keys;
1471
1472 if (!(flags & FLAGS_SHARED))
1473 goto retry_private;
1474
1475 put_futex_key(&key2);
1476 put_futex_key(&key1);
1477 goto retry;
1478 }
1479
1480 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1481 if (match_futex (&this->key, &key1)) {
1482 if (this->pi_state || this->rt_waiter) {
1483 ret = -EINVAL;
1484 goto out_unlock;
1485 }
1486 mark_wake_futex(&wake_q, this);
1487 if (++ret >= nr_wake)
1488 break;
1489 }
1490 }
1491
1492 if (op_ret > 0) {
1493 op_ret = 0;
1494 plist_for_each_entry_safe(this, next, &hb2->chain, list) {
1495 if (match_futex (&this->key, &key2)) {
1496 if (this->pi_state || this->rt_waiter) {
1497 ret = -EINVAL;
1498 goto out_unlock;
1499 }
1500 mark_wake_futex(&wake_q, this);
1501 if (++op_ret >= nr_wake2)
1502 break;
1503 }
1504 }
1505 ret += op_ret;
1506 }
1507
1508out_unlock:
1509 double_unlock_hb(hb1, hb2);
1510 wake_up_q(&wake_q);
1511out_put_keys:
1512 put_futex_key(&key2);
1513out_put_key1:
1514 put_futex_key(&key1);
1515out:
1516 return ret;
1517}
1518
1519
1520
1521
1522
1523
1524
1525
1526static inline
1527void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1528 struct futex_hash_bucket *hb2, union futex_key *key2)
1529{
1530
1531
1532
1533
1534
1535 if (likely(&hb1->chain != &hb2->chain)) {
1536 plist_del(&q->list, &hb1->chain);
1537 hb_waiters_dec(hb1);
1538 hb_waiters_inc(hb2);
1539 plist_add(&q->list, &hb2->chain);
1540 q->lock_ptr = &hb2->lock;
1541 }
1542 get_futex_key_refs(key2);
1543 q->key = *key2;
1544}
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560static inline
1561void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1562 struct futex_hash_bucket *hb)
1563{
1564 get_futex_key_refs(key);
1565 q->key = *key;
1566
1567 __unqueue_futex(q);
1568
1569 WARN_ON(!q->rt_waiter);
1570 q->rt_waiter = NULL;
1571
1572 q->lock_ptr = &hb->lock;
1573
1574 wake_up_state(q->task, TASK_NORMAL);
1575}
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1598 struct futex_hash_bucket *hb1,
1599 struct futex_hash_bucket *hb2,
1600 union futex_key *key1, union futex_key *key2,
1601 struct futex_pi_state **ps, int set_waiters)
1602{
1603 struct futex_q *top_waiter = NULL;
1604 u32 curval;
1605 int ret, vpid;
1606
1607 if (get_futex_value_locked(&curval, pifutex))
1608 return -EFAULT;
1609
1610 if (unlikely(should_fail_futex(true)))
1611 return -EFAULT;
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621 top_waiter = futex_top_waiter(hb1, key1);
1622
1623
1624 if (!top_waiter)
1625 return 0;
1626
1627
1628 if (!match_futex(top_waiter->requeue_pi_key, key2))
1629 return -EINVAL;
1630
1631
1632
1633
1634
1635
1636 vpid = task_pid_vnr(top_waiter->task);
1637 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1638 set_waiters);
1639 if (ret == 1) {
1640 requeue_pi_wake_futex(top_waiter, key2, hb2);
1641 return vpid;
1642 }
1643 return ret;
1644}
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1665 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1666 u32 *cmpval, int requeue_pi)
1667{
1668 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1669 int drop_count = 0, task_count = 0, ret;
1670 struct futex_pi_state *pi_state = NULL;
1671 struct futex_hash_bucket *hb1, *hb2;
1672 struct futex_q *this, *next;
1673 WAKE_Q(wake_q);
1674
1675 if (requeue_pi) {
1676
1677
1678
1679
1680 if (uaddr1 == uaddr2)
1681 return -EINVAL;
1682
1683
1684
1685
1686
1687 if (refill_pi_state_cache())
1688 return -ENOMEM;
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699 if (nr_wake != 1)
1700 return -EINVAL;
1701 }
1702
1703retry:
1704 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1705 if (unlikely(ret != 0))
1706 goto out;
1707 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1708 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1709 if (unlikely(ret != 0))
1710 goto out_put_key1;
1711
1712
1713
1714
1715
1716 if (requeue_pi && match_futex(&key1, &key2)) {
1717 ret = -EINVAL;
1718 goto out_put_keys;
1719 }
1720
1721 hb1 = hash_futex(&key1);
1722 hb2 = hash_futex(&key2);
1723
1724retry_private:
1725 hb_waiters_inc(hb2);
1726 double_lock_hb(hb1, hb2);
1727
1728 if (likely(cmpval != NULL)) {
1729 u32 curval;
1730
1731 ret = get_futex_value_locked(&curval, uaddr1);
1732
1733 if (unlikely(ret)) {
1734 double_unlock_hb(hb1, hb2);
1735 hb_waiters_dec(hb2);
1736
1737 ret = get_user(curval, uaddr1);
1738 if (ret)
1739 goto out_put_keys;
1740
1741 if (!(flags & FLAGS_SHARED))
1742 goto retry_private;
1743
1744 put_futex_key(&key2);
1745 put_futex_key(&key1);
1746 goto retry;
1747 }
1748 if (curval != *cmpval) {
1749 ret = -EAGAIN;
1750 goto out_unlock;
1751 }
1752 }
1753
1754 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1755
1756
1757
1758
1759
1760
1761 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1762 &key2, &pi_state, nr_requeue);
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773 if (ret > 0) {
1774 WARN_ON(pi_state);
1775 drop_count++;
1776 task_count++;
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789 ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
1790 }
1791
1792 switch (ret) {
1793 case 0:
1794
1795 break;
1796
1797
1798 case -EFAULT:
1799 double_unlock_hb(hb1, hb2);
1800 hb_waiters_dec(hb2);
1801 put_futex_key(&key2);
1802 put_futex_key(&key1);
1803 ret = fault_in_user_writeable(uaddr2);
1804 if (!ret)
1805 goto retry;
1806 goto out;
1807 case -EAGAIN:
1808
1809
1810
1811
1812
1813
1814 double_unlock_hb(hb1, hb2);
1815 hb_waiters_dec(hb2);
1816 put_futex_key(&key2);
1817 put_futex_key(&key1);
1818 cond_resched();
1819 goto retry;
1820 default:
1821 goto out_unlock;
1822 }
1823 }
1824
1825 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1826 if (task_count - nr_wake >= nr_requeue)
1827 break;
1828
1829 if (!match_futex(&this->key, &key1))
1830 continue;
1831
1832
1833
1834
1835
1836
1837
1838
1839 if ((requeue_pi && !this->rt_waiter) ||
1840 (!requeue_pi && this->rt_waiter) ||
1841 this->pi_state) {
1842 ret = -EINVAL;
1843 break;
1844 }
1845
1846
1847
1848
1849
1850
1851 if (++task_count <= nr_wake && !requeue_pi) {
1852 mark_wake_futex(&wake_q, this);
1853 continue;
1854 }
1855
1856
1857 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1858 ret = -EINVAL;
1859 break;
1860 }
1861
1862
1863
1864
1865
1866 if (requeue_pi) {
1867
1868
1869
1870
1871
1872 atomic_inc(&pi_state->refcount);
1873 this->pi_state = pi_state;
1874 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1875 this->rt_waiter,
1876 this->task);
1877 if (ret == 1) {
1878
1879
1880
1881
1882
1883
1884
1885
1886 requeue_pi_wake_futex(this, &key2, hb2);
1887 drop_count++;
1888 continue;
1889 } else if (ret) {
1890
1891
1892
1893
1894
1895
1896
1897
1898 this->pi_state = NULL;
1899 put_pi_state(pi_state);
1900
1901
1902
1903
1904 break;
1905 }
1906 }
1907 requeue_futex(this, hb1, hb2, &key2);
1908 drop_count++;
1909 }
1910
1911
1912
1913
1914
1915
1916 put_pi_state(pi_state);
1917
1918out_unlock:
1919 double_unlock_hb(hb1, hb2);
1920 wake_up_q(&wake_q);
1921 hb_waiters_dec(hb2);
1922
1923
1924
1925
1926
1927
1928
1929 while (--drop_count >= 0)
1930 drop_futex_key_refs(&key1);
1931
1932out_put_keys:
1933 put_futex_key(&key2);
1934out_put_key1:
1935 put_futex_key(&key1);
1936out:
1937 return ret ? ret : task_count;
1938}
1939
1940
1941static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1942 __acquires(&hb->lock)
1943{
1944 struct futex_hash_bucket *hb;
1945
1946 hb = hash_futex(&q->key);
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956 hb_waiters_inc(hb);
1957
1958 q->lock_ptr = &hb->lock;
1959
1960 spin_lock(&hb->lock);
1961 return hb;
1962}
1963
1964static inline void
1965queue_unlock(struct futex_hash_bucket *hb)
1966 __releases(&hb->lock)
1967{
1968 spin_unlock(&hb->lock);
1969 hb_waiters_dec(hb);
1970}
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1985 __releases(&hb->lock)
1986{
1987 int prio;
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997 prio = min(current->normal_prio, MAX_RT_PRIO);
1998
1999 plist_node_init(&q->list, prio);
2000 plist_add(&q->list, &hb->chain);
2001 q->task = current;
2002 spin_unlock(&hb->lock);
2003}
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016static int unqueue_me(struct futex_q *q)
2017{
2018 spinlock_t *lock_ptr;
2019 int ret = 0;
2020
2021
2022retry:
2023
2024
2025
2026
2027
2028 lock_ptr = READ_ONCE(q->lock_ptr);
2029 if (lock_ptr != NULL) {
2030 spin_lock(lock_ptr);
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044 if (unlikely(lock_ptr != q->lock_ptr)) {
2045 spin_unlock(lock_ptr);
2046 goto retry;
2047 }
2048 __unqueue_futex(q);
2049
2050 BUG_ON(q->pi_state);
2051
2052 spin_unlock(lock_ptr);
2053 ret = 1;
2054 }
2055
2056 drop_futex_key_refs(&q->key);
2057 return ret;
2058}
2059
2060
2061
2062
2063
2064
2065static void unqueue_me_pi(struct futex_q *q)
2066 __releases(q->lock_ptr)
2067{
2068 __unqueue_futex(q);
2069
2070 BUG_ON(!q->pi_state);
2071 put_pi_state(q->pi_state);
2072 q->pi_state = NULL;
2073
2074 spin_unlock(q->lock_ptr);
2075}
2076
2077
2078
2079
2080
2081
2082
2083static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
2084 struct task_struct *newowner)
2085{
2086 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
2087 struct futex_pi_state *pi_state = q->pi_state;
2088 struct task_struct *oldowner = pi_state->owner;
2089 u32 uval, uninitialized_var(curval), newval;
2090 int ret;
2091
2092
2093 if (!pi_state->owner)
2094 newtid |= FUTEX_OWNER_DIED;
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113retry:
2114 if (get_futex_value_locked(&uval, uaddr))
2115 goto handle_fault;
2116
2117 while (1) {
2118 newval = (uval & FUTEX_OWNER_DIED) | newtid;
2119
2120 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
2121 goto handle_fault;
2122 if (curval == uval)
2123 break;
2124 uval = curval;
2125 }
2126
2127
2128
2129
2130
2131 if (pi_state->owner != NULL) {
2132 raw_spin_lock_irq(&pi_state->owner->pi_lock);
2133 WARN_ON(list_empty(&pi_state->list));
2134 list_del_init(&pi_state->list);
2135 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
2136 }
2137
2138 pi_state->owner = newowner;
2139
2140 raw_spin_lock_irq(&newowner->pi_lock);
2141 WARN_ON(!list_empty(&pi_state->list));
2142 list_add(&pi_state->list, &newowner->pi_state_list);
2143 raw_spin_unlock_irq(&newowner->pi_lock);
2144 return 0;
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156handle_fault:
2157 spin_unlock(q->lock_ptr);
2158
2159 ret = fault_in_user_writeable(uaddr);
2160
2161 spin_lock(q->lock_ptr);
2162
2163
2164
2165
2166 if (pi_state->owner != oldowner)
2167 return 0;
2168
2169 if (ret)
2170 return ret;
2171
2172 goto retry;
2173}
2174
2175static long futex_wait_restart(struct restart_block *restart);
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
2193{
2194 struct task_struct *owner;
2195 int ret = 0;
2196
2197 if (locked) {
2198
2199
2200
2201
2202 if (q->pi_state->owner != current)
2203 ret = fixup_pi_state_owner(uaddr, q, current);
2204 goto out;
2205 }
2206
2207
2208
2209
2210
2211 if (q->pi_state->owner == current) {
2212
2213
2214
2215
2216
2217 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
2218 locked = 1;
2219 goto out;
2220 }
2221
2222
2223
2224
2225
2226
2227 raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
2228 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
2229 if (!owner)
2230 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
2231 raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
2232 ret = fixup_pi_state_owner(uaddr, q, owner);
2233 goto out;
2234 }
2235
2236
2237
2238
2239
2240 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
2241 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
2242 "pi-state %p\n", ret,
2243 q->pi_state->pi_mutex.owner,
2244 q->pi_state->owner);
2245
2246out:
2247 return ret ? ret : locked;
2248}
2249
2250
2251
2252
2253
2254
2255
2256static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
2257 struct hrtimer_sleeper *timeout)
2258{
2259
2260
2261
2262
2263
2264
2265 set_current_state(TASK_INTERRUPTIBLE);
2266 queue_me(q, hb);
2267
2268
2269 if (timeout)
2270 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
2271
2272
2273
2274
2275
2276 if (likely(!plist_node_empty(&q->list))) {
2277
2278
2279
2280
2281
2282 if (!timeout || timeout->task)
2283 freezable_schedule();
2284 }
2285 __set_current_state(TASK_RUNNING);
2286}
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
2306 struct futex_q *q, struct futex_hash_bucket **hb)
2307{
2308 u32 uval;
2309 int ret;
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329retry:
2330 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
2331 if (unlikely(ret != 0))
2332 return ret;
2333
2334retry_private:
2335 *hb = queue_lock(q);
2336
2337 ret = get_futex_value_locked(&uval, uaddr);
2338
2339 if (ret) {
2340 queue_unlock(*hb);
2341
2342 ret = get_user(uval, uaddr);
2343 if (ret)
2344 goto out;
2345
2346 if (!(flags & FLAGS_SHARED))
2347 goto retry_private;
2348
2349 put_futex_key(&q->key);
2350 goto retry;
2351 }
2352
2353 if (uval != val) {
2354 queue_unlock(*hb);
2355 ret = -EWOULDBLOCK;
2356 }
2357
2358out:
2359 if (ret)
2360 put_futex_key(&q->key);
2361 return ret;
2362}
2363
2364static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
2365 ktime_t *abs_time, u32 bitset)
2366{
2367 struct hrtimer_sleeper timeout, *to = NULL;
2368 struct restart_block *restart;
2369 struct futex_hash_bucket *hb;
2370 struct futex_q q = futex_q_init;
2371 int ret;
2372
2373 if (!bitset)
2374 return -EINVAL;
2375 q.bitset = bitset;
2376
2377 if (abs_time) {
2378 to = &timeout;
2379
2380 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2381 CLOCK_REALTIME : CLOCK_MONOTONIC,
2382 HRTIMER_MODE_ABS);
2383 hrtimer_init_sleeper(to, current);
2384 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2385 current->timer_slack_ns);
2386 }
2387
2388retry:
2389
2390
2391
2392
2393 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2394 if (ret)
2395 goto out;
2396
2397
2398 futex_wait_queue_me(hb, &q, to);
2399
2400
2401 ret = 0;
2402
2403 if (!unqueue_me(&q))
2404 goto out;
2405 ret = -ETIMEDOUT;
2406 if (to && !to->task)
2407 goto out;
2408
2409
2410
2411
2412
2413 if (!signal_pending(current))
2414 goto retry;
2415
2416 ret = -ERESTARTSYS;
2417 if (!abs_time)
2418 goto out;
2419
2420 restart = ¤t->restart_block;
2421 restart->fn = futex_wait_restart;
2422 restart->futex.uaddr = uaddr;
2423 restart->futex.val = val;
2424 restart->futex.time = abs_time->tv64;
2425 restart->futex.bitset = bitset;
2426 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
2427
2428 ret = -ERESTART_RESTARTBLOCK;
2429
2430out:
2431 if (to) {
2432 hrtimer_cancel(&to->timer);
2433 destroy_hrtimer_on_stack(&to->timer);
2434 }
2435 return ret;
2436}
2437
2438
2439static long futex_wait_restart(struct restart_block *restart)
2440{
2441 u32 __user *uaddr = restart->futex.uaddr;
2442 ktime_t t, *tp = NULL;
2443
2444 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
2445 t.tv64 = restart->futex.time;
2446 tp = &t;
2447 }
2448 restart->fn = do_no_restart_syscall;
2449
2450 return (long)futex_wait(uaddr, restart->futex.flags,
2451 restart->futex.val, tp, restart->futex.bitset);
2452}
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
2465 ktime_t *time, int trylock)
2466{
2467 struct hrtimer_sleeper timeout, *to = NULL;
2468 struct futex_hash_bucket *hb;
2469 struct futex_q q = futex_q_init;
2470 int res, ret;
2471
2472 if (refill_pi_state_cache())
2473 return -ENOMEM;
2474
2475 if (time) {
2476 to = &timeout;
2477 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2478 HRTIMER_MODE_ABS);
2479 hrtimer_init_sleeper(to, current);
2480 hrtimer_set_expires(&to->timer, *time);
2481 }
2482
2483retry:
2484 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2485 if (unlikely(ret != 0))
2486 goto out;
2487
2488retry_private:
2489 hb = queue_lock(&q);
2490
2491 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
2492 if (unlikely(ret)) {
2493
2494
2495
2496
2497 switch (ret) {
2498 case 1:
2499
2500 ret = 0;
2501 goto out_unlock_put_key;
2502 case -EFAULT:
2503 goto uaddr_faulted;
2504 case -EAGAIN:
2505
2506
2507
2508
2509
2510
2511 queue_unlock(hb);
2512 put_futex_key(&q.key);
2513 cond_resched();
2514 goto retry;
2515 default:
2516 goto out_unlock_put_key;
2517 }
2518 }
2519
2520
2521
2522
2523 queue_me(&q, hb);
2524
2525 WARN_ON(!q.pi_state);
2526
2527
2528
2529 if (!trylock) {
2530 ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to);
2531 } else {
2532 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2533
2534 ret = ret ? 0 : -EWOULDBLOCK;
2535 }
2536
2537 spin_lock(q.lock_ptr);
2538
2539
2540
2541
2542 res = fixup_owner(uaddr, &q, !ret);
2543
2544
2545
2546
2547 if (res)
2548 ret = (res < 0) ? res : 0;
2549
2550
2551
2552
2553
2554 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2555 rt_mutex_unlock(&q.pi_state->pi_mutex);
2556
2557
2558 unqueue_me_pi(&q);
2559
2560 goto out_put_key;
2561
2562out_unlock_put_key:
2563 queue_unlock(hb);
2564
2565out_put_key:
2566 put_futex_key(&q.key);
2567out:
2568 if (to)
2569 destroy_hrtimer_on_stack(&to->timer);
2570 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2571
2572uaddr_faulted:
2573 queue_unlock(hb);
2574
2575 ret = fault_in_user_writeable(uaddr);
2576 if (ret)
2577 goto out_put_key;
2578
2579 if (!(flags & FLAGS_SHARED))
2580 goto retry_private;
2581
2582 put_futex_key(&q.key);
2583 goto retry;
2584}
2585
2586
2587
2588
2589
2590
2591static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2592{
2593 u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
2594 union futex_key key = FUTEX_KEY_INIT;
2595 struct futex_hash_bucket *hb;
2596 struct futex_q *match;
2597 int ret;
2598
2599retry:
2600 if (get_user(uval, uaddr))
2601 return -EFAULT;
2602
2603
2604
2605 if ((uval & FUTEX_TID_MASK) != vpid)
2606 return -EPERM;
2607
2608 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2609 if (ret)
2610 return ret;
2611
2612 hb = hash_futex(&key);
2613 spin_lock(&hb->lock);
2614
2615
2616
2617
2618
2619
2620 match = futex_top_waiter(hb, &key);
2621 if (match) {
2622 ret = wake_futex_pi(uaddr, uval, match, hb);
2623
2624
2625
2626
2627 if (!ret)
2628 goto out_putkey;
2629
2630
2631
2632
2633 if (ret == -EFAULT)
2634 goto pi_faulted;
2635
2636
2637
2638
2639 if (ret == -EAGAIN) {
2640 spin_unlock(&hb->lock);
2641 put_futex_key(&key);
2642 goto retry;
2643 }
2644
2645
2646
2647
2648 goto out_unlock;
2649 }
2650
2651
2652
2653
2654
2655
2656
2657
2658 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))
2659 goto pi_faulted;
2660
2661
2662
2663
2664 ret = (curval == uval) ? 0 : -EAGAIN;
2665
2666out_unlock:
2667 spin_unlock(&hb->lock);
2668out_putkey:
2669 put_futex_key(&key);
2670 return ret;
2671
2672pi_faulted:
2673 spin_unlock(&hb->lock);
2674 put_futex_key(&key);
2675
2676 ret = fault_in_user_writeable(uaddr);
2677 if (!ret)
2678 goto retry;
2679
2680 return ret;
2681}
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699static inline
2700int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2701 struct futex_q *q, union futex_key *key2,
2702 struct hrtimer_sleeper *timeout)
2703{
2704 int ret = 0;
2705
2706
2707
2708
2709
2710
2711
2712
2713 if (!match_futex(&q->key, key2)) {
2714 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2715
2716
2717
2718
2719 plist_del(&q->list, &hb->chain);
2720 hb_waiters_dec(hb);
2721
2722
2723 ret = -EWOULDBLOCK;
2724 if (timeout && !timeout->task)
2725 ret = -ETIMEDOUT;
2726 else if (signal_pending(current))
2727 ret = -ERESTARTNOINTR;
2728 }
2729 return ret;
2730}
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2773 u32 val, ktime_t *abs_time, u32 bitset,
2774 u32 __user *uaddr2)
2775{
2776 struct hrtimer_sleeper timeout, *to = NULL;
2777 struct rt_mutex_waiter rt_waiter;
2778 struct rt_mutex *pi_mutex = NULL;
2779 struct futex_hash_bucket *hb;
2780 union futex_key key2 = FUTEX_KEY_INIT;
2781 struct futex_q q = futex_q_init;
2782 int res, ret;
2783
2784 if (uaddr == uaddr2)
2785 return -EINVAL;
2786
2787 if (!bitset)
2788 return -EINVAL;
2789
2790 if (abs_time) {
2791 to = &timeout;
2792 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2793 CLOCK_REALTIME : CLOCK_MONOTONIC,
2794 HRTIMER_MODE_ABS);
2795 hrtimer_init_sleeper(to, current);
2796 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2797 current->timer_slack_ns);
2798 }
2799
2800
2801
2802
2803
2804 debug_rt_mutex_init_waiter(&rt_waiter);
2805 RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
2806 RB_CLEAR_NODE(&rt_waiter.tree_entry);
2807 rt_waiter.task = NULL;
2808
2809 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2810 if (unlikely(ret != 0))
2811 goto out;
2812
2813 q.bitset = bitset;
2814 q.rt_waiter = &rt_waiter;
2815 q.requeue_pi_key = &key2;
2816
2817
2818
2819
2820
2821 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2822 if (ret)
2823 goto out_key2;
2824
2825
2826
2827
2828
2829 if (match_futex(&q.key, &key2)) {
2830 queue_unlock(hb);
2831 ret = -EINVAL;
2832 goto out_put_keys;
2833 }
2834
2835
2836 futex_wait_queue_me(hb, &q, to);
2837
2838 spin_lock(&hb->lock);
2839 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2840 spin_unlock(&hb->lock);
2841 if (ret)
2842 goto out_put_keys;
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854 if (!q.rt_waiter) {
2855
2856
2857
2858
2859 if (q.pi_state && (q.pi_state->owner != current)) {
2860 spin_lock(q.lock_ptr);
2861 ret = fixup_pi_state_owner(uaddr2, &q, current);
2862
2863
2864
2865
2866 put_pi_state(q.pi_state);
2867 spin_unlock(q.lock_ptr);
2868 }
2869 } else {
2870
2871
2872
2873
2874
2875 WARN_ON(!q.pi_state);
2876 pi_mutex = &q.pi_state->pi_mutex;
2877 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
2878 debug_rt_mutex_free_waiter(&rt_waiter);
2879
2880 spin_lock(q.lock_ptr);
2881
2882
2883
2884
2885 res = fixup_owner(uaddr2, &q, !ret);
2886
2887
2888
2889
2890 if (res)
2891 ret = (res < 0) ? res : 0;
2892
2893
2894 unqueue_me_pi(&q);
2895 }
2896
2897
2898
2899
2900
2901 if (ret == -EFAULT) {
2902 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2903 rt_mutex_unlock(pi_mutex);
2904 } else if (ret == -EINTR) {
2905
2906
2907
2908
2909
2910
2911
2912 ret = -EWOULDBLOCK;
2913 }
2914
2915out_put_keys:
2916 put_futex_key(&q.key);
2917out_key2:
2918 put_futex_key(&key2);
2919
2920out:
2921 if (to) {
2922 hrtimer_cancel(&to->timer);
2923 destroy_hrtimer_on_stack(&to->timer);
2924 }
2925 return ret;
2926}
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2949 size_t, len)
2950{
2951 if (!futex_cmpxchg_enabled)
2952 return -ENOSYS;
2953
2954
2955
2956 if (unlikely(len != sizeof(*head)))
2957 return -EINVAL;
2958
2959 current->robust_list = head;
2960
2961 return 0;
2962}
2963
2964
2965
2966
2967
2968
2969
2970SYSCALL_DEFINE3(get_robust_list, int, pid,
2971 struct robust_list_head __user * __user *, head_ptr,
2972 size_t __user *, len_ptr)
2973{
2974 struct robust_list_head __user *head;
2975 unsigned long ret;
2976 struct task_struct *p;
2977
2978 if (!futex_cmpxchg_enabled)
2979 return -ENOSYS;
2980
2981 rcu_read_lock();
2982
2983 ret = -ESRCH;
2984 if (!pid)
2985 p = current;
2986 else {
2987 p = find_task_by_vpid(pid);
2988 if (!p)
2989 goto err_unlock;
2990 }
2991
2992 ret = -EPERM;
2993 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
2994 goto err_unlock;
2995
2996 head = p->robust_list;
2997 rcu_read_unlock();
2998
2999 if (put_user(sizeof(*head), len_ptr))
3000 return -EFAULT;
3001 return put_user(head, head_ptr);
3002
3003err_unlock:
3004 rcu_read_unlock();
3005
3006 return ret;
3007}
3008
3009
3010
3011
3012
3013int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
3014{
3015 u32 uval, uninitialized_var(nval), mval;
3016
3017retry:
3018 if (get_user(uval, uaddr))
3019 return -1;
3020
3021 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
3043 if (fault_in_user_writeable(uaddr))
3044 return -1;
3045 goto retry;
3046 }
3047 if (nval != uval)
3048 goto retry;
3049
3050
3051
3052
3053
3054 if (!pi && (uval & FUTEX_WAITERS))
3055 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
3056 }
3057 return 0;
3058}
3059
3060
3061
3062
3063static inline int fetch_robust_entry(struct robust_list __user **entry,
3064 struct robust_list __user * __user *head,
3065 unsigned int *pi)
3066{
3067 unsigned long uentry;
3068
3069 if (get_user(uentry, (unsigned long __user *)head))
3070 return -EFAULT;
3071
3072 *entry = (void __user *)(uentry & ~1UL);
3073 *pi = uentry & 1;
3074
3075 return 0;
3076}
3077
3078
3079
3080
3081
3082
3083
3084void exit_robust_list(struct task_struct *curr)
3085{
3086 struct robust_list_head __user *head = curr->robust_list;
3087 struct robust_list __user *entry, *next_entry, *pending;
3088 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3089 unsigned int uninitialized_var(next_pi);
3090 unsigned long futex_offset;
3091 int rc;
3092
3093 if (!futex_cmpxchg_enabled)
3094 return;
3095
3096
3097
3098
3099
3100 if (fetch_robust_entry(&entry, &head->list.next, &pi))
3101 return;
3102
3103
3104
3105 if (get_user(futex_offset, &head->futex_offset))
3106 return;
3107
3108
3109
3110
3111 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
3112 return;
3113
3114 next_entry = NULL;
3115 while (entry != &head->list) {
3116
3117
3118
3119
3120 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
3121
3122
3123
3124
3125 if (entry != pending)
3126 if (handle_futex_death((void __user *)entry + futex_offset,
3127 curr, pi))
3128 return;
3129 if (rc)
3130 return;
3131 entry = next_entry;
3132 pi = next_pi;
3133
3134
3135
3136 if (!--limit)
3137 break;
3138
3139 cond_resched();
3140 }
3141
3142 if (pending)
3143 handle_futex_death((void __user *)pending + futex_offset,
3144 curr, pip);
3145}
3146
3147long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
3148 u32 __user *uaddr2, u32 val2, u32 val3)
3149{
3150 int cmd = op & FUTEX_CMD_MASK;
3151 unsigned int flags = 0;
3152
3153 if (!(op & FUTEX_PRIVATE_FLAG))
3154 flags |= FLAGS_SHARED;
3155
3156 if (op & FUTEX_CLOCK_REALTIME) {
3157 flags |= FLAGS_CLOCKRT;
3158 if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
3159 cmd != FUTEX_WAIT_REQUEUE_PI)
3160 return -ENOSYS;
3161 }
3162
3163 switch (cmd) {
3164 case FUTEX_LOCK_PI:
3165 case FUTEX_UNLOCK_PI:
3166 case FUTEX_TRYLOCK_PI:
3167 case FUTEX_WAIT_REQUEUE_PI:
3168 case FUTEX_CMP_REQUEUE_PI:
3169 if (!futex_cmpxchg_enabled)
3170 return -ENOSYS;
3171 }
3172
3173 switch (cmd) {
3174 case FUTEX_WAIT:
3175 val3 = FUTEX_BITSET_MATCH_ANY;
3176 case FUTEX_WAIT_BITSET:
3177 return futex_wait(uaddr, flags, val, timeout, val3);
3178 case FUTEX_WAKE:
3179 val3 = FUTEX_BITSET_MATCH_ANY;
3180 case FUTEX_WAKE_BITSET:
3181 return futex_wake(uaddr, flags, val, val3);
3182 case FUTEX_REQUEUE:
3183 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
3184 case FUTEX_CMP_REQUEUE:
3185 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
3186 case FUTEX_WAKE_OP:
3187 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
3188 case FUTEX_LOCK_PI:
3189 return futex_lock_pi(uaddr, flags, timeout, 0);
3190 case FUTEX_UNLOCK_PI:
3191 return futex_unlock_pi(uaddr, flags);
3192 case FUTEX_TRYLOCK_PI:
3193 return futex_lock_pi(uaddr, flags, NULL, 1);
3194 case FUTEX_WAIT_REQUEUE_PI:
3195 val3 = FUTEX_BITSET_MATCH_ANY;
3196 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
3197 uaddr2);
3198 case FUTEX_CMP_REQUEUE_PI:
3199 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
3200 }
3201 return -ENOSYS;
3202}
3203
3204
3205SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
3206 struct timespec __user *, utime, u32 __user *, uaddr2,
3207 u32, val3)
3208{
3209 struct timespec ts;
3210 ktime_t t, *tp = NULL;
3211 u32 val2 = 0;
3212 int cmd = op & FUTEX_CMD_MASK;
3213
3214 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
3215 cmd == FUTEX_WAIT_BITSET ||
3216 cmd == FUTEX_WAIT_REQUEUE_PI)) {
3217 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
3218 return -EFAULT;
3219 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
3220 return -EFAULT;
3221 if (!timespec_valid(&ts))
3222 return -EINVAL;
3223
3224 t = timespec_to_ktime(ts);
3225 if (cmd == FUTEX_WAIT)
3226 t = ktime_add_safe(ktime_get(), t);
3227 tp = &t;
3228 }
3229
3230
3231
3232
3233 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
3234 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
3235 val2 = (u32) (unsigned long) utime;
3236
3237 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
3238}
3239
3240static void __init futex_detect_cmpxchg(void)
3241{
3242#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
3243 u32 curval;
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
3256 futex_cmpxchg_enabled = 1;
3257#endif
3258}
3259
3260static int __init futex_init(void)
3261{
3262 unsigned int futex_shift;
3263 unsigned long i;
3264
3265#if CONFIG_BASE_SMALL
3266 futex_hashsize = 16;
3267#else
3268 futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
3269#endif
3270
3271 futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
3272 futex_hashsize, 0,
3273 futex_hashsize < 256 ? HASH_SMALL : 0,
3274 &futex_shift, NULL,
3275 futex_hashsize, futex_hashsize);
3276 futex_hashsize = 1UL << futex_shift;
3277
3278 futex_detect_cmpxchg();
3279
3280 for (i = 0; i < futex_hashsize; i++) {
3281 atomic_set(&futex_queues[i].waiters, 0);
3282 plist_head_init(&futex_queues[i].chain);
3283 spin_lock_init(&futex_queues[i].lock);
3284 }
3285
3286 return 0;
3287}
3288__initcall(futex_init);
3289