1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/compat.h>
48#include <linux/slab.h>
49#include <linux/poll.h>
50#include <linux/fs.h>
51#include <linux/file.h>
52#include <linux/jhash.h>
53#include <linux/init.h>
54#include <linux/futex.h>
55#include <linux/mount.h>
56#include <linux/pagemap.h>
57#include <linux/syscalls.h>
58#include <linux/signal.h>
59#include <linux/export.h>
60#include <linux/magic.h>
61#include <linux/pid.h>
62#include <linux/nsproxy.h>
63#include <linux/ptrace.h>
64#include <linux/sched/rt.h>
65#include <linux/sched/wake_q.h>
66#include <linux/sched/mm.h>
67#include <linux/hugetlb.h>
68#include <linux/freezer.h>
69#include <linux/memblock.h>
70#include <linux/fault-inject.h>
71#include <linux/refcount.h>
72
73#include <asm/futex.h>
74
75#include "locking/rtmutex_common.h"
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
178#define futex_cmpxchg_enabled 1
179#else
180static int __read_mostly futex_cmpxchg_enabled;
181#endif
182
183
184
185
186
187#ifdef CONFIG_MMU
188# define FLAGS_SHARED 0x01
189#else
190
191
192
193
194# define FLAGS_SHARED 0x00
195#endif
196#define FLAGS_CLOCKRT 0x02
197#define FLAGS_HAS_TIMEOUT 0x04
198
199
200
201
202struct futex_pi_state {
203
204
205
206
207 struct list_head list;
208
209
210
211
212 struct rt_mutex pi_mutex;
213
214 struct task_struct *owner;
215 refcount_t refcount;
216
217 union futex_key key;
218} __randomize_layout;
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242struct futex_q {
243 struct plist_node list;
244
245 struct task_struct *task;
246 spinlock_t *lock_ptr;
247 union futex_key key;
248 struct futex_pi_state *pi_state;
249 struct rt_mutex_waiter *rt_waiter;
250 union futex_key *requeue_pi_key;
251 u32 bitset;
252} __randomize_layout;
253
254static const struct futex_q futex_q_init = {
255
256 .key = FUTEX_KEY_INIT,
257 .bitset = FUTEX_BITSET_MATCH_ANY
258};
259
260
261
262
263
264
265struct futex_hash_bucket {
266 atomic_t waiters;
267 spinlock_t lock;
268 struct plist_head chain;
269} ____cacheline_aligned_in_smp;
270
271
272
273
274
275
276static struct {
277 struct futex_hash_bucket *queues;
278 unsigned long hashsize;
279} __futex_data __read_mostly __aligned(2*sizeof(long));
280#define futex_queues (__futex_data.queues)
281#define futex_hashsize (__futex_data.hashsize)
282
283
284
285
286
287#ifdef CONFIG_FAIL_FUTEX
288
289static struct {
290 struct fault_attr attr;
291
292 bool ignore_private;
293} fail_futex = {
294 .attr = FAULT_ATTR_INITIALIZER,
295 .ignore_private = false,
296};
297
298static int __init setup_fail_futex(char *str)
299{
300 return setup_fault_attr(&fail_futex.attr, str);
301}
302__setup("fail_futex=", setup_fail_futex);
303
304static bool should_fail_futex(bool fshared)
305{
306 if (fail_futex.ignore_private && !fshared)
307 return false;
308
309 return should_fail(&fail_futex.attr, 1);
310}
311
312#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
313
314static int __init fail_futex_debugfs(void)
315{
316 umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
317 struct dentry *dir;
318
319 dir = fault_create_debugfs_attr("fail_futex", NULL,
320 &fail_futex.attr);
321 if (IS_ERR(dir))
322 return PTR_ERR(dir);
323
324 debugfs_create_bool("ignore-private", mode, dir,
325 &fail_futex.ignore_private);
326 return 0;
327}
328
329late_initcall(fail_futex_debugfs);
330
331#endif
332
333#else
334static inline bool should_fail_futex(bool fshared)
335{
336 return false;
337}
338#endif
339
340#ifdef CONFIG_COMPAT
341static void compat_exit_robust_list(struct task_struct *curr);
342#else
343static inline void compat_exit_robust_list(struct task_struct *curr) { }
344#endif
345
346
347
348
349static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
350{
351#ifdef CONFIG_SMP
352 atomic_inc(&hb->waiters);
353
354
355
356 smp_mb__after_atomic();
357#endif
358}
359
360
361
362
363
364static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
365{
366#ifdef CONFIG_SMP
367 atomic_dec(&hb->waiters);
368#endif
369}
370
371static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
372{
373#ifdef CONFIG_SMP
374
375
376
377 smp_mb();
378 return atomic_read(&hb->waiters);
379#else
380 return 1;
381#endif
382}
383
384
385
386
387
388
389
390
391static struct futex_hash_bucket *hash_futex(union futex_key *key)
392{
393 u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
394 key->both.offset);
395
396 return &futex_queues[hash & (futex_hashsize - 1)];
397}
398
399
400
401
402
403
404
405
406
407static inline int match_futex(union futex_key *key1, union futex_key *key2)
408{
409 return (key1 && key2
410 && key1->both.word == key2->both.word
411 && key1->both.ptr == key2->both.ptr
412 && key1->both.offset == key2->both.offset);
413}
414
415enum futex_access {
416 FUTEX_READ,
417 FUTEX_WRITE
418};
419
420
421
422
423
424
425
426
427
428
429
430static inline struct hrtimer_sleeper *
431futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
432 int flags, u64 range_ns)
433{
434 if (!time)
435 return NULL;
436
437 hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
438 CLOCK_REALTIME : CLOCK_MONOTONIC,
439 HRTIMER_MODE_ABS);
440
441
442
443
444 hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
445
446 return timeout;
447}
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467static u64 get_inode_sequence_number(struct inode *inode)
468{
469 static atomic64_t i_seq;
470 u64 old;
471
472
473 old = atomic64_read(&inode->i_sequence);
474 if (likely(old))
475 return old;
476
477 for (;;) {
478 u64 new = atomic64_add_return(1, &i_seq);
479 if (WARN_ON_ONCE(!new))
480 continue;
481
482 old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
483 if (old)
484 return old;
485 return new;
486 }
487}
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513static int
514get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_access rw)
515{
516 unsigned long address = (unsigned long)uaddr;
517 struct mm_struct *mm = current->mm;
518 struct page *page, *tail;
519 struct address_space *mapping;
520 int err, ro = 0;
521
522
523
524
525 key->both.offset = address % PAGE_SIZE;
526 if (unlikely((address % sizeof(u32)) != 0))
527 return -EINVAL;
528 address -= key->both.offset;
529
530 if (unlikely(!access_ok(uaddr, sizeof(u32))))
531 return -EFAULT;
532
533 if (unlikely(should_fail_futex(fshared)))
534 return -EFAULT;
535
536
537
538
539
540
541
542
543 if (!fshared) {
544 key->private.mm = mm;
545 key->private.address = address;
546 return 0;
547 }
548
549again:
550
551 if (unlikely(should_fail_futex(fshared)))
552 return -EFAULT;
553
554 err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
555
556
557
558
559 if (err == -EFAULT && rw == FUTEX_READ) {
560 err = get_user_pages_fast(address, 1, 0, &page);
561 ro = 1;
562 }
563 if (err < 0)
564 return err;
565 else
566 err = 0;
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586 tail = page;
587 page = compound_head(page);
588 mapping = READ_ONCE(page->mapping);
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605 if (unlikely(!mapping)) {
606 int shmem_swizzled;
607
608
609
610
611
612
613 lock_page(page);
614 shmem_swizzled = PageSwapCache(page) || page->mapping;
615 unlock_page(page);
616 put_page(page);
617
618 if (shmem_swizzled)
619 goto again;
620
621 return -EFAULT;
622 }
623
624
625
626
627
628
629
630
631
632
633
634 if (PageAnon(page)) {
635
636
637
638
639 if (unlikely(should_fail_futex(fshared)) || ro) {
640 err = -EFAULT;
641 goto out;
642 }
643
644 key->both.offset |= FUT_OFF_MMSHARED;
645 key->private.mm = mm;
646 key->private.address = address;
647
648 } else {
649 struct inode *inode;
650
651
652
653
654
655
656
657
658
659
660
661
662 rcu_read_lock();
663
664 if (READ_ONCE(page->mapping) != mapping) {
665 rcu_read_unlock();
666 put_page(page);
667
668 goto again;
669 }
670
671 inode = READ_ONCE(mapping->host);
672 if (!inode) {
673 rcu_read_unlock();
674 put_page(page);
675
676 goto again;
677 }
678
679 key->both.offset |= FUT_OFF_INODE;
680 key->shared.i_seq = get_inode_sequence_number(inode);
681 key->shared.pgoff = basepage_index(tail);
682 rcu_read_unlock();
683 }
684
685out:
686 put_page(page);
687 return err;
688}
689
690static inline void put_futex_key(union futex_key *key)
691{
692}
693
694
695
696
697
698
699
700
701
702
703
704
705
706static int fault_in_user_writeable(u32 __user *uaddr)
707{
708 struct mm_struct *mm = current->mm;
709 int ret;
710
711 down_read(&mm->mmap_sem);
712 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
713 FAULT_FLAG_WRITE, NULL);
714 up_read(&mm->mmap_sem);
715
716 return ret < 0 ? ret : 0;
717}
718
719
720
721
722
723
724
725
726static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
727 union futex_key *key)
728{
729 struct futex_q *this;
730
731 plist_for_each_entry(this, &hb->chain, list) {
732 if (match_futex(&this->key, key))
733 return this;
734 }
735 return NULL;
736}
737
738static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
739 u32 uval, u32 newval)
740{
741 int ret;
742
743 pagefault_disable();
744 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
745 pagefault_enable();
746
747 return ret;
748}
749
750static int get_futex_value_locked(u32 *dest, u32 __user *from)
751{
752 int ret;
753
754 pagefault_disable();
755 ret = __get_user(*dest, from);
756 pagefault_enable();
757
758 return ret ? -EFAULT : 0;
759}
760
761
762
763
764
765static int refill_pi_state_cache(void)
766{
767 struct futex_pi_state *pi_state;
768
769 if (likely(current->pi_state_cache))
770 return 0;
771
772 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
773
774 if (!pi_state)
775 return -ENOMEM;
776
777 INIT_LIST_HEAD(&pi_state->list);
778
779 pi_state->owner = NULL;
780 refcount_set(&pi_state->refcount, 1);
781 pi_state->key = FUTEX_KEY_INIT;
782
783 current->pi_state_cache = pi_state;
784
785 return 0;
786}
787
788static struct futex_pi_state *alloc_pi_state(void)
789{
790 struct futex_pi_state *pi_state = current->pi_state_cache;
791
792 WARN_ON(!pi_state);
793 current->pi_state_cache = NULL;
794
795 return pi_state;
796}
797
798static void get_pi_state(struct futex_pi_state *pi_state)
799{
800 WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
801}
802
803
804
805
806
807static void put_pi_state(struct futex_pi_state *pi_state)
808{
809 if (!pi_state)
810 return;
811
812 if (!refcount_dec_and_test(&pi_state->refcount))
813 return;
814
815
816
817
818
819 if (pi_state->owner) {
820 struct task_struct *owner;
821
822 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
823 owner = pi_state->owner;
824 if (owner) {
825 raw_spin_lock(&owner->pi_lock);
826 list_del_init(&pi_state->list);
827 raw_spin_unlock(&owner->pi_lock);
828 }
829 rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
830 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
831 }
832
833 if (current->pi_state_cache) {
834 kfree(pi_state);
835 } else {
836
837
838
839
840
841 pi_state->owner = NULL;
842 refcount_set(&pi_state->refcount, 1);
843 current->pi_state_cache = pi_state;
844 }
845}
846
847#ifdef CONFIG_FUTEX_PI
848
849
850
851
852
853
854static void exit_pi_state_list(struct task_struct *curr)
855{
856 struct list_head *next, *head = &curr->pi_state_list;
857 struct futex_pi_state *pi_state;
858 struct futex_hash_bucket *hb;
859 union futex_key key = FUTEX_KEY_INIT;
860
861 if (!futex_cmpxchg_enabled)
862 return;
863
864
865
866
867
868 raw_spin_lock_irq(&curr->pi_lock);
869 while (!list_empty(head)) {
870 next = head->next;
871 pi_state = list_entry(next, struct futex_pi_state, list);
872 key = pi_state->key;
873 hb = hash_futex(&key);
874
875
876
877
878
879
880
881
882
883
884
885 if (!refcount_inc_not_zero(&pi_state->refcount)) {
886 raw_spin_unlock_irq(&curr->pi_lock);
887 cpu_relax();
888 raw_spin_lock_irq(&curr->pi_lock);
889 continue;
890 }
891 raw_spin_unlock_irq(&curr->pi_lock);
892
893 spin_lock(&hb->lock);
894 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
895 raw_spin_lock(&curr->pi_lock);
896
897
898
899
900 if (head->next != next) {
901
902 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
903 spin_unlock(&hb->lock);
904 put_pi_state(pi_state);
905 continue;
906 }
907
908 WARN_ON(pi_state->owner != curr);
909 WARN_ON(list_empty(&pi_state->list));
910 list_del_init(&pi_state->list);
911 pi_state->owner = NULL;
912
913 raw_spin_unlock(&curr->pi_lock);
914 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
915 spin_unlock(&hb->lock);
916
917 rt_mutex_futex_unlock(&pi_state->pi_mutex);
918 put_pi_state(pi_state);
919
920 raw_spin_lock_irq(&curr->pi_lock);
921 }
922 raw_spin_unlock_irq(&curr->pi_lock);
923}
924#else
925static inline void exit_pi_state_list(struct task_struct *curr) { }
926#endif
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
1017 struct futex_pi_state *pi_state,
1018 struct futex_pi_state **ps)
1019{
1020 pid_t pid = uval & FUTEX_TID_MASK;
1021 u32 uval2;
1022 int ret;
1023
1024
1025
1026
1027 if (unlikely(!pi_state))
1028 return -EINVAL;
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042 WARN_ON(!refcount_read(&pi_state->refcount));
1043
1044
1045
1046
1047
1048 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
1049
1050
1051
1052
1053
1054
1055
1056 if (get_futex_value_locked(&uval2, uaddr))
1057 goto out_efault;
1058
1059 if (uval != uval2)
1060 goto out_eagain;
1061
1062
1063
1064
1065 if (uval & FUTEX_OWNER_DIED) {
1066
1067
1068
1069
1070
1071 if (!pi_state->owner) {
1072
1073
1074
1075
1076 if (pid)
1077 goto out_einval;
1078
1079
1080
1081 goto out_attach;
1082 }
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 if (!pid)
1093 goto out_attach;
1094 } else {
1095
1096
1097
1098
1099 if (!pi_state->owner)
1100 goto out_einval;
1101 }
1102
1103
1104
1105
1106
1107
1108 if (pid != task_pid_vnr(pi_state->owner))
1109 goto out_einval;
1110
1111out_attach:
1112 get_pi_state(pi_state);
1113 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1114 *ps = pi_state;
1115 return 0;
1116
1117out_einval:
1118 ret = -EINVAL;
1119 goto out_error;
1120
1121out_eagain:
1122 ret = -EAGAIN;
1123 goto out_error;
1124
1125out_efault:
1126 ret = -EFAULT;
1127 goto out_error;
1128
1129out_error:
1130 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1131 return ret;
1132}
1133
1134
1135
1136
1137
1138
1139
1140
1141static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
1142{
1143 if (ret != -EBUSY) {
1144 WARN_ON_ONCE(exiting);
1145 return;
1146 }
1147
1148 if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
1149 return;
1150
1151 mutex_lock(&exiting->futex_exit_mutex);
1152
1153
1154
1155
1156
1157
1158
1159
1160 mutex_unlock(&exiting->futex_exit_mutex);
1161
1162 put_task_struct(exiting);
1163}
1164
1165static int handle_exit_race(u32 __user *uaddr, u32 uval,
1166 struct task_struct *tsk)
1167{
1168 u32 uval2;
1169
1170
1171
1172
1173
1174 if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
1175 return -EBUSY;
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206 if (get_futex_value_locked(&uval2, uaddr))
1207 return -EFAULT;
1208
1209
1210 if (uval2 != uval)
1211 return -EAGAIN;
1212
1213
1214
1215
1216
1217
1218 return -ESRCH;
1219}
1220
1221
1222
1223
1224
1225static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
1226 struct futex_pi_state **ps,
1227 struct task_struct **exiting)
1228{
1229 pid_t pid = uval & FUTEX_TID_MASK;
1230 struct futex_pi_state *pi_state;
1231 struct task_struct *p;
1232
1233
1234
1235
1236
1237
1238
1239
1240 if (!pid)
1241 return -EAGAIN;
1242 p = find_get_task_by_vpid(pid);
1243 if (!p)
1244 return handle_exit_race(uaddr, uval, NULL);
1245
1246 if (unlikely(p->flags & PF_KTHREAD)) {
1247 put_task_struct(p);
1248 return -EPERM;
1249 }
1250
1251
1252
1253
1254
1255
1256 raw_spin_lock_irq(&p->pi_lock);
1257 if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
1258
1259
1260
1261
1262
1263 int ret = handle_exit_race(uaddr, uval, p);
1264
1265 raw_spin_unlock_irq(&p->pi_lock);
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275 if (ret == -EBUSY)
1276 *exiting = p;
1277 else
1278 put_task_struct(p);
1279 return ret;
1280 }
1281
1282
1283
1284
1285
1286
1287
1288 pi_state = alloc_pi_state();
1289
1290
1291
1292
1293
1294 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
1295
1296
1297 pi_state->key = *key;
1298
1299 WARN_ON(!list_empty(&pi_state->list));
1300 list_add(&pi_state->list, &p->pi_state_list);
1301
1302
1303
1304
1305 pi_state->owner = p;
1306 raw_spin_unlock_irq(&p->pi_lock);
1307
1308 put_task_struct(p);
1309
1310 *ps = pi_state;
1311
1312 return 0;
1313}
1314
1315static int lookup_pi_state(u32 __user *uaddr, u32 uval,
1316 struct futex_hash_bucket *hb,
1317 union futex_key *key, struct futex_pi_state **ps,
1318 struct task_struct **exiting)
1319{
1320 struct futex_q *top_waiter = futex_top_waiter(hb, key);
1321
1322
1323
1324
1325
1326 if (top_waiter)
1327 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
1328
1329
1330
1331
1332
1333 return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
1334}
1335
1336static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
1337{
1338 int err;
1339 u32 uninitialized_var(curval);
1340
1341 if (unlikely(should_fail_futex(true)))
1342 return -EFAULT;
1343
1344 err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
1345 if (unlikely(err))
1346 return err;
1347
1348
1349 return curval != uval ? -EAGAIN : 0;
1350}
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
1377 union futex_key *key,
1378 struct futex_pi_state **ps,
1379 struct task_struct *task,
1380 struct task_struct **exiting,
1381 int set_waiters)
1382{
1383 u32 uval, newval, vpid = task_pid_vnr(task);
1384 struct futex_q *top_waiter;
1385 int ret;
1386
1387
1388
1389
1390
1391 if (get_futex_value_locked(&uval, uaddr))
1392 return -EFAULT;
1393
1394 if (unlikely(should_fail_futex(true)))
1395 return -EFAULT;
1396
1397
1398
1399
1400 if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
1401 return -EDEADLK;
1402
1403 if ((unlikely(should_fail_futex(true))))
1404 return -EDEADLK;
1405
1406
1407
1408
1409
1410 top_waiter = futex_top_waiter(hb, key);
1411 if (top_waiter)
1412 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
1413
1414
1415
1416
1417
1418
1419
1420 if (!(uval & FUTEX_TID_MASK)) {
1421
1422
1423
1424
1425 newval = uval & FUTEX_OWNER_DIED;
1426 newval |= vpid;
1427
1428
1429 if (set_waiters)
1430 newval |= FUTEX_WAITERS;
1431
1432 ret = lock_pi_update_atomic(uaddr, uval, newval);
1433
1434 return ret < 0 ? ret : 1;
1435 }
1436
1437
1438
1439
1440
1441
1442 newval = uval | FUTEX_WAITERS;
1443 ret = lock_pi_update_atomic(uaddr, uval, newval);
1444 if (ret)
1445 return ret;
1446
1447
1448
1449
1450
1451 return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
1452}
1453
1454
1455
1456
1457
1458
1459
1460static void __unqueue_futex(struct futex_q *q)
1461{
1462 struct futex_hash_bucket *hb;
1463
1464 if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
1465 return;
1466 lockdep_assert_held(q->lock_ptr);
1467
1468 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
1469 plist_del(&q->list, &hb->chain);
1470 hb_waiters_dec(hb);
1471}
1472
1473
1474
1475
1476
1477
1478
1479static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1480{
1481 struct task_struct *p = q->task;
1482
1483 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
1484 return;
1485
1486 get_task_struct(p);
1487 __unqueue_futex(q);
1488
1489
1490
1491
1492
1493
1494
1495 smp_store_release(&q->lock_ptr, NULL);
1496
1497
1498
1499
1500
1501 wake_q_add_safe(wake_q, p);
1502}
1503
1504
1505
1506
1507static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
1508{
1509 u32 uninitialized_var(curval), newval;
1510 struct task_struct *new_owner;
1511 bool postunlock = false;
1512 DEFINE_WAKE_Q(wake_q);
1513 int ret = 0;
1514
1515 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
1516 if (WARN_ON_ONCE(!new_owner)) {
1517
1518
1519
1520
1521
1522
1523
1524
1525 ret = -EAGAIN;
1526 goto out_unlock;
1527 }
1528
1529
1530
1531
1532
1533
1534 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
1535
1536 if (unlikely(should_fail_futex(true)))
1537 ret = -EFAULT;
1538
1539 ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
1540 if (!ret && (curval != uval)) {
1541
1542
1543
1544
1545
1546
1547 if ((FUTEX_TID_MASK & curval) == uval)
1548 ret = -EAGAIN;
1549 else
1550 ret = -EINVAL;
1551 }
1552
1553 if (ret)
1554 goto out_unlock;
1555
1556
1557
1558
1559
1560
1561 raw_spin_lock(&pi_state->owner->pi_lock);
1562 WARN_ON(list_empty(&pi_state->list));
1563 list_del_init(&pi_state->list);
1564 raw_spin_unlock(&pi_state->owner->pi_lock);
1565
1566 raw_spin_lock(&new_owner->pi_lock);
1567 WARN_ON(!list_empty(&pi_state->list));
1568 list_add(&pi_state->list, &new_owner->pi_state_list);
1569 pi_state->owner = new_owner;
1570 raw_spin_unlock(&new_owner->pi_lock);
1571
1572 postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
1573
1574out_unlock:
1575 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1576
1577 if (postunlock)
1578 rt_mutex_postunlock(&wake_q);
1579
1580 return ret;
1581}
1582
1583
1584
1585
1586static inline void
1587double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1588{
1589 if (hb1 <= hb2) {
1590 spin_lock(&hb1->lock);
1591 if (hb1 < hb2)
1592 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
1593 } else {
1594 spin_lock(&hb2->lock);
1595 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
1596 }
1597}
1598
1599static inline void
1600double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1601{
1602 spin_unlock(&hb1->lock);
1603 if (hb1 != hb2)
1604 spin_unlock(&hb2->lock);
1605}
1606
1607
1608
1609
1610static int
1611futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
1612{
1613 struct futex_hash_bucket *hb;
1614 struct futex_q *this, *next;
1615 union futex_key key = FUTEX_KEY_INIT;
1616 int ret;
1617 DEFINE_WAKE_Q(wake_q);
1618
1619 if (!bitset)
1620 return -EINVAL;
1621
1622 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
1623 if (unlikely(ret != 0))
1624 goto out;
1625
1626 hb = hash_futex(&key);
1627
1628
1629 if (!hb_waiters_pending(hb))
1630 goto out_put_key;
1631
1632 spin_lock(&hb->lock);
1633
1634 plist_for_each_entry_safe(this, next, &hb->chain, list) {
1635 if (match_futex (&this->key, &key)) {
1636 if (this->pi_state || this->rt_waiter) {
1637 ret = -EINVAL;
1638 break;
1639 }
1640
1641
1642 if (!(this->bitset & bitset))
1643 continue;
1644
1645 mark_wake_futex(&wake_q, this);
1646 if (++ret >= nr_wake)
1647 break;
1648 }
1649 }
1650
1651 spin_unlock(&hb->lock);
1652 wake_up_q(&wake_q);
1653out_put_key:
1654 put_futex_key(&key);
1655out:
1656 return ret;
1657}
1658
1659static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
1660{
1661 unsigned int op = (encoded_op & 0x70000000) >> 28;
1662 unsigned int cmp = (encoded_op & 0x0f000000) >> 24;
1663 int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
1664 int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
1665 int oldval, ret;
1666
1667 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
1668 if (oparg < 0 || oparg > 31) {
1669 char comm[sizeof(current->comm)];
1670
1671
1672
1673
1674 pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
1675 get_task_comm(comm, current), oparg);
1676 oparg &= 31;
1677 }
1678 oparg = 1 << oparg;
1679 }
1680
1681 if (!access_ok(uaddr, sizeof(u32)))
1682 return -EFAULT;
1683
1684 ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
1685 if (ret)
1686 return ret;
1687
1688 switch (cmp) {
1689 case FUTEX_OP_CMP_EQ:
1690 return oldval == cmparg;
1691 case FUTEX_OP_CMP_NE:
1692 return oldval != cmparg;
1693 case FUTEX_OP_CMP_LT:
1694 return oldval < cmparg;
1695 case FUTEX_OP_CMP_GE:
1696 return oldval >= cmparg;
1697 case FUTEX_OP_CMP_LE:
1698 return oldval <= cmparg;
1699 case FUTEX_OP_CMP_GT:
1700 return oldval > cmparg;
1701 default:
1702 return -ENOSYS;
1703 }
1704}
1705
1706
1707
1708
1709
1710static int
1711futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1712 int nr_wake, int nr_wake2, int op)
1713{
1714 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1715 struct futex_hash_bucket *hb1, *hb2;
1716 struct futex_q *this, *next;
1717 int ret, op_ret;
1718 DEFINE_WAKE_Q(wake_q);
1719
1720retry:
1721 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
1722 if (unlikely(ret != 0))
1723 goto out;
1724 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
1725 if (unlikely(ret != 0))
1726 goto out_put_key1;
1727
1728 hb1 = hash_futex(&key1);
1729 hb2 = hash_futex(&key2);
1730
1731retry_private:
1732 double_lock_hb(hb1, hb2);
1733 op_ret = futex_atomic_op_inuser(op, uaddr2);
1734 if (unlikely(op_ret < 0)) {
1735 double_unlock_hb(hb1, hb2);
1736
1737 if (!IS_ENABLED(CONFIG_MMU) ||
1738 unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
1739
1740
1741
1742
1743 ret = op_ret;
1744 goto out_put_keys;
1745 }
1746
1747 if (op_ret == -EFAULT) {
1748 ret = fault_in_user_writeable(uaddr2);
1749 if (ret)
1750 goto out_put_keys;
1751 }
1752
1753 if (!(flags & FLAGS_SHARED)) {
1754 cond_resched();
1755 goto retry_private;
1756 }
1757
1758 put_futex_key(&key2);
1759 put_futex_key(&key1);
1760 cond_resched();
1761 goto retry;
1762 }
1763
1764 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1765 if (match_futex (&this->key, &key1)) {
1766 if (this->pi_state || this->rt_waiter) {
1767 ret = -EINVAL;
1768 goto out_unlock;
1769 }
1770 mark_wake_futex(&wake_q, this);
1771 if (++ret >= nr_wake)
1772 break;
1773 }
1774 }
1775
1776 if (op_ret > 0) {
1777 op_ret = 0;
1778 plist_for_each_entry_safe(this, next, &hb2->chain, list) {
1779 if (match_futex (&this->key, &key2)) {
1780 if (this->pi_state || this->rt_waiter) {
1781 ret = -EINVAL;
1782 goto out_unlock;
1783 }
1784 mark_wake_futex(&wake_q, this);
1785 if (++op_ret >= nr_wake2)
1786 break;
1787 }
1788 }
1789 ret += op_ret;
1790 }
1791
1792out_unlock:
1793 double_unlock_hb(hb1, hb2);
1794 wake_up_q(&wake_q);
1795out_put_keys:
1796 put_futex_key(&key2);
1797out_put_key1:
1798 put_futex_key(&key1);
1799out:
1800 return ret;
1801}
1802
1803
1804
1805
1806
1807
1808
1809
1810static inline
1811void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1812 struct futex_hash_bucket *hb2, union futex_key *key2)
1813{
1814
1815
1816
1817
1818
1819 if (likely(&hb1->chain != &hb2->chain)) {
1820 plist_del(&q->list, &hb1->chain);
1821 hb_waiters_dec(hb1);
1822 hb_waiters_inc(hb2);
1823 plist_add(&q->list, &hb2->chain);
1824 q->lock_ptr = &hb2->lock;
1825 }
1826 q->key = *key2;
1827}
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843static inline
1844void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1845 struct futex_hash_bucket *hb)
1846{
1847 q->key = *key;
1848
1849 __unqueue_futex(q);
1850
1851 WARN_ON(!q->rt_waiter);
1852 q->rt_waiter = NULL;
1853
1854 q->lock_ptr = &hb->lock;
1855
1856 wake_up_state(q->task, TASK_NORMAL);
1857}
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885static int
1886futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
1887 struct futex_hash_bucket *hb2, union futex_key *key1,
1888 union futex_key *key2, struct futex_pi_state **ps,
1889 struct task_struct **exiting, int set_waiters)
1890{
1891 struct futex_q *top_waiter = NULL;
1892 u32 curval;
1893 int ret, vpid;
1894
1895 if (get_futex_value_locked(&curval, pifutex))
1896 return -EFAULT;
1897
1898 if (unlikely(should_fail_futex(true)))
1899 return -EFAULT;
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909 top_waiter = futex_top_waiter(hb1, key1);
1910
1911
1912 if (!top_waiter)
1913 return 0;
1914
1915
1916 if (!match_futex(top_waiter->requeue_pi_key, key2))
1917 return -EINVAL;
1918
1919
1920
1921
1922
1923
1924 vpid = task_pid_vnr(top_waiter->task);
1925 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1926 exiting, set_waiters);
1927 if (ret == 1) {
1928 requeue_pi_wake_futex(top_waiter, key2, hb2);
1929 return vpid;
1930 }
1931 return ret;
1932}
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1953 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1954 u32 *cmpval, int requeue_pi)
1955{
1956 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1957 int task_count = 0, ret;
1958 struct futex_pi_state *pi_state = NULL;
1959 struct futex_hash_bucket *hb1, *hb2;
1960 struct futex_q *this, *next;
1961 DEFINE_WAKE_Q(wake_q);
1962
1963 if (nr_wake < 0 || nr_requeue < 0)
1964 return -EINVAL;
1965
1966
1967
1968
1969
1970
1971
1972 if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
1973 return -ENOSYS;
1974
1975 if (requeue_pi) {
1976
1977
1978
1979
1980 if (uaddr1 == uaddr2)
1981 return -EINVAL;
1982
1983
1984
1985
1986
1987 if (refill_pi_state_cache())
1988 return -ENOMEM;
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999 if (nr_wake != 1)
2000 return -EINVAL;
2001 }
2002
2003retry:
2004 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
2005 if (unlikely(ret != 0))
2006 goto out;
2007 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
2008 requeue_pi ? FUTEX_WRITE : FUTEX_READ);
2009 if (unlikely(ret != 0))
2010 goto out_put_key1;
2011
2012
2013
2014
2015
2016 if (requeue_pi && match_futex(&key1, &key2)) {
2017 ret = -EINVAL;
2018 goto out_put_keys;
2019 }
2020
2021 hb1 = hash_futex(&key1);
2022 hb2 = hash_futex(&key2);
2023
2024retry_private:
2025 hb_waiters_inc(hb2);
2026 double_lock_hb(hb1, hb2);
2027
2028 if (likely(cmpval != NULL)) {
2029 u32 curval;
2030
2031 ret = get_futex_value_locked(&curval, uaddr1);
2032
2033 if (unlikely(ret)) {
2034 double_unlock_hb(hb1, hb2);
2035 hb_waiters_dec(hb2);
2036
2037 ret = get_user(curval, uaddr1);
2038 if (ret)
2039 goto out_put_keys;
2040
2041 if (!(flags & FLAGS_SHARED))
2042 goto retry_private;
2043
2044 put_futex_key(&key2);
2045 put_futex_key(&key1);
2046 goto retry;
2047 }
2048 if (curval != *cmpval) {
2049 ret = -EAGAIN;
2050 goto out_unlock;
2051 }
2052 }
2053
2054 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
2055 struct task_struct *exiting = NULL;
2056
2057
2058
2059
2060
2061
2062
2063 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
2064 &key2, &pi_state,
2065 &exiting, nr_requeue);
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076 if (ret > 0) {
2077 WARN_ON(pi_state);
2078 task_count++;
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091 ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
2092 &pi_state, &exiting);
2093 }
2094
2095 switch (ret) {
2096 case 0:
2097
2098 break;
2099
2100
2101 case -EFAULT:
2102 double_unlock_hb(hb1, hb2);
2103 hb_waiters_dec(hb2);
2104 put_futex_key(&key2);
2105 put_futex_key(&key1);
2106 ret = fault_in_user_writeable(uaddr2);
2107 if (!ret)
2108 goto retry;
2109 goto out;
2110 case -EBUSY:
2111 case -EAGAIN:
2112
2113
2114
2115
2116
2117
2118 double_unlock_hb(hb1, hb2);
2119 hb_waiters_dec(hb2);
2120 put_futex_key(&key2);
2121 put_futex_key(&key1);
2122
2123
2124
2125
2126
2127 wait_for_owner_exiting(ret, exiting);
2128 cond_resched();
2129 goto retry;
2130 default:
2131 goto out_unlock;
2132 }
2133 }
2134
2135 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
2136 if (task_count - nr_wake >= nr_requeue)
2137 break;
2138
2139 if (!match_futex(&this->key, &key1))
2140 continue;
2141
2142
2143
2144
2145
2146
2147
2148
2149 if ((requeue_pi && !this->rt_waiter) ||
2150 (!requeue_pi && this->rt_waiter) ||
2151 this->pi_state) {
2152 ret = -EINVAL;
2153 break;
2154 }
2155
2156
2157
2158
2159
2160
2161 if (++task_count <= nr_wake && !requeue_pi) {
2162 mark_wake_futex(&wake_q, this);
2163 continue;
2164 }
2165
2166
2167 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
2168 ret = -EINVAL;
2169 break;
2170 }
2171
2172
2173
2174
2175
2176 if (requeue_pi) {
2177
2178
2179
2180
2181
2182 get_pi_state(pi_state);
2183 this->pi_state = pi_state;
2184 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
2185 this->rt_waiter,
2186 this->task);
2187 if (ret == 1) {
2188
2189
2190
2191
2192
2193
2194
2195
2196 requeue_pi_wake_futex(this, &key2, hb2);
2197 continue;
2198 } else if (ret) {
2199
2200
2201
2202
2203
2204
2205
2206
2207 this->pi_state = NULL;
2208 put_pi_state(pi_state);
2209
2210
2211
2212
2213 break;
2214 }
2215 }
2216 requeue_futex(this, hb1, hb2, &key2);
2217 }
2218
2219
2220
2221
2222
2223
2224 put_pi_state(pi_state);
2225
2226out_unlock:
2227 double_unlock_hb(hb1, hb2);
2228 wake_up_q(&wake_q);
2229 hb_waiters_dec(hb2);
2230
2231out_put_keys:
2232 put_futex_key(&key2);
2233out_put_key1:
2234 put_futex_key(&key1);
2235out:
2236 return ret ? ret : task_count;
2237}
2238
2239
2240static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
2241 __acquires(&hb->lock)
2242{
2243 struct futex_hash_bucket *hb;
2244
2245 hb = hash_futex(&q->key);
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255 hb_waiters_inc(hb);
2256
2257 q->lock_ptr = &hb->lock;
2258
2259 spin_lock(&hb->lock);
2260 return hb;
2261}
2262
2263static inline void
2264queue_unlock(struct futex_hash_bucket *hb)
2265 __releases(&hb->lock)
2266{
2267 spin_unlock(&hb->lock);
2268 hb_waiters_dec(hb);
2269}
2270
2271static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
2272{
2273 int prio;
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283 prio = min(current->normal_prio, MAX_RT_PRIO);
2284
2285 plist_node_init(&q->list, prio);
2286 plist_add(&q->list, &hb->chain);
2287 q->task = current;
2288}
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
2303 __releases(&hb->lock)
2304{
2305 __queue_me(q, hb);
2306 spin_unlock(&hb->lock);
2307}
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320static int unqueue_me(struct futex_q *q)
2321{
2322 spinlock_t *lock_ptr;
2323 int ret = 0;
2324
2325
2326retry:
2327
2328
2329
2330
2331
2332 lock_ptr = READ_ONCE(q->lock_ptr);
2333 if (lock_ptr != NULL) {
2334 spin_lock(lock_ptr);
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348 if (unlikely(lock_ptr != q->lock_ptr)) {
2349 spin_unlock(lock_ptr);
2350 goto retry;
2351 }
2352 __unqueue_futex(q);
2353
2354 BUG_ON(q->pi_state);
2355
2356 spin_unlock(lock_ptr);
2357 ret = 1;
2358 }
2359
2360 return ret;
2361}
2362
2363
2364
2365
2366
2367
2368static void unqueue_me_pi(struct futex_q *q)
2369 __releases(q->lock_ptr)
2370{
2371 __unqueue_futex(q);
2372
2373 BUG_ON(!q->pi_state);
2374 put_pi_state(q->pi_state);
2375 q->pi_state = NULL;
2376
2377 spin_unlock(q->lock_ptr);
2378}
2379
2380static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
2381 struct task_struct *argowner)
2382{
2383 struct futex_pi_state *pi_state = q->pi_state;
2384 u32 uval, uninitialized_var(curval), newval;
2385 struct task_struct *oldowner, *newowner;
2386 u32 newtid;
2387 int ret, err = 0;
2388
2389 lockdep_assert_held(q->lock_ptr);
2390
2391 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
2392
2393 oldowner = pi_state->owner;
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418retry:
2419 if (!argowner) {
2420 if (oldowner != current) {
2421
2422
2423
2424
2425 ret = 0;
2426 goto out_unlock;
2427 }
2428
2429 if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
2430
2431 ret = 0;
2432 goto out_unlock;
2433 }
2434
2435
2436
2437
2438 newowner = rt_mutex_owner(&pi_state->pi_mutex);
2439 BUG_ON(!newowner);
2440 } else {
2441 WARN_ON_ONCE(argowner != current);
2442 if (oldowner == current) {
2443
2444
2445
2446
2447 ret = 0;
2448 goto out_unlock;
2449 }
2450 newowner = argowner;
2451 }
2452
2453 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
2454
2455 if (!pi_state->owner)
2456 newtid |= FUTEX_OWNER_DIED;
2457
2458 err = get_futex_value_locked(&uval, uaddr);
2459 if (err)
2460 goto handle_err;
2461
2462 for (;;) {
2463 newval = (uval & FUTEX_OWNER_DIED) | newtid;
2464
2465 err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
2466 if (err)
2467 goto handle_err;
2468
2469 if (curval == uval)
2470 break;
2471 uval = curval;
2472 }
2473
2474
2475
2476
2477
2478 if (pi_state->owner != NULL) {
2479 raw_spin_lock(&pi_state->owner->pi_lock);
2480 WARN_ON(list_empty(&pi_state->list));
2481 list_del_init(&pi_state->list);
2482 raw_spin_unlock(&pi_state->owner->pi_lock);
2483 }
2484
2485 pi_state->owner = newowner;
2486
2487 raw_spin_lock(&newowner->pi_lock);
2488 WARN_ON(!list_empty(&pi_state->list));
2489 list_add(&pi_state->list, &newowner->pi_state_list);
2490 raw_spin_unlock(&newowner->pi_lock);
2491 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
2492
2493 return 0;
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508handle_err:
2509 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
2510 spin_unlock(q->lock_ptr);
2511
2512 switch (err) {
2513 case -EFAULT:
2514 ret = fault_in_user_writeable(uaddr);
2515 break;
2516
2517 case -EAGAIN:
2518 cond_resched();
2519 ret = 0;
2520 break;
2521
2522 default:
2523 WARN_ON_ONCE(1);
2524 ret = err;
2525 break;
2526 }
2527
2528 spin_lock(q->lock_ptr);
2529 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
2530
2531
2532
2533
2534 if (pi_state->owner != oldowner) {
2535 ret = 0;
2536 goto out_unlock;
2537 }
2538
2539 if (ret)
2540 goto out_unlock;
2541
2542 goto retry;
2543
2544out_unlock:
2545 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
2546 return ret;
2547}
2548
2549static long futex_wait_restart(struct restart_block *restart);
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
2567{
2568 int ret = 0;
2569
2570 if (locked) {
2571
2572
2573
2574
2575
2576
2577
2578
2579 if (q->pi_state->owner != current)
2580 ret = fixup_pi_state_owner(uaddr, q, current);
2581 goto out;
2582 }
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592 if (q->pi_state->owner == current) {
2593 ret = fixup_pi_state_owner(uaddr, q, NULL);
2594 goto out;
2595 }
2596
2597
2598
2599
2600
2601 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
2602 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
2603 "pi-state %p\n", ret,
2604 q->pi_state->pi_mutex.owner,
2605 q->pi_state->owner);
2606 }
2607
2608out:
2609 return ret ? ret : locked;
2610}
2611
2612
2613
2614
2615
2616
2617
2618static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
2619 struct hrtimer_sleeper *timeout)
2620{
2621
2622
2623
2624
2625
2626
2627 set_current_state(TASK_INTERRUPTIBLE);
2628 queue_me(q, hb);
2629
2630
2631 if (timeout)
2632 hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
2633
2634
2635
2636
2637
2638 if (likely(!plist_node_empty(&q->list))) {
2639
2640
2641
2642
2643
2644 if (!timeout || timeout->task)
2645 freezable_schedule();
2646 }
2647 __set_current_state(TASK_RUNNING);
2648}
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
2668 struct futex_q *q, struct futex_hash_bucket **hb)
2669{
2670 u32 uval;
2671 int ret;
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691retry:
2692 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
2693 if (unlikely(ret != 0))
2694 return ret;
2695
2696retry_private:
2697 *hb = queue_lock(q);
2698
2699 ret = get_futex_value_locked(&uval, uaddr);
2700
2701 if (ret) {
2702 queue_unlock(*hb);
2703
2704 ret = get_user(uval, uaddr);
2705 if (ret)
2706 goto out;
2707
2708 if (!(flags & FLAGS_SHARED))
2709 goto retry_private;
2710
2711 put_futex_key(&q->key);
2712 goto retry;
2713 }
2714
2715 if (uval != val) {
2716 queue_unlock(*hb);
2717 ret = -EWOULDBLOCK;
2718 }
2719
2720out:
2721 if (ret)
2722 put_futex_key(&q->key);
2723 return ret;
2724}
2725
2726static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
2727 ktime_t *abs_time, u32 bitset)
2728{
2729 struct hrtimer_sleeper timeout, *to;
2730 struct restart_block *restart;
2731 struct futex_hash_bucket *hb;
2732 struct futex_q q = futex_q_init;
2733 int ret;
2734
2735 if (!bitset)
2736 return -EINVAL;
2737 q.bitset = bitset;
2738
2739 to = futex_setup_timer(abs_time, &timeout, flags,
2740 current->timer_slack_ns);
2741retry:
2742
2743
2744
2745
2746 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2747 if (ret)
2748 goto out;
2749
2750
2751 futex_wait_queue_me(hb, &q, to);
2752
2753
2754 ret = 0;
2755
2756 if (!unqueue_me(&q))
2757 goto out;
2758 ret = -ETIMEDOUT;
2759 if (to && !to->task)
2760 goto out;
2761
2762
2763
2764
2765
2766 if (!signal_pending(current))
2767 goto retry;
2768
2769 ret = -ERESTARTSYS;
2770 if (!abs_time)
2771 goto out;
2772
2773 restart = ¤t->restart_block;
2774 restart->fn = futex_wait_restart;
2775 restart->futex.uaddr = uaddr;
2776 restart->futex.val = val;
2777 restart->futex.time = *abs_time;
2778 restart->futex.bitset = bitset;
2779 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
2780
2781 ret = -ERESTART_RESTARTBLOCK;
2782
2783out:
2784 if (to) {
2785 hrtimer_cancel(&to->timer);
2786 destroy_hrtimer_on_stack(&to->timer);
2787 }
2788 return ret;
2789}
2790
2791
2792static long futex_wait_restart(struct restart_block *restart)
2793{
2794 u32 __user *uaddr = restart->futex.uaddr;
2795 ktime_t t, *tp = NULL;
2796
2797 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
2798 t = restart->futex.time;
2799 tp = &t;
2800 }
2801 restart->fn = do_no_restart_syscall;
2802
2803 return (long)futex_wait(uaddr, restart->futex.flags,
2804 restart->futex.val, tp, restart->futex.bitset);
2805}
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
2818 ktime_t *time, int trylock)
2819{
2820 struct hrtimer_sleeper timeout, *to;
2821 struct futex_pi_state *pi_state = NULL;
2822 struct task_struct *exiting = NULL;
2823 struct rt_mutex_waiter rt_waiter;
2824 struct futex_hash_bucket *hb;
2825 struct futex_q q = futex_q_init;
2826 int res, ret;
2827
2828 if (!IS_ENABLED(CONFIG_FUTEX_PI))
2829 return -ENOSYS;
2830
2831 if (refill_pi_state_cache())
2832 return -ENOMEM;
2833
2834 to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0);
2835
2836retry:
2837 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
2838 if (unlikely(ret != 0))
2839 goto out;
2840
2841retry_private:
2842 hb = queue_lock(&q);
2843
2844 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
2845 &exiting, 0);
2846 if (unlikely(ret)) {
2847
2848
2849
2850
2851 switch (ret) {
2852 case 1:
2853
2854 ret = 0;
2855 goto out_unlock_put_key;
2856 case -EFAULT:
2857 goto uaddr_faulted;
2858 case -EBUSY:
2859 case -EAGAIN:
2860
2861
2862
2863
2864
2865
2866 queue_unlock(hb);
2867 put_futex_key(&q.key);
2868
2869
2870
2871
2872
2873 wait_for_owner_exiting(ret, exiting);
2874 cond_resched();
2875 goto retry;
2876 default:
2877 goto out_unlock_put_key;
2878 }
2879 }
2880
2881 WARN_ON(!q.pi_state);
2882
2883
2884
2885
2886 __queue_me(&q, hb);
2887
2888 if (trylock) {
2889 ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
2890
2891 ret = ret ? 0 : -EWOULDBLOCK;
2892 goto no_block;
2893 }
2894
2895 rt_mutex_init_waiter(&rt_waiter);
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
2911 spin_unlock(q.lock_ptr);
2912
2913
2914
2915
2916
2917 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
2918 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
2919
2920 if (ret) {
2921 if (ret == 1)
2922 ret = 0;
2923 goto cleanup;
2924 }
2925
2926 if (unlikely(to))
2927 hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
2928
2929 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
2930
2931cleanup:
2932 spin_lock(q.lock_ptr);
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942 if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
2943 ret = 0;
2944
2945no_block:
2946
2947
2948
2949
2950 res = fixup_owner(uaddr, &q, !ret);
2951
2952
2953
2954
2955 if (res)
2956 ret = (res < 0) ? res : 0;
2957
2958
2959
2960
2961
2962 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
2963 pi_state = q.pi_state;
2964 get_pi_state(pi_state);
2965 }
2966
2967
2968 unqueue_me_pi(&q);
2969
2970 if (pi_state) {
2971 rt_mutex_futex_unlock(&pi_state->pi_mutex);
2972 put_pi_state(pi_state);
2973 }
2974
2975 goto out_put_key;
2976
2977out_unlock_put_key:
2978 queue_unlock(hb);
2979
2980out_put_key:
2981 put_futex_key(&q.key);
2982out:
2983 if (to) {
2984 hrtimer_cancel(&to->timer);
2985 destroy_hrtimer_on_stack(&to->timer);
2986 }
2987 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2988
2989uaddr_faulted:
2990 queue_unlock(hb);
2991
2992 ret = fault_in_user_writeable(uaddr);
2993 if (ret)
2994 goto out_put_key;
2995
2996 if (!(flags & FLAGS_SHARED))
2997 goto retry_private;
2998
2999 put_futex_key(&q.key);
3000 goto retry;
3001}
3002
3003
3004
3005
3006
3007
3008static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
3009{
3010 u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
3011 union futex_key key = FUTEX_KEY_INIT;
3012 struct futex_hash_bucket *hb;
3013 struct futex_q *top_waiter;
3014 int ret;
3015
3016 if (!IS_ENABLED(CONFIG_FUTEX_PI))
3017 return -ENOSYS;
3018
3019retry:
3020 if (get_user(uval, uaddr))
3021 return -EFAULT;
3022
3023
3024
3025 if ((uval & FUTEX_TID_MASK) != vpid)
3026 return -EPERM;
3027
3028 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
3029 if (ret)
3030 return ret;
3031
3032 hb = hash_futex(&key);
3033 spin_lock(&hb->lock);
3034
3035
3036
3037
3038
3039
3040 top_waiter = futex_top_waiter(hb, &key);
3041 if (top_waiter) {
3042 struct futex_pi_state *pi_state = top_waiter->pi_state;
3043
3044 ret = -EINVAL;
3045 if (!pi_state)
3046 goto out_unlock;
3047
3048
3049
3050
3051
3052 if (pi_state->owner != current)
3053 goto out_unlock;
3054
3055 get_pi_state(pi_state);
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
3067 spin_unlock(&hb->lock);
3068
3069
3070 ret = wake_futex_pi(uaddr, uval, pi_state);
3071
3072 put_pi_state(pi_state);
3073
3074
3075
3076
3077 if (!ret)
3078 goto out_putkey;
3079
3080
3081
3082
3083 if (ret == -EFAULT)
3084 goto pi_faulted;
3085
3086
3087
3088
3089 if (ret == -EAGAIN)
3090 goto pi_retry;
3091
3092
3093
3094
3095 goto out_putkey;
3096 }
3097
3098
3099
3100
3101
3102
3103
3104
3105 if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
3106 spin_unlock(&hb->lock);
3107 switch (ret) {
3108 case -EFAULT:
3109 goto pi_faulted;
3110
3111 case -EAGAIN:
3112 goto pi_retry;
3113
3114 default:
3115 WARN_ON_ONCE(1);
3116 goto out_putkey;
3117 }
3118 }
3119
3120
3121
3122
3123 ret = (curval == uval) ? 0 : -EAGAIN;
3124
3125out_unlock:
3126 spin_unlock(&hb->lock);
3127out_putkey:
3128 put_futex_key(&key);
3129 return ret;
3130
3131pi_retry:
3132 put_futex_key(&key);
3133 cond_resched();
3134 goto retry;
3135
3136pi_faulted:
3137 put_futex_key(&key);
3138
3139 ret = fault_in_user_writeable(uaddr);
3140 if (!ret)
3141 goto retry;
3142
3143 return ret;
3144}
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162static inline
3163int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
3164 struct futex_q *q, union futex_key *key2,
3165 struct hrtimer_sleeper *timeout)
3166{
3167 int ret = 0;
3168
3169
3170
3171
3172
3173
3174
3175
3176 if (!match_futex(&q->key, key2)) {
3177 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
3178
3179
3180
3181
3182 plist_del(&q->list, &hb->chain);
3183 hb_waiters_dec(hb);
3184
3185
3186 ret = -EWOULDBLOCK;
3187 if (timeout && !timeout->task)
3188 ret = -ETIMEDOUT;
3189 else if (signal_pending(current))
3190 ret = -ERESTARTNOINTR;
3191 }
3192 return ret;
3193}
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
3236 u32 val, ktime_t *abs_time, u32 bitset,
3237 u32 __user *uaddr2)
3238{
3239 struct hrtimer_sleeper timeout, *to;
3240 struct futex_pi_state *pi_state = NULL;
3241 struct rt_mutex_waiter rt_waiter;
3242 struct futex_hash_bucket *hb;
3243 union futex_key key2 = FUTEX_KEY_INIT;
3244 struct futex_q q = futex_q_init;
3245 int res, ret;
3246
3247 if (!IS_ENABLED(CONFIG_FUTEX_PI))
3248 return -ENOSYS;
3249
3250 if (uaddr == uaddr2)
3251 return -EINVAL;
3252
3253 if (!bitset)
3254 return -EINVAL;
3255
3256 to = futex_setup_timer(abs_time, &timeout, flags,
3257 current->timer_slack_ns);
3258
3259
3260
3261
3262
3263 rt_mutex_init_waiter(&rt_waiter);
3264
3265 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
3266 if (unlikely(ret != 0))
3267 goto out;
3268
3269 q.bitset = bitset;
3270 q.rt_waiter = &rt_waiter;
3271 q.requeue_pi_key = &key2;
3272
3273
3274
3275
3276
3277 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
3278 if (ret)
3279 goto out_key2;
3280
3281
3282
3283
3284
3285 if (match_futex(&q.key, &key2)) {
3286 queue_unlock(hb);
3287 ret = -EINVAL;
3288 goto out_put_keys;
3289 }
3290
3291
3292 futex_wait_queue_me(hb, &q, to);
3293
3294 spin_lock(&hb->lock);
3295 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
3296 spin_unlock(&hb->lock);
3297 if (ret)
3298 goto out_put_keys;
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310 if (!q.rt_waiter) {
3311
3312
3313
3314
3315 if (q.pi_state && (q.pi_state->owner != current)) {
3316 spin_lock(q.lock_ptr);
3317 ret = fixup_pi_state_owner(uaddr2, &q, current);
3318 if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
3319 pi_state = q.pi_state;
3320 get_pi_state(pi_state);
3321 }
3322
3323
3324
3325
3326 put_pi_state(q.pi_state);
3327 spin_unlock(q.lock_ptr);
3328 }
3329 } else {
3330 struct rt_mutex *pi_mutex;
3331
3332
3333
3334
3335
3336
3337 WARN_ON(!q.pi_state);
3338 pi_mutex = &q.pi_state->pi_mutex;
3339 ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
3340
3341 spin_lock(q.lock_ptr);
3342 if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
3343 ret = 0;
3344
3345 debug_rt_mutex_free_waiter(&rt_waiter);
3346
3347
3348
3349
3350 res = fixup_owner(uaddr2, &q, !ret);
3351
3352
3353
3354
3355 if (res)
3356 ret = (res < 0) ? res : 0;
3357
3358
3359
3360
3361
3362
3363 if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
3364 pi_state = q.pi_state;
3365 get_pi_state(pi_state);
3366 }
3367
3368
3369 unqueue_me_pi(&q);
3370 }
3371
3372 if (pi_state) {
3373 rt_mutex_futex_unlock(&pi_state->pi_mutex);
3374 put_pi_state(pi_state);
3375 }
3376
3377 if (ret == -EINTR) {
3378
3379
3380
3381
3382
3383
3384
3385 ret = -EWOULDBLOCK;
3386 }
3387
3388out_put_keys:
3389 put_futex_key(&q.key);
3390out_key2:
3391 put_futex_key(&key2);
3392
3393out:
3394 if (to) {
3395 hrtimer_cancel(&to->timer);
3396 destroy_hrtimer_on_stack(&to->timer);
3397 }
3398 return ret;
3399}
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
3422 size_t, len)
3423{
3424 if (!futex_cmpxchg_enabled)
3425 return -ENOSYS;
3426
3427
3428
3429 if (unlikely(len != sizeof(*head)))
3430 return -EINVAL;
3431
3432 current->robust_list = head;
3433
3434 return 0;
3435}
3436
3437
3438
3439
3440
3441
3442
3443SYSCALL_DEFINE3(get_robust_list, int, pid,
3444 struct robust_list_head __user * __user *, head_ptr,
3445 size_t __user *, len_ptr)
3446{
3447 struct robust_list_head __user *head;
3448 unsigned long ret;
3449 struct task_struct *p;
3450
3451 if (!futex_cmpxchg_enabled)
3452 return -ENOSYS;
3453
3454 rcu_read_lock();
3455
3456 ret = -ESRCH;
3457 if (!pid)
3458 p = current;
3459 else {
3460 p = find_task_by_vpid(pid);
3461 if (!p)
3462 goto err_unlock;
3463 }
3464
3465 ret = -EPERM;
3466 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
3467 goto err_unlock;
3468
3469 head = p->robust_list;
3470 rcu_read_unlock();
3471
3472 if (put_user(sizeof(*head), len_ptr))
3473 return -EFAULT;
3474 return put_user(head, head_ptr);
3475
3476err_unlock:
3477 rcu_read_unlock();
3478
3479 return ret;
3480}
3481
3482
3483#define HANDLE_DEATH_PENDING true
3484#define HANDLE_DEATH_LIST false
3485
3486
3487
3488
3489
3490static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
3491 bool pi, bool pending_op)
3492{
3493 u32 uval, uninitialized_var(nval), mval;
3494 int err;
3495
3496
3497 if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
3498 return -1;
3499
3500retry:
3501 if (get_user(uval, uaddr))
3502 return -1;
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535 if (pending_op && !pi && !uval) {
3536 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
3537 return 0;
3538 }
3539
3540 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
3541 return 0;
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564 if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) {
3565 switch (err) {
3566 case -EFAULT:
3567 if (fault_in_user_writeable(uaddr))
3568 return -1;
3569 goto retry;
3570
3571 case -EAGAIN:
3572 cond_resched();
3573 goto retry;
3574
3575 default:
3576 WARN_ON_ONCE(1);
3577 return err;
3578 }
3579 }
3580
3581 if (nval != uval)
3582 goto retry;
3583
3584
3585
3586
3587
3588 if (!pi && (uval & FUTEX_WAITERS))
3589 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
3590
3591 return 0;
3592}
3593
3594
3595
3596
3597static inline int fetch_robust_entry(struct robust_list __user **entry,
3598 struct robust_list __user * __user *head,
3599 unsigned int *pi)
3600{
3601 unsigned long uentry;
3602
3603 if (get_user(uentry, (unsigned long __user *)head))
3604 return -EFAULT;
3605
3606 *entry = (void __user *)(uentry & ~1UL);
3607 *pi = uentry & 1;
3608
3609 return 0;
3610}
3611
3612
3613
3614
3615
3616
3617
3618static void exit_robust_list(struct task_struct *curr)
3619{
3620 struct robust_list_head __user *head = curr->robust_list;
3621 struct robust_list __user *entry, *next_entry, *pending;
3622 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3623 unsigned int uninitialized_var(next_pi);
3624 unsigned long futex_offset;
3625 int rc;
3626
3627 if (!futex_cmpxchg_enabled)
3628 return;
3629
3630
3631
3632
3633
3634 if (fetch_robust_entry(&entry, &head->list.next, &pi))
3635 return;
3636
3637
3638
3639 if (get_user(futex_offset, &head->futex_offset))
3640 return;
3641
3642
3643
3644
3645 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
3646 return;
3647
3648 next_entry = NULL;
3649 while (entry != &head->list) {
3650
3651
3652
3653
3654 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
3655
3656
3657
3658
3659 if (entry != pending) {
3660 if (handle_futex_death((void __user *)entry + futex_offset,
3661 curr, pi, HANDLE_DEATH_LIST))
3662 return;
3663 }
3664 if (rc)
3665 return;
3666 entry = next_entry;
3667 pi = next_pi;
3668
3669
3670
3671 if (!--limit)
3672 break;
3673
3674 cond_resched();
3675 }
3676
3677 if (pending) {
3678 handle_futex_death((void __user *)pending + futex_offset,
3679 curr, pip, HANDLE_DEATH_PENDING);
3680 }
3681}
3682
3683static void futex_cleanup(struct task_struct *tsk)
3684{
3685 if (unlikely(tsk->robust_list)) {
3686 exit_robust_list(tsk);
3687 tsk->robust_list = NULL;
3688 }
3689
3690#ifdef CONFIG_COMPAT
3691 if (unlikely(tsk->compat_robust_list)) {
3692 compat_exit_robust_list(tsk);
3693 tsk->compat_robust_list = NULL;
3694 }
3695#endif
3696
3697 if (unlikely(!list_empty(&tsk->pi_state_list)))
3698 exit_pi_state_list(tsk);
3699}
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718void futex_exit_recursive(struct task_struct *tsk)
3719{
3720
3721 if (tsk->futex_state == FUTEX_STATE_EXITING)
3722 mutex_unlock(&tsk->futex_exit_mutex);
3723 tsk->futex_state = FUTEX_STATE_DEAD;
3724}
3725
3726static void futex_cleanup_begin(struct task_struct *tsk)
3727{
3728
3729
3730
3731
3732
3733
3734 mutex_lock(&tsk->futex_exit_mutex);
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747 raw_spin_lock_irq(&tsk->pi_lock);
3748 tsk->futex_state = FUTEX_STATE_EXITING;
3749 raw_spin_unlock_irq(&tsk->pi_lock);
3750}
3751
3752static void futex_cleanup_end(struct task_struct *tsk, int state)
3753{
3754
3755
3756
3757
3758 tsk->futex_state = state;
3759
3760
3761
3762
3763 mutex_unlock(&tsk->futex_exit_mutex);
3764}
3765
3766void futex_exec_release(struct task_struct *tsk)
3767{
3768
3769
3770
3771
3772
3773
3774
3775 futex_cleanup_begin(tsk);
3776 futex_cleanup(tsk);
3777
3778
3779
3780
3781 futex_cleanup_end(tsk, FUTEX_STATE_OK);
3782}
3783
3784void futex_exit_release(struct task_struct *tsk)
3785{
3786 futex_cleanup_begin(tsk);
3787 futex_cleanup(tsk);
3788 futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
3789}
3790
3791long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
3792 u32 __user *uaddr2, u32 val2, u32 val3)
3793{
3794 int cmd = op & FUTEX_CMD_MASK;
3795 unsigned int flags = 0;
3796
3797 if (!(op & FUTEX_PRIVATE_FLAG))
3798 flags |= FLAGS_SHARED;
3799
3800 if (op & FUTEX_CLOCK_REALTIME) {
3801 flags |= FLAGS_CLOCKRT;
3802 if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
3803 cmd != FUTEX_WAIT_REQUEUE_PI)
3804 return -ENOSYS;
3805 }
3806
3807 switch (cmd) {
3808 case FUTEX_LOCK_PI:
3809 case FUTEX_UNLOCK_PI:
3810 case FUTEX_TRYLOCK_PI:
3811 case FUTEX_WAIT_REQUEUE_PI:
3812 case FUTEX_CMP_REQUEUE_PI:
3813 if (!futex_cmpxchg_enabled)
3814 return -ENOSYS;
3815 }
3816
3817 switch (cmd) {
3818 case FUTEX_WAIT:
3819 val3 = FUTEX_BITSET_MATCH_ANY;
3820
3821 case FUTEX_WAIT_BITSET:
3822 return futex_wait(uaddr, flags, val, timeout, val3);
3823 case FUTEX_WAKE:
3824 val3 = FUTEX_BITSET_MATCH_ANY;
3825
3826 case FUTEX_WAKE_BITSET:
3827 return futex_wake(uaddr, flags, val, val3);
3828 case FUTEX_REQUEUE:
3829 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
3830 case FUTEX_CMP_REQUEUE:
3831 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
3832 case FUTEX_WAKE_OP:
3833 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
3834 case FUTEX_LOCK_PI:
3835 return futex_lock_pi(uaddr, flags, timeout, 0);
3836 case FUTEX_UNLOCK_PI:
3837 return futex_unlock_pi(uaddr, flags);
3838 case FUTEX_TRYLOCK_PI:
3839 return futex_lock_pi(uaddr, flags, NULL, 1);
3840 case FUTEX_WAIT_REQUEUE_PI:
3841 val3 = FUTEX_BITSET_MATCH_ANY;
3842 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
3843 uaddr2);
3844 case FUTEX_CMP_REQUEUE_PI:
3845 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
3846 }
3847 return -ENOSYS;
3848}
3849
3850
3851SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
3852 struct timespec __user *, utime, u32 __user *, uaddr2,
3853 u32, val3)
3854{
3855 struct timespec ts;
3856 ktime_t t, *tp = NULL;
3857 u32 val2 = 0;
3858 int cmd = op & FUTEX_CMD_MASK;
3859
3860 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
3861 cmd == FUTEX_WAIT_BITSET ||
3862 cmd == FUTEX_WAIT_REQUEUE_PI)) {
3863 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
3864 return -EFAULT;
3865 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
3866 return -EFAULT;
3867 if (!timespec_valid(&ts))
3868 return -EINVAL;
3869
3870 t = timespec_to_ktime(ts);
3871 if (cmd == FUTEX_WAIT)
3872 t = ktime_add_safe(ktime_get(), t);
3873 tp = &t;
3874 }
3875
3876
3877
3878
3879 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
3880 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
3881 val2 = (u32) (unsigned long) utime;
3882
3883 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
3884}
3885
3886#ifdef CONFIG_COMPAT
3887
3888
3889
3890static inline int
3891compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
3892 compat_uptr_t __user *head, unsigned int *pi)
3893{
3894 if (get_user(*uentry, head))
3895 return -EFAULT;
3896
3897 *entry = compat_ptr((*uentry) & ~1);
3898 *pi = (unsigned int)(*uentry) & 1;
3899
3900 return 0;
3901}
3902
3903static void __user *futex_uaddr(struct robust_list __user *entry,
3904 compat_long_t futex_offset)
3905{
3906 compat_uptr_t base = ptr_to_compat(entry);
3907 void __user *uaddr = compat_ptr(base + futex_offset);
3908
3909 return uaddr;
3910}
3911
3912
3913
3914
3915
3916
3917
3918static void compat_exit_robust_list(struct task_struct *curr)
3919{
3920 struct compat_robust_list_head __user *head = curr->compat_robust_list;
3921 struct robust_list __user *entry, *next_entry, *pending;
3922 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3923 unsigned int uninitialized_var(next_pi);
3924 compat_uptr_t uentry, next_uentry, upending;
3925 compat_long_t futex_offset;
3926 int rc;
3927
3928 if (!futex_cmpxchg_enabled)
3929 return;
3930
3931
3932
3933
3934
3935 if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
3936 return;
3937
3938
3939
3940 if (get_user(futex_offset, &head->futex_offset))
3941 return;
3942
3943
3944
3945
3946 if (compat_fetch_robust_entry(&upending, &pending,
3947 &head->list_op_pending, &pip))
3948 return;
3949
3950 next_entry = NULL;
3951 while (entry != (struct robust_list __user *) &head->list) {
3952
3953
3954
3955
3956 rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
3957 (compat_uptr_t __user *)&entry->next, &next_pi);
3958
3959
3960
3961
3962 if (entry != pending) {
3963 void __user *uaddr = futex_uaddr(entry, futex_offset);
3964
3965 if (handle_futex_death(uaddr, curr, pi,
3966 HANDLE_DEATH_LIST))
3967 return;
3968 }
3969 if (rc)
3970 return;
3971 uentry = next_uentry;
3972 entry = next_entry;
3973 pi = next_pi;
3974
3975
3976
3977 if (!--limit)
3978 break;
3979
3980 cond_resched();
3981 }
3982 if (pending) {
3983 void __user *uaddr = futex_uaddr(pending, futex_offset);
3984
3985 handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
3986 }
3987}
3988
3989COMPAT_SYSCALL_DEFINE2(set_robust_list,
3990 struct compat_robust_list_head __user *, head,
3991 compat_size_t, len)
3992{
3993 if (!futex_cmpxchg_enabled)
3994 return -ENOSYS;
3995
3996 if (unlikely(len != sizeof(*head)))
3997 return -EINVAL;
3998
3999 current->compat_robust_list = head;
4000
4001 return 0;
4002}
4003
4004COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
4005 compat_uptr_t __user *, head_ptr,
4006 compat_size_t __user *, len_ptr)
4007{
4008 struct compat_robust_list_head __user *head;
4009 unsigned long ret;
4010 struct task_struct *p;
4011
4012 if (!futex_cmpxchg_enabled)
4013 return -ENOSYS;
4014
4015 rcu_read_lock();
4016
4017 ret = -ESRCH;
4018 if (!pid)
4019 p = current;
4020 else {
4021 p = find_task_by_vpid(pid);
4022 if (!p)
4023 goto err_unlock;
4024 }
4025
4026 ret = -EPERM;
4027 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
4028 goto err_unlock;
4029
4030 head = p->compat_robust_list;
4031 rcu_read_unlock();
4032
4033 if (put_user(sizeof(*head), len_ptr))
4034 return -EFAULT;
4035 return put_user(ptr_to_compat(head), head_ptr);
4036
4037err_unlock:
4038 rcu_read_unlock();
4039
4040 return ret;
4041}
4042
4043COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
4044 struct compat_timespec __user *, utime, u32 __user *, uaddr2,
4045 u32, val3)
4046{
4047 struct timespec ts;
4048 ktime_t t, *tp = NULL;
4049 int val2 = 0;
4050 int cmd = op & FUTEX_CMD_MASK;
4051
4052 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
4053 cmd == FUTEX_WAIT_BITSET ||
4054 cmd == FUTEX_WAIT_REQUEUE_PI)) {
4055 if (compat_get_timespec(&ts, utime))
4056 return -EFAULT;
4057 if (!timespec_valid(&ts))
4058 return -EINVAL;
4059
4060 t = timespec_to_ktime(ts);
4061 if (cmd == FUTEX_WAIT)
4062 t = ktime_add_safe(ktime_get(), t);
4063 tp = &t;
4064 }
4065 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
4066 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
4067 val2 = (int) (unsigned long) utime;
4068
4069 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
4070}
4071#endif
4072
4073static void __init futex_detect_cmpxchg(void)
4074{
4075#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
4076 u32 curval;
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
4089 futex_cmpxchg_enabled = 1;
4090#endif
4091}
4092
4093static int __init futex_init(void)
4094{
4095 unsigned int futex_shift;
4096 unsigned long i;
4097
4098#if CONFIG_BASE_SMALL
4099 futex_hashsize = 16;
4100#else
4101 futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
4102#endif
4103
4104 futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
4105 futex_hashsize, 0,
4106 futex_hashsize < 256 ? HASH_SMALL : 0,
4107 &futex_shift, NULL,
4108 futex_hashsize, futex_hashsize);
4109 futex_hashsize = 1UL << futex_shift;
4110
4111 futex_detect_cmpxchg();
4112
4113 for (i = 0; i < futex_hashsize; i++) {
4114 atomic_set(&futex_queues[i].waiters, 0);
4115 plist_head_init(&futex_queues[i].chain);
4116 spin_lock_init(&futex_queues[i].lock);
4117 }
4118
4119 return 0;
4120}
4121core_initcall(futex_init);
4122