1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#include <linux/compat.h>
35#include <linux/jhash.h>
36#include <linux/pagemap.h>
37#include <linux/syscalls.h>
38#include <linux/freezer.h>
39#include <linux/memblock.h>
40#include <linux/fault-inject.h>
41#include <linux/time_namespace.h>
42
43#include <asm/futex.h>
44
45#include "locking/rtmutex_common.h"
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
148#define futex_cmpxchg_enabled 1
149#else
150static int __read_mostly futex_cmpxchg_enabled;
151#endif
152
153
154
155
156
157#ifdef CONFIG_MMU
158# define FLAGS_SHARED 0x01
159#else
160
161
162
163
164# define FLAGS_SHARED 0x00
165#endif
166#define FLAGS_CLOCKRT 0x02
167#define FLAGS_HAS_TIMEOUT 0x04
168
169
170
171
172struct futex_pi_state {
173
174
175
176
177 struct list_head list;
178
179
180
181
182 struct rt_mutex_base pi_mutex;
183
184 struct task_struct *owner;
185 refcount_t refcount;
186
187 union futex_key key;
188} __randomize_layout;
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214struct futex_q {
215 struct plist_node list;
216
217 struct task_struct *task;
218 spinlock_t *lock_ptr;
219 union futex_key key;
220 struct futex_pi_state *pi_state;
221 struct rt_mutex_waiter *rt_waiter;
222 union futex_key *requeue_pi_key;
223 u32 bitset;
224 atomic_t requeue_state;
225#ifdef CONFIG_PREEMPT_RT
226 struct rcuwait requeue_wait;
227#endif
228} __randomize_layout;
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272enum {
273 Q_REQUEUE_PI_NONE = 0,
274 Q_REQUEUE_PI_IGNORE,
275 Q_REQUEUE_PI_IN_PROGRESS,
276 Q_REQUEUE_PI_WAIT,
277 Q_REQUEUE_PI_DONE,
278 Q_REQUEUE_PI_LOCKED,
279};
280
281static const struct futex_q futex_q_init = {
282
283 .key = FUTEX_KEY_INIT,
284 .bitset = FUTEX_BITSET_MATCH_ANY,
285 .requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE),
286};
287
288
289
290
291
292
293struct futex_hash_bucket {
294 atomic_t waiters;
295 spinlock_t lock;
296 struct plist_head chain;
297} ____cacheline_aligned_in_smp;
298
299
300
301
302
303
304static struct {
305 struct futex_hash_bucket *queues;
306 unsigned long hashsize;
307} __futex_data __read_mostly __aligned(2*sizeof(long));
308#define futex_queues (__futex_data.queues)
309#define futex_hashsize (__futex_data.hashsize)
310
311
312
313
314
315#ifdef CONFIG_FAIL_FUTEX
316
317static struct {
318 struct fault_attr attr;
319
320 bool ignore_private;
321} fail_futex = {
322 .attr = FAULT_ATTR_INITIALIZER,
323 .ignore_private = false,
324};
325
326static int __init setup_fail_futex(char *str)
327{
328 return setup_fault_attr(&fail_futex.attr, str);
329}
330__setup("fail_futex=", setup_fail_futex);
331
332static bool should_fail_futex(bool fshared)
333{
334 if (fail_futex.ignore_private && !fshared)
335 return false;
336
337 return should_fail(&fail_futex.attr, 1);
338}
339
340#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
341
342static int __init fail_futex_debugfs(void)
343{
344 umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
345 struct dentry *dir;
346
347 dir = fault_create_debugfs_attr("fail_futex", NULL,
348 &fail_futex.attr);
349 if (IS_ERR(dir))
350 return PTR_ERR(dir);
351
352 debugfs_create_bool("ignore-private", mode, dir,
353 &fail_futex.ignore_private);
354 return 0;
355}
356
357late_initcall(fail_futex_debugfs);
358
359#endif
360
361#else
362static inline bool should_fail_futex(bool fshared)
363{
364 return false;
365}
366#endif
367
368#ifdef CONFIG_COMPAT
369static void compat_exit_robust_list(struct task_struct *curr);
370#endif
371
372
373
374
375static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
376{
377#ifdef CONFIG_SMP
378 atomic_inc(&hb->waiters);
379
380
381
382 smp_mb__after_atomic();
383#endif
384}
385
386
387
388
389
390static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
391{
392#ifdef CONFIG_SMP
393 atomic_dec(&hb->waiters);
394#endif
395}
396
397static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
398{
399#ifdef CONFIG_SMP
400
401
402
403 smp_mb();
404 return atomic_read(&hb->waiters);
405#else
406 return 1;
407#endif
408}
409
410
411
412
413
414
415
416
417static struct futex_hash_bucket *hash_futex(union futex_key *key)
418{
419 u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
420 key->both.offset);
421
422 return &futex_queues[hash & (futex_hashsize - 1)];
423}
424
425
426
427
428
429
430
431
432
433static inline int match_futex(union futex_key *key1, union futex_key *key2)
434{
435 return (key1 && key2
436 && key1->both.word == key2->both.word
437 && key1->both.ptr == key2->both.ptr
438 && key1->both.offset == key2->both.offset);
439}
440
441enum futex_access {
442 FUTEX_READ,
443 FUTEX_WRITE
444};
445
446
447
448
449
450
451
452
453
454
455
456static inline struct hrtimer_sleeper *
457futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
458 int flags, u64 range_ns)
459{
460 if (!time)
461 return NULL;
462
463 hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
464 CLOCK_REALTIME : CLOCK_MONOTONIC,
465 HRTIMER_MODE_ABS);
466
467
468
469
470 hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
471
472 return timeout;
473}
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493static u64 get_inode_sequence_number(struct inode *inode)
494{
495 static atomic64_t i_seq;
496 u64 old;
497
498
499 old = atomic64_read(&inode->i_sequence);
500 if (likely(old))
501 return old;
502
503 for (;;) {
504 u64 new = atomic64_add_return(1, &i_seq);
505 if (WARN_ON_ONCE(!new))
506 continue;
507
508 old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
509 if (old)
510 return old;
511 return new;
512 }
513}
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
543 enum futex_access rw)
544{
545 unsigned long address = (unsigned long)uaddr;
546 struct mm_struct *mm = current->mm;
547 struct page *page, *tail;
548 struct address_space *mapping;
549 int err, ro = 0;
550
551
552
553
554 key->both.offset = address % PAGE_SIZE;
555 if (unlikely((address % sizeof(u32)) != 0))
556 return -EINVAL;
557 address -= key->both.offset;
558
559 if (unlikely(!access_ok(uaddr, sizeof(u32))))
560 return -EFAULT;
561
562 if (unlikely(should_fail_futex(fshared)))
563 return -EFAULT;
564
565
566
567
568
569
570
571
572 if (!fshared) {
573 key->private.mm = mm;
574 key->private.address = address;
575 return 0;
576 }
577
578again:
579
580 if (unlikely(should_fail_futex(true)))
581 return -EFAULT;
582
583 err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
584
585
586
587
588 if (err == -EFAULT && rw == FUTEX_READ) {
589 err = get_user_pages_fast(address, 1, 0, &page);
590 ro = 1;
591 }
592 if (err < 0)
593 return err;
594 else
595 err = 0;
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615 tail = page;
616 page = compound_head(page);
617 mapping = READ_ONCE(page->mapping);
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634 if (unlikely(!mapping)) {
635 int shmem_swizzled;
636
637
638
639
640
641
642 lock_page(page);
643 shmem_swizzled = PageSwapCache(page) || page->mapping;
644 unlock_page(page);
645 put_page(page);
646
647 if (shmem_swizzled)
648 goto again;
649
650 return -EFAULT;
651 }
652
653
654
655
656
657
658
659
660
661
662
663 if (PageAnon(page)) {
664
665
666
667
668 if (unlikely(should_fail_futex(true)) || ro) {
669 err = -EFAULT;
670 goto out;
671 }
672
673 key->both.offset |= FUT_OFF_MMSHARED;
674 key->private.mm = mm;
675 key->private.address = address;
676
677 } else {
678 struct inode *inode;
679
680
681
682
683
684
685
686
687
688
689
690
691 rcu_read_lock();
692
693 if (READ_ONCE(page->mapping) != mapping) {
694 rcu_read_unlock();
695 put_page(page);
696
697 goto again;
698 }
699
700 inode = READ_ONCE(mapping->host);
701 if (!inode) {
702 rcu_read_unlock();
703 put_page(page);
704
705 goto again;
706 }
707
708 key->both.offset |= FUT_OFF_INODE;
709 key->shared.i_seq = get_inode_sequence_number(inode);
710 key->shared.pgoff = page_to_pgoff(tail);
711 rcu_read_unlock();
712 }
713
714out:
715 put_page(page);
716 return err;
717}
718
719
720
721
722
723
724
725
726
727
728
729
730
731static int fault_in_user_writeable(u32 __user *uaddr)
732{
733 struct mm_struct *mm = current->mm;
734 int ret;
735
736 mmap_read_lock(mm);
737 ret = fixup_user_fault(mm, (unsigned long)uaddr,
738 FAULT_FLAG_WRITE, NULL);
739 mmap_read_unlock(mm);
740
741 return ret < 0 ? ret : 0;
742}
743
744
745
746
747
748
749
750
751static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
752 union futex_key *key)
753{
754 struct futex_q *this;
755
756 plist_for_each_entry(this, &hb->chain, list) {
757 if (match_futex(&this->key, key))
758 return this;
759 }
760 return NULL;
761}
762
763static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
764 u32 uval, u32 newval)
765{
766 int ret;
767
768 pagefault_disable();
769 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
770 pagefault_enable();
771
772 return ret;
773}
774
775static int get_futex_value_locked(u32 *dest, u32 __user *from)
776{
777 int ret;
778
779 pagefault_disable();
780 ret = __get_user(*dest, from);
781 pagefault_enable();
782
783 return ret ? -EFAULT : 0;
784}
785
786
787
788
789
790static int refill_pi_state_cache(void)
791{
792 struct futex_pi_state *pi_state;
793
794 if (likely(current->pi_state_cache))
795 return 0;
796
797 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
798
799 if (!pi_state)
800 return -ENOMEM;
801
802 INIT_LIST_HEAD(&pi_state->list);
803
804 pi_state->owner = NULL;
805 refcount_set(&pi_state->refcount, 1);
806 pi_state->key = FUTEX_KEY_INIT;
807
808 current->pi_state_cache = pi_state;
809
810 return 0;
811}
812
813static struct futex_pi_state *alloc_pi_state(void)
814{
815 struct futex_pi_state *pi_state = current->pi_state_cache;
816
817 WARN_ON(!pi_state);
818 current->pi_state_cache = NULL;
819
820 return pi_state;
821}
822
823static void pi_state_update_owner(struct futex_pi_state *pi_state,
824 struct task_struct *new_owner)
825{
826 struct task_struct *old_owner = pi_state->owner;
827
828 lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
829
830 if (old_owner) {
831 raw_spin_lock(&old_owner->pi_lock);
832 WARN_ON(list_empty(&pi_state->list));
833 list_del_init(&pi_state->list);
834 raw_spin_unlock(&old_owner->pi_lock);
835 }
836
837 if (new_owner) {
838 raw_spin_lock(&new_owner->pi_lock);
839 WARN_ON(!list_empty(&pi_state->list));
840 list_add(&pi_state->list, &new_owner->pi_state_list);
841 pi_state->owner = new_owner;
842 raw_spin_unlock(&new_owner->pi_lock);
843 }
844}
845
846static void get_pi_state(struct futex_pi_state *pi_state)
847{
848 WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
849}
850
851
852
853
854
855static void put_pi_state(struct futex_pi_state *pi_state)
856{
857 if (!pi_state)
858 return;
859
860 if (!refcount_dec_and_test(&pi_state->refcount))
861 return;
862
863
864
865
866
867 if (pi_state->owner) {
868 unsigned long flags;
869
870 raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
871 pi_state_update_owner(pi_state, NULL);
872 rt_mutex_proxy_unlock(&pi_state->pi_mutex);
873 raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
874 }
875
876 if (current->pi_state_cache) {
877 kfree(pi_state);
878 } else {
879
880
881
882
883
884 pi_state->owner = NULL;
885 refcount_set(&pi_state->refcount, 1);
886 current->pi_state_cache = pi_state;
887 }
888}
889
890#ifdef CONFIG_FUTEX_PI
891
892
893
894
895
896
897static void exit_pi_state_list(struct task_struct *curr)
898{
899 struct list_head *next, *head = &curr->pi_state_list;
900 struct futex_pi_state *pi_state;
901 struct futex_hash_bucket *hb;
902 union futex_key key = FUTEX_KEY_INIT;
903
904 if (!futex_cmpxchg_enabled)
905 return;
906
907
908
909
910
911 raw_spin_lock_irq(&curr->pi_lock);
912 while (!list_empty(head)) {
913 next = head->next;
914 pi_state = list_entry(next, struct futex_pi_state, list);
915 key = pi_state->key;
916 hb = hash_futex(&key);
917
918
919
920
921
922
923
924
925
926
927
928 if (!refcount_inc_not_zero(&pi_state->refcount)) {
929 raw_spin_unlock_irq(&curr->pi_lock);
930 cpu_relax();
931 raw_spin_lock_irq(&curr->pi_lock);
932 continue;
933 }
934 raw_spin_unlock_irq(&curr->pi_lock);
935
936 spin_lock(&hb->lock);
937 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
938 raw_spin_lock(&curr->pi_lock);
939
940
941
942
943 if (head->next != next) {
944
945 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
946 spin_unlock(&hb->lock);
947 put_pi_state(pi_state);
948 continue;
949 }
950
951 WARN_ON(pi_state->owner != curr);
952 WARN_ON(list_empty(&pi_state->list));
953 list_del_init(&pi_state->list);
954 pi_state->owner = NULL;
955
956 raw_spin_unlock(&curr->pi_lock);
957 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
958 spin_unlock(&hb->lock);
959
960 rt_mutex_futex_unlock(&pi_state->pi_mutex);
961 put_pi_state(pi_state);
962
963 raw_spin_lock_irq(&curr->pi_lock);
964 }
965 raw_spin_unlock_irq(&curr->pi_lock);
966}
967#else
968static inline void exit_pi_state_list(struct task_struct *curr) { }
969#endif
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
1062 struct futex_pi_state *pi_state,
1063 struct futex_pi_state **ps)
1064{
1065 pid_t pid = uval & FUTEX_TID_MASK;
1066 u32 uval2;
1067 int ret;
1068
1069
1070
1071
1072 if (unlikely(!pi_state))
1073 return -EINVAL;
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087 WARN_ON(!refcount_read(&pi_state->refcount));
1088
1089
1090
1091
1092
1093 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
1094
1095
1096
1097
1098
1099
1100
1101 if (get_futex_value_locked(&uval2, uaddr))
1102 goto out_efault;
1103
1104 if (uval != uval2)
1105 goto out_eagain;
1106
1107
1108
1109
1110 if (uval & FUTEX_OWNER_DIED) {
1111
1112
1113
1114
1115
1116 if (!pi_state->owner) {
1117
1118
1119
1120
1121 if (pid)
1122 goto out_einval;
1123
1124
1125
1126 goto out_attach;
1127 }
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137 if (!pid)
1138 goto out_attach;
1139 } else {
1140
1141
1142
1143
1144 if (!pi_state->owner)
1145 goto out_einval;
1146 }
1147
1148
1149
1150
1151
1152
1153 if (pid != task_pid_vnr(pi_state->owner))
1154 goto out_einval;
1155
1156out_attach:
1157 get_pi_state(pi_state);
1158 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1159 *ps = pi_state;
1160 return 0;
1161
1162out_einval:
1163 ret = -EINVAL;
1164 goto out_error;
1165
1166out_eagain:
1167 ret = -EAGAIN;
1168 goto out_error;
1169
1170out_efault:
1171 ret = -EFAULT;
1172 goto out_error;
1173
1174out_error:
1175 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1176 return ret;
1177}
1178
1179
1180
1181
1182
1183
1184
1185
1186static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
1187{
1188 if (ret != -EBUSY) {
1189 WARN_ON_ONCE(exiting);
1190 return;
1191 }
1192
1193 if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
1194 return;
1195
1196 mutex_lock(&exiting->futex_exit_mutex);
1197
1198
1199
1200
1201
1202
1203
1204
1205 mutex_unlock(&exiting->futex_exit_mutex);
1206
1207 put_task_struct(exiting);
1208}
1209
1210static int handle_exit_race(u32 __user *uaddr, u32 uval,
1211 struct task_struct *tsk)
1212{
1213 u32 uval2;
1214
1215
1216
1217
1218
1219 if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
1220 return -EBUSY;
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251 if (get_futex_value_locked(&uval2, uaddr))
1252 return -EFAULT;
1253
1254
1255 if (uval2 != uval)
1256 return -EAGAIN;
1257
1258
1259
1260
1261
1262
1263 return -ESRCH;
1264}
1265
1266static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
1267 struct futex_pi_state **ps)
1268{
1269
1270
1271
1272
1273
1274
1275 struct futex_pi_state *pi_state = alloc_pi_state();
1276
1277
1278
1279
1280
1281 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
1282
1283
1284 pi_state->key = *key;
1285
1286 WARN_ON(!list_empty(&pi_state->list));
1287 list_add(&pi_state->list, &p->pi_state_list);
1288
1289
1290
1291
1292 pi_state->owner = p;
1293
1294 *ps = pi_state;
1295}
1296
1297
1298
1299
1300static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
1301 struct futex_pi_state **ps,
1302 struct task_struct **exiting)
1303{
1304 pid_t pid = uval & FUTEX_TID_MASK;
1305 struct task_struct *p;
1306
1307
1308
1309
1310
1311
1312
1313
1314 if (!pid)
1315 return -EAGAIN;
1316 p = find_get_task_by_vpid(pid);
1317 if (!p)
1318 return handle_exit_race(uaddr, uval, NULL);
1319
1320 if (unlikely(p->flags & PF_KTHREAD)) {
1321 put_task_struct(p);
1322 return -EPERM;
1323 }
1324
1325
1326
1327
1328
1329
1330 raw_spin_lock_irq(&p->pi_lock);
1331 if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
1332
1333
1334
1335
1336
1337 int ret = handle_exit_race(uaddr, uval, p);
1338
1339 raw_spin_unlock_irq(&p->pi_lock);
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349 if (ret == -EBUSY)
1350 *exiting = p;
1351 else
1352 put_task_struct(p);
1353 return ret;
1354 }
1355
1356 __attach_to_pi_owner(p, key, ps);
1357 raw_spin_unlock_irq(&p->pi_lock);
1358
1359 put_task_struct(p);
1360
1361 return 0;
1362}
1363
1364static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
1365{
1366 int err;
1367 u32 curval;
1368
1369 if (unlikely(should_fail_futex(true)))
1370 return -EFAULT;
1371
1372 err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
1373 if (unlikely(err))
1374 return err;
1375
1376
1377 return curval != uval ? -EAGAIN : 0;
1378}
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
1405 union futex_key *key,
1406 struct futex_pi_state **ps,
1407 struct task_struct *task,
1408 struct task_struct **exiting,
1409 int set_waiters)
1410{
1411 u32 uval, newval, vpid = task_pid_vnr(task);
1412 struct futex_q *top_waiter;
1413 int ret;
1414
1415
1416
1417
1418
1419 if (get_futex_value_locked(&uval, uaddr))
1420 return -EFAULT;
1421
1422 if (unlikely(should_fail_futex(true)))
1423 return -EFAULT;
1424
1425
1426
1427
1428 if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
1429 return -EDEADLK;
1430
1431 if ((unlikely(should_fail_futex(true))))
1432 return -EDEADLK;
1433
1434
1435
1436
1437
1438 top_waiter = futex_top_waiter(hb, key);
1439 if (top_waiter)
1440 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
1441
1442
1443
1444
1445
1446
1447
1448 if (!(uval & FUTEX_TID_MASK)) {
1449
1450
1451
1452
1453 newval = uval & FUTEX_OWNER_DIED;
1454 newval |= vpid;
1455
1456
1457 if (set_waiters)
1458 newval |= FUTEX_WAITERS;
1459
1460 ret = lock_pi_update_atomic(uaddr, uval, newval);
1461 if (ret)
1462 return ret;
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475 if (set_waiters) {
1476 raw_spin_lock_irq(&task->pi_lock);
1477 __attach_to_pi_owner(task, key, ps);
1478 raw_spin_unlock_irq(&task->pi_lock);
1479 }
1480 return 1;
1481 }
1482
1483
1484
1485
1486
1487
1488 newval = uval | FUTEX_WAITERS;
1489 ret = lock_pi_update_atomic(uaddr, uval, newval);
1490 if (ret)
1491 return ret;
1492
1493
1494
1495
1496
1497 return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
1498}
1499
1500
1501
1502
1503
1504
1505
1506static void __unqueue_futex(struct futex_q *q)
1507{
1508 struct futex_hash_bucket *hb;
1509
1510 if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
1511 return;
1512 lockdep_assert_held(q->lock_ptr);
1513
1514 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
1515 plist_del(&q->list, &hb->chain);
1516 hb_waiters_dec(hb);
1517}
1518
1519
1520
1521
1522
1523
1524
1525static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1526{
1527 struct task_struct *p = q->task;
1528
1529 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
1530 return;
1531
1532 get_task_struct(p);
1533 __unqueue_futex(q);
1534
1535
1536
1537
1538
1539
1540
1541 smp_store_release(&q->lock_ptr, NULL);
1542
1543
1544
1545
1546
1547 wake_q_add_safe(wake_q, p);
1548}
1549
1550
1551
1552
1553static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
1554{
1555 struct rt_mutex_waiter *top_waiter;
1556 struct task_struct *new_owner;
1557 bool postunlock = false;
1558 DEFINE_RT_WAKE_Q(wqh);
1559 u32 curval, newval;
1560 int ret = 0;
1561
1562 top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
1563 if (WARN_ON_ONCE(!top_waiter)) {
1564
1565
1566
1567
1568
1569
1570
1571
1572 ret = -EAGAIN;
1573 goto out_unlock;
1574 }
1575
1576 new_owner = top_waiter->task;
1577
1578
1579
1580
1581
1582
1583 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
1584
1585 if (unlikely(should_fail_futex(true))) {
1586 ret = -EFAULT;
1587 goto out_unlock;
1588 }
1589
1590 ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
1591 if (!ret && (curval != uval)) {
1592
1593
1594
1595
1596
1597
1598 if ((FUTEX_TID_MASK & curval) == uval)
1599 ret = -EAGAIN;
1600 else
1601 ret = -EINVAL;
1602 }
1603
1604 if (!ret) {
1605
1606
1607
1608
1609
1610 pi_state_update_owner(pi_state, new_owner);
1611 postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh);
1612 }
1613
1614out_unlock:
1615 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1616
1617 if (postunlock)
1618 rt_mutex_postunlock(&wqh);
1619
1620 return ret;
1621}
1622
1623
1624
1625
1626static inline void
1627double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1628{
1629 if (hb1 <= hb2) {
1630 spin_lock(&hb1->lock);
1631 if (hb1 < hb2)
1632 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
1633 } else {
1634 spin_lock(&hb2->lock);
1635 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
1636 }
1637}
1638
1639static inline void
1640double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1641{
1642 spin_unlock(&hb1->lock);
1643 if (hb1 != hb2)
1644 spin_unlock(&hb2->lock);
1645}
1646
1647
1648
1649
1650static int
1651futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
1652{
1653 struct futex_hash_bucket *hb;
1654 struct futex_q *this, *next;
1655 union futex_key key = FUTEX_KEY_INIT;
1656 int ret;
1657 DEFINE_WAKE_Q(wake_q);
1658
1659 if (!bitset)
1660 return -EINVAL;
1661
1662 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
1663 if (unlikely(ret != 0))
1664 return ret;
1665
1666 hb = hash_futex(&key);
1667
1668
1669 if (!hb_waiters_pending(hb))
1670 return ret;
1671
1672 spin_lock(&hb->lock);
1673
1674 plist_for_each_entry_safe(this, next, &hb->chain, list) {
1675 if (match_futex (&this->key, &key)) {
1676 if (this->pi_state || this->rt_waiter) {
1677 ret = -EINVAL;
1678 break;
1679 }
1680
1681
1682 if (!(this->bitset & bitset))
1683 continue;
1684
1685 mark_wake_futex(&wake_q, this);
1686 if (++ret >= nr_wake)
1687 break;
1688 }
1689 }
1690
1691 spin_unlock(&hb->lock);
1692 wake_up_q(&wake_q);
1693 return ret;
1694}
1695
1696static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
1697{
1698 unsigned int op = (encoded_op & 0x70000000) >> 28;
1699 unsigned int cmp = (encoded_op & 0x0f000000) >> 24;
1700 int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
1701 int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
1702 int oldval, ret;
1703
1704 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
1705 if (oparg < 0 || oparg > 31) {
1706 char comm[sizeof(current->comm)];
1707
1708
1709
1710
1711 pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
1712 get_task_comm(comm, current), oparg);
1713 oparg &= 31;
1714 }
1715 oparg = 1 << oparg;
1716 }
1717
1718 pagefault_disable();
1719 ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
1720 pagefault_enable();
1721 if (ret)
1722 return ret;
1723
1724 switch (cmp) {
1725 case FUTEX_OP_CMP_EQ:
1726 return oldval == cmparg;
1727 case FUTEX_OP_CMP_NE:
1728 return oldval != cmparg;
1729 case FUTEX_OP_CMP_LT:
1730 return oldval < cmparg;
1731 case FUTEX_OP_CMP_GE:
1732 return oldval >= cmparg;
1733 case FUTEX_OP_CMP_LE:
1734 return oldval <= cmparg;
1735 case FUTEX_OP_CMP_GT:
1736 return oldval > cmparg;
1737 default:
1738 return -ENOSYS;
1739 }
1740}
1741
1742
1743
1744
1745
1746static int
1747futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1748 int nr_wake, int nr_wake2, int op)
1749{
1750 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1751 struct futex_hash_bucket *hb1, *hb2;
1752 struct futex_q *this, *next;
1753 int ret, op_ret;
1754 DEFINE_WAKE_Q(wake_q);
1755
1756retry:
1757 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
1758 if (unlikely(ret != 0))
1759 return ret;
1760 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
1761 if (unlikely(ret != 0))
1762 return ret;
1763
1764 hb1 = hash_futex(&key1);
1765 hb2 = hash_futex(&key2);
1766
1767retry_private:
1768 double_lock_hb(hb1, hb2);
1769 op_ret = futex_atomic_op_inuser(op, uaddr2);
1770 if (unlikely(op_ret < 0)) {
1771 double_unlock_hb(hb1, hb2);
1772
1773 if (!IS_ENABLED(CONFIG_MMU) ||
1774 unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
1775
1776
1777
1778
1779 ret = op_ret;
1780 return ret;
1781 }
1782
1783 if (op_ret == -EFAULT) {
1784 ret = fault_in_user_writeable(uaddr2);
1785 if (ret)
1786 return ret;
1787 }
1788
1789 cond_resched();
1790 if (!(flags & FLAGS_SHARED))
1791 goto retry_private;
1792 goto retry;
1793 }
1794
1795 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1796 if (match_futex (&this->key, &key1)) {
1797 if (this->pi_state || this->rt_waiter) {
1798 ret = -EINVAL;
1799 goto out_unlock;
1800 }
1801 mark_wake_futex(&wake_q, this);
1802 if (++ret >= nr_wake)
1803 break;
1804 }
1805 }
1806
1807 if (op_ret > 0) {
1808 op_ret = 0;
1809 plist_for_each_entry_safe(this, next, &hb2->chain, list) {
1810 if (match_futex (&this->key, &key2)) {
1811 if (this->pi_state || this->rt_waiter) {
1812 ret = -EINVAL;
1813 goto out_unlock;
1814 }
1815 mark_wake_futex(&wake_q, this);
1816 if (++op_ret >= nr_wake2)
1817 break;
1818 }
1819 }
1820 ret += op_ret;
1821 }
1822
1823out_unlock:
1824 double_unlock_hb(hb1, hb2);
1825 wake_up_q(&wake_q);
1826 return ret;
1827}
1828
1829
1830
1831
1832
1833
1834
1835
1836static inline
1837void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1838 struct futex_hash_bucket *hb2, union futex_key *key2)
1839{
1840
1841
1842
1843
1844
1845 if (likely(&hb1->chain != &hb2->chain)) {
1846 plist_del(&q->list, &hb1->chain);
1847 hb_waiters_dec(hb1);
1848 hb_waiters_inc(hb2);
1849 plist_add(&q->list, &hb2->chain);
1850 q->lock_ptr = &hb2->lock;
1851 }
1852 q->key = *key2;
1853}
1854
1855static inline bool futex_requeue_pi_prepare(struct futex_q *q,
1856 struct futex_pi_state *pi_state)
1857{
1858 int old, new;
1859
1860
1861
1862
1863
1864
1865 old = atomic_read_acquire(&q->requeue_state);
1866 do {
1867 if (old == Q_REQUEUE_PI_IGNORE)
1868 return false;
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878 if (old != Q_REQUEUE_PI_NONE)
1879 break;
1880
1881 new = Q_REQUEUE_PI_IN_PROGRESS;
1882 } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
1883
1884 q->pi_state = pi_state;
1885 return true;
1886}
1887
1888static inline void futex_requeue_pi_complete(struct futex_q *q, int locked)
1889{
1890 int old, new;
1891
1892 old = atomic_read_acquire(&q->requeue_state);
1893 do {
1894 if (old == Q_REQUEUE_PI_IGNORE)
1895 return;
1896
1897 if (locked >= 0) {
1898
1899 WARN_ON_ONCE(old != Q_REQUEUE_PI_IN_PROGRESS &&
1900 old != Q_REQUEUE_PI_WAIT);
1901 new = Q_REQUEUE_PI_DONE + locked;
1902 } else if (old == Q_REQUEUE_PI_IN_PROGRESS) {
1903
1904 new = Q_REQUEUE_PI_NONE;
1905 } else {
1906
1907 WARN_ON_ONCE(old != Q_REQUEUE_PI_WAIT);
1908 new = Q_REQUEUE_PI_IGNORE;
1909 }
1910 } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
1911
1912#ifdef CONFIG_PREEMPT_RT
1913
1914 if (unlikely(old == Q_REQUEUE_PI_WAIT))
1915 rcuwait_wake_up(&q->requeue_wait);
1916#endif
1917}
1918
1919static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q)
1920{
1921 int old, new;
1922
1923 old = atomic_read_acquire(&q->requeue_state);
1924 do {
1925
1926 if (old >= Q_REQUEUE_PI_DONE)
1927 return old;
1928
1929
1930
1931
1932
1933 new = Q_REQUEUE_PI_WAIT;
1934 if (old == Q_REQUEUE_PI_NONE)
1935 new = Q_REQUEUE_PI_IGNORE;
1936 } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
1937
1938
1939 if (old == Q_REQUEUE_PI_IN_PROGRESS) {
1940#ifdef CONFIG_PREEMPT_RT
1941 rcuwait_wait_event(&q->requeue_wait,
1942 atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT,
1943 TASK_UNINTERRUPTIBLE);
1944#else
1945 (void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT);
1946#endif
1947 }
1948
1949
1950
1951
1952
1953
1954 return atomic_read(&q->requeue_state);
1955}
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985static inline
1986void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1987 struct futex_hash_bucket *hb)
1988{
1989 q->key = *key;
1990
1991 __unqueue_futex(q);
1992
1993 WARN_ON(!q->rt_waiter);
1994 q->rt_waiter = NULL;
1995
1996 q->lock_ptr = &hb->lock;
1997
1998
1999 futex_requeue_pi_complete(q, 1);
2000 wake_up_state(q->task, TASK_NORMAL);
2001}
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029static int
2030futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
2031 struct futex_hash_bucket *hb2, union futex_key *key1,
2032 union futex_key *key2, struct futex_pi_state **ps,
2033 struct task_struct **exiting, int set_waiters)
2034{
2035 struct futex_q *top_waiter = NULL;
2036 u32 curval;
2037 int ret;
2038
2039 if (get_futex_value_locked(&curval, pifutex))
2040 return -EFAULT;
2041
2042 if (unlikely(should_fail_futex(true)))
2043 return -EFAULT;
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053 top_waiter = futex_top_waiter(hb1, key1);
2054
2055
2056 if (!top_waiter)
2057 return 0;
2058
2059
2060
2061
2062
2063 if (!top_waiter->rt_waiter || top_waiter->pi_state)
2064 return -EINVAL;
2065
2066
2067 if (!match_futex(top_waiter->requeue_pi_key, key2))
2068 return -EINVAL;
2069
2070
2071 if (!futex_requeue_pi_prepare(top_waiter, NULL))
2072 return -EAGAIN;
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
2083 exiting, set_waiters);
2084 if (ret == 1) {
2085
2086
2087
2088
2089
2090
2091 requeue_pi_wake_futex(top_waiter, key2, hb2);
2092 } else if (ret < 0) {
2093
2094 futex_requeue_pi_complete(top_waiter, ret);
2095 } else {
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105 }
2106 return ret;
2107}
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
2128 u32 __user *uaddr2, int nr_wake, int nr_requeue,
2129 u32 *cmpval, int requeue_pi)
2130{
2131 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
2132 int task_count = 0, ret;
2133 struct futex_pi_state *pi_state = NULL;
2134 struct futex_hash_bucket *hb1, *hb2;
2135 struct futex_q *this, *next;
2136 DEFINE_WAKE_Q(wake_q);
2137
2138 if (nr_wake < 0 || nr_requeue < 0)
2139 return -EINVAL;
2140
2141
2142
2143
2144
2145
2146
2147 if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
2148 return -ENOSYS;
2149
2150 if (requeue_pi) {
2151
2152
2153
2154
2155 if (uaddr1 == uaddr2)
2156 return -EINVAL;
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179 if (nr_wake != 1)
2180 return -EINVAL;
2181
2182
2183
2184
2185
2186 if (refill_pi_state_cache())
2187 return -ENOMEM;
2188 }
2189
2190retry:
2191 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
2192 if (unlikely(ret != 0))
2193 return ret;
2194 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
2195 requeue_pi ? FUTEX_WRITE : FUTEX_READ);
2196 if (unlikely(ret != 0))
2197 return ret;
2198
2199
2200
2201
2202
2203 if (requeue_pi && match_futex(&key1, &key2))
2204 return -EINVAL;
2205
2206 hb1 = hash_futex(&key1);
2207 hb2 = hash_futex(&key2);
2208
2209retry_private:
2210 hb_waiters_inc(hb2);
2211 double_lock_hb(hb1, hb2);
2212
2213 if (likely(cmpval != NULL)) {
2214 u32 curval;
2215
2216 ret = get_futex_value_locked(&curval, uaddr1);
2217
2218 if (unlikely(ret)) {
2219 double_unlock_hb(hb1, hb2);
2220 hb_waiters_dec(hb2);
2221
2222 ret = get_user(curval, uaddr1);
2223 if (ret)
2224 return ret;
2225
2226 if (!(flags & FLAGS_SHARED))
2227 goto retry_private;
2228
2229 goto retry;
2230 }
2231 if (curval != *cmpval) {
2232 ret = -EAGAIN;
2233 goto out_unlock;
2234 }
2235 }
2236
2237 if (requeue_pi) {
2238 struct task_struct *exiting = NULL;
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
2249 &key2, &pi_state,
2250 &exiting, nr_requeue);
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286 switch (ret) {
2287 case 0:
2288
2289 break;
2290
2291 case 1:
2292
2293
2294
2295
2296 task_count++;
2297 ret = 0;
2298 break;
2299
2300
2301
2302
2303
2304 case -EFAULT:
2305 double_unlock_hb(hb1, hb2);
2306 hb_waiters_dec(hb2);
2307 ret = fault_in_user_writeable(uaddr2);
2308 if (!ret)
2309 goto retry;
2310 return ret;
2311 case -EBUSY:
2312 case -EAGAIN:
2313
2314
2315
2316
2317
2318
2319 double_unlock_hb(hb1, hb2);
2320 hb_waiters_dec(hb2);
2321
2322
2323
2324
2325
2326 wait_for_owner_exiting(ret, exiting);
2327 cond_resched();
2328 goto retry;
2329 default:
2330 goto out_unlock;
2331 }
2332 }
2333
2334 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
2335 if (task_count - nr_wake >= nr_requeue)
2336 break;
2337
2338 if (!match_futex(&this->key, &key1))
2339 continue;
2340
2341
2342
2343
2344
2345
2346
2347
2348 if ((requeue_pi && !this->rt_waiter) ||
2349 (!requeue_pi && this->rt_waiter) ||
2350 this->pi_state) {
2351 ret = -EINVAL;
2352 break;
2353 }
2354
2355
2356 if (!requeue_pi) {
2357 if (++task_count <= nr_wake)
2358 mark_wake_futex(&wake_q, this);
2359 else
2360 requeue_futex(this, hb1, hb2, &key2);
2361 continue;
2362 }
2363
2364
2365 if (!match_futex(this->requeue_pi_key, &key2)) {
2366 ret = -EINVAL;
2367 break;
2368 }
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378 get_pi_state(pi_state);
2379
2380
2381 if (!futex_requeue_pi_prepare(this, pi_state)) {
2382
2383
2384
2385
2386
2387 put_pi_state(pi_state);
2388 continue;
2389 }
2390
2391 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
2392 this->rt_waiter,
2393 this->task);
2394
2395 if (ret == 1) {
2396
2397
2398
2399
2400
2401
2402
2403
2404 requeue_pi_wake_futex(this, &key2, hb2);
2405 task_count++;
2406 } else if (!ret) {
2407
2408 requeue_futex(this, hb1, hb2, &key2);
2409 futex_requeue_pi_complete(this, 0);
2410 task_count++;
2411 } else {
2412
2413
2414
2415
2416
2417
2418
2419 this->pi_state = NULL;
2420 put_pi_state(pi_state);
2421 futex_requeue_pi_complete(this, ret);
2422
2423
2424
2425
2426 break;
2427 }
2428 }
2429
2430
2431
2432
2433
2434 put_pi_state(pi_state);
2435
2436out_unlock:
2437 double_unlock_hb(hb1, hb2);
2438 wake_up_q(&wake_q);
2439 hb_waiters_dec(hb2);
2440 return ret ? ret : task_count;
2441}
2442
2443
2444static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
2445 __acquires(&hb->lock)
2446{
2447 struct futex_hash_bucket *hb;
2448
2449 hb = hash_futex(&q->key);
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459 hb_waiters_inc(hb);
2460
2461 q->lock_ptr = &hb->lock;
2462
2463 spin_lock(&hb->lock);
2464 return hb;
2465}
2466
2467static inline void
2468queue_unlock(struct futex_hash_bucket *hb)
2469 __releases(&hb->lock)
2470{
2471 spin_unlock(&hb->lock);
2472 hb_waiters_dec(hb);
2473}
2474
2475static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
2476{
2477 int prio;
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487 prio = min(current->normal_prio, MAX_RT_PRIO);
2488
2489 plist_node_init(&q->list, prio);
2490 plist_add(&q->list, &hb->chain);
2491 q->task = current;
2492}
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
2507 __releases(&hb->lock)
2508{
2509 __queue_me(q, hb);
2510 spin_unlock(&hb->lock);
2511}
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524static int unqueue_me(struct futex_q *q)
2525{
2526 spinlock_t *lock_ptr;
2527 int ret = 0;
2528
2529
2530retry:
2531
2532
2533
2534
2535
2536 lock_ptr = READ_ONCE(q->lock_ptr);
2537 if (lock_ptr != NULL) {
2538 spin_lock(lock_ptr);
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552 if (unlikely(lock_ptr != q->lock_ptr)) {
2553 spin_unlock(lock_ptr);
2554 goto retry;
2555 }
2556 __unqueue_futex(q);
2557
2558 BUG_ON(q->pi_state);
2559
2560 spin_unlock(lock_ptr);
2561 ret = 1;
2562 }
2563
2564 return ret;
2565}
2566
2567
2568
2569
2570
2571static void unqueue_me_pi(struct futex_q *q)
2572{
2573 __unqueue_futex(q);
2574
2575 BUG_ON(!q->pi_state);
2576 put_pi_state(q->pi_state);
2577 q->pi_state = NULL;
2578}
2579
2580static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
2581 struct task_struct *argowner)
2582{
2583 struct futex_pi_state *pi_state = q->pi_state;
2584 struct task_struct *oldowner, *newowner;
2585 u32 uval, curval, newval, newtid;
2586 int err = 0;
2587
2588 oldowner = pi_state->owner;
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613retry:
2614 if (!argowner) {
2615 if (oldowner != current) {
2616
2617
2618
2619
2620 return 0;
2621 }
2622
2623 if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
2624
2625 return 1;
2626 }
2627
2628
2629
2630
2631
2632 newowner = rt_mutex_owner(&pi_state->pi_mutex);
2633
2634
2635
2636
2637
2638
2639
2640
2641 if (unlikely(!newowner)) {
2642 err = -EAGAIN;
2643 goto handle_err;
2644 }
2645 } else {
2646 WARN_ON_ONCE(argowner != current);
2647 if (oldowner == current) {
2648
2649
2650
2651
2652 return 1;
2653 }
2654 newowner = argowner;
2655 }
2656
2657 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
2658
2659 if (!pi_state->owner)
2660 newtid |= FUTEX_OWNER_DIED;
2661
2662 err = get_futex_value_locked(&uval, uaddr);
2663 if (err)
2664 goto handle_err;
2665
2666 for (;;) {
2667 newval = (uval & FUTEX_OWNER_DIED) | newtid;
2668
2669 err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
2670 if (err)
2671 goto handle_err;
2672
2673 if (curval == uval)
2674 break;
2675 uval = curval;
2676 }
2677
2678
2679
2680
2681
2682 pi_state_update_owner(pi_state, newowner);
2683
2684 return argowner == current;
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699handle_err:
2700 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
2701 spin_unlock(q->lock_ptr);
2702
2703 switch (err) {
2704 case -EFAULT:
2705 err = fault_in_user_writeable(uaddr);
2706 break;
2707
2708 case -EAGAIN:
2709 cond_resched();
2710 err = 0;
2711 break;
2712
2713 default:
2714 WARN_ON_ONCE(1);
2715 break;
2716 }
2717
2718 spin_lock(q->lock_ptr);
2719 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
2720
2721
2722
2723
2724 if (pi_state->owner != oldowner)
2725 return argowner == current;
2726
2727
2728 if (!err)
2729 goto retry;
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747 pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
2748
2749 return err;
2750}
2751
2752static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
2753 struct task_struct *argowner)
2754{
2755 struct futex_pi_state *pi_state = q->pi_state;
2756 int ret;
2757
2758 lockdep_assert_held(q->lock_ptr);
2759
2760 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
2761 ret = __fixup_pi_state_owner(uaddr, q, argowner);
2762 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
2763 return ret;
2764}
2765
2766static long futex_wait_restart(struct restart_block *restart);
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
2784{
2785 if (locked) {
2786
2787
2788
2789
2790
2791
2792
2793
2794 if (q->pi_state->owner != current)
2795 return fixup_pi_state_owner(uaddr, q, current);
2796 return 1;
2797 }
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807 if (q->pi_state->owner == current)
2808 return fixup_pi_state_owner(uaddr, q, NULL);
2809
2810
2811
2812
2813
2814 if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
2815 return fixup_pi_state_owner(uaddr, q, current);
2816
2817 return 0;
2818}
2819
2820
2821
2822
2823
2824
2825
2826static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
2827 struct hrtimer_sleeper *timeout)
2828{
2829
2830
2831
2832
2833
2834
2835 set_current_state(TASK_INTERRUPTIBLE);
2836 queue_me(q, hb);
2837
2838
2839 if (timeout)
2840 hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
2841
2842
2843
2844
2845
2846 if (likely(!plist_node_empty(&q->list))) {
2847
2848
2849
2850
2851
2852 if (!timeout || timeout->task)
2853 freezable_schedule();
2854 }
2855 __set_current_state(TASK_RUNNING);
2856}
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
2875 struct futex_q *q, struct futex_hash_bucket **hb)
2876{
2877 u32 uval;
2878 int ret;
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898retry:
2899 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
2900 if (unlikely(ret != 0))
2901 return ret;
2902
2903retry_private:
2904 *hb = queue_lock(q);
2905
2906 ret = get_futex_value_locked(&uval, uaddr);
2907
2908 if (ret) {
2909 queue_unlock(*hb);
2910
2911 ret = get_user(uval, uaddr);
2912 if (ret)
2913 return ret;
2914
2915 if (!(flags & FLAGS_SHARED))
2916 goto retry_private;
2917
2918 goto retry;
2919 }
2920
2921 if (uval != val) {
2922 queue_unlock(*hb);
2923 ret = -EWOULDBLOCK;
2924 }
2925
2926 return ret;
2927}
2928
2929static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
2930 ktime_t *abs_time, u32 bitset)
2931{
2932 struct hrtimer_sleeper timeout, *to;
2933 struct restart_block *restart;
2934 struct futex_hash_bucket *hb;
2935 struct futex_q q = futex_q_init;
2936 int ret;
2937
2938 if (!bitset)
2939 return -EINVAL;
2940 q.bitset = bitset;
2941
2942 to = futex_setup_timer(abs_time, &timeout, flags,
2943 current->timer_slack_ns);
2944retry:
2945
2946
2947
2948
2949 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2950 if (ret)
2951 goto out;
2952
2953
2954 futex_wait_queue_me(hb, &q, to);
2955
2956
2957 ret = 0;
2958 if (!unqueue_me(&q))
2959 goto out;
2960 ret = -ETIMEDOUT;
2961 if (to && !to->task)
2962 goto out;
2963
2964
2965
2966
2967
2968 if (!signal_pending(current))
2969 goto retry;
2970
2971 ret = -ERESTARTSYS;
2972 if (!abs_time)
2973 goto out;
2974
2975 restart = ¤t->restart_block;
2976 restart->futex.uaddr = uaddr;
2977 restart->futex.val = val;
2978 restart->futex.time = *abs_time;
2979 restart->futex.bitset = bitset;
2980 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
2981
2982 ret = set_restart_fn(restart, futex_wait_restart);
2983
2984out:
2985 if (to) {
2986 hrtimer_cancel(&to->timer);
2987 destroy_hrtimer_on_stack(&to->timer);
2988 }
2989 return ret;
2990}
2991
2992
2993static long futex_wait_restart(struct restart_block *restart)
2994{
2995 u32 __user *uaddr = restart->futex.uaddr;
2996 ktime_t t, *tp = NULL;
2997
2998 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
2999 t = restart->futex.time;
3000 tp = &t;
3001 }
3002 restart->fn = do_no_restart_syscall;
3003
3004 return (long)futex_wait(uaddr, restart->futex.flags,
3005 restart->futex.val, tp, restart->futex.bitset);
3006}
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
3019 ktime_t *time, int trylock)
3020{
3021 struct hrtimer_sleeper timeout, *to;
3022 struct task_struct *exiting = NULL;
3023 struct rt_mutex_waiter rt_waiter;
3024 struct futex_hash_bucket *hb;
3025 struct futex_q q = futex_q_init;
3026 int res, ret;
3027
3028 if (!IS_ENABLED(CONFIG_FUTEX_PI))
3029 return -ENOSYS;
3030
3031 if (refill_pi_state_cache())
3032 return -ENOMEM;
3033
3034 to = futex_setup_timer(time, &timeout, flags, 0);
3035
3036retry:
3037 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
3038 if (unlikely(ret != 0))
3039 goto out;
3040
3041retry_private:
3042 hb = queue_lock(&q);
3043
3044 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
3045 &exiting, 0);
3046 if (unlikely(ret)) {
3047
3048
3049
3050
3051 switch (ret) {
3052 case 1:
3053
3054 ret = 0;
3055 goto out_unlock_put_key;
3056 case -EFAULT:
3057 goto uaddr_faulted;
3058 case -EBUSY:
3059 case -EAGAIN:
3060
3061
3062
3063
3064
3065
3066 queue_unlock(hb);
3067
3068
3069
3070
3071
3072 wait_for_owner_exiting(ret, exiting);
3073 cond_resched();
3074 goto retry;
3075 default:
3076 goto out_unlock_put_key;
3077 }
3078 }
3079
3080 WARN_ON(!q.pi_state);
3081
3082
3083
3084
3085 __queue_me(&q, hb);
3086
3087 if (trylock) {
3088 ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
3089
3090 ret = ret ? 0 : -EWOULDBLOCK;
3091 goto no_block;
3092 }
3093
3094 rt_mutex_init_waiter(&rt_waiter);
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
3110 spin_unlock(q.lock_ptr);
3111
3112
3113
3114
3115
3116 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
3117 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
3118
3119 if (ret) {
3120 if (ret == 1)
3121 ret = 0;
3122 goto cleanup;
3123 }
3124
3125 if (unlikely(to))
3126 hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
3127
3128 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
3129
3130cleanup:
3131 spin_lock(q.lock_ptr);
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141 if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
3142 ret = 0;
3143
3144no_block:
3145
3146
3147
3148
3149 res = fixup_owner(uaddr, &q, !ret);
3150
3151
3152
3153
3154 if (res)
3155 ret = (res < 0) ? res : 0;
3156
3157 unqueue_me_pi(&q);
3158 spin_unlock(q.lock_ptr);
3159 goto out;
3160
3161out_unlock_put_key:
3162 queue_unlock(hb);
3163
3164out:
3165 if (to) {
3166 hrtimer_cancel(&to->timer);
3167 destroy_hrtimer_on_stack(&to->timer);
3168 }
3169 return ret != -EINTR ? ret : -ERESTARTNOINTR;
3170
3171uaddr_faulted:
3172 queue_unlock(hb);
3173
3174 ret = fault_in_user_writeable(uaddr);
3175 if (ret)
3176 goto out;
3177
3178 if (!(flags & FLAGS_SHARED))
3179 goto retry_private;
3180
3181 goto retry;
3182}
3183
3184
3185
3186
3187
3188
3189static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
3190{
3191 u32 curval, uval, vpid = task_pid_vnr(current);
3192 union futex_key key = FUTEX_KEY_INIT;
3193 struct futex_hash_bucket *hb;
3194 struct futex_q *top_waiter;
3195 int ret;
3196
3197 if (!IS_ENABLED(CONFIG_FUTEX_PI))
3198 return -ENOSYS;
3199
3200retry:
3201 if (get_user(uval, uaddr))
3202 return -EFAULT;
3203
3204
3205
3206 if ((uval & FUTEX_TID_MASK) != vpid)
3207 return -EPERM;
3208
3209 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
3210 if (ret)
3211 return ret;
3212
3213 hb = hash_futex(&key);
3214 spin_lock(&hb->lock);
3215
3216
3217
3218
3219
3220
3221 top_waiter = futex_top_waiter(hb, &key);
3222 if (top_waiter) {
3223 struct futex_pi_state *pi_state = top_waiter->pi_state;
3224
3225 ret = -EINVAL;
3226 if (!pi_state)
3227 goto out_unlock;
3228
3229
3230
3231
3232
3233 if (pi_state->owner != current)
3234 goto out_unlock;
3235
3236 get_pi_state(pi_state);
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
3248 spin_unlock(&hb->lock);
3249
3250
3251 ret = wake_futex_pi(uaddr, uval, pi_state);
3252
3253 put_pi_state(pi_state);
3254
3255
3256
3257
3258 if (!ret)
3259 return ret;
3260
3261
3262
3263
3264 if (ret == -EFAULT)
3265 goto pi_faulted;
3266
3267
3268
3269
3270 if (ret == -EAGAIN)
3271 goto pi_retry;
3272
3273
3274
3275
3276 return ret;
3277 }
3278
3279
3280
3281
3282
3283
3284
3285
3286 if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
3287 spin_unlock(&hb->lock);
3288 switch (ret) {
3289 case -EFAULT:
3290 goto pi_faulted;
3291
3292 case -EAGAIN:
3293 goto pi_retry;
3294
3295 default:
3296 WARN_ON_ONCE(1);
3297 return ret;
3298 }
3299 }
3300
3301
3302
3303
3304 ret = (curval == uval) ? 0 : -EAGAIN;
3305
3306out_unlock:
3307 spin_unlock(&hb->lock);
3308 return ret;
3309
3310pi_retry:
3311 cond_resched();
3312 goto retry;
3313
3314pi_faulted:
3315
3316 ret = fault_in_user_writeable(uaddr);
3317 if (!ret)
3318 goto retry;
3319
3320 return ret;
3321}
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334static inline
3335int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
3336 struct futex_q *q,
3337 struct hrtimer_sleeper *timeout)
3338{
3339 int ret;
3340
3341
3342
3343
3344
3345
3346
3347
3348 WARN_ON_ONCE(&hb->lock != q->lock_ptr);
3349
3350
3351
3352
3353
3354 plist_del(&q->list, &hb->chain);
3355 hb_waiters_dec(hb);
3356
3357
3358 ret = -EWOULDBLOCK;
3359 if (timeout && !timeout->task)
3360 ret = -ETIMEDOUT;
3361 else if (signal_pending(current))
3362 ret = -ERESTARTNOINTR;
3363 return ret;
3364}
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
3407 u32 val, ktime_t *abs_time, u32 bitset,
3408 u32 __user *uaddr2)
3409{
3410 struct hrtimer_sleeper timeout, *to;
3411 struct rt_mutex_waiter rt_waiter;
3412 struct futex_hash_bucket *hb;
3413 union futex_key key2 = FUTEX_KEY_INIT;
3414 struct futex_q q = futex_q_init;
3415 struct rt_mutex_base *pi_mutex;
3416 int res, ret;
3417
3418 if (!IS_ENABLED(CONFIG_FUTEX_PI))
3419 return -ENOSYS;
3420
3421 if (uaddr == uaddr2)
3422 return -EINVAL;
3423
3424 if (!bitset)
3425 return -EINVAL;
3426
3427 to = futex_setup_timer(abs_time, &timeout, flags,
3428 current->timer_slack_ns);
3429
3430
3431
3432
3433
3434 rt_mutex_init_waiter(&rt_waiter);
3435
3436 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
3437 if (unlikely(ret != 0))
3438 goto out;
3439
3440 q.bitset = bitset;
3441 q.rt_waiter = &rt_waiter;
3442 q.requeue_pi_key = &key2;
3443
3444
3445
3446
3447
3448 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
3449 if (ret)
3450 goto out;
3451
3452
3453
3454
3455
3456 if (match_futex(&q.key, &key2)) {
3457 queue_unlock(hb);
3458 ret = -EINVAL;
3459 goto out;
3460 }
3461
3462
3463 futex_wait_queue_me(hb, &q, to);
3464
3465 switch (futex_requeue_pi_wakeup_sync(&q)) {
3466 case Q_REQUEUE_PI_IGNORE:
3467
3468 spin_lock(&hb->lock);
3469 ret = handle_early_requeue_pi_wakeup(hb, &q, to);
3470 spin_unlock(&hb->lock);
3471 break;
3472
3473 case Q_REQUEUE_PI_LOCKED:
3474
3475 if (q.pi_state && (q.pi_state->owner != current)) {
3476 spin_lock(q.lock_ptr);
3477 ret = fixup_owner(uaddr2, &q, true);
3478
3479
3480
3481
3482 put_pi_state(q.pi_state);
3483 spin_unlock(q.lock_ptr);
3484
3485
3486
3487
3488 ret = ret < 0 ? ret : 0;
3489 }
3490 break;
3491
3492 case Q_REQUEUE_PI_DONE:
3493
3494 pi_mutex = &q.pi_state->pi_mutex;
3495 ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
3496
3497
3498 spin_lock(q.lock_ptr);
3499 if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
3500 ret = 0;
3501
3502 debug_rt_mutex_free_waiter(&rt_waiter);
3503
3504
3505
3506
3507 res = fixup_owner(uaddr2, &q, !ret);
3508
3509
3510
3511
3512 if (res)
3513 ret = (res < 0) ? res : 0;
3514
3515 unqueue_me_pi(&q);
3516 spin_unlock(q.lock_ptr);
3517
3518 if (ret == -EINTR) {
3519
3520
3521
3522
3523
3524
3525
3526
3527 ret = -EWOULDBLOCK;
3528 }
3529 break;
3530 default:
3531 BUG();
3532 }
3533
3534out:
3535 if (to) {
3536 hrtimer_cancel(&to->timer);
3537 destroy_hrtimer_on_stack(&to->timer);
3538 }
3539 return ret;
3540}
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
3563 size_t, len)
3564{
3565 if (!futex_cmpxchg_enabled)
3566 return -ENOSYS;
3567
3568
3569
3570 if (unlikely(len != sizeof(*head)))
3571 return -EINVAL;
3572
3573 current->robust_list = head;
3574
3575 return 0;
3576}
3577
3578
3579
3580
3581
3582
3583
3584SYSCALL_DEFINE3(get_robust_list, int, pid,
3585 struct robust_list_head __user * __user *, head_ptr,
3586 size_t __user *, len_ptr)
3587{
3588 struct robust_list_head __user *head;
3589 unsigned long ret;
3590 struct task_struct *p;
3591
3592 if (!futex_cmpxchg_enabled)
3593 return -ENOSYS;
3594
3595 rcu_read_lock();
3596
3597 ret = -ESRCH;
3598 if (!pid)
3599 p = current;
3600 else {
3601 p = find_task_by_vpid(pid);
3602 if (!p)
3603 goto err_unlock;
3604 }
3605
3606 ret = -EPERM;
3607 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
3608 goto err_unlock;
3609
3610 head = p->robust_list;
3611 rcu_read_unlock();
3612
3613 if (put_user(sizeof(*head), len_ptr))
3614 return -EFAULT;
3615 return put_user(head, head_ptr);
3616
3617err_unlock:
3618 rcu_read_unlock();
3619
3620 return ret;
3621}
3622
3623
3624#define HANDLE_DEATH_PENDING true
3625#define HANDLE_DEATH_LIST false
3626
3627
3628
3629
3630
3631static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
3632 bool pi, bool pending_op)
3633{
3634 u32 uval, nval, mval;
3635 int err;
3636
3637
3638 if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
3639 return -1;
3640
3641retry:
3642 if (get_user(uval, uaddr))
3643 return -1;
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676 if (pending_op && !pi && !uval) {
3677 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
3678 return 0;
3679 }
3680
3681 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
3682 return 0;
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705 if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) {
3706 switch (err) {
3707 case -EFAULT:
3708 if (fault_in_user_writeable(uaddr))
3709 return -1;
3710 goto retry;
3711
3712 case -EAGAIN:
3713 cond_resched();
3714 goto retry;
3715
3716 default:
3717 WARN_ON_ONCE(1);
3718 return err;
3719 }
3720 }
3721
3722 if (nval != uval)
3723 goto retry;
3724
3725
3726
3727
3728
3729 if (!pi && (uval & FUTEX_WAITERS))
3730 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
3731
3732 return 0;
3733}
3734
3735
3736
3737
3738static inline int fetch_robust_entry(struct robust_list __user **entry,
3739 struct robust_list __user * __user *head,
3740 unsigned int *pi)
3741{
3742 unsigned long uentry;
3743
3744 if (get_user(uentry, (unsigned long __user *)head))
3745 return -EFAULT;
3746
3747 *entry = (void __user *)(uentry & ~1UL);
3748 *pi = uentry & 1;
3749
3750 return 0;
3751}
3752
3753
3754
3755
3756
3757
3758
3759static void exit_robust_list(struct task_struct *curr)
3760{
3761 struct robust_list_head __user *head = curr->robust_list;
3762 struct robust_list __user *entry, *next_entry, *pending;
3763 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3764 unsigned int next_pi;
3765 unsigned long futex_offset;
3766 int rc;
3767
3768 if (!futex_cmpxchg_enabled)
3769 return;
3770
3771
3772
3773
3774
3775 if (fetch_robust_entry(&entry, &head->list.next, &pi))
3776 return;
3777
3778
3779
3780 if (get_user(futex_offset, &head->futex_offset))
3781 return;
3782
3783
3784
3785
3786 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
3787 return;
3788
3789 next_entry = NULL;
3790 while (entry != &head->list) {
3791
3792
3793
3794
3795 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
3796
3797
3798
3799
3800 if (entry != pending) {
3801 if (handle_futex_death((void __user *)entry + futex_offset,
3802 curr, pi, HANDLE_DEATH_LIST))
3803 return;
3804 }
3805 if (rc)
3806 return;
3807 entry = next_entry;
3808 pi = next_pi;
3809
3810
3811
3812 if (!--limit)
3813 break;
3814
3815 cond_resched();
3816 }
3817
3818 if (pending) {
3819 handle_futex_death((void __user *)pending + futex_offset,
3820 curr, pip, HANDLE_DEATH_PENDING);
3821 }
3822}
3823
3824static void futex_cleanup(struct task_struct *tsk)
3825{
3826 if (unlikely(tsk->robust_list)) {
3827 exit_robust_list(tsk);
3828 tsk->robust_list = NULL;
3829 }
3830
3831#ifdef CONFIG_COMPAT
3832 if (unlikely(tsk->compat_robust_list)) {
3833 compat_exit_robust_list(tsk);
3834 tsk->compat_robust_list = NULL;
3835 }
3836#endif
3837
3838 if (unlikely(!list_empty(&tsk->pi_state_list)))
3839 exit_pi_state_list(tsk);
3840}
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859void futex_exit_recursive(struct task_struct *tsk)
3860{
3861
3862 if (tsk->futex_state == FUTEX_STATE_EXITING)
3863 mutex_unlock(&tsk->futex_exit_mutex);
3864 tsk->futex_state = FUTEX_STATE_DEAD;
3865}
3866
3867static void futex_cleanup_begin(struct task_struct *tsk)
3868{
3869
3870
3871
3872
3873
3874
3875 mutex_lock(&tsk->futex_exit_mutex);
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888 raw_spin_lock_irq(&tsk->pi_lock);
3889 tsk->futex_state = FUTEX_STATE_EXITING;
3890 raw_spin_unlock_irq(&tsk->pi_lock);
3891}
3892
3893static void futex_cleanup_end(struct task_struct *tsk, int state)
3894{
3895
3896
3897
3898
3899 tsk->futex_state = state;
3900
3901
3902
3903
3904 mutex_unlock(&tsk->futex_exit_mutex);
3905}
3906
3907void futex_exec_release(struct task_struct *tsk)
3908{
3909
3910
3911
3912
3913
3914
3915
3916 futex_cleanup_begin(tsk);
3917 futex_cleanup(tsk);
3918
3919
3920
3921
3922 futex_cleanup_end(tsk, FUTEX_STATE_OK);
3923}
3924
3925void futex_exit_release(struct task_struct *tsk)
3926{
3927 futex_cleanup_begin(tsk);
3928 futex_cleanup(tsk);
3929 futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
3930}
3931
3932long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
3933 u32 __user *uaddr2, u32 val2, u32 val3)
3934{
3935 int cmd = op & FUTEX_CMD_MASK;
3936 unsigned int flags = 0;
3937
3938 if (!(op & FUTEX_PRIVATE_FLAG))
3939 flags |= FLAGS_SHARED;
3940
3941 if (op & FUTEX_CLOCK_REALTIME) {
3942 flags |= FLAGS_CLOCKRT;
3943 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
3944 cmd != FUTEX_LOCK_PI2)
3945 return -ENOSYS;
3946 }
3947
3948 switch (cmd) {
3949 case FUTEX_LOCK_PI:
3950 case FUTEX_LOCK_PI2:
3951 case FUTEX_UNLOCK_PI:
3952 case FUTEX_TRYLOCK_PI:
3953 case FUTEX_WAIT_REQUEUE_PI:
3954 case FUTEX_CMP_REQUEUE_PI:
3955 if (!futex_cmpxchg_enabled)
3956 return -ENOSYS;
3957 }
3958
3959 switch (cmd) {
3960 case FUTEX_WAIT:
3961 val3 = FUTEX_BITSET_MATCH_ANY;
3962 fallthrough;
3963 case FUTEX_WAIT_BITSET:
3964 return futex_wait(uaddr, flags, val, timeout, val3);
3965 case FUTEX_WAKE:
3966 val3 = FUTEX_BITSET_MATCH_ANY;
3967 fallthrough;
3968 case FUTEX_WAKE_BITSET:
3969 return futex_wake(uaddr, flags, val, val3);
3970 case FUTEX_REQUEUE:
3971 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
3972 case FUTEX_CMP_REQUEUE:
3973 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
3974 case FUTEX_WAKE_OP:
3975 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
3976 case FUTEX_LOCK_PI:
3977 flags |= FLAGS_CLOCKRT;
3978 fallthrough;
3979 case FUTEX_LOCK_PI2:
3980 return futex_lock_pi(uaddr, flags, timeout, 0);
3981 case FUTEX_UNLOCK_PI:
3982 return futex_unlock_pi(uaddr, flags);
3983 case FUTEX_TRYLOCK_PI:
3984 return futex_lock_pi(uaddr, flags, NULL, 1);
3985 case FUTEX_WAIT_REQUEUE_PI:
3986 val3 = FUTEX_BITSET_MATCH_ANY;
3987 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
3988 uaddr2);
3989 case FUTEX_CMP_REQUEUE_PI:
3990 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
3991 }
3992 return -ENOSYS;
3993}
3994
3995static __always_inline bool futex_cmd_has_timeout(u32 cmd)
3996{
3997 switch (cmd) {
3998 case FUTEX_WAIT:
3999 case FUTEX_LOCK_PI:
4000 case FUTEX_LOCK_PI2:
4001 case FUTEX_WAIT_BITSET:
4002 case FUTEX_WAIT_REQUEUE_PI:
4003 return true;
4004 }
4005 return false;
4006}
4007
4008static __always_inline int
4009futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
4010{
4011 if (!timespec64_valid(ts))
4012 return -EINVAL;
4013
4014 *t = timespec64_to_ktime(*ts);
4015 if (cmd == FUTEX_WAIT)
4016 *t = ktime_add_safe(ktime_get(), *t);
4017 else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
4018 *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
4019 return 0;
4020}
4021
4022SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
4023 const struct __kernel_timespec __user *, utime,
4024 u32 __user *, uaddr2, u32, val3)
4025{
4026 int ret, cmd = op & FUTEX_CMD_MASK;
4027 ktime_t t, *tp = NULL;
4028 struct timespec64 ts;
4029
4030 if (utime && futex_cmd_has_timeout(cmd)) {
4031 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
4032 return -EFAULT;
4033 if (get_timespec64(&ts, utime))
4034 return -EFAULT;
4035 ret = futex_init_timeout(cmd, op, &ts, &t);
4036 if (ret)
4037 return ret;
4038 tp = &t;
4039 }
4040
4041 return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
4042}
4043
4044#ifdef CONFIG_COMPAT
4045
4046
4047
4048static inline int
4049compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
4050 compat_uptr_t __user *head, unsigned int *pi)
4051{
4052 if (get_user(*uentry, head))
4053 return -EFAULT;
4054
4055 *entry = compat_ptr((*uentry) & ~1);
4056 *pi = (unsigned int)(*uentry) & 1;
4057
4058 return 0;
4059}
4060
4061static void __user *futex_uaddr(struct robust_list __user *entry,
4062 compat_long_t futex_offset)
4063{
4064 compat_uptr_t base = ptr_to_compat(entry);
4065 void __user *uaddr = compat_ptr(base + futex_offset);
4066
4067 return uaddr;
4068}
4069
4070
4071
4072
4073
4074
4075
4076static void compat_exit_robust_list(struct task_struct *curr)
4077{
4078 struct compat_robust_list_head __user *head = curr->compat_robust_list;
4079 struct robust_list __user *entry, *next_entry, *pending;
4080 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
4081 unsigned int next_pi;
4082 compat_uptr_t uentry, next_uentry, upending;
4083 compat_long_t futex_offset;
4084 int rc;
4085
4086 if (!futex_cmpxchg_enabled)
4087 return;
4088
4089
4090
4091
4092
4093 if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
4094 return;
4095
4096
4097
4098 if (get_user(futex_offset, &head->futex_offset))
4099 return;
4100
4101
4102
4103
4104 if (compat_fetch_robust_entry(&upending, &pending,
4105 &head->list_op_pending, &pip))
4106 return;
4107
4108 next_entry = NULL;
4109 while (entry != (struct robust_list __user *) &head->list) {
4110
4111
4112
4113
4114 rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
4115 (compat_uptr_t __user *)&entry->next, &next_pi);
4116
4117
4118
4119
4120 if (entry != pending) {
4121 void __user *uaddr = futex_uaddr(entry, futex_offset);
4122
4123 if (handle_futex_death(uaddr, curr, pi,
4124 HANDLE_DEATH_LIST))
4125 return;
4126 }
4127 if (rc)
4128 return;
4129 uentry = next_uentry;
4130 entry = next_entry;
4131 pi = next_pi;
4132
4133
4134
4135 if (!--limit)
4136 break;
4137
4138 cond_resched();
4139 }
4140 if (pending) {
4141 void __user *uaddr = futex_uaddr(pending, futex_offset);
4142
4143 handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
4144 }
4145}
4146
4147COMPAT_SYSCALL_DEFINE2(set_robust_list,
4148 struct compat_robust_list_head __user *, head,
4149 compat_size_t, len)
4150{
4151 if (!futex_cmpxchg_enabled)
4152 return -ENOSYS;
4153
4154 if (unlikely(len != sizeof(*head)))
4155 return -EINVAL;
4156
4157 current->compat_robust_list = head;
4158
4159 return 0;
4160}
4161
4162COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
4163 compat_uptr_t __user *, head_ptr,
4164 compat_size_t __user *, len_ptr)
4165{
4166 struct compat_robust_list_head __user *head;
4167 unsigned long ret;
4168 struct task_struct *p;
4169
4170 if (!futex_cmpxchg_enabled)
4171 return -ENOSYS;
4172
4173 rcu_read_lock();
4174
4175 ret = -ESRCH;
4176 if (!pid)
4177 p = current;
4178 else {
4179 p = find_task_by_vpid(pid);
4180 if (!p)
4181 goto err_unlock;
4182 }
4183
4184 ret = -EPERM;
4185 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
4186 goto err_unlock;
4187
4188 head = p->compat_robust_list;
4189 rcu_read_unlock();
4190
4191 if (put_user(sizeof(*head), len_ptr))
4192 return -EFAULT;
4193 return put_user(ptr_to_compat(head), head_ptr);
4194
4195err_unlock:
4196 rcu_read_unlock();
4197
4198 return ret;
4199}
4200#endif
4201
4202#ifdef CONFIG_COMPAT_32BIT_TIME
4203SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
4204 const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
4205 u32, val3)
4206{
4207 int ret, cmd = op & FUTEX_CMD_MASK;
4208 ktime_t t, *tp = NULL;
4209 struct timespec64 ts;
4210
4211 if (utime && futex_cmd_has_timeout(cmd)) {
4212 if (get_old_timespec32(&ts, utime))
4213 return -EFAULT;
4214 ret = futex_init_timeout(cmd, op, &ts, &t);
4215 if (ret)
4216 return ret;
4217 tp = &t;
4218 }
4219
4220 return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
4221}
4222#endif
4223
4224static void __init futex_detect_cmpxchg(void)
4225{
4226#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
4227 u32 curval;
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
4240 futex_cmpxchg_enabled = 1;
4241#endif
4242}
4243
4244static int __init futex_init(void)
4245{
4246 unsigned int futex_shift;
4247 unsigned long i;
4248
4249#if CONFIG_BASE_SMALL
4250 futex_hashsize = 16;
4251#else
4252 futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
4253#endif
4254
4255 futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
4256 futex_hashsize, 0,
4257 futex_hashsize < 256 ? HASH_SMALL : 0,
4258 &futex_shift, NULL,
4259 futex_hashsize, futex_hashsize);
4260 futex_hashsize = 1UL << futex_shift;
4261
4262 futex_detect_cmpxchg();
4263
4264 for (i = 0; i < futex_hashsize; i++) {
4265 atomic_set(&futex_queues[i].waiters, 0);
4266 plist_head_init(&futex_queues[i].chain);
4267 spin_lock_init(&futex_queues[i].lock);
4268 }
4269
4270 return 0;
4271}
4272core_initcall(futex_init);
4273