1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#include <linux/compat.h>
35#include <linux/jhash.h>
36#include <linux/pagemap.h>
37#include <linux/syscalls.h>
38#include <linux/hugetlb.h>
39#include <linux/freezer.h>
40#include <linux/memblock.h>
41#include <linux/fault-inject.h>
42#include <linux/time_namespace.h>
43
44#include <asm/futex.h>
45
46#include "locking/rtmutex_common.h"
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
149#define futex_cmpxchg_enabled 1
150#else
151static int __read_mostly futex_cmpxchg_enabled;
152#endif
153
154
155
156
157
158#ifdef CONFIG_MMU
159# define FLAGS_SHARED 0x01
160#else
161
162
163
164
165# define FLAGS_SHARED 0x00
166#endif
167#define FLAGS_CLOCKRT 0x02
168#define FLAGS_HAS_TIMEOUT 0x04
169
170
171
172
173struct futex_pi_state {
174
175
176
177
178 struct list_head list;
179
180
181
182
183 struct rt_mutex pi_mutex;
184
185 struct task_struct *owner;
186 refcount_t refcount;
187
188 union futex_key key;
189} __randomize_layout;
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213struct futex_q {
214 struct plist_node list;
215
216 struct task_struct *task;
217 spinlock_t *lock_ptr;
218 union futex_key key;
219 struct futex_pi_state *pi_state;
220 struct rt_mutex_waiter *rt_waiter;
221 union futex_key *requeue_pi_key;
222 u32 bitset;
223} __randomize_layout;
224
225static const struct futex_q futex_q_init = {
226
227 .key = FUTEX_KEY_INIT,
228 .bitset = FUTEX_BITSET_MATCH_ANY
229};
230
231
232
233
234
235
236struct futex_hash_bucket {
237 atomic_t waiters;
238 spinlock_t lock;
239 struct plist_head chain;
240} ____cacheline_aligned_in_smp;
241
242
243
244
245
246
247static struct {
248 struct futex_hash_bucket *queues;
249 unsigned long hashsize;
250} __futex_data __read_mostly __aligned(2*sizeof(long));
251#define futex_queues (__futex_data.queues)
252#define futex_hashsize (__futex_data.hashsize)
253
254
255
256
257
258#ifdef CONFIG_FAIL_FUTEX
259
260static struct {
261 struct fault_attr attr;
262
263 bool ignore_private;
264} fail_futex = {
265 .attr = FAULT_ATTR_INITIALIZER,
266 .ignore_private = false,
267};
268
269static int __init setup_fail_futex(char *str)
270{
271 return setup_fault_attr(&fail_futex.attr, str);
272}
273__setup("fail_futex=", setup_fail_futex);
274
275static bool should_fail_futex(bool fshared)
276{
277 if (fail_futex.ignore_private && !fshared)
278 return false;
279
280 return should_fail(&fail_futex.attr, 1);
281}
282
283#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
284
285static int __init fail_futex_debugfs(void)
286{
287 umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
288 struct dentry *dir;
289
290 dir = fault_create_debugfs_attr("fail_futex", NULL,
291 &fail_futex.attr);
292 if (IS_ERR(dir))
293 return PTR_ERR(dir);
294
295 debugfs_create_bool("ignore-private", mode, dir,
296 &fail_futex.ignore_private);
297 return 0;
298}
299
300late_initcall(fail_futex_debugfs);
301
302#endif
303
304#else
305static inline bool should_fail_futex(bool fshared)
306{
307 return false;
308}
309#endif
310
311#ifdef CONFIG_COMPAT
312static void compat_exit_robust_list(struct task_struct *curr);
313#else
314static inline void compat_exit_robust_list(struct task_struct *curr) { }
315#endif
316
317
318
319
320static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
321{
322#ifdef CONFIG_SMP
323 atomic_inc(&hb->waiters);
324
325
326
327 smp_mb__after_atomic();
328#endif
329}
330
331
332
333
334
335static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
336{
337#ifdef CONFIG_SMP
338 atomic_dec(&hb->waiters);
339#endif
340}
341
342static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
343{
344#ifdef CONFIG_SMP
345
346
347
348 smp_mb();
349 return atomic_read(&hb->waiters);
350#else
351 return 1;
352#endif
353}
354
355
356
357
358
359
360
361
362static struct futex_hash_bucket *hash_futex(union futex_key *key)
363{
364 u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
365 key->both.offset);
366
367 return &futex_queues[hash & (futex_hashsize - 1)];
368}
369
370
371
372
373
374
375
376
377
378static inline int match_futex(union futex_key *key1, union futex_key *key2)
379{
380 return (key1 && key2
381 && key1->both.word == key2->both.word
382 && key1->both.ptr == key2->both.ptr
383 && key1->both.offset == key2->both.offset);
384}
385
386enum futex_access {
387 FUTEX_READ,
388 FUTEX_WRITE
389};
390
391
392
393
394
395
396
397
398
399
400
401static inline struct hrtimer_sleeper *
402futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
403 int flags, u64 range_ns)
404{
405 if (!time)
406 return NULL;
407
408 hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
409 CLOCK_REALTIME : CLOCK_MONOTONIC,
410 HRTIMER_MODE_ABS);
411
412
413
414
415 hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
416
417 return timeout;
418}
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438static u64 get_inode_sequence_number(struct inode *inode)
439{
440 static atomic64_t i_seq;
441 u64 old;
442
443
444 old = atomic64_read(&inode->i_sequence);
445 if (likely(old))
446 return old;
447
448 for (;;) {
449 u64 new = atomic64_add_return(1, &i_seq);
450 if (WARN_ON_ONCE(!new))
451 continue;
452
453 old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
454 if (old)
455 return old;
456 return new;
457 }
458}
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
488 enum futex_access rw)
489{
490 unsigned long address = (unsigned long)uaddr;
491 struct mm_struct *mm = current->mm;
492 struct page *page, *tail;
493 struct address_space *mapping;
494 int err, ro = 0;
495
496
497
498
499 key->both.offset = address % PAGE_SIZE;
500 if (unlikely((address % sizeof(u32)) != 0))
501 return -EINVAL;
502 address -= key->both.offset;
503
504 if (unlikely(!access_ok(uaddr, sizeof(u32))))
505 return -EFAULT;
506
507 if (unlikely(should_fail_futex(fshared)))
508 return -EFAULT;
509
510
511
512
513
514
515
516
517 if (!fshared) {
518 key->private.mm = mm;
519 key->private.address = address;
520 return 0;
521 }
522
523again:
524
525 if (unlikely(should_fail_futex(true)))
526 return -EFAULT;
527
528 err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
529
530
531
532
533 if (err == -EFAULT && rw == FUTEX_READ) {
534 err = get_user_pages_fast(address, 1, 0, &page);
535 ro = 1;
536 }
537 if (err < 0)
538 return err;
539 else
540 err = 0;
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560 tail = page;
561 page = compound_head(page);
562 mapping = READ_ONCE(page->mapping);
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579 if (unlikely(!mapping)) {
580 int shmem_swizzled;
581
582
583
584
585
586
587 lock_page(page);
588 shmem_swizzled = PageSwapCache(page) || page->mapping;
589 unlock_page(page);
590 put_page(page);
591
592 if (shmem_swizzled)
593 goto again;
594
595 return -EFAULT;
596 }
597
598
599
600
601
602
603
604
605
606
607
608 if (PageAnon(page)) {
609
610
611
612
613 if (unlikely(should_fail_futex(true)) || ro) {
614 err = -EFAULT;
615 goto out;
616 }
617
618 key->both.offset |= FUT_OFF_MMSHARED;
619 key->private.mm = mm;
620 key->private.address = address;
621
622 } else {
623 struct inode *inode;
624
625
626
627
628
629
630
631
632
633
634
635
636 rcu_read_lock();
637
638 if (READ_ONCE(page->mapping) != mapping) {
639 rcu_read_unlock();
640 put_page(page);
641
642 goto again;
643 }
644
645 inode = READ_ONCE(mapping->host);
646 if (!inode) {
647 rcu_read_unlock();
648 put_page(page);
649
650 goto again;
651 }
652
653 key->both.offset |= FUT_OFF_INODE;
654 key->shared.i_seq = get_inode_sequence_number(inode);
655 key->shared.pgoff = basepage_index(tail);
656 rcu_read_unlock();
657 }
658
659out:
660 put_page(page);
661 return err;
662}
663
664
665
666
667
668
669
670
671
672
673
674
675
676static int fault_in_user_writeable(u32 __user *uaddr)
677{
678 struct mm_struct *mm = current->mm;
679 int ret;
680
681 mmap_read_lock(mm);
682 ret = fixup_user_fault(mm, (unsigned long)uaddr,
683 FAULT_FLAG_WRITE, NULL);
684 mmap_read_unlock(mm);
685
686 return ret < 0 ? ret : 0;
687}
688
689
690
691
692
693
694
695
696static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
697 union futex_key *key)
698{
699 struct futex_q *this;
700
701 plist_for_each_entry(this, &hb->chain, list) {
702 if (match_futex(&this->key, key))
703 return this;
704 }
705 return NULL;
706}
707
708static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
709 u32 uval, u32 newval)
710{
711 int ret;
712
713 pagefault_disable();
714 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
715 pagefault_enable();
716
717 return ret;
718}
719
720static int get_futex_value_locked(u32 *dest, u32 __user *from)
721{
722 int ret;
723
724 pagefault_disable();
725 ret = __get_user(*dest, from);
726 pagefault_enable();
727
728 return ret ? -EFAULT : 0;
729}
730
731
732
733
734
735static int refill_pi_state_cache(void)
736{
737 struct futex_pi_state *pi_state;
738
739 if (likely(current->pi_state_cache))
740 return 0;
741
742 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
743
744 if (!pi_state)
745 return -ENOMEM;
746
747 INIT_LIST_HEAD(&pi_state->list);
748
749 pi_state->owner = NULL;
750 refcount_set(&pi_state->refcount, 1);
751 pi_state->key = FUTEX_KEY_INIT;
752
753 current->pi_state_cache = pi_state;
754
755 return 0;
756}
757
758static struct futex_pi_state *alloc_pi_state(void)
759{
760 struct futex_pi_state *pi_state = current->pi_state_cache;
761
762 WARN_ON(!pi_state);
763 current->pi_state_cache = NULL;
764
765 return pi_state;
766}
767
768static void get_pi_state(struct futex_pi_state *pi_state)
769{
770 WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
771}
772
773
774
775
776
777static void put_pi_state(struct futex_pi_state *pi_state)
778{
779 if (!pi_state)
780 return;
781
782 if (!refcount_dec_and_test(&pi_state->refcount))
783 return;
784
785
786
787
788
789 if (pi_state->owner) {
790 struct task_struct *owner;
791 unsigned long flags;
792
793 raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
794 owner = pi_state->owner;
795 if (owner) {
796 raw_spin_lock(&owner->pi_lock);
797 list_del_init(&pi_state->list);
798 raw_spin_unlock(&owner->pi_lock);
799 }
800 rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
801 raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
802 }
803
804 if (current->pi_state_cache) {
805 kfree(pi_state);
806 } else {
807
808
809
810
811
812 pi_state->owner = NULL;
813 refcount_set(&pi_state->refcount, 1);
814 current->pi_state_cache = pi_state;
815 }
816}
817
818#ifdef CONFIG_FUTEX_PI
819
820
821
822
823
824
825static void exit_pi_state_list(struct task_struct *curr)
826{
827 struct list_head *next, *head = &curr->pi_state_list;
828 struct futex_pi_state *pi_state;
829 struct futex_hash_bucket *hb;
830 union futex_key key = FUTEX_KEY_INIT;
831
832 if (!futex_cmpxchg_enabled)
833 return;
834
835
836
837
838
839 raw_spin_lock_irq(&curr->pi_lock);
840 while (!list_empty(head)) {
841 next = head->next;
842 pi_state = list_entry(next, struct futex_pi_state, list);
843 key = pi_state->key;
844 hb = hash_futex(&key);
845
846
847
848
849
850
851
852
853
854
855
856 if (!refcount_inc_not_zero(&pi_state->refcount)) {
857 raw_spin_unlock_irq(&curr->pi_lock);
858 cpu_relax();
859 raw_spin_lock_irq(&curr->pi_lock);
860 continue;
861 }
862 raw_spin_unlock_irq(&curr->pi_lock);
863
864 spin_lock(&hb->lock);
865 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
866 raw_spin_lock(&curr->pi_lock);
867
868
869
870
871 if (head->next != next) {
872
873 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
874 spin_unlock(&hb->lock);
875 put_pi_state(pi_state);
876 continue;
877 }
878
879 WARN_ON(pi_state->owner != curr);
880 WARN_ON(list_empty(&pi_state->list));
881 list_del_init(&pi_state->list);
882 pi_state->owner = NULL;
883
884 raw_spin_unlock(&curr->pi_lock);
885 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
886 spin_unlock(&hb->lock);
887
888 rt_mutex_futex_unlock(&pi_state->pi_mutex);
889 put_pi_state(pi_state);
890
891 raw_spin_lock_irq(&curr->pi_lock);
892 }
893 raw_spin_unlock_irq(&curr->pi_lock);
894}
895#else
896static inline void exit_pi_state_list(struct task_struct *curr) { }
897#endif
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
988 struct futex_pi_state *pi_state,
989 struct futex_pi_state **ps)
990{
991 pid_t pid = uval & FUTEX_TID_MASK;
992 u32 uval2;
993 int ret;
994
995
996
997
998 if (unlikely(!pi_state))
999 return -EINVAL;
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013 WARN_ON(!refcount_read(&pi_state->refcount));
1014
1015
1016
1017
1018
1019 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
1020
1021
1022
1023
1024
1025
1026
1027 if (get_futex_value_locked(&uval2, uaddr))
1028 goto out_efault;
1029
1030 if (uval != uval2)
1031 goto out_eagain;
1032
1033
1034
1035
1036 if (uval & FUTEX_OWNER_DIED) {
1037
1038
1039
1040
1041
1042 if (!pi_state->owner) {
1043
1044
1045
1046
1047 if (pid)
1048 goto out_einval;
1049
1050
1051
1052 goto out_attach;
1053 }
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063 if (!pid)
1064 goto out_attach;
1065 } else {
1066
1067
1068
1069
1070 if (!pi_state->owner)
1071 goto out_einval;
1072 }
1073
1074
1075
1076
1077
1078
1079 if (pid != task_pid_vnr(pi_state->owner))
1080 goto out_einval;
1081
1082out_attach:
1083 get_pi_state(pi_state);
1084 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1085 *ps = pi_state;
1086 return 0;
1087
1088out_einval:
1089 ret = -EINVAL;
1090 goto out_error;
1091
1092out_eagain:
1093 ret = -EAGAIN;
1094 goto out_error;
1095
1096out_efault:
1097 ret = -EFAULT;
1098 goto out_error;
1099
1100out_error:
1101 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1102 return ret;
1103}
1104
1105
1106
1107
1108
1109
1110
1111
1112static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
1113{
1114 if (ret != -EBUSY) {
1115 WARN_ON_ONCE(exiting);
1116 return;
1117 }
1118
1119 if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
1120 return;
1121
1122 mutex_lock(&exiting->futex_exit_mutex);
1123
1124
1125
1126
1127
1128
1129
1130
1131 mutex_unlock(&exiting->futex_exit_mutex);
1132
1133 put_task_struct(exiting);
1134}
1135
1136static int handle_exit_race(u32 __user *uaddr, u32 uval,
1137 struct task_struct *tsk)
1138{
1139 u32 uval2;
1140
1141
1142
1143
1144
1145 if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
1146 return -EBUSY;
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177 if (get_futex_value_locked(&uval2, uaddr))
1178 return -EFAULT;
1179
1180
1181 if (uval2 != uval)
1182 return -EAGAIN;
1183
1184
1185
1186
1187
1188
1189 return -ESRCH;
1190}
1191
1192
1193
1194
1195
1196static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
1197 struct futex_pi_state **ps,
1198 struct task_struct **exiting)
1199{
1200 pid_t pid = uval & FUTEX_TID_MASK;
1201 struct futex_pi_state *pi_state;
1202 struct task_struct *p;
1203
1204
1205
1206
1207
1208
1209
1210
1211 if (!pid)
1212 return -EAGAIN;
1213 p = find_get_task_by_vpid(pid);
1214 if (!p)
1215 return handle_exit_race(uaddr, uval, NULL);
1216
1217 if (unlikely(p->flags & PF_KTHREAD)) {
1218 put_task_struct(p);
1219 return -EPERM;
1220 }
1221
1222
1223
1224
1225
1226
1227 raw_spin_lock_irq(&p->pi_lock);
1228 if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
1229
1230
1231
1232
1233
1234 int ret = handle_exit_race(uaddr, uval, p);
1235
1236 raw_spin_unlock_irq(&p->pi_lock);
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246 if (ret == -EBUSY)
1247 *exiting = p;
1248 else
1249 put_task_struct(p);
1250 return ret;
1251 }
1252
1253
1254
1255
1256
1257
1258
1259 pi_state = alloc_pi_state();
1260
1261
1262
1263
1264
1265 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
1266
1267
1268 pi_state->key = *key;
1269
1270 WARN_ON(!list_empty(&pi_state->list));
1271 list_add(&pi_state->list, &p->pi_state_list);
1272
1273
1274
1275
1276 pi_state->owner = p;
1277 raw_spin_unlock_irq(&p->pi_lock);
1278
1279 put_task_struct(p);
1280
1281 *ps = pi_state;
1282
1283 return 0;
1284}
1285
1286static int lookup_pi_state(u32 __user *uaddr, u32 uval,
1287 struct futex_hash_bucket *hb,
1288 union futex_key *key, struct futex_pi_state **ps,
1289 struct task_struct **exiting)
1290{
1291 struct futex_q *top_waiter = futex_top_waiter(hb, key);
1292
1293
1294
1295
1296
1297 if (top_waiter)
1298 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
1299
1300
1301
1302
1303
1304 return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
1305}
1306
1307static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
1308{
1309 int err;
1310 u32 curval;
1311
1312 if (unlikely(should_fail_futex(true)))
1313 return -EFAULT;
1314
1315 err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
1316 if (unlikely(err))
1317 return err;
1318
1319
1320 return curval != uval ? -EAGAIN : 0;
1321}
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
1348 union futex_key *key,
1349 struct futex_pi_state **ps,
1350 struct task_struct *task,
1351 struct task_struct **exiting,
1352 int set_waiters)
1353{
1354 u32 uval, newval, vpid = task_pid_vnr(task);
1355 struct futex_q *top_waiter;
1356 int ret;
1357
1358
1359
1360
1361
1362 if (get_futex_value_locked(&uval, uaddr))
1363 return -EFAULT;
1364
1365 if (unlikely(should_fail_futex(true)))
1366 return -EFAULT;
1367
1368
1369
1370
1371 if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
1372 return -EDEADLK;
1373
1374 if ((unlikely(should_fail_futex(true))))
1375 return -EDEADLK;
1376
1377
1378
1379
1380
1381 top_waiter = futex_top_waiter(hb, key);
1382 if (top_waiter)
1383 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
1384
1385
1386
1387
1388
1389
1390
1391 if (!(uval & FUTEX_TID_MASK)) {
1392
1393
1394
1395
1396 newval = uval & FUTEX_OWNER_DIED;
1397 newval |= vpid;
1398
1399
1400 if (set_waiters)
1401 newval |= FUTEX_WAITERS;
1402
1403 ret = lock_pi_update_atomic(uaddr, uval, newval);
1404
1405 return ret < 0 ? ret : 1;
1406 }
1407
1408
1409
1410
1411
1412
1413 newval = uval | FUTEX_WAITERS;
1414 ret = lock_pi_update_atomic(uaddr, uval, newval);
1415 if (ret)
1416 return ret;
1417
1418
1419
1420
1421
1422 return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
1423}
1424
1425
1426
1427
1428
1429
1430
1431static void __unqueue_futex(struct futex_q *q)
1432{
1433 struct futex_hash_bucket *hb;
1434
1435 if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
1436 return;
1437 lockdep_assert_held(q->lock_ptr);
1438
1439 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
1440 plist_del(&q->list, &hb->chain);
1441 hb_waiters_dec(hb);
1442}
1443
1444
1445
1446
1447
1448
1449
1450static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1451{
1452 struct task_struct *p = q->task;
1453
1454 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
1455 return;
1456
1457 get_task_struct(p);
1458 __unqueue_futex(q);
1459
1460
1461
1462
1463
1464
1465
1466 smp_store_release(&q->lock_ptr, NULL);
1467
1468
1469
1470
1471
1472 wake_q_add_safe(wake_q, p);
1473}
1474
1475
1476
1477
1478static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
1479{
1480 u32 curval, newval;
1481 struct task_struct *new_owner;
1482 bool postunlock = false;
1483 DEFINE_WAKE_Q(wake_q);
1484 int ret = 0;
1485
1486 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
1487 if (WARN_ON_ONCE(!new_owner)) {
1488
1489
1490
1491
1492
1493
1494
1495
1496 ret = -EAGAIN;
1497 goto out_unlock;
1498 }
1499
1500
1501
1502
1503
1504
1505 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
1506
1507 if (unlikely(should_fail_futex(true))) {
1508 ret = -EFAULT;
1509 goto out_unlock;
1510 }
1511
1512 ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
1513 if (!ret && (curval != uval)) {
1514
1515
1516
1517
1518
1519
1520 if ((FUTEX_TID_MASK & curval) == uval)
1521 ret = -EAGAIN;
1522 else
1523 ret = -EINVAL;
1524 }
1525
1526 if (ret)
1527 goto out_unlock;
1528
1529
1530
1531
1532
1533
1534 raw_spin_lock(&pi_state->owner->pi_lock);
1535 WARN_ON(list_empty(&pi_state->list));
1536 list_del_init(&pi_state->list);
1537 raw_spin_unlock(&pi_state->owner->pi_lock);
1538
1539 raw_spin_lock(&new_owner->pi_lock);
1540 WARN_ON(!list_empty(&pi_state->list));
1541 list_add(&pi_state->list, &new_owner->pi_state_list);
1542 pi_state->owner = new_owner;
1543 raw_spin_unlock(&new_owner->pi_lock);
1544
1545 postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
1546
1547out_unlock:
1548 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1549
1550 if (postunlock)
1551 rt_mutex_postunlock(&wake_q);
1552
1553 return ret;
1554}
1555
1556
1557
1558
1559static inline void
1560double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1561{
1562 if (hb1 <= hb2) {
1563 spin_lock(&hb1->lock);
1564 if (hb1 < hb2)
1565 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
1566 } else {
1567 spin_lock(&hb2->lock);
1568 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
1569 }
1570}
1571
1572static inline void
1573double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1574{
1575 spin_unlock(&hb1->lock);
1576 if (hb1 != hb2)
1577 spin_unlock(&hb2->lock);
1578}
1579
1580
1581
1582
1583static int
1584futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
1585{
1586 struct futex_hash_bucket *hb;
1587 struct futex_q *this, *next;
1588 union futex_key key = FUTEX_KEY_INIT;
1589 int ret;
1590 DEFINE_WAKE_Q(wake_q);
1591
1592 if (!bitset)
1593 return -EINVAL;
1594
1595 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
1596 if (unlikely(ret != 0))
1597 return ret;
1598
1599 hb = hash_futex(&key);
1600
1601
1602 if (!hb_waiters_pending(hb))
1603 return ret;
1604
1605 spin_lock(&hb->lock);
1606
1607 plist_for_each_entry_safe(this, next, &hb->chain, list) {
1608 if (match_futex (&this->key, &key)) {
1609 if (this->pi_state || this->rt_waiter) {
1610 ret = -EINVAL;
1611 break;
1612 }
1613
1614
1615 if (!(this->bitset & bitset))
1616 continue;
1617
1618 mark_wake_futex(&wake_q, this);
1619 if (++ret >= nr_wake)
1620 break;
1621 }
1622 }
1623
1624 spin_unlock(&hb->lock);
1625 wake_up_q(&wake_q);
1626 return ret;
1627}
1628
1629static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
1630{
1631 unsigned int op = (encoded_op & 0x70000000) >> 28;
1632 unsigned int cmp = (encoded_op & 0x0f000000) >> 24;
1633 int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
1634 int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
1635 int oldval, ret;
1636
1637 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
1638 if (oparg < 0 || oparg > 31) {
1639 char comm[sizeof(current->comm)];
1640
1641
1642
1643
1644 pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
1645 get_task_comm(comm, current), oparg);
1646 oparg &= 31;
1647 }
1648 oparg = 1 << oparg;
1649 }
1650
1651 pagefault_disable();
1652 ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
1653 pagefault_enable();
1654 if (ret)
1655 return ret;
1656
1657 switch (cmp) {
1658 case FUTEX_OP_CMP_EQ:
1659 return oldval == cmparg;
1660 case FUTEX_OP_CMP_NE:
1661 return oldval != cmparg;
1662 case FUTEX_OP_CMP_LT:
1663 return oldval < cmparg;
1664 case FUTEX_OP_CMP_GE:
1665 return oldval >= cmparg;
1666 case FUTEX_OP_CMP_LE:
1667 return oldval <= cmparg;
1668 case FUTEX_OP_CMP_GT:
1669 return oldval > cmparg;
1670 default:
1671 return -ENOSYS;
1672 }
1673}
1674
1675
1676
1677
1678
1679static int
1680futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1681 int nr_wake, int nr_wake2, int op)
1682{
1683 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1684 struct futex_hash_bucket *hb1, *hb2;
1685 struct futex_q *this, *next;
1686 int ret, op_ret;
1687 DEFINE_WAKE_Q(wake_q);
1688
1689retry:
1690 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
1691 if (unlikely(ret != 0))
1692 return ret;
1693 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
1694 if (unlikely(ret != 0))
1695 return ret;
1696
1697 hb1 = hash_futex(&key1);
1698 hb2 = hash_futex(&key2);
1699
1700retry_private:
1701 double_lock_hb(hb1, hb2);
1702 op_ret = futex_atomic_op_inuser(op, uaddr2);
1703 if (unlikely(op_ret < 0)) {
1704 double_unlock_hb(hb1, hb2);
1705
1706 if (!IS_ENABLED(CONFIG_MMU) ||
1707 unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
1708
1709
1710
1711
1712 ret = op_ret;
1713 return ret;
1714 }
1715
1716 if (op_ret == -EFAULT) {
1717 ret = fault_in_user_writeable(uaddr2);
1718 if (ret)
1719 return ret;
1720 }
1721
1722 if (!(flags & FLAGS_SHARED)) {
1723 cond_resched();
1724 goto retry_private;
1725 }
1726
1727 cond_resched();
1728 goto retry;
1729 }
1730
1731 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1732 if (match_futex (&this->key, &key1)) {
1733 if (this->pi_state || this->rt_waiter) {
1734 ret = -EINVAL;
1735 goto out_unlock;
1736 }
1737 mark_wake_futex(&wake_q, this);
1738 if (++ret >= nr_wake)
1739 break;
1740 }
1741 }
1742
1743 if (op_ret > 0) {
1744 op_ret = 0;
1745 plist_for_each_entry_safe(this, next, &hb2->chain, list) {
1746 if (match_futex (&this->key, &key2)) {
1747 if (this->pi_state || this->rt_waiter) {
1748 ret = -EINVAL;
1749 goto out_unlock;
1750 }
1751 mark_wake_futex(&wake_q, this);
1752 if (++op_ret >= nr_wake2)
1753 break;
1754 }
1755 }
1756 ret += op_ret;
1757 }
1758
1759out_unlock:
1760 double_unlock_hb(hb1, hb2);
1761 wake_up_q(&wake_q);
1762 return ret;
1763}
1764
1765
1766
1767
1768
1769
1770
1771
1772static inline
1773void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1774 struct futex_hash_bucket *hb2, union futex_key *key2)
1775{
1776
1777
1778
1779
1780
1781 if (likely(&hb1->chain != &hb2->chain)) {
1782 plist_del(&q->list, &hb1->chain);
1783 hb_waiters_dec(hb1);
1784 hb_waiters_inc(hb2);
1785 plist_add(&q->list, &hb2->chain);
1786 q->lock_ptr = &hb2->lock;
1787 }
1788 q->key = *key2;
1789}
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805static inline
1806void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1807 struct futex_hash_bucket *hb)
1808{
1809 q->key = *key;
1810
1811 __unqueue_futex(q);
1812
1813 WARN_ON(!q->rt_waiter);
1814 q->rt_waiter = NULL;
1815
1816 q->lock_ptr = &hb->lock;
1817
1818 wake_up_state(q->task, TASK_NORMAL);
1819}
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847static int
1848futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
1849 struct futex_hash_bucket *hb2, union futex_key *key1,
1850 union futex_key *key2, struct futex_pi_state **ps,
1851 struct task_struct **exiting, int set_waiters)
1852{
1853 struct futex_q *top_waiter = NULL;
1854 u32 curval;
1855 int ret, vpid;
1856
1857 if (get_futex_value_locked(&curval, pifutex))
1858 return -EFAULT;
1859
1860 if (unlikely(should_fail_futex(true)))
1861 return -EFAULT;
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871 top_waiter = futex_top_waiter(hb1, key1);
1872
1873
1874 if (!top_waiter)
1875 return 0;
1876
1877
1878 if (!match_futex(top_waiter->requeue_pi_key, key2))
1879 return -EINVAL;
1880
1881
1882
1883
1884
1885
1886 vpid = task_pid_vnr(top_waiter->task);
1887 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1888 exiting, set_waiters);
1889 if (ret == 1) {
1890 requeue_pi_wake_futex(top_waiter, key2, hb2);
1891 return vpid;
1892 }
1893 return ret;
1894}
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1915 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1916 u32 *cmpval, int requeue_pi)
1917{
1918 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1919 int task_count = 0, ret;
1920 struct futex_pi_state *pi_state = NULL;
1921 struct futex_hash_bucket *hb1, *hb2;
1922 struct futex_q *this, *next;
1923 DEFINE_WAKE_Q(wake_q);
1924
1925 if (nr_wake < 0 || nr_requeue < 0)
1926 return -EINVAL;
1927
1928
1929
1930
1931
1932
1933
1934 if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
1935 return -ENOSYS;
1936
1937 if (requeue_pi) {
1938
1939
1940
1941
1942 if (uaddr1 == uaddr2)
1943 return -EINVAL;
1944
1945
1946
1947
1948
1949 if (refill_pi_state_cache())
1950 return -ENOMEM;
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961 if (nr_wake != 1)
1962 return -EINVAL;
1963 }
1964
1965retry:
1966 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
1967 if (unlikely(ret != 0))
1968 return ret;
1969 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1970 requeue_pi ? FUTEX_WRITE : FUTEX_READ);
1971 if (unlikely(ret != 0))
1972 return ret;
1973
1974
1975
1976
1977
1978 if (requeue_pi && match_futex(&key1, &key2))
1979 return -EINVAL;
1980
1981 hb1 = hash_futex(&key1);
1982 hb2 = hash_futex(&key2);
1983
1984retry_private:
1985 hb_waiters_inc(hb2);
1986 double_lock_hb(hb1, hb2);
1987
1988 if (likely(cmpval != NULL)) {
1989 u32 curval;
1990
1991 ret = get_futex_value_locked(&curval, uaddr1);
1992
1993 if (unlikely(ret)) {
1994 double_unlock_hb(hb1, hb2);
1995 hb_waiters_dec(hb2);
1996
1997 ret = get_user(curval, uaddr1);
1998 if (ret)
1999 return ret;
2000
2001 if (!(flags & FLAGS_SHARED))
2002 goto retry_private;
2003
2004 goto retry;
2005 }
2006 if (curval != *cmpval) {
2007 ret = -EAGAIN;
2008 goto out_unlock;
2009 }
2010 }
2011
2012 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
2013 struct task_struct *exiting = NULL;
2014
2015
2016
2017
2018
2019
2020
2021 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
2022 &key2, &pi_state,
2023 &exiting, nr_requeue);
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034 if (ret > 0) {
2035 WARN_ON(pi_state);
2036 task_count++;
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049 ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
2050 &pi_state, &exiting);
2051 }
2052
2053 switch (ret) {
2054 case 0:
2055
2056 break;
2057
2058
2059 case -EFAULT:
2060 double_unlock_hb(hb1, hb2);
2061 hb_waiters_dec(hb2);
2062 ret = fault_in_user_writeable(uaddr2);
2063 if (!ret)
2064 goto retry;
2065 return ret;
2066 case -EBUSY:
2067 case -EAGAIN:
2068
2069
2070
2071
2072
2073
2074 double_unlock_hb(hb1, hb2);
2075 hb_waiters_dec(hb2);
2076
2077
2078
2079
2080
2081 wait_for_owner_exiting(ret, exiting);
2082 cond_resched();
2083 goto retry;
2084 default:
2085 goto out_unlock;
2086 }
2087 }
2088
2089 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
2090 if (task_count - nr_wake >= nr_requeue)
2091 break;
2092
2093 if (!match_futex(&this->key, &key1))
2094 continue;
2095
2096
2097
2098
2099
2100
2101
2102
2103 if ((requeue_pi && !this->rt_waiter) ||
2104 (!requeue_pi && this->rt_waiter) ||
2105 this->pi_state) {
2106 ret = -EINVAL;
2107 break;
2108 }
2109
2110
2111
2112
2113
2114
2115 if (++task_count <= nr_wake && !requeue_pi) {
2116 mark_wake_futex(&wake_q, this);
2117 continue;
2118 }
2119
2120
2121 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
2122 ret = -EINVAL;
2123 break;
2124 }
2125
2126
2127
2128
2129
2130 if (requeue_pi) {
2131
2132
2133
2134
2135
2136 get_pi_state(pi_state);
2137 this->pi_state = pi_state;
2138 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
2139 this->rt_waiter,
2140 this->task);
2141 if (ret == 1) {
2142
2143
2144
2145
2146
2147
2148
2149
2150 requeue_pi_wake_futex(this, &key2, hb2);
2151 continue;
2152 } else if (ret) {
2153
2154
2155
2156
2157
2158
2159
2160
2161 this->pi_state = NULL;
2162 put_pi_state(pi_state);
2163
2164
2165
2166
2167 break;
2168 }
2169 }
2170 requeue_futex(this, hb1, hb2, &key2);
2171 }
2172
2173
2174
2175
2176
2177
2178 put_pi_state(pi_state);
2179
2180out_unlock:
2181 double_unlock_hb(hb1, hb2);
2182 wake_up_q(&wake_q);
2183 hb_waiters_dec(hb2);
2184 return ret ? ret : task_count;
2185}
2186
2187
2188static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
2189 __acquires(&hb->lock)
2190{
2191 struct futex_hash_bucket *hb;
2192
2193 hb = hash_futex(&q->key);
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203 hb_waiters_inc(hb);
2204
2205 q->lock_ptr = &hb->lock;
2206
2207 spin_lock(&hb->lock);
2208 return hb;
2209}
2210
2211static inline void
2212queue_unlock(struct futex_hash_bucket *hb)
2213 __releases(&hb->lock)
2214{
2215 spin_unlock(&hb->lock);
2216 hb_waiters_dec(hb);
2217}
2218
2219static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
2220{
2221 int prio;
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231 prio = min(current->normal_prio, MAX_RT_PRIO);
2232
2233 plist_node_init(&q->list, prio);
2234 plist_add(&q->list, &hb->chain);
2235 q->task = current;
2236}
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
2251 __releases(&hb->lock)
2252{
2253 __queue_me(q, hb);
2254 spin_unlock(&hb->lock);
2255}
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268static int unqueue_me(struct futex_q *q)
2269{
2270 spinlock_t *lock_ptr;
2271 int ret = 0;
2272
2273
2274retry:
2275
2276
2277
2278
2279
2280 lock_ptr = READ_ONCE(q->lock_ptr);
2281 if (lock_ptr != NULL) {
2282 spin_lock(lock_ptr);
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296 if (unlikely(lock_ptr != q->lock_ptr)) {
2297 spin_unlock(lock_ptr);
2298 goto retry;
2299 }
2300 __unqueue_futex(q);
2301
2302 BUG_ON(q->pi_state);
2303
2304 spin_unlock(lock_ptr);
2305 ret = 1;
2306 }
2307
2308 return ret;
2309}
2310
2311
2312
2313
2314
2315
2316static void unqueue_me_pi(struct futex_q *q)
2317 __releases(q->lock_ptr)
2318{
2319 __unqueue_futex(q);
2320
2321 BUG_ON(!q->pi_state);
2322 put_pi_state(q->pi_state);
2323 q->pi_state = NULL;
2324
2325 spin_unlock(q->lock_ptr);
2326}
2327
2328static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
2329 struct task_struct *argowner)
2330{
2331 struct futex_pi_state *pi_state = q->pi_state;
2332 u32 uval, curval, newval;
2333 struct task_struct *oldowner, *newowner;
2334 u32 newtid;
2335 int ret, err = 0;
2336
2337 lockdep_assert_held(q->lock_ptr);
2338
2339 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
2340
2341 oldowner = pi_state->owner;
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366retry:
2367 if (!argowner) {
2368 if (oldowner != current) {
2369
2370
2371
2372
2373 ret = 0;
2374 goto out_unlock;
2375 }
2376
2377 if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
2378
2379 ret = 0;
2380 goto out_unlock;
2381 }
2382
2383
2384
2385
2386
2387 newowner = rt_mutex_owner(&pi_state->pi_mutex);
2388
2389
2390
2391
2392
2393
2394
2395
2396 if (unlikely(!newowner)) {
2397 err = -EAGAIN;
2398 goto handle_err;
2399 }
2400 } else {
2401 WARN_ON_ONCE(argowner != current);
2402 if (oldowner == current) {
2403
2404
2405
2406
2407 ret = 0;
2408 goto out_unlock;
2409 }
2410 newowner = argowner;
2411 }
2412
2413 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
2414
2415 if (!pi_state->owner)
2416 newtid |= FUTEX_OWNER_DIED;
2417
2418 err = get_futex_value_locked(&uval, uaddr);
2419 if (err)
2420 goto handle_err;
2421
2422 for (;;) {
2423 newval = (uval & FUTEX_OWNER_DIED) | newtid;
2424
2425 err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
2426 if (err)
2427 goto handle_err;
2428
2429 if (curval == uval)
2430 break;
2431 uval = curval;
2432 }
2433
2434
2435
2436
2437
2438 if (pi_state->owner != NULL) {
2439 raw_spin_lock(&pi_state->owner->pi_lock);
2440 WARN_ON(list_empty(&pi_state->list));
2441 list_del_init(&pi_state->list);
2442 raw_spin_unlock(&pi_state->owner->pi_lock);
2443 }
2444
2445 pi_state->owner = newowner;
2446
2447 raw_spin_lock(&newowner->pi_lock);
2448 WARN_ON(!list_empty(&pi_state->list));
2449 list_add(&pi_state->list, &newowner->pi_state_list);
2450 raw_spin_unlock(&newowner->pi_lock);
2451 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
2452
2453 return 0;
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468handle_err:
2469 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
2470 spin_unlock(q->lock_ptr);
2471
2472 switch (err) {
2473 case -EFAULT:
2474 ret = fault_in_user_writeable(uaddr);
2475 break;
2476
2477 case -EAGAIN:
2478 cond_resched();
2479 ret = 0;
2480 break;
2481
2482 default:
2483 WARN_ON_ONCE(1);
2484 ret = err;
2485 break;
2486 }
2487
2488 spin_lock(q->lock_ptr);
2489 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
2490
2491
2492
2493
2494 if (pi_state->owner != oldowner) {
2495 ret = 0;
2496 goto out_unlock;
2497 }
2498
2499 if (ret)
2500 goto out_unlock;
2501
2502 goto retry;
2503
2504out_unlock:
2505 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
2506 return ret;
2507}
2508
2509static long futex_wait_restart(struct restart_block *restart);
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
2527{
2528 int ret = 0;
2529
2530 if (locked) {
2531
2532
2533
2534
2535
2536
2537
2538
2539 if (q->pi_state->owner != current)
2540 ret = fixup_pi_state_owner(uaddr, q, current);
2541 return ret ? ret : locked;
2542 }
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552 if (q->pi_state->owner == current) {
2553 ret = fixup_pi_state_owner(uaddr, q, NULL);
2554 return ret;
2555 }
2556
2557
2558
2559
2560
2561 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
2562 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
2563 "pi-state %p\n", ret,
2564 q->pi_state->pi_mutex.owner,
2565 q->pi_state->owner);
2566 }
2567
2568 return ret;
2569}
2570
2571
2572
2573
2574
2575
2576
2577static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
2578 struct hrtimer_sleeper *timeout)
2579{
2580
2581
2582
2583
2584
2585
2586 set_current_state(TASK_INTERRUPTIBLE);
2587 queue_me(q, hb);
2588
2589
2590 if (timeout)
2591 hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
2592
2593
2594
2595
2596
2597 if (likely(!plist_node_empty(&q->list))) {
2598
2599
2600
2601
2602
2603 if (!timeout || timeout->task)
2604 freezable_schedule();
2605 }
2606 __set_current_state(TASK_RUNNING);
2607}
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
2627 struct futex_q *q, struct futex_hash_bucket **hb)
2628{
2629 u32 uval;
2630 int ret;
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650retry:
2651 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
2652 if (unlikely(ret != 0))
2653 return ret;
2654
2655retry_private:
2656 *hb = queue_lock(q);
2657
2658 ret = get_futex_value_locked(&uval, uaddr);
2659
2660 if (ret) {
2661 queue_unlock(*hb);
2662
2663 ret = get_user(uval, uaddr);
2664 if (ret)
2665 return ret;
2666
2667 if (!(flags & FLAGS_SHARED))
2668 goto retry_private;
2669
2670 goto retry;
2671 }
2672
2673 if (uval != val) {
2674 queue_unlock(*hb);
2675 ret = -EWOULDBLOCK;
2676 }
2677
2678 return ret;
2679}
2680
2681static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
2682 ktime_t *abs_time, u32 bitset)
2683{
2684 struct hrtimer_sleeper timeout, *to;
2685 struct restart_block *restart;
2686 struct futex_hash_bucket *hb;
2687 struct futex_q q = futex_q_init;
2688 int ret;
2689
2690 if (!bitset)
2691 return -EINVAL;
2692 q.bitset = bitset;
2693
2694 to = futex_setup_timer(abs_time, &timeout, flags,
2695 current->timer_slack_ns);
2696retry:
2697
2698
2699
2700
2701 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2702 if (ret)
2703 goto out;
2704
2705
2706 futex_wait_queue_me(hb, &q, to);
2707
2708
2709 ret = 0;
2710
2711 if (!unqueue_me(&q))
2712 goto out;
2713 ret = -ETIMEDOUT;
2714 if (to && !to->task)
2715 goto out;
2716
2717
2718
2719
2720
2721 if (!signal_pending(current))
2722 goto retry;
2723
2724 ret = -ERESTARTSYS;
2725 if (!abs_time)
2726 goto out;
2727
2728 restart = ¤t->restart_block;
2729 restart->fn = futex_wait_restart;
2730 restart->futex.uaddr = uaddr;
2731 restart->futex.val = val;
2732 restart->futex.time = *abs_time;
2733 restart->futex.bitset = bitset;
2734 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
2735
2736 ret = -ERESTART_RESTARTBLOCK;
2737
2738out:
2739 if (to) {
2740 hrtimer_cancel(&to->timer);
2741 destroy_hrtimer_on_stack(&to->timer);
2742 }
2743 return ret;
2744}
2745
2746
2747static long futex_wait_restart(struct restart_block *restart)
2748{
2749 u32 __user *uaddr = restart->futex.uaddr;
2750 ktime_t t, *tp = NULL;
2751
2752 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
2753 t = restart->futex.time;
2754 tp = &t;
2755 }
2756 restart->fn = do_no_restart_syscall;
2757
2758 return (long)futex_wait(uaddr, restart->futex.flags,
2759 restart->futex.val, tp, restart->futex.bitset);
2760}
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
2773 ktime_t *time, int trylock)
2774{
2775 struct hrtimer_sleeper timeout, *to;
2776 struct futex_pi_state *pi_state = NULL;
2777 struct task_struct *exiting = NULL;
2778 struct rt_mutex_waiter rt_waiter;
2779 struct futex_hash_bucket *hb;
2780 struct futex_q q = futex_q_init;
2781 int res, ret;
2782
2783 if (!IS_ENABLED(CONFIG_FUTEX_PI))
2784 return -ENOSYS;
2785
2786 if (refill_pi_state_cache())
2787 return -ENOMEM;
2788
2789 to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0);
2790
2791retry:
2792 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
2793 if (unlikely(ret != 0))
2794 goto out;
2795
2796retry_private:
2797 hb = queue_lock(&q);
2798
2799 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
2800 &exiting, 0);
2801 if (unlikely(ret)) {
2802
2803
2804
2805
2806 switch (ret) {
2807 case 1:
2808
2809 ret = 0;
2810 goto out_unlock_put_key;
2811 case -EFAULT:
2812 goto uaddr_faulted;
2813 case -EBUSY:
2814 case -EAGAIN:
2815
2816
2817
2818
2819
2820
2821 queue_unlock(hb);
2822
2823
2824
2825
2826
2827 wait_for_owner_exiting(ret, exiting);
2828 cond_resched();
2829 goto retry;
2830 default:
2831 goto out_unlock_put_key;
2832 }
2833 }
2834
2835 WARN_ON(!q.pi_state);
2836
2837
2838
2839
2840 __queue_me(&q, hb);
2841
2842 if (trylock) {
2843 ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
2844
2845 ret = ret ? 0 : -EWOULDBLOCK;
2846 goto no_block;
2847 }
2848
2849 rt_mutex_init_waiter(&rt_waiter);
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
2865 spin_unlock(q.lock_ptr);
2866
2867
2868
2869
2870
2871 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
2872 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
2873
2874 if (ret) {
2875 if (ret == 1)
2876 ret = 0;
2877 goto cleanup;
2878 }
2879
2880 if (unlikely(to))
2881 hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
2882
2883 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
2884
2885cleanup:
2886 spin_lock(q.lock_ptr);
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896 if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
2897 ret = 0;
2898
2899no_block:
2900
2901
2902
2903
2904 res = fixup_owner(uaddr, &q, !ret);
2905
2906
2907
2908
2909 if (res)
2910 ret = (res < 0) ? res : 0;
2911
2912
2913
2914
2915
2916 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
2917 pi_state = q.pi_state;
2918 get_pi_state(pi_state);
2919 }
2920
2921
2922 unqueue_me_pi(&q);
2923
2924 if (pi_state) {
2925 rt_mutex_futex_unlock(&pi_state->pi_mutex);
2926 put_pi_state(pi_state);
2927 }
2928
2929 goto out;
2930
2931out_unlock_put_key:
2932 queue_unlock(hb);
2933
2934out:
2935 if (to) {
2936 hrtimer_cancel(&to->timer);
2937 destroy_hrtimer_on_stack(&to->timer);
2938 }
2939 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2940
2941uaddr_faulted:
2942 queue_unlock(hb);
2943
2944 ret = fault_in_user_writeable(uaddr);
2945 if (ret)
2946 goto out;
2947
2948 if (!(flags & FLAGS_SHARED))
2949 goto retry_private;
2950
2951 goto retry;
2952}
2953
2954
2955
2956
2957
2958
2959static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2960{
2961 u32 curval, uval, vpid = task_pid_vnr(current);
2962 union futex_key key = FUTEX_KEY_INIT;
2963 struct futex_hash_bucket *hb;
2964 struct futex_q *top_waiter;
2965 int ret;
2966
2967 if (!IS_ENABLED(CONFIG_FUTEX_PI))
2968 return -ENOSYS;
2969
2970retry:
2971 if (get_user(uval, uaddr))
2972 return -EFAULT;
2973
2974
2975
2976 if ((uval & FUTEX_TID_MASK) != vpid)
2977 return -EPERM;
2978
2979 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
2980 if (ret)
2981 return ret;
2982
2983 hb = hash_futex(&key);
2984 spin_lock(&hb->lock);
2985
2986
2987
2988
2989
2990
2991 top_waiter = futex_top_waiter(hb, &key);
2992 if (top_waiter) {
2993 struct futex_pi_state *pi_state = top_waiter->pi_state;
2994
2995 ret = -EINVAL;
2996 if (!pi_state)
2997 goto out_unlock;
2998
2999
3000
3001
3002
3003 if (pi_state->owner != current)
3004 goto out_unlock;
3005
3006 get_pi_state(pi_state);
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
3018 spin_unlock(&hb->lock);
3019
3020
3021 ret = wake_futex_pi(uaddr, uval, pi_state);
3022
3023 put_pi_state(pi_state);
3024
3025
3026
3027
3028 if (!ret)
3029 goto out_putkey;
3030
3031
3032
3033
3034 if (ret == -EFAULT)
3035 goto pi_faulted;
3036
3037
3038
3039
3040 if (ret == -EAGAIN)
3041 goto pi_retry;
3042
3043
3044
3045
3046 goto out_putkey;
3047 }
3048
3049
3050
3051
3052
3053
3054
3055
3056 if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
3057 spin_unlock(&hb->lock);
3058 switch (ret) {
3059 case -EFAULT:
3060 goto pi_faulted;
3061
3062 case -EAGAIN:
3063 goto pi_retry;
3064
3065 default:
3066 WARN_ON_ONCE(1);
3067 goto out_putkey;
3068 }
3069 }
3070
3071
3072
3073
3074 ret = (curval == uval) ? 0 : -EAGAIN;
3075
3076out_unlock:
3077 spin_unlock(&hb->lock);
3078out_putkey:
3079 return ret;
3080
3081pi_retry:
3082 cond_resched();
3083 goto retry;
3084
3085pi_faulted:
3086
3087 ret = fault_in_user_writeable(uaddr);
3088 if (!ret)
3089 goto retry;
3090
3091 return ret;
3092}
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110static inline
3111int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
3112 struct futex_q *q, union futex_key *key2,
3113 struct hrtimer_sleeper *timeout)
3114{
3115 int ret = 0;
3116
3117
3118
3119
3120
3121
3122
3123
3124 if (!match_futex(&q->key, key2)) {
3125 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
3126
3127
3128
3129
3130 plist_del(&q->list, &hb->chain);
3131 hb_waiters_dec(hb);
3132
3133
3134 ret = -EWOULDBLOCK;
3135 if (timeout && !timeout->task)
3136 ret = -ETIMEDOUT;
3137 else if (signal_pending(current))
3138 ret = -ERESTARTNOINTR;
3139 }
3140 return ret;
3141}
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
3184 u32 val, ktime_t *abs_time, u32 bitset,
3185 u32 __user *uaddr2)
3186{
3187 struct hrtimer_sleeper timeout, *to;
3188 struct futex_pi_state *pi_state = NULL;
3189 struct rt_mutex_waiter rt_waiter;
3190 struct futex_hash_bucket *hb;
3191 union futex_key key2 = FUTEX_KEY_INIT;
3192 struct futex_q q = futex_q_init;
3193 int res, ret;
3194
3195 if (!IS_ENABLED(CONFIG_FUTEX_PI))
3196 return -ENOSYS;
3197
3198 if (uaddr == uaddr2)
3199 return -EINVAL;
3200
3201 if (!bitset)
3202 return -EINVAL;
3203
3204 to = futex_setup_timer(abs_time, &timeout, flags,
3205 current->timer_slack_ns);
3206
3207
3208
3209
3210
3211 rt_mutex_init_waiter(&rt_waiter);
3212
3213 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
3214 if (unlikely(ret != 0))
3215 goto out;
3216
3217 q.bitset = bitset;
3218 q.rt_waiter = &rt_waiter;
3219 q.requeue_pi_key = &key2;
3220
3221
3222
3223
3224
3225 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
3226 if (ret)
3227 goto out;
3228
3229
3230
3231
3232
3233 if (match_futex(&q.key, &key2)) {
3234 queue_unlock(hb);
3235 ret = -EINVAL;
3236 goto out;
3237 }
3238
3239
3240 futex_wait_queue_me(hb, &q, to);
3241
3242 spin_lock(&hb->lock);
3243 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
3244 spin_unlock(&hb->lock);
3245 if (ret)
3246 goto out;
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258 if (!q.rt_waiter) {
3259
3260
3261
3262
3263 if (q.pi_state && (q.pi_state->owner != current)) {
3264 spin_lock(q.lock_ptr);
3265 ret = fixup_pi_state_owner(uaddr2, &q, current);
3266 if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
3267 pi_state = q.pi_state;
3268 get_pi_state(pi_state);
3269 }
3270
3271
3272
3273
3274 put_pi_state(q.pi_state);
3275 spin_unlock(q.lock_ptr);
3276 }
3277 } else {
3278 struct rt_mutex *pi_mutex;
3279
3280
3281
3282
3283
3284
3285 WARN_ON(!q.pi_state);
3286 pi_mutex = &q.pi_state->pi_mutex;
3287 ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
3288
3289 spin_lock(q.lock_ptr);
3290 if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
3291 ret = 0;
3292
3293 debug_rt_mutex_free_waiter(&rt_waiter);
3294
3295
3296
3297
3298 res = fixup_owner(uaddr2, &q, !ret);
3299
3300
3301
3302
3303 if (res)
3304 ret = (res < 0) ? res : 0;
3305
3306
3307
3308
3309
3310
3311 if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
3312 pi_state = q.pi_state;
3313 get_pi_state(pi_state);
3314 }
3315
3316
3317 unqueue_me_pi(&q);
3318 }
3319
3320 if (pi_state) {
3321 rt_mutex_futex_unlock(&pi_state->pi_mutex);
3322 put_pi_state(pi_state);
3323 }
3324
3325 if (ret == -EINTR) {
3326
3327
3328
3329
3330
3331
3332
3333 ret = -EWOULDBLOCK;
3334 }
3335
3336out:
3337 if (to) {
3338 hrtimer_cancel(&to->timer);
3339 destroy_hrtimer_on_stack(&to->timer);
3340 }
3341 return ret;
3342}
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
3365 size_t, len)
3366{
3367 if (!futex_cmpxchg_enabled)
3368 return -ENOSYS;
3369
3370
3371
3372 if (unlikely(len != sizeof(*head)))
3373 return -EINVAL;
3374
3375 current->robust_list = head;
3376
3377 return 0;
3378}
3379
3380
3381
3382
3383
3384
3385
3386SYSCALL_DEFINE3(get_robust_list, int, pid,
3387 struct robust_list_head __user * __user *, head_ptr,
3388 size_t __user *, len_ptr)
3389{
3390 struct robust_list_head __user *head;
3391 unsigned long ret;
3392 struct task_struct *p;
3393
3394 if (!futex_cmpxchg_enabled)
3395 return -ENOSYS;
3396
3397 rcu_read_lock();
3398
3399 ret = -ESRCH;
3400 if (!pid)
3401 p = current;
3402 else {
3403 p = find_task_by_vpid(pid);
3404 if (!p)
3405 goto err_unlock;
3406 }
3407
3408 ret = -EPERM;
3409 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
3410 goto err_unlock;
3411
3412 head = p->robust_list;
3413 rcu_read_unlock();
3414
3415 if (put_user(sizeof(*head), len_ptr))
3416 return -EFAULT;
3417 return put_user(head, head_ptr);
3418
3419err_unlock:
3420 rcu_read_unlock();
3421
3422 return ret;
3423}
3424
3425
3426#define HANDLE_DEATH_PENDING true
3427#define HANDLE_DEATH_LIST false
3428
3429
3430
3431
3432
3433static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
3434 bool pi, bool pending_op)
3435{
3436 u32 uval, nval, mval;
3437 int err;
3438
3439
3440 if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
3441 return -1;
3442
3443retry:
3444 if (get_user(uval, uaddr))
3445 return -1;
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478 if (pending_op && !pi && !uval) {
3479 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
3480 return 0;
3481 }
3482
3483 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
3484 return 0;
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507 if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) {
3508 switch (err) {
3509 case -EFAULT:
3510 if (fault_in_user_writeable(uaddr))
3511 return -1;
3512 goto retry;
3513
3514 case -EAGAIN:
3515 cond_resched();
3516 goto retry;
3517
3518 default:
3519 WARN_ON_ONCE(1);
3520 return err;
3521 }
3522 }
3523
3524 if (nval != uval)
3525 goto retry;
3526
3527
3528
3529
3530
3531 if (!pi && (uval & FUTEX_WAITERS))
3532 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
3533
3534 return 0;
3535}
3536
3537
3538
3539
3540static inline int fetch_robust_entry(struct robust_list __user **entry,
3541 struct robust_list __user * __user *head,
3542 unsigned int *pi)
3543{
3544 unsigned long uentry;
3545
3546 if (get_user(uentry, (unsigned long __user *)head))
3547 return -EFAULT;
3548
3549 *entry = (void __user *)(uentry & ~1UL);
3550 *pi = uentry & 1;
3551
3552 return 0;
3553}
3554
3555
3556
3557
3558
3559
3560
3561static void exit_robust_list(struct task_struct *curr)
3562{
3563 struct robust_list_head __user *head = curr->robust_list;
3564 struct robust_list __user *entry, *next_entry, *pending;
3565 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3566 unsigned int next_pi;
3567 unsigned long futex_offset;
3568 int rc;
3569
3570 if (!futex_cmpxchg_enabled)
3571 return;
3572
3573
3574
3575
3576
3577 if (fetch_robust_entry(&entry, &head->list.next, &pi))
3578 return;
3579
3580
3581
3582 if (get_user(futex_offset, &head->futex_offset))
3583 return;
3584
3585
3586
3587
3588 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
3589 return;
3590
3591 next_entry = NULL;
3592 while (entry != &head->list) {
3593
3594
3595
3596
3597 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
3598
3599
3600
3601
3602 if (entry != pending) {
3603 if (handle_futex_death((void __user *)entry + futex_offset,
3604 curr, pi, HANDLE_DEATH_LIST))
3605 return;
3606 }
3607 if (rc)
3608 return;
3609 entry = next_entry;
3610 pi = next_pi;
3611
3612
3613
3614 if (!--limit)
3615 break;
3616
3617 cond_resched();
3618 }
3619
3620 if (pending) {
3621 handle_futex_death((void __user *)pending + futex_offset,
3622 curr, pip, HANDLE_DEATH_PENDING);
3623 }
3624}
3625
3626static void futex_cleanup(struct task_struct *tsk)
3627{
3628 if (unlikely(tsk->robust_list)) {
3629 exit_robust_list(tsk);
3630 tsk->robust_list = NULL;
3631 }
3632
3633#ifdef CONFIG_COMPAT
3634 if (unlikely(tsk->compat_robust_list)) {
3635 compat_exit_robust_list(tsk);
3636 tsk->compat_robust_list = NULL;
3637 }
3638#endif
3639
3640 if (unlikely(!list_empty(&tsk->pi_state_list)))
3641 exit_pi_state_list(tsk);
3642}
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661void futex_exit_recursive(struct task_struct *tsk)
3662{
3663
3664 if (tsk->futex_state == FUTEX_STATE_EXITING)
3665 mutex_unlock(&tsk->futex_exit_mutex);
3666 tsk->futex_state = FUTEX_STATE_DEAD;
3667}
3668
3669static void futex_cleanup_begin(struct task_struct *tsk)
3670{
3671
3672
3673
3674
3675
3676
3677 mutex_lock(&tsk->futex_exit_mutex);
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690 raw_spin_lock_irq(&tsk->pi_lock);
3691 tsk->futex_state = FUTEX_STATE_EXITING;
3692 raw_spin_unlock_irq(&tsk->pi_lock);
3693}
3694
3695static void futex_cleanup_end(struct task_struct *tsk, int state)
3696{
3697
3698
3699
3700
3701 tsk->futex_state = state;
3702
3703
3704
3705
3706 mutex_unlock(&tsk->futex_exit_mutex);
3707}
3708
3709void futex_exec_release(struct task_struct *tsk)
3710{
3711
3712
3713
3714
3715
3716
3717
3718 futex_cleanup_begin(tsk);
3719 futex_cleanup(tsk);
3720
3721
3722
3723
3724 futex_cleanup_end(tsk, FUTEX_STATE_OK);
3725}
3726
3727void futex_exit_release(struct task_struct *tsk)
3728{
3729 futex_cleanup_begin(tsk);
3730 futex_cleanup(tsk);
3731 futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
3732}
3733
3734long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
3735 u32 __user *uaddr2, u32 val2, u32 val3)
3736{
3737 int cmd = op & FUTEX_CMD_MASK;
3738 unsigned int flags = 0;
3739
3740 if (!(op & FUTEX_PRIVATE_FLAG))
3741 flags |= FLAGS_SHARED;
3742
3743 if (op & FUTEX_CLOCK_REALTIME) {
3744 flags |= FLAGS_CLOCKRT;
3745 if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
3746 cmd != FUTEX_WAIT_REQUEUE_PI)
3747 return -ENOSYS;
3748 }
3749
3750 switch (cmd) {
3751 case FUTEX_LOCK_PI:
3752 case FUTEX_UNLOCK_PI:
3753 case FUTEX_TRYLOCK_PI:
3754 case FUTEX_WAIT_REQUEUE_PI:
3755 case FUTEX_CMP_REQUEUE_PI:
3756 if (!futex_cmpxchg_enabled)
3757 return -ENOSYS;
3758 }
3759
3760 switch (cmd) {
3761 case FUTEX_WAIT:
3762 val3 = FUTEX_BITSET_MATCH_ANY;
3763 fallthrough;
3764 case FUTEX_WAIT_BITSET:
3765 return futex_wait(uaddr, flags, val, timeout, val3);
3766 case FUTEX_WAKE:
3767 val3 = FUTEX_BITSET_MATCH_ANY;
3768 fallthrough;
3769 case FUTEX_WAKE_BITSET:
3770 return futex_wake(uaddr, flags, val, val3);
3771 case FUTEX_REQUEUE:
3772 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
3773 case FUTEX_CMP_REQUEUE:
3774 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
3775 case FUTEX_WAKE_OP:
3776 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
3777 case FUTEX_LOCK_PI:
3778 return futex_lock_pi(uaddr, flags, timeout, 0);
3779 case FUTEX_UNLOCK_PI:
3780 return futex_unlock_pi(uaddr, flags);
3781 case FUTEX_TRYLOCK_PI:
3782 return futex_lock_pi(uaddr, flags, NULL, 1);
3783 case FUTEX_WAIT_REQUEUE_PI:
3784 val3 = FUTEX_BITSET_MATCH_ANY;
3785 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
3786 uaddr2);
3787 case FUTEX_CMP_REQUEUE_PI:
3788 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
3789 }
3790 return -ENOSYS;
3791}
3792
3793
3794SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
3795 struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
3796 u32, val3)
3797{
3798 struct timespec64 ts;
3799 ktime_t t, *tp = NULL;
3800 u32 val2 = 0;
3801 int cmd = op & FUTEX_CMD_MASK;
3802
3803 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
3804 cmd == FUTEX_WAIT_BITSET ||
3805 cmd == FUTEX_WAIT_REQUEUE_PI)) {
3806 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
3807 return -EFAULT;
3808 if (get_timespec64(&ts, utime))
3809 return -EFAULT;
3810 if (!timespec64_valid(&ts))
3811 return -EINVAL;
3812
3813 t = timespec64_to_ktime(ts);
3814 if (cmd == FUTEX_WAIT)
3815 t = ktime_add_safe(ktime_get(), t);
3816 else if (!(op & FUTEX_CLOCK_REALTIME))
3817 t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
3818 tp = &t;
3819 }
3820
3821
3822
3823
3824 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
3825 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
3826 val2 = (u32) (unsigned long) utime;
3827
3828 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
3829}
3830
3831#ifdef CONFIG_COMPAT
3832
3833
3834
3835static inline int
3836compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
3837 compat_uptr_t __user *head, unsigned int *pi)
3838{
3839 if (get_user(*uentry, head))
3840 return -EFAULT;
3841
3842 *entry = compat_ptr((*uentry) & ~1);
3843 *pi = (unsigned int)(*uentry) & 1;
3844
3845 return 0;
3846}
3847
3848static void __user *futex_uaddr(struct robust_list __user *entry,
3849 compat_long_t futex_offset)
3850{
3851 compat_uptr_t base = ptr_to_compat(entry);
3852 void __user *uaddr = compat_ptr(base + futex_offset);
3853
3854 return uaddr;
3855}
3856
3857
3858
3859
3860
3861
3862
3863static void compat_exit_robust_list(struct task_struct *curr)
3864{
3865 struct compat_robust_list_head __user *head = curr->compat_robust_list;
3866 struct robust_list __user *entry, *next_entry, *pending;
3867 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3868 unsigned int next_pi;
3869 compat_uptr_t uentry, next_uentry, upending;
3870 compat_long_t futex_offset;
3871 int rc;
3872
3873 if (!futex_cmpxchg_enabled)
3874 return;
3875
3876
3877
3878
3879
3880 if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
3881 return;
3882
3883
3884
3885 if (get_user(futex_offset, &head->futex_offset))
3886 return;
3887
3888
3889
3890
3891 if (compat_fetch_robust_entry(&upending, &pending,
3892 &head->list_op_pending, &pip))
3893 return;
3894
3895 next_entry = NULL;
3896 while (entry != (struct robust_list __user *) &head->list) {
3897
3898
3899
3900
3901 rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
3902 (compat_uptr_t __user *)&entry->next, &next_pi);
3903
3904
3905
3906
3907 if (entry != pending) {
3908 void __user *uaddr = futex_uaddr(entry, futex_offset);
3909
3910 if (handle_futex_death(uaddr, curr, pi,
3911 HANDLE_DEATH_LIST))
3912 return;
3913 }
3914 if (rc)
3915 return;
3916 uentry = next_uentry;
3917 entry = next_entry;
3918 pi = next_pi;
3919
3920
3921
3922 if (!--limit)
3923 break;
3924
3925 cond_resched();
3926 }
3927 if (pending) {
3928 void __user *uaddr = futex_uaddr(pending, futex_offset);
3929
3930 handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
3931 }
3932}
3933
3934COMPAT_SYSCALL_DEFINE2(set_robust_list,
3935 struct compat_robust_list_head __user *, head,
3936 compat_size_t, len)
3937{
3938 if (!futex_cmpxchg_enabled)
3939 return -ENOSYS;
3940
3941 if (unlikely(len != sizeof(*head)))
3942 return -EINVAL;
3943
3944 current->compat_robust_list = head;
3945
3946 return 0;
3947}
3948
3949COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
3950 compat_uptr_t __user *, head_ptr,
3951 compat_size_t __user *, len_ptr)
3952{
3953 struct compat_robust_list_head __user *head;
3954 unsigned long ret;
3955 struct task_struct *p;
3956
3957 if (!futex_cmpxchg_enabled)
3958 return -ENOSYS;
3959
3960 rcu_read_lock();
3961
3962 ret = -ESRCH;
3963 if (!pid)
3964 p = current;
3965 else {
3966 p = find_task_by_vpid(pid);
3967 if (!p)
3968 goto err_unlock;
3969 }
3970
3971 ret = -EPERM;
3972 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
3973 goto err_unlock;
3974
3975 head = p->compat_robust_list;
3976 rcu_read_unlock();
3977
3978 if (put_user(sizeof(*head), len_ptr))
3979 return -EFAULT;
3980 return put_user(ptr_to_compat(head), head_ptr);
3981
3982err_unlock:
3983 rcu_read_unlock();
3984
3985 return ret;
3986}
3987#endif
3988
3989#ifdef CONFIG_COMPAT_32BIT_TIME
3990SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
3991 struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
3992 u32, val3)
3993{
3994 struct timespec64 ts;
3995 ktime_t t, *tp = NULL;
3996 int val2 = 0;
3997 int cmd = op & FUTEX_CMD_MASK;
3998
3999 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
4000 cmd == FUTEX_WAIT_BITSET ||
4001 cmd == FUTEX_WAIT_REQUEUE_PI)) {
4002 if (get_old_timespec32(&ts, utime))
4003 return -EFAULT;
4004 if (!timespec64_valid(&ts))
4005 return -EINVAL;
4006
4007 t = timespec64_to_ktime(ts);
4008 if (cmd == FUTEX_WAIT)
4009 t = ktime_add_safe(ktime_get(), t);
4010 else if (!(op & FUTEX_CLOCK_REALTIME))
4011 t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
4012 tp = &t;
4013 }
4014 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
4015 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
4016 val2 = (int) (unsigned long) utime;
4017
4018 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
4019}
4020#endif
4021
4022static void __init futex_detect_cmpxchg(void)
4023{
4024#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
4025 u32 curval;
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
4038 futex_cmpxchg_enabled = 1;
4039#endif
4040}
4041
4042static int __init futex_init(void)
4043{
4044 unsigned int futex_shift;
4045 unsigned long i;
4046
4047#if CONFIG_BASE_SMALL
4048 futex_hashsize = 16;
4049#else
4050 futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
4051#endif
4052
4053 futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
4054 futex_hashsize, 0,
4055 futex_hashsize < 256 ? HASH_SMALL : 0,
4056 &futex_shift, NULL,
4057 futex_hashsize, futex_hashsize);
4058 futex_hashsize = 1UL << futex_shift;
4059
4060 futex_detect_cmpxchg();
4061
4062 for (i = 0; i < futex_hashsize; i++) {
4063 atomic_set(&futex_queues[i].waiters, 0);
4064 plist_head_init(&futex_queues[i].chain);
4065 spin_lock_init(&futex_queues[i].lock);
4066 }
4067
4068 return 0;
4069}
4070core_initcall(futex_init);
4071