1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/export.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
63#include <linux/sched/rt.h>
64#include <linux/hugetlb.h>
65#include <linux/freezer.h>
66#include <linux/bootmem.h>
67#include <linux/fault-inject.h>
68
69#include <asm/futex.h>
70
71#include "locking/rtmutex_common.h"
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
175int __read_mostly futex_cmpxchg_enabled;
176#endif
177
178
179
180
181
182#ifdef CONFIG_MMU
183# define FLAGS_SHARED 0x01
184#else
185
186
187
188
189# define FLAGS_SHARED 0x00
190#endif
191#define FLAGS_CLOCKRT 0x02
192#define FLAGS_HAS_TIMEOUT 0x04
193
194
195
196
197struct futex_pi_state {
198
199
200
201
202 struct list_head list;
203
204
205
206
207 struct rt_mutex pi_mutex;
208
209 struct task_struct *owner;
210 atomic_t refcount;
211
212 union futex_key key;
213};
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237struct futex_q {
238 struct plist_node list;
239
240 struct task_struct *task;
241 spinlock_t *lock_ptr;
242 union futex_key key;
243 struct futex_pi_state *pi_state;
244 struct rt_mutex_waiter *rt_waiter;
245 union futex_key *requeue_pi_key;
246 u32 bitset;
247};
248
249static const struct futex_q futex_q_init = {
250
251 .key = FUTEX_KEY_INIT,
252 .bitset = FUTEX_BITSET_MATCH_ANY
253};
254
255
256
257
258
259
260struct futex_hash_bucket {
261 atomic_t waiters;
262 spinlock_t lock;
263 struct plist_head chain;
264} ____cacheline_aligned_in_smp;
265
266
267
268
269
270
271static struct {
272 struct futex_hash_bucket *queues;
273 unsigned long hashsize;
274} __futex_data __read_mostly __aligned(2*sizeof(long));
275#define futex_queues (__futex_data.queues)
276#define futex_hashsize (__futex_data.hashsize)
277
278
279
280
281
282#ifdef CONFIG_FAIL_FUTEX
283
284static struct {
285 struct fault_attr attr;
286
287 bool ignore_private;
288} fail_futex = {
289 .attr = FAULT_ATTR_INITIALIZER,
290 .ignore_private = false,
291};
292
293static int __init setup_fail_futex(char *str)
294{
295 return setup_fault_attr(&fail_futex.attr, str);
296}
297__setup("fail_futex=", setup_fail_futex);
298
299static bool should_fail_futex(bool fshared)
300{
301 if (fail_futex.ignore_private && !fshared)
302 return false;
303
304 return should_fail(&fail_futex.attr, 1);
305}
306
307#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
308
309static int __init fail_futex_debugfs(void)
310{
311 umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
312 struct dentry *dir;
313
314 dir = fault_create_debugfs_attr("fail_futex", NULL,
315 &fail_futex.attr);
316 if (IS_ERR(dir))
317 return PTR_ERR(dir);
318
319 if (!debugfs_create_bool("ignore-private", mode, dir,
320 &fail_futex.ignore_private)) {
321 debugfs_remove_recursive(dir);
322 return -ENOMEM;
323 }
324
325 return 0;
326}
327
328late_initcall(fail_futex_debugfs);
329
330#endif
331
332#else
333static inline bool should_fail_futex(bool fshared)
334{
335 return false;
336}
337#endif
338
339static inline void futex_get_mm(union futex_key *key)
340{
341 atomic_inc(&key->private.mm->mm_count);
342
343
344
345
346
347 smp_mb__after_atomic();
348}
349
350
351
352
353static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
354{
355#ifdef CONFIG_SMP
356 atomic_inc(&hb->waiters);
357
358
359
360 smp_mb__after_atomic();
361#endif
362}
363
364
365
366
367
368static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
369{
370#ifdef CONFIG_SMP
371 atomic_dec(&hb->waiters);
372#endif
373}
374
375static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
376{
377#ifdef CONFIG_SMP
378 return atomic_read(&hb->waiters);
379#else
380 return 1;
381#endif
382}
383
384
385
386
387static struct futex_hash_bucket *hash_futex(union futex_key *key)
388{
389 u32 hash = jhash2((u32*)&key->both.word,
390 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
391 key->both.offset);
392 return &futex_queues[hash & (futex_hashsize - 1)];
393}
394
395
396
397
398static inline int match_futex(union futex_key *key1, union futex_key *key2)
399{
400 return (key1 && key2
401 && key1->both.word == key2->both.word
402 && key1->both.ptr == key2->both.ptr
403 && key1->both.offset == key2->both.offset);
404}
405
406
407
408
409
410
411static void get_futex_key_refs(union futex_key *key)
412{
413 if (!key->both.ptr)
414 return;
415
416
417
418
419
420
421 if (!IS_ENABLED(CONFIG_MMU)) {
422 smp_mb();
423 return;
424 }
425
426 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
427 case FUT_OFF_INODE:
428 ihold(key->shared.inode);
429 break;
430 case FUT_OFF_MMSHARED:
431 futex_get_mm(key);
432 break;
433 default:
434
435
436
437
438
439 smp_mb();
440 }
441}
442
443
444
445
446
447
448
449static void drop_futex_key_refs(union futex_key *key)
450{
451 if (!key->both.ptr) {
452
453 WARN_ON_ONCE(1);
454 return;
455 }
456
457 if (!IS_ENABLED(CONFIG_MMU))
458 return;
459
460 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
461 case FUT_OFF_INODE:
462 iput(key->shared.inode);
463 break;
464 case FUT_OFF_MMSHARED:
465 mmdrop(key->private.mm);
466 break;
467 }
468}
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488static int
489get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
490{
491 unsigned long address = (unsigned long)uaddr;
492 struct mm_struct *mm = current->mm;
493 struct page *page, *tail;
494 struct address_space *mapping;
495 int err, ro = 0;
496
497
498
499
500 key->both.offset = address % PAGE_SIZE;
501 if (unlikely((address % sizeof(u32)) != 0))
502 return -EINVAL;
503 address -= key->both.offset;
504
505 if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
506 return -EFAULT;
507
508 if (unlikely(should_fail_futex(fshared)))
509 return -EFAULT;
510
511
512
513
514
515
516
517
518 if (!fshared) {
519 key->private.mm = mm;
520 key->private.address = address;
521 get_futex_key_refs(key);
522 return 0;
523 }
524
525again:
526
527 if (unlikely(should_fail_futex(fshared)))
528 return -EFAULT;
529
530 err = get_user_pages_fast(address, 1, 1, &page);
531
532
533
534
535 if (err == -EFAULT && rw == VERIFY_READ) {
536 err = get_user_pages_fast(address, 1, 0, &page);
537 ro = 1;
538 }
539 if (err < 0)
540 return err;
541 else
542 err = 0;
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562 tail = page;
563 page = compound_head(page);
564 mapping = READ_ONCE(page->mapping);
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581 if (unlikely(!mapping)) {
582 int shmem_swizzled;
583
584
585
586
587
588
589 lock_page(page);
590 shmem_swizzled = PageSwapCache(page) || page->mapping;
591 unlock_page(page);
592 put_page(page);
593
594 if (shmem_swizzled)
595 goto again;
596
597 return -EFAULT;
598 }
599
600
601
602
603
604
605
606
607
608
609
610 if (PageAnon(page)) {
611
612
613
614
615 if (unlikely(should_fail_futex(fshared)) || ro) {
616 err = -EFAULT;
617 goto out;
618 }
619
620 key->both.offset |= FUT_OFF_MMSHARED;
621 key->private.mm = mm;
622 key->private.address = address;
623
624 get_futex_key_refs(key);
625
626 } else {
627 struct inode *inode;
628
629
630
631
632
633
634
635
636
637
638
639
640 rcu_read_lock();
641
642 if (READ_ONCE(page->mapping) != mapping) {
643 rcu_read_unlock();
644 put_page(page);
645
646 goto again;
647 }
648
649 inode = READ_ONCE(mapping->host);
650 if (!inode) {
651 rcu_read_unlock();
652 put_page(page);
653
654 goto again;
655 }
656
657
658
659
660
661
662
663
664
665
666
667
668 if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
669 rcu_read_unlock();
670 put_page(page);
671
672 goto again;
673 }
674
675
676 if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
677 err = -EFAULT;
678 rcu_read_unlock();
679 iput(inode);
680
681 goto out;
682 }
683
684 key->both.offset |= FUT_OFF_INODE;
685 key->shared.inode = inode;
686 key->shared.pgoff = basepage_index(tail);
687 rcu_read_unlock();
688 }
689
690out:
691 put_page(page);
692 return err;
693}
694
695static inline void put_futex_key(union futex_key *key)
696{
697 drop_futex_key_refs(key);
698}
699
700
701
702
703
704
705
706
707
708
709
710
711
712static int fault_in_user_writeable(u32 __user *uaddr)
713{
714 struct mm_struct *mm = current->mm;
715 int ret;
716
717 down_read(&mm->mmap_sem);
718 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
719 FAULT_FLAG_WRITE, NULL);
720 up_read(&mm->mmap_sem);
721
722 return ret < 0 ? ret : 0;
723}
724
725
726
727
728
729
730
731
732static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
733 union futex_key *key)
734{
735 struct futex_q *this;
736
737 plist_for_each_entry(this, &hb->chain, list) {
738 if (match_futex(&this->key, key))
739 return this;
740 }
741 return NULL;
742}
743
744static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
745 u32 uval, u32 newval)
746{
747 int ret;
748
749 pagefault_disable();
750 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
751 pagefault_enable();
752
753 return ret;
754}
755
756static int get_futex_value_locked(u32 *dest, u32 __user *from)
757{
758 int ret;
759
760 pagefault_disable();
761 ret = __get_user(*dest, from);
762 pagefault_enable();
763
764 return ret ? -EFAULT : 0;
765}
766
767
768
769
770
771static int refill_pi_state_cache(void)
772{
773 struct futex_pi_state *pi_state;
774
775 if (likely(current->pi_state_cache))
776 return 0;
777
778 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
779
780 if (!pi_state)
781 return -ENOMEM;
782
783 INIT_LIST_HEAD(&pi_state->list);
784
785 pi_state->owner = NULL;
786 atomic_set(&pi_state->refcount, 1);
787 pi_state->key = FUTEX_KEY_INIT;
788
789 current->pi_state_cache = pi_state;
790
791 return 0;
792}
793
794static struct futex_pi_state * alloc_pi_state(void)
795{
796 struct futex_pi_state *pi_state = current->pi_state_cache;
797
798 WARN_ON(!pi_state);
799 current->pi_state_cache = NULL;
800
801 return pi_state;
802}
803
804
805
806
807
808
809
810static void put_pi_state(struct futex_pi_state *pi_state)
811{
812 if (!pi_state)
813 return;
814
815 if (!atomic_dec_and_test(&pi_state->refcount))
816 return;
817
818
819
820
821
822 if (pi_state->owner) {
823 raw_spin_lock_irq(&pi_state->owner->pi_lock);
824 list_del_init(&pi_state->list);
825 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
826
827 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
828 }
829
830 if (current->pi_state_cache)
831 kfree(pi_state);
832 else {
833
834
835
836
837
838 pi_state->owner = NULL;
839 atomic_set(&pi_state->refcount, 1);
840 current->pi_state_cache = pi_state;
841 }
842}
843
844
845
846
847
848static struct task_struct * futex_find_get_task(pid_t pid)
849{
850 struct task_struct *p;
851
852 rcu_read_lock();
853 p = find_task_by_vpid(pid);
854 if (p)
855 get_task_struct(p);
856
857 rcu_read_unlock();
858
859 return p;
860}
861
862
863
864
865
866
867void exit_pi_state_list(struct task_struct *curr)
868{
869 struct list_head *next, *head = &curr->pi_state_list;
870 struct futex_pi_state *pi_state;
871 struct futex_hash_bucket *hb;
872 union futex_key key = FUTEX_KEY_INIT;
873
874 if (!futex_cmpxchg_enabled)
875 return;
876
877
878
879
880
881 raw_spin_lock_irq(&curr->pi_lock);
882 while (!list_empty(head)) {
883
884 next = head->next;
885 pi_state = list_entry(next, struct futex_pi_state, list);
886 key = pi_state->key;
887 hb = hash_futex(&key);
888 raw_spin_unlock_irq(&curr->pi_lock);
889
890 spin_lock(&hb->lock);
891
892 raw_spin_lock_irq(&curr->pi_lock);
893
894
895
896
897 if (head->next != next) {
898 spin_unlock(&hb->lock);
899 continue;
900 }
901
902 WARN_ON(pi_state->owner != curr);
903 WARN_ON(list_empty(&pi_state->list));
904 list_del_init(&pi_state->list);
905 pi_state->owner = NULL;
906 raw_spin_unlock_irq(&curr->pi_lock);
907
908 rt_mutex_unlock(&pi_state->pi_mutex);
909
910 spin_unlock(&hb->lock);
911
912 raw_spin_lock_irq(&curr->pi_lock);
913 }
914 raw_spin_unlock_irq(&curr->pi_lock);
915}
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
973 struct futex_pi_state **ps)
974{
975 pid_t pid = uval & FUTEX_TID_MASK;
976
977
978
979
980 if (unlikely(!pi_state))
981 return -EINVAL;
982
983 WARN_ON(!atomic_read(&pi_state->refcount));
984
985
986
987
988 if (uval & FUTEX_OWNER_DIED) {
989
990
991
992
993
994 if (!pi_state->owner) {
995
996
997
998
999 if (pid)
1000 return -EINVAL;
1001
1002
1003
1004 goto out_state;
1005 }
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015 if (!pid)
1016 goto out_state;
1017 } else {
1018
1019
1020
1021
1022 if (!pi_state->owner)
1023 return -EINVAL;
1024 }
1025
1026
1027
1028
1029
1030
1031 if (pid != task_pid_vnr(pi_state->owner))
1032 return -EINVAL;
1033out_state:
1034 atomic_inc(&pi_state->refcount);
1035 *ps = pi_state;
1036 return 0;
1037}
1038
1039
1040
1041
1042
1043static int attach_to_pi_owner(u32 uval, union futex_key *key,
1044 struct futex_pi_state **ps)
1045{
1046 pid_t pid = uval & FUTEX_TID_MASK;
1047 struct futex_pi_state *pi_state;
1048 struct task_struct *p;
1049
1050
1051
1052
1053
1054 if (!pid)
1055 return -ESRCH;
1056 p = futex_find_get_task(pid);
1057 if (!p)
1058 return -ESRCH;
1059
1060 if (unlikely(p->flags & PF_KTHREAD)) {
1061 put_task_struct(p);
1062 return -EPERM;
1063 }
1064
1065
1066
1067
1068
1069
1070
1071 raw_spin_lock_irq(&p->pi_lock);
1072 if (unlikely(p->flags & PF_EXITING)) {
1073
1074
1075
1076
1077
1078 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
1079
1080 raw_spin_unlock_irq(&p->pi_lock);
1081 put_task_struct(p);
1082 return ret;
1083 }
1084
1085
1086
1087
1088 pi_state = alloc_pi_state();
1089
1090
1091
1092
1093
1094 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
1095
1096
1097 pi_state->key = *key;
1098
1099 WARN_ON(!list_empty(&pi_state->list));
1100 list_add(&pi_state->list, &p->pi_state_list);
1101 pi_state->owner = p;
1102 raw_spin_unlock_irq(&p->pi_lock);
1103
1104 put_task_struct(p);
1105
1106 *ps = pi_state;
1107
1108 return 0;
1109}
1110
1111static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
1112 union futex_key *key, struct futex_pi_state **ps)
1113{
1114 struct futex_q *match = futex_top_waiter(hb, key);
1115
1116
1117
1118
1119
1120 if (match)
1121 return attach_to_pi_state(uval, match->pi_state, ps);
1122
1123
1124
1125
1126
1127 return attach_to_pi_owner(uval, key, ps);
1128}
1129
1130static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
1131{
1132 u32 uninitialized_var(curval);
1133
1134 if (unlikely(should_fail_futex(true)))
1135 return -EFAULT;
1136
1137 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
1138 return -EFAULT;
1139
1140
1141 return curval != uval ? -EAGAIN : 0;
1142}
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
1163 union futex_key *key,
1164 struct futex_pi_state **ps,
1165 struct task_struct *task, int set_waiters)
1166{
1167 u32 uval, newval, vpid = task_pid_vnr(task);
1168 struct futex_q *match;
1169 int ret;
1170
1171
1172
1173
1174
1175 if (get_futex_value_locked(&uval, uaddr))
1176 return -EFAULT;
1177
1178 if (unlikely(should_fail_futex(true)))
1179 return -EFAULT;
1180
1181
1182
1183
1184 if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
1185 return -EDEADLK;
1186
1187 if ((unlikely(should_fail_futex(true))))
1188 return -EDEADLK;
1189
1190
1191
1192
1193
1194 match = futex_top_waiter(hb, key);
1195 if (match)
1196 return attach_to_pi_state(uval, match->pi_state, ps);
1197
1198
1199
1200
1201
1202
1203
1204 if (!(uval & FUTEX_TID_MASK)) {
1205
1206
1207
1208
1209 newval = uval & FUTEX_OWNER_DIED;
1210 newval |= vpid;
1211
1212
1213 if (set_waiters)
1214 newval |= FUTEX_WAITERS;
1215
1216 ret = lock_pi_update_atomic(uaddr, uval, newval);
1217
1218 return ret < 0 ? ret : 1;
1219 }
1220
1221
1222
1223
1224
1225
1226 newval = uval | FUTEX_WAITERS;
1227 ret = lock_pi_update_atomic(uaddr, uval, newval);
1228 if (ret)
1229 return ret;
1230
1231
1232
1233
1234
1235 return attach_to_pi_owner(uval, key, ps);
1236}
1237
1238
1239
1240
1241
1242
1243
1244static void __unqueue_futex(struct futex_q *q)
1245{
1246 struct futex_hash_bucket *hb;
1247
1248 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
1249 || WARN_ON(plist_node_empty(&q->list)))
1250 return;
1251
1252 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
1253 plist_del(&q->list, &hb->chain);
1254 hb_waiters_dec(hb);
1255}
1256
1257
1258
1259
1260
1261
1262
1263static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1264{
1265 struct task_struct *p = q->task;
1266
1267 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
1268 return;
1269
1270
1271
1272
1273
1274 wake_q_add(wake_q, p);
1275 __unqueue_futex(q);
1276
1277
1278
1279
1280
1281
1282 smp_wmb();
1283 q->lock_ptr = NULL;
1284}
1285
1286static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
1287 struct futex_hash_bucket *hb)
1288{
1289 struct task_struct *new_owner;
1290 struct futex_pi_state *pi_state = this->pi_state;
1291 u32 uninitialized_var(curval), newval;
1292 WAKE_Q(wake_q);
1293 bool deboost;
1294 int ret = 0;
1295
1296 if (!pi_state)
1297 return -EINVAL;
1298
1299
1300
1301
1302
1303 if (pi_state->owner != current)
1304 return -EINVAL;
1305
1306 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
1307 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
1308
1309
1310
1311
1312
1313
1314 if (!new_owner)
1315 new_owner = this->task;
1316
1317
1318
1319
1320
1321
1322 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
1323
1324 if (unlikely(should_fail_futex(true)))
1325 ret = -EFAULT;
1326
1327 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
1328 ret = -EFAULT;
1329 } else if (curval != uval) {
1330
1331
1332
1333
1334
1335
1336 if ((FUTEX_TID_MASK & curval) == uval)
1337 ret = -EAGAIN;
1338 else
1339 ret = -EINVAL;
1340 }
1341 if (ret) {
1342 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1343 return ret;
1344 }
1345
1346 raw_spin_lock(&pi_state->owner->pi_lock);
1347 WARN_ON(list_empty(&pi_state->list));
1348 list_del_init(&pi_state->list);
1349 raw_spin_unlock(&pi_state->owner->pi_lock);
1350
1351 raw_spin_lock(&new_owner->pi_lock);
1352 WARN_ON(!list_empty(&pi_state->list));
1353 list_add(&pi_state->list, &new_owner->pi_state_list);
1354 pi_state->owner = new_owner;
1355 raw_spin_unlock(&new_owner->pi_lock);
1356
1357 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
1358
1359 deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
1360
1361
1362
1363
1364
1365
1366
1367 spin_unlock(&hb->lock);
1368 wake_up_q(&wake_q);
1369 if (deboost)
1370 rt_mutex_adjust_prio(current);
1371
1372 return 0;
1373}
1374
1375
1376
1377
1378static inline void
1379double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1380{
1381 if (hb1 <= hb2) {
1382 spin_lock(&hb1->lock);
1383 if (hb1 < hb2)
1384 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
1385 } else {
1386 spin_lock(&hb2->lock);
1387 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
1388 }
1389}
1390
1391static inline void
1392double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1393{
1394 spin_unlock(&hb1->lock);
1395 if (hb1 != hb2)
1396 spin_unlock(&hb2->lock);
1397}
1398
1399
1400
1401
1402static int
1403futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
1404{
1405 struct futex_hash_bucket *hb;
1406 struct futex_q *this, *next;
1407 union futex_key key = FUTEX_KEY_INIT;
1408 int ret;
1409 WAKE_Q(wake_q);
1410
1411 if (!bitset)
1412 return -EINVAL;
1413
1414 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
1415 if (unlikely(ret != 0))
1416 goto out;
1417
1418 hb = hash_futex(&key);
1419
1420
1421 if (!hb_waiters_pending(hb))
1422 goto out_put_key;
1423
1424 spin_lock(&hb->lock);
1425
1426 plist_for_each_entry_safe(this, next, &hb->chain, list) {
1427 if (match_futex (&this->key, &key)) {
1428 if (this->pi_state || this->rt_waiter) {
1429 ret = -EINVAL;
1430 break;
1431 }
1432
1433
1434 if (!(this->bitset & bitset))
1435 continue;
1436
1437 mark_wake_futex(&wake_q, this);
1438 if (++ret >= nr_wake)
1439 break;
1440 }
1441 }
1442
1443 spin_unlock(&hb->lock);
1444 wake_up_q(&wake_q);
1445out_put_key:
1446 put_futex_key(&key);
1447out:
1448 return ret;
1449}
1450
1451
1452
1453
1454
1455static int
1456futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1457 int nr_wake, int nr_wake2, int op)
1458{
1459 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1460 struct futex_hash_bucket *hb1, *hb2;
1461 struct futex_q *this, *next;
1462 int ret, op_ret;
1463 WAKE_Q(wake_q);
1464
1465retry:
1466 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1467 if (unlikely(ret != 0))
1468 goto out;
1469 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1470 if (unlikely(ret != 0))
1471 goto out_put_key1;
1472
1473 hb1 = hash_futex(&key1);
1474 hb2 = hash_futex(&key2);
1475
1476retry_private:
1477 double_lock_hb(hb1, hb2);
1478 op_ret = futex_atomic_op_inuser(op, uaddr2);
1479 if (unlikely(op_ret < 0)) {
1480
1481 double_unlock_hb(hb1, hb2);
1482
1483#ifndef CONFIG_MMU
1484
1485
1486
1487
1488 ret = op_ret;
1489 goto out_put_keys;
1490#endif
1491
1492 if (unlikely(op_ret != -EFAULT)) {
1493 ret = op_ret;
1494 goto out_put_keys;
1495 }
1496
1497 ret = fault_in_user_writeable(uaddr2);
1498 if (ret)
1499 goto out_put_keys;
1500
1501 if (!(flags & FLAGS_SHARED))
1502 goto retry_private;
1503
1504 put_futex_key(&key2);
1505 put_futex_key(&key1);
1506 goto retry;
1507 }
1508
1509 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1510 if (match_futex (&this->key, &key1)) {
1511 if (this->pi_state || this->rt_waiter) {
1512 ret = -EINVAL;
1513 goto out_unlock;
1514 }
1515 mark_wake_futex(&wake_q, this);
1516 if (++ret >= nr_wake)
1517 break;
1518 }
1519 }
1520
1521 if (op_ret > 0) {
1522 op_ret = 0;
1523 plist_for_each_entry_safe(this, next, &hb2->chain, list) {
1524 if (match_futex (&this->key, &key2)) {
1525 if (this->pi_state || this->rt_waiter) {
1526 ret = -EINVAL;
1527 goto out_unlock;
1528 }
1529 mark_wake_futex(&wake_q, this);
1530 if (++op_ret >= nr_wake2)
1531 break;
1532 }
1533 }
1534 ret += op_ret;
1535 }
1536
1537out_unlock:
1538 double_unlock_hb(hb1, hb2);
1539 wake_up_q(&wake_q);
1540out_put_keys:
1541 put_futex_key(&key2);
1542out_put_key1:
1543 put_futex_key(&key1);
1544out:
1545 return ret;
1546}
1547
1548
1549
1550
1551
1552
1553
1554
1555static inline
1556void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1557 struct futex_hash_bucket *hb2, union futex_key *key2)
1558{
1559
1560
1561
1562
1563
1564 if (likely(&hb1->chain != &hb2->chain)) {
1565 plist_del(&q->list, &hb1->chain);
1566 hb_waiters_dec(hb1);
1567 hb_waiters_inc(hb2);
1568 plist_add(&q->list, &hb2->chain);
1569 q->lock_ptr = &hb2->lock;
1570 }
1571 get_futex_key_refs(key2);
1572 q->key = *key2;
1573}
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589static inline
1590void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1591 struct futex_hash_bucket *hb)
1592{
1593 get_futex_key_refs(key);
1594 q->key = *key;
1595
1596 __unqueue_futex(q);
1597
1598 WARN_ON(!q->rt_waiter);
1599 q->rt_waiter = NULL;
1600
1601 q->lock_ptr = &hb->lock;
1602
1603 wake_up_state(q->task, TASK_NORMAL);
1604}
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1627 struct futex_hash_bucket *hb1,
1628 struct futex_hash_bucket *hb2,
1629 union futex_key *key1, union futex_key *key2,
1630 struct futex_pi_state **ps, int set_waiters)
1631{
1632 struct futex_q *top_waiter = NULL;
1633 u32 curval;
1634 int ret, vpid;
1635
1636 if (get_futex_value_locked(&curval, pifutex))
1637 return -EFAULT;
1638
1639 if (unlikely(should_fail_futex(true)))
1640 return -EFAULT;
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650 top_waiter = futex_top_waiter(hb1, key1);
1651
1652
1653 if (!top_waiter)
1654 return 0;
1655
1656
1657 if (!match_futex(top_waiter->requeue_pi_key, key2))
1658 return -EINVAL;
1659
1660
1661
1662
1663
1664
1665 vpid = task_pid_vnr(top_waiter->task);
1666 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1667 set_waiters);
1668 if (ret == 1) {
1669 requeue_pi_wake_futex(top_waiter, key2, hb2);
1670 return vpid;
1671 }
1672 return ret;
1673}
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1694 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1695 u32 *cmpval, int requeue_pi)
1696{
1697 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1698 int drop_count = 0, task_count = 0, ret;
1699 struct futex_pi_state *pi_state = NULL;
1700 struct futex_hash_bucket *hb1, *hb2;
1701 struct futex_q *this, *next;
1702 WAKE_Q(wake_q);
1703
1704 if (requeue_pi) {
1705
1706
1707
1708
1709 if (uaddr1 == uaddr2)
1710 return -EINVAL;
1711
1712
1713
1714
1715
1716 if (refill_pi_state_cache())
1717 return -ENOMEM;
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728 if (nr_wake != 1)
1729 return -EINVAL;
1730 }
1731
1732retry:
1733 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1734 if (unlikely(ret != 0))
1735 goto out;
1736 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1737 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1738 if (unlikely(ret != 0))
1739 goto out_put_key1;
1740
1741
1742
1743
1744
1745 if (requeue_pi && match_futex(&key1, &key2)) {
1746 ret = -EINVAL;
1747 goto out_put_keys;
1748 }
1749
1750 hb1 = hash_futex(&key1);
1751 hb2 = hash_futex(&key2);
1752
1753retry_private:
1754 hb_waiters_inc(hb2);
1755 double_lock_hb(hb1, hb2);
1756
1757 if (likely(cmpval != NULL)) {
1758 u32 curval;
1759
1760 ret = get_futex_value_locked(&curval, uaddr1);
1761
1762 if (unlikely(ret)) {
1763 double_unlock_hb(hb1, hb2);
1764 hb_waiters_dec(hb2);
1765
1766 ret = get_user(curval, uaddr1);
1767 if (ret)
1768 goto out_put_keys;
1769
1770 if (!(flags & FLAGS_SHARED))
1771 goto retry_private;
1772
1773 put_futex_key(&key2);
1774 put_futex_key(&key1);
1775 goto retry;
1776 }
1777 if (curval != *cmpval) {
1778 ret = -EAGAIN;
1779 goto out_unlock;
1780 }
1781 }
1782
1783 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1784
1785
1786
1787
1788
1789
1790 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1791 &key2, &pi_state, nr_requeue);
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802 if (ret > 0) {
1803 WARN_ON(pi_state);
1804 drop_count++;
1805 task_count++;
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818 ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
1819 }
1820
1821 switch (ret) {
1822 case 0:
1823
1824 break;
1825
1826
1827 case -EFAULT:
1828 double_unlock_hb(hb1, hb2);
1829 hb_waiters_dec(hb2);
1830 put_futex_key(&key2);
1831 put_futex_key(&key1);
1832 ret = fault_in_user_writeable(uaddr2);
1833 if (!ret)
1834 goto retry;
1835 goto out;
1836 case -EAGAIN:
1837
1838
1839
1840
1841
1842
1843 double_unlock_hb(hb1, hb2);
1844 hb_waiters_dec(hb2);
1845 put_futex_key(&key2);
1846 put_futex_key(&key1);
1847 cond_resched();
1848 goto retry;
1849 default:
1850 goto out_unlock;
1851 }
1852 }
1853
1854 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1855 if (task_count - nr_wake >= nr_requeue)
1856 break;
1857
1858 if (!match_futex(&this->key, &key1))
1859 continue;
1860
1861
1862
1863
1864
1865
1866
1867
1868 if ((requeue_pi && !this->rt_waiter) ||
1869 (!requeue_pi && this->rt_waiter) ||
1870 this->pi_state) {
1871 ret = -EINVAL;
1872 break;
1873 }
1874
1875
1876
1877
1878
1879
1880 if (++task_count <= nr_wake && !requeue_pi) {
1881 mark_wake_futex(&wake_q, this);
1882 continue;
1883 }
1884
1885
1886 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1887 ret = -EINVAL;
1888 break;
1889 }
1890
1891
1892
1893
1894
1895 if (requeue_pi) {
1896
1897
1898
1899
1900
1901 atomic_inc(&pi_state->refcount);
1902 this->pi_state = pi_state;
1903 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1904 this->rt_waiter,
1905 this->task);
1906 if (ret == 1) {
1907
1908
1909
1910
1911
1912
1913
1914
1915 requeue_pi_wake_futex(this, &key2, hb2);
1916 drop_count++;
1917 continue;
1918 } else if (ret) {
1919
1920
1921
1922
1923
1924
1925
1926
1927 this->pi_state = NULL;
1928 put_pi_state(pi_state);
1929
1930
1931
1932
1933 break;
1934 }
1935 }
1936 requeue_futex(this, hb1, hb2, &key2);
1937 drop_count++;
1938 }
1939
1940
1941
1942
1943
1944
1945 put_pi_state(pi_state);
1946
1947out_unlock:
1948 double_unlock_hb(hb1, hb2);
1949 wake_up_q(&wake_q);
1950 hb_waiters_dec(hb2);
1951
1952
1953
1954
1955
1956
1957
1958 while (--drop_count >= 0)
1959 drop_futex_key_refs(&key1);
1960
1961out_put_keys:
1962 put_futex_key(&key2);
1963out_put_key1:
1964 put_futex_key(&key1);
1965out:
1966 return ret ? ret : task_count;
1967}
1968
1969
1970static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1971 __acquires(&hb->lock)
1972{
1973 struct futex_hash_bucket *hb;
1974
1975 hb = hash_futex(&q->key);
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985 hb_waiters_inc(hb);
1986
1987 q->lock_ptr = &hb->lock;
1988
1989 spin_lock(&hb->lock);
1990 return hb;
1991}
1992
1993static inline void
1994queue_unlock(struct futex_hash_bucket *hb)
1995 __releases(&hb->lock)
1996{
1997 spin_unlock(&hb->lock);
1998 hb_waiters_dec(hb);
1999}
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
2014 __releases(&hb->lock)
2015{
2016 int prio;
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026 prio = min(current->normal_prio, MAX_RT_PRIO);
2027
2028 plist_node_init(&q->list, prio);
2029 plist_add(&q->list, &hb->chain);
2030 q->task = current;
2031 spin_unlock(&hb->lock);
2032}
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045static int unqueue_me(struct futex_q *q)
2046{
2047 spinlock_t *lock_ptr;
2048 int ret = 0;
2049
2050
2051retry:
2052
2053
2054
2055
2056
2057 lock_ptr = READ_ONCE(q->lock_ptr);
2058 if (lock_ptr != NULL) {
2059 spin_lock(lock_ptr);
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073 if (unlikely(lock_ptr != q->lock_ptr)) {
2074 spin_unlock(lock_ptr);
2075 goto retry;
2076 }
2077 __unqueue_futex(q);
2078
2079 BUG_ON(q->pi_state);
2080
2081 spin_unlock(lock_ptr);
2082 ret = 1;
2083 }
2084
2085 drop_futex_key_refs(&q->key);
2086 return ret;
2087}
2088
2089
2090
2091
2092
2093
2094static void unqueue_me_pi(struct futex_q *q)
2095 __releases(q->lock_ptr)
2096{
2097 __unqueue_futex(q);
2098
2099 BUG_ON(!q->pi_state);
2100 put_pi_state(q->pi_state);
2101 q->pi_state = NULL;
2102
2103 spin_unlock(q->lock_ptr);
2104}
2105
2106
2107
2108
2109
2110
2111
2112static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
2113 struct task_struct *newowner)
2114{
2115 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
2116 struct futex_pi_state *pi_state = q->pi_state;
2117 struct task_struct *oldowner = pi_state->owner;
2118 u32 uval, uninitialized_var(curval), newval;
2119 int ret;
2120
2121
2122 if (!pi_state->owner)
2123 newtid |= FUTEX_OWNER_DIED;
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142retry:
2143 if (get_futex_value_locked(&uval, uaddr))
2144 goto handle_fault;
2145
2146 while (1) {
2147 newval = (uval & FUTEX_OWNER_DIED) | newtid;
2148
2149 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
2150 goto handle_fault;
2151 if (curval == uval)
2152 break;
2153 uval = curval;
2154 }
2155
2156
2157
2158
2159
2160 if (pi_state->owner != NULL) {
2161 raw_spin_lock_irq(&pi_state->owner->pi_lock);
2162 WARN_ON(list_empty(&pi_state->list));
2163 list_del_init(&pi_state->list);
2164 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
2165 }
2166
2167 pi_state->owner = newowner;
2168
2169 raw_spin_lock_irq(&newowner->pi_lock);
2170 WARN_ON(!list_empty(&pi_state->list));
2171 list_add(&pi_state->list, &newowner->pi_state_list);
2172 raw_spin_unlock_irq(&newowner->pi_lock);
2173 return 0;
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185handle_fault:
2186 spin_unlock(q->lock_ptr);
2187
2188 ret = fault_in_user_writeable(uaddr);
2189
2190 spin_lock(q->lock_ptr);
2191
2192
2193
2194
2195 if (pi_state->owner != oldowner)
2196 return 0;
2197
2198 if (ret)
2199 return ret;
2200
2201 goto retry;
2202}
2203
2204static long futex_wait_restart(struct restart_block *restart);
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
2222{
2223 struct task_struct *owner;
2224 int ret = 0;
2225
2226 if (locked) {
2227
2228
2229
2230
2231 if (q->pi_state->owner != current)
2232 ret = fixup_pi_state_owner(uaddr, q, current);
2233 goto out;
2234 }
2235
2236
2237
2238
2239
2240 if (q->pi_state->owner == current) {
2241
2242
2243
2244
2245
2246 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
2247 locked = 1;
2248 goto out;
2249 }
2250
2251
2252
2253
2254
2255
2256 raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
2257 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
2258 if (!owner)
2259 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
2260 raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
2261 ret = fixup_pi_state_owner(uaddr, q, owner);
2262 goto out;
2263 }
2264
2265
2266
2267
2268
2269 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
2270 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
2271 "pi-state %p\n", ret,
2272 q->pi_state->pi_mutex.owner,
2273 q->pi_state->owner);
2274
2275out:
2276 return ret ? ret : locked;
2277}
2278
2279
2280
2281
2282
2283
2284
2285static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
2286 struct hrtimer_sleeper *timeout)
2287{
2288
2289
2290
2291
2292
2293
2294 set_current_state(TASK_INTERRUPTIBLE);
2295 queue_me(q, hb);
2296
2297
2298 if (timeout)
2299 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
2300
2301
2302
2303
2304
2305 if (likely(!plist_node_empty(&q->list))) {
2306
2307
2308
2309
2310
2311 if (!timeout || timeout->task)
2312 freezable_schedule();
2313 }
2314 __set_current_state(TASK_RUNNING);
2315}
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
2335 struct futex_q *q, struct futex_hash_bucket **hb)
2336{
2337 u32 uval;
2338 int ret;
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358retry:
2359 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
2360 if (unlikely(ret != 0))
2361 return ret;
2362
2363retry_private:
2364 *hb = queue_lock(q);
2365
2366 ret = get_futex_value_locked(&uval, uaddr);
2367
2368 if (ret) {
2369 queue_unlock(*hb);
2370
2371 ret = get_user(uval, uaddr);
2372 if (ret)
2373 goto out;
2374
2375 if (!(flags & FLAGS_SHARED))
2376 goto retry_private;
2377
2378 put_futex_key(&q->key);
2379 goto retry;
2380 }
2381
2382 if (uval != val) {
2383 queue_unlock(*hb);
2384 ret = -EWOULDBLOCK;
2385 }
2386
2387out:
2388 if (ret)
2389 put_futex_key(&q->key);
2390 return ret;
2391}
2392
2393static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
2394 ktime_t *abs_time, u32 bitset)
2395{
2396 struct hrtimer_sleeper timeout, *to = NULL;
2397 struct restart_block *restart;
2398 struct futex_hash_bucket *hb;
2399 struct futex_q q = futex_q_init;
2400 int ret;
2401
2402 if (!bitset)
2403 return -EINVAL;
2404 q.bitset = bitset;
2405
2406 if (abs_time) {
2407 to = &timeout;
2408
2409 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2410 CLOCK_REALTIME : CLOCK_MONOTONIC,
2411 HRTIMER_MODE_ABS);
2412 hrtimer_init_sleeper(to, current);
2413 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2414 current->timer_slack_ns);
2415 }
2416
2417retry:
2418
2419
2420
2421
2422 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2423 if (ret)
2424 goto out;
2425
2426
2427 futex_wait_queue_me(hb, &q, to);
2428
2429
2430 ret = 0;
2431
2432 if (!unqueue_me(&q))
2433 goto out;
2434 ret = -ETIMEDOUT;
2435 if (to && !to->task)
2436 goto out;
2437
2438
2439
2440
2441
2442 if (!signal_pending(current))
2443 goto retry;
2444
2445 ret = -ERESTARTSYS;
2446 if (!abs_time)
2447 goto out;
2448
2449 restart = ¤t->restart_block;
2450 restart->fn = futex_wait_restart;
2451 restart->futex.uaddr = uaddr;
2452 restart->futex.val = val;
2453 restart->futex.time = abs_time->tv64;
2454 restart->futex.bitset = bitset;
2455 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
2456
2457 ret = -ERESTART_RESTARTBLOCK;
2458
2459out:
2460 if (to) {
2461 hrtimer_cancel(&to->timer);
2462 destroy_hrtimer_on_stack(&to->timer);
2463 }
2464 return ret;
2465}
2466
2467
2468static long futex_wait_restart(struct restart_block *restart)
2469{
2470 u32 __user *uaddr = restart->futex.uaddr;
2471 ktime_t t, *tp = NULL;
2472
2473 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
2474 t.tv64 = restart->futex.time;
2475 tp = &t;
2476 }
2477 restart->fn = do_no_restart_syscall;
2478
2479 return (long)futex_wait(uaddr, restart->futex.flags,
2480 restart->futex.val, tp, restart->futex.bitset);
2481}
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
2494 ktime_t *time, int trylock)
2495{
2496 struct hrtimer_sleeper timeout, *to = NULL;
2497 struct futex_hash_bucket *hb;
2498 struct futex_q q = futex_q_init;
2499 int res, ret;
2500
2501 if (refill_pi_state_cache())
2502 return -ENOMEM;
2503
2504 if (time) {
2505 to = &timeout;
2506 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2507 HRTIMER_MODE_ABS);
2508 hrtimer_init_sleeper(to, current);
2509 hrtimer_set_expires(&to->timer, *time);
2510 }
2511
2512retry:
2513 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2514 if (unlikely(ret != 0))
2515 goto out;
2516
2517retry_private:
2518 hb = queue_lock(&q);
2519
2520 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
2521 if (unlikely(ret)) {
2522
2523
2524
2525
2526 switch (ret) {
2527 case 1:
2528
2529 ret = 0;
2530 goto out_unlock_put_key;
2531 case -EFAULT:
2532 goto uaddr_faulted;
2533 case -EAGAIN:
2534
2535
2536
2537
2538
2539
2540 queue_unlock(hb);
2541 put_futex_key(&q.key);
2542 cond_resched();
2543 goto retry;
2544 default:
2545 goto out_unlock_put_key;
2546 }
2547 }
2548
2549
2550
2551
2552 queue_me(&q, hb);
2553
2554 WARN_ON(!q.pi_state);
2555
2556
2557
2558 if (!trylock) {
2559 ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to);
2560 } else {
2561 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2562
2563 ret = ret ? 0 : -EWOULDBLOCK;
2564 }
2565
2566 spin_lock(q.lock_ptr);
2567
2568
2569
2570
2571 res = fixup_owner(uaddr, &q, !ret);
2572
2573
2574
2575
2576 if (res)
2577 ret = (res < 0) ? res : 0;
2578
2579
2580
2581
2582
2583 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2584 rt_mutex_unlock(&q.pi_state->pi_mutex);
2585
2586
2587 unqueue_me_pi(&q);
2588
2589 goto out_put_key;
2590
2591out_unlock_put_key:
2592 queue_unlock(hb);
2593
2594out_put_key:
2595 put_futex_key(&q.key);
2596out:
2597 if (to)
2598 destroy_hrtimer_on_stack(&to->timer);
2599 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2600
2601uaddr_faulted:
2602 queue_unlock(hb);
2603
2604 ret = fault_in_user_writeable(uaddr);
2605 if (ret)
2606 goto out_put_key;
2607
2608 if (!(flags & FLAGS_SHARED))
2609 goto retry_private;
2610
2611 put_futex_key(&q.key);
2612 goto retry;
2613}
2614
2615
2616
2617
2618
2619
2620static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2621{
2622 u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
2623 union futex_key key = FUTEX_KEY_INIT;
2624 struct futex_hash_bucket *hb;
2625 struct futex_q *match;
2626 int ret;
2627
2628retry:
2629 if (get_user(uval, uaddr))
2630 return -EFAULT;
2631
2632
2633
2634 if ((uval & FUTEX_TID_MASK) != vpid)
2635 return -EPERM;
2636
2637 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2638 if (ret)
2639 return ret;
2640
2641 hb = hash_futex(&key);
2642 spin_lock(&hb->lock);
2643
2644
2645
2646
2647
2648
2649 match = futex_top_waiter(hb, &key);
2650 if (match) {
2651 ret = wake_futex_pi(uaddr, uval, match, hb);
2652
2653
2654
2655
2656 if (!ret)
2657 goto out_putkey;
2658
2659
2660
2661
2662 if (ret == -EFAULT)
2663 goto pi_faulted;
2664
2665
2666
2667
2668 if (ret == -EAGAIN) {
2669 spin_unlock(&hb->lock);
2670 put_futex_key(&key);
2671 goto retry;
2672 }
2673
2674
2675
2676
2677 goto out_unlock;
2678 }
2679
2680
2681
2682
2683
2684
2685
2686
2687 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))
2688 goto pi_faulted;
2689
2690
2691
2692
2693 ret = (curval == uval) ? 0 : -EAGAIN;
2694
2695out_unlock:
2696 spin_unlock(&hb->lock);
2697out_putkey:
2698 put_futex_key(&key);
2699 return ret;
2700
2701pi_faulted:
2702 spin_unlock(&hb->lock);
2703 put_futex_key(&key);
2704
2705 ret = fault_in_user_writeable(uaddr);
2706 if (!ret)
2707 goto retry;
2708
2709 return ret;
2710}
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728static inline
2729int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2730 struct futex_q *q, union futex_key *key2,
2731 struct hrtimer_sleeper *timeout)
2732{
2733 int ret = 0;
2734
2735
2736
2737
2738
2739
2740
2741
2742 if (!match_futex(&q->key, key2)) {
2743 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2744
2745
2746
2747
2748 plist_del(&q->list, &hb->chain);
2749 hb_waiters_dec(hb);
2750
2751
2752 ret = -EWOULDBLOCK;
2753 if (timeout && !timeout->task)
2754 ret = -ETIMEDOUT;
2755 else if (signal_pending(current))
2756 ret = -ERESTARTNOINTR;
2757 }
2758 return ret;
2759}
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2802 u32 val, ktime_t *abs_time, u32 bitset,
2803 u32 __user *uaddr2)
2804{
2805 struct hrtimer_sleeper timeout, *to = NULL;
2806 struct rt_mutex_waiter rt_waiter;
2807 struct rt_mutex *pi_mutex = NULL;
2808 struct futex_hash_bucket *hb;
2809 union futex_key key2 = FUTEX_KEY_INIT;
2810 struct futex_q q = futex_q_init;
2811 int res, ret;
2812
2813 if (uaddr == uaddr2)
2814 return -EINVAL;
2815
2816 if (!bitset)
2817 return -EINVAL;
2818
2819 if (abs_time) {
2820 to = &timeout;
2821 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2822 CLOCK_REALTIME : CLOCK_MONOTONIC,
2823 HRTIMER_MODE_ABS);
2824 hrtimer_init_sleeper(to, current);
2825 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2826 current->timer_slack_ns);
2827 }
2828
2829
2830
2831
2832
2833 debug_rt_mutex_init_waiter(&rt_waiter);
2834 RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
2835 RB_CLEAR_NODE(&rt_waiter.tree_entry);
2836 rt_waiter.task = NULL;
2837
2838 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2839 if (unlikely(ret != 0))
2840 goto out;
2841
2842 q.bitset = bitset;
2843 q.rt_waiter = &rt_waiter;
2844 q.requeue_pi_key = &key2;
2845
2846
2847
2848
2849
2850 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2851 if (ret)
2852 goto out_key2;
2853
2854
2855
2856
2857
2858 if (match_futex(&q.key, &key2)) {
2859 queue_unlock(hb);
2860 ret = -EINVAL;
2861 goto out_put_keys;
2862 }
2863
2864
2865 futex_wait_queue_me(hb, &q, to);
2866
2867 spin_lock(&hb->lock);
2868 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2869 spin_unlock(&hb->lock);
2870 if (ret)
2871 goto out_put_keys;
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883 if (!q.rt_waiter) {
2884
2885
2886
2887
2888 if (q.pi_state && (q.pi_state->owner != current)) {
2889 spin_lock(q.lock_ptr);
2890 ret = fixup_pi_state_owner(uaddr2, &q, current);
2891
2892
2893
2894
2895 put_pi_state(q.pi_state);
2896 spin_unlock(q.lock_ptr);
2897 }
2898 } else {
2899
2900
2901
2902
2903
2904 WARN_ON(!q.pi_state);
2905 pi_mutex = &q.pi_state->pi_mutex;
2906 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
2907 debug_rt_mutex_free_waiter(&rt_waiter);
2908
2909 spin_lock(q.lock_ptr);
2910
2911
2912
2913
2914 res = fixup_owner(uaddr2, &q, !ret);
2915
2916
2917
2918
2919 if (res)
2920 ret = (res < 0) ? res : 0;
2921
2922
2923 unqueue_me_pi(&q);
2924 }
2925
2926
2927
2928
2929
2930 if (ret == -EFAULT) {
2931 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2932 rt_mutex_unlock(pi_mutex);
2933 } else if (ret == -EINTR) {
2934
2935
2936
2937
2938
2939
2940
2941 ret = -EWOULDBLOCK;
2942 }
2943
2944out_put_keys:
2945 put_futex_key(&q.key);
2946out_key2:
2947 put_futex_key(&key2);
2948
2949out:
2950 if (to) {
2951 hrtimer_cancel(&to->timer);
2952 destroy_hrtimer_on_stack(&to->timer);
2953 }
2954 return ret;
2955}
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2978 size_t, len)
2979{
2980 if (!futex_cmpxchg_enabled)
2981 return -ENOSYS;
2982
2983
2984
2985 if (unlikely(len != sizeof(*head)))
2986 return -EINVAL;
2987
2988 current->robust_list = head;
2989
2990 return 0;
2991}
2992
2993
2994
2995
2996
2997
2998
2999SYSCALL_DEFINE3(get_robust_list, int, pid,
3000 struct robust_list_head __user * __user *, head_ptr,
3001 size_t __user *, len_ptr)
3002{
3003 struct robust_list_head __user *head;
3004 unsigned long ret;
3005 struct task_struct *p;
3006
3007 if (!futex_cmpxchg_enabled)
3008 return -ENOSYS;
3009
3010 rcu_read_lock();
3011
3012 ret = -ESRCH;
3013 if (!pid)
3014 p = current;
3015 else {
3016 p = find_task_by_vpid(pid);
3017 if (!p)
3018 goto err_unlock;
3019 }
3020
3021 ret = -EPERM;
3022 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
3023 goto err_unlock;
3024
3025 head = p->robust_list;
3026 rcu_read_unlock();
3027
3028 if (put_user(sizeof(*head), len_ptr))
3029 return -EFAULT;
3030 return put_user(head, head_ptr);
3031
3032err_unlock:
3033 rcu_read_unlock();
3034
3035 return ret;
3036}
3037
3038
3039
3040
3041
3042int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
3043{
3044 u32 uval, uninitialized_var(nval), mval;
3045
3046retry:
3047 if (get_user(uval, uaddr))
3048 return -1;
3049
3050 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
3072 if (fault_in_user_writeable(uaddr))
3073 return -1;
3074 goto retry;
3075 }
3076 if (nval != uval)
3077 goto retry;
3078
3079
3080
3081
3082
3083 if (!pi && (uval & FUTEX_WAITERS))
3084 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
3085 }
3086 return 0;
3087}
3088
3089
3090
3091
3092static inline int fetch_robust_entry(struct robust_list __user **entry,
3093 struct robust_list __user * __user *head,
3094 unsigned int *pi)
3095{
3096 unsigned long uentry;
3097
3098 if (get_user(uentry, (unsigned long __user *)head))
3099 return -EFAULT;
3100
3101 *entry = (void __user *)(uentry & ~1UL);
3102 *pi = uentry & 1;
3103
3104 return 0;
3105}
3106
3107
3108
3109
3110
3111
3112
3113void exit_robust_list(struct task_struct *curr)
3114{
3115 struct robust_list_head __user *head = curr->robust_list;
3116 struct robust_list __user *entry, *next_entry, *pending;
3117 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3118 unsigned int uninitialized_var(next_pi);
3119 unsigned long futex_offset;
3120 int rc;
3121
3122 if (!futex_cmpxchg_enabled)
3123 return;
3124
3125
3126
3127
3128
3129 if (fetch_robust_entry(&entry, &head->list.next, &pi))
3130 return;
3131
3132
3133
3134 if (get_user(futex_offset, &head->futex_offset))
3135 return;
3136
3137
3138
3139
3140 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
3141 return;
3142
3143 next_entry = NULL;
3144 while (entry != &head->list) {
3145
3146
3147
3148
3149 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
3150
3151
3152
3153
3154 if (entry != pending)
3155 if (handle_futex_death((void __user *)entry + futex_offset,
3156 curr, pi))
3157 return;
3158 if (rc)
3159 return;
3160 entry = next_entry;
3161 pi = next_pi;
3162
3163
3164
3165 if (!--limit)
3166 break;
3167
3168 cond_resched();
3169 }
3170
3171 if (pending)
3172 handle_futex_death((void __user *)pending + futex_offset,
3173 curr, pip);
3174}
3175
3176long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
3177 u32 __user *uaddr2, u32 val2, u32 val3)
3178{
3179 int cmd = op & FUTEX_CMD_MASK;
3180 unsigned int flags = 0;
3181
3182 if (!(op & FUTEX_PRIVATE_FLAG))
3183 flags |= FLAGS_SHARED;
3184
3185 if (op & FUTEX_CLOCK_REALTIME) {
3186 flags |= FLAGS_CLOCKRT;
3187 if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
3188 cmd != FUTEX_WAIT_REQUEUE_PI)
3189 return -ENOSYS;
3190 }
3191
3192 switch (cmd) {
3193 case FUTEX_LOCK_PI:
3194 case FUTEX_UNLOCK_PI:
3195 case FUTEX_TRYLOCK_PI:
3196 case FUTEX_WAIT_REQUEUE_PI:
3197 case FUTEX_CMP_REQUEUE_PI:
3198 if (!futex_cmpxchg_enabled)
3199 return -ENOSYS;
3200 }
3201
3202 switch (cmd) {
3203 case FUTEX_WAIT:
3204 val3 = FUTEX_BITSET_MATCH_ANY;
3205 case FUTEX_WAIT_BITSET:
3206 return futex_wait(uaddr, flags, val, timeout, val3);
3207 case FUTEX_WAKE:
3208 val3 = FUTEX_BITSET_MATCH_ANY;
3209 case FUTEX_WAKE_BITSET:
3210 return futex_wake(uaddr, flags, val, val3);
3211 case FUTEX_REQUEUE:
3212 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
3213 case FUTEX_CMP_REQUEUE:
3214 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
3215 case FUTEX_WAKE_OP:
3216 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
3217 case FUTEX_LOCK_PI:
3218 return futex_lock_pi(uaddr, flags, timeout, 0);
3219 case FUTEX_UNLOCK_PI:
3220 return futex_unlock_pi(uaddr, flags);
3221 case FUTEX_TRYLOCK_PI:
3222 return futex_lock_pi(uaddr, flags, NULL, 1);
3223 case FUTEX_WAIT_REQUEUE_PI:
3224 val3 = FUTEX_BITSET_MATCH_ANY;
3225 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
3226 uaddr2);
3227 case FUTEX_CMP_REQUEUE_PI:
3228 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
3229 }
3230 return -ENOSYS;
3231}
3232
3233
3234SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
3235 struct timespec __user *, utime, u32 __user *, uaddr2,
3236 u32, val3)
3237{
3238 struct timespec ts;
3239 ktime_t t, *tp = NULL;
3240 u32 val2 = 0;
3241 int cmd = op & FUTEX_CMD_MASK;
3242
3243 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
3244 cmd == FUTEX_WAIT_BITSET ||
3245 cmd == FUTEX_WAIT_REQUEUE_PI)) {
3246 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
3247 return -EFAULT;
3248 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
3249 return -EFAULT;
3250 if (!timespec_valid(&ts))
3251 return -EINVAL;
3252
3253 t = timespec_to_ktime(ts);
3254 if (cmd == FUTEX_WAIT)
3255 t = ktime_add_safe(ktime_get(), t);
3256 tp = &t;
3257 }
3258
3259
3260
3261
3262 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
3263 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
3264 val2 = (u32) (unsigned long) utime;
3265
3266 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
3267}
3268
3269static void __init futex_detect_cmpxchg(void)
3270{
3271#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
3272 u32 curval;
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
3285 futex_cmpxchg_enabled = 1;
3286#endif
3287}
3288
3289static int __init futex_init(void)
3290{
3291 unsigned int futex_shift;
3292 unsigned long i;
3293
3294#if CONFIG_BASE_SMALL
3295 futex_hashsize = 16;
3296#else
3297 futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
3298#endif
3299
3300 futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
3301 futex_hashsize, 0,
3302 futex_hashsize < 256 ? HASH_SMALL : 0,
3303 &futex_shift, NULL,
3304 futex_hashsize, futex_hashsize);
3305 futex_hashsize = 1UL << futex_shift;
3306
3307 futex_detect_cmpxchg();
3308
3309 for (i = 0; i < futex_hashsize; i++) {
3310 atomic_set(&futex_queues[i].waiters, 0);
3311 plist_head_init(&futex_queues[i].chain);
3312 spin_lock_init(&futex_queues[i].lock);
3313 }
3314
3315 return 0;
3316}
3317__initcall(futex_init);
3318