1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/export.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
63#include <linux/sched/rt.h>
64#include <linux/hugetlb.h>
65#include <linux/freezer.h>
66#include <linux/bootmem.h>
67
68#include <asm/futex.h>
69
70#include "locking/rtmutex_common.h"
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160int __read_mostly futex_cmpxchg_enabled;
161
162
163
164
165
166#define FLAGS_SHARED 0x01
167#define FLAGS_CLOCKRT 0x02
168#define FLAGS_HAS_TIMEOUT 0x04
169
170
171
172
173struct futex_pi_state {
174
175
176
177
178 struct list_head list;
179
180
181
182
183 struct rt_mutex pi_mutex;
184
185 struct task_struct *owner;
186 atomic_t refcount;
187
188 union futex_key key;
189};
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213struct futex_q {
214 struct plist_node list;
215
216 struct task_struct *task;
217 spinlock_t *lock_ptr;
218 union futex_key key;
219 struct futex_pi_state *pi_state;
220 struct rt_mutex_waiter *rt_waiter;
221 union futex_key *requeue_pi_key;
222 u32 bitset;
223};
224
225static const struct futex_q futex_q_init = {
226
227 .key = FUTEX_KEY_INIT,
228 .bitset = FUTEX_BITSET_MATCH_ANY
229};
230
231
232
233
234
235
236struct futex_hash_bucket {
237 atomic_t waiters;
238 spinlock_t lock;
239 struct plist_head chain;
240} ____cacheline_aligned_in_smp;
241
242static unsigned long __read_mostly futex_hashsize;
243
244static struct futex_hash_bucket *futex_queues;
245
246static inline void futex_get_mm(union futex_key *key)
247{
248 atomic_inc(&key->private.mm->mm_count);
249
250
251
252
253
254 smp_mb__after_atomic_inc();
255}
256
257
258
259
260static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
261{
262#ifdef CONFIG_SMP
263 atomic_inc(&hb->waiters);
264
265
266
267 smp_mb__after_atomic_inc();
268#endif
269}
270
271
272
273
274
275static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
276{
277#ifdef CONFIG_SMP
278 atomic_dec(&hb->waiters);
279#endif
280}
281
282static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
283{
284#ifdef CONFIG_SMP
285 return atomic_read(&hb->waiters);
286#else
287 return 1;
288#endif
289}
290
291
292
293
294static struct futex_hash_bucket *hash_futex(union futex_key *key)
295{
296 u32 hash = jhash2((u32*)&key->both.word,
297 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
298 key->both.offset);
299 return &futex_queues[hash & (futex_hashsize - 1)];
300}
301
302
303
304
305static inline int match_futex(union futex_key *key1, union futex_key *key2)
306{
307 return (key1 && key2
308 && key1->both.word == key2->both.word
309 && key1->both.ptr == key2->both.ptr
310 && key1->both.offset == key2->both.offset);
311}
312
313
314
315
316
317
318static void get_futex_key_refs(union futex_key *key)
319{
320 if (!key->both.ptr)
321 return;
322
323 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
324 case FUT_OFF_INODE:
325 ihold(key->shared.inode);
326 break;
327 case FUT_OFF_MMSHARED:
328 futex_get_mm(key);
329 break;
330 }
331}
332
333
334
335
336
337static void drop_futex_key_refs(union futex_key *key)
338{
339 if (!key->both.ptr) {
340
341 WARN_ON_ONCE(1);
342 return;
343 }
344
345 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
346 case FUT_OFF_INODE:
347 iput(key->shared.inode);
348 break;
349 case FUT_OFF_MMSHARED:
350 mmdrop(key->private.mm);
351 break;
352 }
353}
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373static int
374get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
375{
376 unsigned long address = (unsigned long)uaddr;
377 struct mm_struct *mm = current->mm;
378 struct page *page, *page_head;
379 int err, ro = 0;
380
381
382
383
384 key->both.offset = address % PAGE_SIZE;
385 if (unlikely((address % sizeof(u32)) != 0))
386 return -EINVAL;
387 address -= key->both.offset;
388
389 if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
390 return -EFAULT;
391
392
393
394
395
396
397
398
399 if (!fshared) {
400 key->private.mm = mm;
401 key->private.address = address;
402 get_futex_key_refs(key);
403 return 0;
404 }
405
406again:
407 err = get_user_pages_fast(address, 1, 1, &page);
408
409
410
411
412 if (err == -EFAULT && rw == VERIFY_READ) {
413 err = get_user_pages_fast(address, 1, 0, &page);
414 ro = 1;
415 }
416 if (err < 0)
417 return err;
418 else
419 err = 0;
420
421#ifdef CONFIG_TRANSPARENT_HUGEPAGE
422 page_head = page;
423 if (unlikely(PageTail(page))) {
424 put_page(page);
425
426 local_irq_disable();
427 if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) {
428 page_head = compound_head(page);
429
430
431
432
433
434
435
436
437
438
439 if (page != page_head) {
440 get_page(page_head);
441 put_page(page);
442 }
443 local_irq_enable();
444 } else {
445 local_irq_enable();
446 goto again;
447 }
448 }
449#else
450 page_head = compound_head(page);
451 if (page != page_head) {
452 get_page(page_head);
453 put_page(page);
454 }
455#endif
456
457 lock_page(page_head);
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474 if (!page_head->mapping) {
475 int shmem_swizzled = PageSwapCache(page_head);
476 unlock_page(page_head);
477 put_page(page_head);
478 if (shmem_swizzled)
479 goto again;
480 return -EFAULT;
481 }
482
483
484
485
486
487
488
489
490 if (PageAnon(page_head)) {
491
492
493
494
495 if (ro) {
496 err = -EFAULT;
497 goto out;
498 }
499
500 key->both.offset |= FUT_OFF_MMSHARED;
501 key->private.mm = mm;
502 key->private.address = address;
503 } else {
504 key->both.offset |= FUT_OFF_INODE;
505 key->shared.inode = page_head->mapping->host;
506 key->shared.pgoff = basepage_index(page);
507 }
508
509 get_futex_key_refs(key);
510
511out:
512 unlock_page(page_head);
513 put_page(page_head);
514 return err;
515}
516
517static inline void put_futex_key(union futex_key *key)
518{
519 drop_futex_key_refs(key);
520}
521
522
523
524
525
526
527
528
529
530
531
532
533
534static int fault_in_user_writeable(u32 __user *uaddr)
535{
536 struct mm_struct *mm = current->mm;
537 int ret;
538
539 down_read(&mm->mmap_sem);
540 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
541 FAULT_FLAG_WRITE);
542 up_read(&mm->mmap_sem);
543
544 return ret < 0 ? ret : 0;
545}
546
547
548
549
550
551
552
553
554static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
555 union futex_key *key)
556{
557 struct futex_q *this;
558
559 plist_for_each_entry(this, &hb->chain, list) {
560 if (match_futex(&this->key, key))
561 return this;
562 }
563 return NULL;
564}
565
566static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
567 u32 uval, u32 newval)
568{
569 int ret;
570
571 pagefault_disable();
572 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
573 pagefault_enable();
574
575 return ret;
576}
577
578static int get_futex_value_locked(u32 *dest, u32 __user *from)
579{
580 int ret;
581
582 pagefault_disable();
583 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
584 pagefault_enable();
585
586 return ret ? -EFAULT : 0;
587}
588
589
590
591
592
593static int refill_pi_state_cache(void)
594{
595 struct futex_pi_state *pi_state;
596
597 if (likely(current->pi_state_cache))
598 return 0;
599
600 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
601
602 if (!pi_state)
603 return -ENOMEM;
604
605 INIT_LIST_HEAD(&pi_state->list);
606
607 pi_state->owner = NULL;
608 atomic_set(&pi_state->refcount, 1);
609 pi_state->key = FUTEX_KEY_INIT;
610
611 current->pi_state_cache = pi_state;
612
613 return 0;
614}
615
616static struct futex_pi_state * alloc_pi_state(void)
617{
618 struct futex_pi_state *pi_state = current->pi_state_cache;
619
620 WARN_ON(!pi_state);
621 current->pi_state_cache = NULL;
622
623 return pi_state;
624}
625
626static void free_pi_state(struct futex_pi_state *pi_state)
627{
628 if (!atomic_dec_and_test(&pi_state->refcount))
629 return;
630
631
632
633
634
635 if (pi_state->owner) {
636 raw_spin_lock_irq(&pi_state->owner->pi_lock);
637 list_del_init(&pi_state->list);
638 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
639
640 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
641 }
642
643 if (current->pi_state_cache)
644 kfree(pi_state);
645 else {
646
647
648
649
650
651 pi_state->owner = NULL;
652 atomic_set(&pi_state->refcount, 1);
653 current->pi_state_cache = pi_state;
654 }
655}
656
657
658
659
660
661static struct task_struct * futex_find_get_task(pid_t pid)
662{
663 struct task_struct *p;
664
665 rcu_read_lock();
666 p = find_task_by_vpid(pid);
667 if (p)
668 get_task_struct(p);
669
670 rcu_read_unlock();
671
672 return p;
673}
674
675
676
677
678
679
680void exit_pi_state_list(struct task_struct *curr)
681{
682 struct list_head *next, *head = &curr->pi_state_list;
683 struct futex_pi_state *pi_state;
684 struct futex_hash_bucket *hb;
685 union futex_key key = FUTEX_KEY_INIT;
686
687 if (!futex_cmpxchg_enabled)
688 return;
689
690
691
692
693
694 raw_spin_lock_irq(&curr->pi_lock);
695 while (!list_empty(head)) {
696
697 next = head->next;
698 pi_state = list_entry(next, struct futex_pi_state, list);
699 key = pi_state->key;
700 hb = hash_futex(&key);
701 raw_spin_unlock_irq(&curr->pi_lock);
702
703 spin_lock(&hb->lock);
704
705 raw_spin_lock_irq(&curr->pi_lock);
706
707
708
709
710 if (head->next != next) {
711 spin_unlock(&hb->lock);
712 continue;
713 }
714
715 WARN_ON(pi_state->owner != curr);
716 WARN_ON(list_empty(&pi_state->list));
717 list_del_init(&pi_state->list);
718 pi_state->owner = NULL;
719 raw_spin_unlock_irq(&curr->pi_lock);
720
721 rt_mutex_unlock(&pi_state->pi_mutex);
722
723 spin_unlock(&hb->lock);
724
725 raw_spin_lock_irq(&curr->pi_lock);
726 }
727 raw_spin_unlock_irq(&curr->pi_lock);
728}
729
730static int
731lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
732 union futex_key *key, struct futex_pi_state **ps)
733{
734 struct futex_pi_state *pi_state = NULL;
735 struct futex_q *this, *next;
736 struct task_struct *p;
737 pid_t pid = uval & FUTEX_TID_MASK;
738
739 plist_for_each_entry_safe(this, next, &hb->chain, list) {
740 if (match_futex(&this->key, key)) {
741
742
743
744
745 pi_state = this->pi_state;
746
747
748
749 if (unlikely(!pi_state))
750 return -EINVAL;
751
752 WARN_ON(!atomic_read(&pi_state->refcount));
753
754
755
756
757
758
759
760
761
762
763
764 if (pid && pi_state->owner) {
765
766
767
768
769 if (pid != task_pid_vnr(pi_state->owner))
770 return -EINVAL;
771 }
772
773 atomic_inc(&pi_state->refcount);
774 *ps = pi_state;
775
776 return 0;
777 }
778 }
779
780
781
782
783
784 if (!pid)
785 return -ESRCH;
786 p = futex_find_get_task(pid);
787 if (!p)
788 return -ESRCH;
789
790
791
792
793
794
795
796 raw_spin_lock_irq(&p->pi_lock);
797 if (unlikely(p->flags & PF_EXITING)) {
798
799
800
801
802
803 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
804
805 raw_spin_unlock_irq(&p->pi_lock);
806 put_task_struct(p);
807 return ret;
808 }
809
810 pi_state = alloc_pi_state();
811
812
813
814
815
816 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
817
818
819 pi_state->key = *key;
820
821 WARN_ON(!list_empty(&pi_state->list));
822 list_add(&pi_state->list, &p->pi_state_list);
823 pi_state->owner = p;
824 raw_spin_unlock_irq(&p->pi_lock);
825
826 put_task_struct(p);
827
828 *ps = pi_state;
829
830 return 0;
831}
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
852 union futex_key *key,
853 struct futex_pi_state **ps,
854 struct task_struct *task, int set_waiters)
855{
856 int lock_taken, ret, force_take = 0;
857 u32 uval, newval, curval, vpid = task_pid_vnr(task);
858
859retry:
860 ret = lock_taken = 0;
861
862
863
864
865
866
867 newval = vpid;
868 if (set_waiters)
869 newval |= FUTEX_WAITERS;
870
871 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
872 return -EFAULT;
873
874
875
876
877 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
878 return -EDEADLK;
879
880
881
882
883 if (unlikely(!curval))
884 return 1;
885
886 uval = curval;
887
888
889
890
891
892 newval = curval | FUTEX_WAITERS;
893
894
895
896
897 if (unlikely(force_take)) {
898
899
900
901
902 newval = (curval & ~FUTEX_TID_MASK) | vpid;
903 force_take = 0;
904 lock_taken = 1;
905 }
906
907 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
908 return -EFAULT;
909 if (unlikely(curval != uval))
910 goto retry;
911
912
913
914
915 if (unlikely(lock_taken))
916 return 1;
917
918
919
920
921
922 ret = lookup_pi_state(uval, hb, key, ps);
923
924 if (unlikely(ret)) {
925 switch (ret) {
926 case -ESRCH:
927
928
929
930
931
932
933
934
935
936
937 if (get_futex_value_locked(&curval, uaddr))
938 return -EFAULT;
939
940
941
942
943
944
945 if (!(curval & FUTEX_TID_MASK)) {
946 force_take = 1;
947 goto retry;
948 }
949 default:
950 break;
951 }
952 }
953
954 return ret;
955}
956
957
958
959
960
961
962
963static void __unqueue_futex(struct futex_q *q)
964{
965 struct futex_hash_bucket *hb;
966
967 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
968 || WARN_ON(plist_node_empty(&q->list)))
969 return;
970
971 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
972 plist_del(&q->list, &hb->chain);
973 hb_waiters_dec(hb);
974}
975
976
977
978
979
980static void wake_futex(struct futex_q *q)
981{
982 struct task_struct *p = q->task;
983
984 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
985 return;
986
987
988
989
990
991
992
993
994 get_task_struct(p);
995
996 __unqueue_futex(q);
997
998
999
1000
1001
1002
1003 smp_wmb();
1004 q->lock_ptr = NULL;
1005
1006 wake_up_state(p, TASK_NORMAL);
1007 put_task_struct(p);
1008}
1009
1010static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
1011{
1012 struct task_struct *new_owner;
1013 struct futex_pi_state *pi_state = this->pi_state;
1014 u32 uninitialized_var(curval), newval;
1015
1016 if (!pi_state)
1017 return -EINVAL;
1018
1019
1020
1021
1022
1023 if (pi_state->owner != current)
1024 return -EINVAL;
1025
1026 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
1027 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
1028
1029
1030
1031
1032
1033
1034 if (!new_owner)
1035 new_owner = this->task;
1036
1037
1038
1039
1040
1041
1042 if (!(uval & FUTEX_OWNER_DIED)) {
1043 int ret = 0;
1044
1045 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
1046
1047 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1048 ret = -EFAULT;
1049 else if (curval != uval)
1050 ret = -EINVAL;
1051 if (ret) {
1052 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
1053 return ret;
1054 }
1055 }
1056
1057 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1058 WARN_ON(list_empty(&pi_state->list));
1059 list_del_init(&pi_state->list);
1060 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1061
1062 raw_spin_lock_irq(&new_owner->pi_lock);
1063 WARN_ON(!list_empty(&pi_state->list));
1064 list_add(&pi_state->list, &new_owner->pi_state_list);
1065 pi_state->owner = new_owner;
1066 raw_spin_unlock_irq(&new_owner->pi_lock);
1067
1068 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
1069 rt_mutex_unlock(&pi_state->pi_mutex);
1070
1071 return 0;
1072}
1073
1074static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
1075{
1076 u32 uninitialized_var(oldval);
1077
1078
1079
1080
1081
1082 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
1083 return -EFAULT;
1084 if (oldval != uval)
1085 return -EAGAIN;
1086
1087 return 0;
1088}
1089
1090
1091
1092
1093static inline void
1094double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1095{
1096 if (hb1 <= hb2) {
1097 spin_lock(&hb1->lock);
1098 if (hb1 < hb2)
1099 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
1100 } else {
1101 spin_lock(&hb2->lock);
1102 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
1103 }
1104}
1105
1106static inline void
1107double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1108{
1109 spin_unlock(&hb1->lock);
1110 if (hb1 != hb2)
1111 spin_unlock(&hb2->lock);
1112}
1113
1114
1115
1116
1117static int
1118futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
1119{
1120 struct futex_hash_bucket *hb;
1121 struct futex_q *this, *next;
1122 union futex_key key = FUTEX_KEY_INIT;
1123 int ret;
1124
1125 if (!bitset)
1126 return -EINVAL;
1127
1128 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
1129 if (unlikely(ret != 0))
1130 goto out;
1131
1132 hb = hash_futex(&key);
1133
1134
1135 if (!hb_waiters_pending(hb))
1136 goto out_put_key;
1137
1138 spin_lock(&hb->lock);
1139
1140 plist_for_each_entry_safe(this, next, &hb->chain, list) {
1141 if (match_futex (&this->key, &key)) {
1142 if (this->pi_state || this->rt_waiter) {
1143 ret = -EINVAL;
1144 break;
1145 }
1146
1147
1148 if (!(this->bitset & bitset))
1149 continue;
1150
1151 wake_futex(this);
1152 if (++ret >= nr_wake)
1153 break;
1154 }
1155 }
1156
1157 spin_unlock(&hb->lock);
1158out_put_key:
1159 put_futex_key(&key);
1160out:
1161 return ret;
1162}
1163
1164
1165
1166
1167
1168static int
1169futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1170 int nr_wake, int nr_wake2, int op)
1171{
1172 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1173 struct futex_hash_bucket *hb1, *hb2;
1174 struct futex_q *this, *next;
1175 int ret, op_ret;
1176
1177retry:
1178 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1179 if (unlikely(ret != 0))
1180 goto out;
1181 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1182 if (unlikely(ret != 0))
1183 goto out_put_key1;
1184
1185 hb1 = hash_futex(&key1);
1186 hb2 = hash_futex(&key2);
1187
1188retry_private:
1189 double_lock_hb(hb1, hb2);
1190 op_ret = futex_atomic_op_inuser(op, uaddr2);
1191 if (unlikely(op_ret < 0)) {
1192
1193 double_unlock_hb(hb1, hb2);
1194
1195#ifndef CONFIG_MMU
1196
1197
1198
1199
1200 ret = op_ret;
1201 goto out_put_keys;
1202#endif
1203
1204 if (unlikely(op_ret != -EFAULT)) {
1205 ret = op_ret;
1206 goto out_put_keys;
1207 }
1208
1209 ret = fault_in_user_writeable(uaddr2);
1210 if (ret)
1211 goto out_put_keys;
1212
1213 if (!(flags & FLAGS_SHARED))
1214 goto retry_private;
1215
1216 put_futex_key(&key2);
1217 put_futex_key(&key1);
1218 goto retry;
1219 }
1220
1221 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1222 if (match_futex (&this->key, &key1)) {
1223 if (this->pi_state || this->rt_waiter) {
1224 ret = -EINVAL;
1225 goto out_unlock;
1226 }
1227 wake_futex(this);
1228 if (++ret >= nr_wake)
1229 break;
1230 }
1231 }
1232
1233 if (op_ret > 0) {
1234 op_ret = 0;
1235 plist_for_each_entry_safe(this, next, &hb2->chain, list) {
1236 if (match_futex (&this->key, &key2)) {
1237 if (this->pi_state || this->rt_waiter) {
1238 ret = -EINVAL;
1239 goto out_unlock;
1240 }
1241 wake_futex(this);
1242 if (++op_ret >= nr_wake2)
1243 break;
1244 }
1245 }
1246 ret += op_ret;
1247 }
1248
1249out_unlock:
1250 double_unlock_hb(hb1, hb2);
1251out_put_keys:
1252 put_futex_key(&key2);
1253out_put_key1:
1254 put_futex_key(&key1);
1255out:
1256 return ret;
1257}
1258
1259
1260
1261
1262
1263
1264
1265
1266static inline
1267void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1268 struct futex_hash_bucket *hb2, union futex_key *key2)
1269{
1270
1271
1272
1273
1274
1275 if (likely(&hb1->chain != &hb2->chain)) {
1276 plist_del(&q->list, &hb1->chain);
1277 hb_waiters_dec(hb1);
1278 plist_add(&q->list, &hb2->chain);
1279 hb_waiters_inc(hb2);
1280 q->lock_ptr = &hb2->lock;
1281 }
1282 get_futex_key_refs(key2);
1283 q->key = *key2;
1284}
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300static inline
1301void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1302 struct futex_hash_bucket *hb)
1303{
1304 get_futex_key_refs(key);
1305 q->key = *key;
1306
1307 __unqueue_futex(q);
1308
1309 WARN_ON(!q->rt_waiter);
1310 q->rt_waiter = NULL;
1311
1312 q->lock_ptr = &hb->lock;
1313
1314 wake_up_state(q->task, TASK_NORMAL);
1315}
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1338 struct futex_hash_bucket *hb1,
1339 struct futex_hash_bucket *hb2,
1340 union futex_key *key1, union futex_key *key2,
1341 struct futex_pi_state **ps, int set_waiters)
1342{
1343 struct futex_q *top_waiter = NULL;
1344 u32 curval;
1345 int ret;
1346
1347 if (get_futex_value_locked(&curval, pifutex))
1348 return -EFAULT;
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358 top_waiter = futex_top_waiter(hb1, key1);
1359
1360
1361 if (!top_waiter)
1362 return 0;
1363
1364
1365 if (!match_futex(top_waiter->requeue_pi_key, key2))
1366 return -EINVAL;
1367
1368
1369
1370
1371
1372
1373 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1374 set_waiters);
1375 if (ret == 1)
1376 requeue_pi_wake_futex(top_waiter, key2, hb2);
1377
1378 return ret;
1379}
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1400 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1401 u32 *cmpval, int requeue_pi)
1402{
1403 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1404 int drop_count = 0, task_count = 0, ret;
1405 struct futex_pi_state *pi_state = NULL;
1406 struct futex_hash_bucket *hb1, *hb2;
1407 struct futex_q *this, *next;
1408 u32 curval2;
1409
1410 if (requeue_pi) {
1411
1412
1413
1414
1415 if (refill_pi_state_cache())
1416 return -ENOMEM;
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427 if (nr_wake != 1)
1428 return -EINVAL;
1429 }
1430
1431retry:
1432 if (pi_state != NULL) {
1433
1434
1435
1436
1437 free_pi_state(pi_state);
1438 pi_state = NULL;
1439 }
1440
1441 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1442 if (unlikely(ret != 0))
1443 goto out;
1444 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1445 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1446 if (unlikely(ret != 0))
1447 goto out_put_key1;
1448
1449 hb1 = hash_futex(&key1);
1450 hb2 = hash_futex(&key2);
1451
1452retry_private:
1453 double_lock_hb(hb1, hb2);
1454
1455 if (likely(cmpval != NULL)) {
1456 u32 curval;
1457
1458 ret = get_futex_value_locked(&curval, uaddr1);
1459
1460 if (unlikely(ret)) {
1461 double_unlock_hb(hb1, hb2);
1462
1463 ret = get_user(curval, uaddr1);
1464 if (ret)
1465 goto out_put_keys;
1466
1467 if (!(flags & FLAGS_SHARED))
1468 goto retry_private;
1469
1470 put_futex_key(&key2);
1471 put_futex_key(&key1);
1472 goto retry;
1473 }
1474 if (curval != *cmpval) {
1475 ret = -EAGAIN;
1476 goto out_unlock;
1477 }
1478 }
1479
1480 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1481
1482
1483
1484
1485
1486
1487 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1488 &key2, &pi_state, nr_requeue);
1489
1490
1491
1492
1493
1494
1495
1496 if (ret == 1) {
1497 WARN_ON(pi_state);
1498 drop_count++;
1499 task_count++;
1500 ret = get_futex_value_locked(&curval2, uaddr2);
1501 if (!ret)
1502 ret = lookup_pi_state(curval2, hb2, &key2,
1503 &pi_state);
1504 }
1505
1506 switch (ret) {
1507 case 0:
1508 break;
1509 case -EFAULT:
1510 double_unlock_hb(hb1, hb2);
1511 put_futex_key(&key2);
1512 put_futex_key(&key1);
1513 ret = fault_in_user_writeable(uaddr2);
1514 if (!ret)
1515 goto retry;
1516 goto out;
1517 case -EAGAIN:
1518
1519 double_unlock_hb(hb1, hb2);
1520 put_futex_key(&key2);
1521 put_futex_key(&key1);
1522 cond_resched();
1523 goto retry;
1524 default:
1525 goto out_unlock;
1526 }
1527 }
1528
1529 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1530 if (task_count - nr_wake >= nr_requeue)
1531 break;
1532
1533 if (!match_futex(&this->key, &key1))
1534 continue;
1535
1536
1537
1538
1539
1540
1541
1542
1543 if ((requeue_pi && !this->rt_waiter) ||
1544 (!requeue_pi && this->rt_waiter) ||
1545 this->pi_state) {
1546 ret = -EINVAL;
1547 break;
1548 }
1549
1550
1551
1552
1553
1554
1555 if (++task_count <= nr_wake && !requeue_pi) {
1556 wake_futex(this);
1557 continue;
1558 }
1559
1560
1561 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1562 ret = -EINVAL;
1563 break;
1564 }
1565
1566
1567
1568
1569
1570 if (requeue_pi) {
1571
1572 atomic_inc(&pi_state->refcount);
1573 this->pi_state = pi_state;
1574 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1575 this->rt_waiter,
1576 this->task, 1);
1577 if (ret == 1) {
1578
1579 requeue_pi_wake_futex(this, &key2, hb2);
1580 drop_count++;
1581 continue;
1582 } else if (ret) {
1583
1584 this->pi_state = NULL;
1585 free_pi_state(pi_state);
1586 goto out_unlock;
1587 }
1588 }
1589 requeue_futex(this, hb1, hb2, &key2);
1590 drop_count++;
1591 }
1592
1593out_unlock:
1594 double_unlock_hb(hb1, hb2);
1595
1596
1597
1598
1599
1600
1601
1602 while (--drop_count >= 0)
1603 drop_futex_key_refs(&key1);
1604
1605out_put_keys:
1606 put_futex_key(&key2);
1607out_put_key1:
1608 put_futex_key(&key1);
1609out:
1610 if (pi_state != NULL)
1611 free_pi_state(pi_state);
1612 return ret ? ret : task_count;
1613}
1614
1615
1616static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1617 __acquires(&hb->lock)
1618{
1619 struct futex_hash_bucket *hb;
1620
1621 hb = hash_futex(&q->key);
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631 hb_waiters_inc(hb);
1632
1633 q->lock_ptr = &hb->lock;
1634
1635 spin_lock(&hb->lock);
1636 return hb;
1637}
1638
1639static inline void
1640queue_unlock(struct futex_hash_bucket *hb)
1641 __releases(&hb->lock)
1642{
1643 spin_unlock(&hb->lock);
1644 hb_waiters_dec(hb);
1645}
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1660 __releases(&hb->lock)
1661{
1662 int prio;
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672 prio = min(current->normal_prio, MAX_RT_PRIO);
1673
1674 plist_node_init(&q->list, prio);
1675 plist_add(&q->list, &hb->chain);
1676 q->task = current;
1677 spin_unlock(&hb->lock);
1678}
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691static int unqueue_me(struct futex_q *q)
1692{
1693 spinlock_t *lock_ptr;
1694 int ret = 0;
1695
1696
1697retry:
1698 lock_ptr = q->lock_ptr;
1699 barrier();
1700 if (lock_ptr != NULL) {
1701 spin_lock(lock_ptr);
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715 if (unlikely(lock_ptr != q->lock_ptr)) {
1716 spin_unlock(lock_ptr);
1717 goto retry;
1718 }
1719 __unqueue_futex(q);
1720
1721 BUG_ON(q->pi_state);
1722
1723 spin_unlock(lock_ptr);
1724 ret = 1;
1725 }
1726
1727 drop_futex_key_refs(&q->key);
1728 return ret;
1729}
1730
1731
1732
1733
1734
1735
1736static void unqueue_me_pi(struct futex_q *q)
1737 __releases(q->lock_ptr)
1738{
1739 __unqueue_futex(q);
1740
1741 BUG_ON(!q->pi_state);
1742 free_pi_state(q->pi_state);
1743 q->pi_state = NULL;
1744
1745 spin_unlock(q->lock_ptr);
1746}
1747
1748
1749
1750
1751
1752
1753
1754static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1755 struct task_struct *newowner)
1756{
1757 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1758 struct futex_pi_state *pi_state = q->pi_state;
1759 struct task_struct *oldowner = pi_state->owner;
1760 u32 uval, uninitialized_var(curval), newval;
1761 int ret;
1762
1763
1764 if (!pi_state->owner)
1765 newtid |= FUTEX_OWNER_DIED;
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784retry:
1785 if (get_futex_value_locked(&uval, uaddr))
1786 goto handle_fault;
1787
1788 while (1) {
1789 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1790
1791 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1792 goto handle_fault;
1793 if (curval == uval)
1794 break;
1795 uval = curval;
1796 }
1797
1798
1799
1800
1801
1802 if (pi_state->owner != NULL) {
1803 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1804 WARN_ON(list_empty(&pi_state->list));
1805 list_del_init(&pi_state->list);
1806 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1807 }
1808
1809 pi_state->owner = newowner;
1810
1811 raw_spin_lock_irq(&newowner->pi_lock);
1812 WARN_ON(!list_empty(&pi_state->list));
1813 list_add(&pi_state->list, &newowner->pi_state_list);
1814 raw_spin_unlock_irq(&newowner->pi_lock);
1815 return 0;
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827handle_fault:
1828 spin_unlock(q->lock_ptr);
1829
1830 ret = fault_in_user_writeable(uaddr);
1831
1832 spin_lock(q->lock_ptr);
1833
1834
1835
1836
1837 if (pi_state->owner != oldowner)
1838 return 0;
1839
1840 if (ret)
1841 return ret;
1842
1843 goto retry;
1844}
1845
1846static long futex_wait_restart(struct restart_block *restart);
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1864{
1865 struct task_struct *owner;
1866 int ret = 0;
1867
1868 if (locked) {
1869
1870
1871
1872
1873 if (q->pi_state->owner != current)
1874 ret = fixup_pi_state_owner(uaddr, q, current);
1875 goto out;
1876 }
1877
1878
1879
1880
1881
1882 if (q->pi_state->owner == current) {
1883
1884
1885
1886
1887
1888 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1889 locked = 1;
1890 goto out;
1891 }
1892
1893
1894
1895
1896
1897
1898 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1899 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1900 if (!owner)
1901 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1902 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1903 ret = fixup_pi_state_owner(uaddr, q, owner);
1904 goto out;
1905 }
1906
1907
1908
1909
1910
1911 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1912 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1913 "pi-state %p\n", ret,
1914 q->pi_state->pi_mutex.owner,
1915 q->pi_state->owner);
1916
1917out:
1918 return ret ? ret : locked;
1919}
1920
1921
1922
1923
1924
1925
1926
1927static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1928 struct hrtimer_sleeper *timeout)
1929{
1930
1931
1932
1933
1934
1935
1936 set_current_state(TASK_INTERRUPTIBLE);
1937 queue_me(q, hb);
1938
1939
1940 if (timeout) {
1941 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1942 if (!hrtimer_active(&timeout->timer))
1943 timeout->task = NULL;
1944 }
1945
1946
1947
1948
1949
1950 if (likely(!plist_node_empty(&q->list))) {
1951
1952
1953
1954
1955
1956 if (!timeout || timeout->task)
1957 freezable_schedule();
1958 }
1959 __set_current_state(TASK_RUNNING);
1960}
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1980 struct futex_q *q, struct futex_hash_bucket **hb)
1981{
1982 u32 uval;
1983 int ret;
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003retry:
2004 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
2005 if (unlikely(ret != 0))
2006 return ret;
2007
2008retry_private:
2009 *hb = queue_lock(q);
2010
2011 ret = get_futex_value_locked(&uval, uaddr);
2012
2013 if (ret) {
2014 queue_unlock(*hb);
2015
2016 ret = get_user(uval, uaddr);
2017 if (ret)
2018 goto out;
2019
2020 if (!(flags & FLAGS_SHARED))
2021 goto retry_private;
2022
2023 put_futex_key(&q->key);
2024 goto retry;
2025 }
2026
2027 if (uval != val) {
2028 queue_unlock(*hb);
2029 ret = -EWOULDBLOCK;
2030 }
2031
2032out:
2033 if (ret)
2034 put_futex_key(&q->key);
2035 return ret;
2036}
2037
2038static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
2039 ktime_t *abs_time, u32 bitset)
2040{
2041 struct hrtimer_sleeper timeout, *to = NULL;
2042 struct restart_block *restart;
2043 struct futex_hash_bucket *hb;
2044 struct futex_q q = futex_q_init;
2045 int ret;
2046
2047 if (!bitset)
2048 return -EINVAL;
2049 q.bitset = bitset;
2050
2051 if (abs_time) {
2052 to = &timeout;
2053
2054 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2055 CLOCK_REALTIME : CLOCK_MONOTONIC,
2056 HRTIMER_MODE_ABS);
2057 hrtimer_init_sleeper(to, current);
2058 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2059 current->timer_slack_ns);
2060 }
2061
2062retry:
2063
2064
2065
2066
2067 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2068 if (ret)
2069 goto out;
2070
2071
2072 futex_wait_queue_me(hb, &q, to);
2073
2074
2075 ret = 0;
2076
2077 if (!unqueue_me(&q))
2078 goto out;
2079 ret = -ETIMEDOUT;
2080 if (to && !to->task)
2081 goto out;
2082
2083
2084
2085
2086
2087 if (!signal_pending(current))
2088 goto retry;
2089
2090 ret = -ERESTARTSYS;
2091 if (!abs_time)
2092 goto out;
2093
2094 restart = ¤t_thread_info()->restart_block;
2095 restart->fn = futex_wait_restart;
2096 restart->futex.uaddr = uaddr;
2097 restart->futex.val = val;
2098 restart->futex.time = abs_time->tv64;
2099 restart->futex.bitset = bitset;
2100 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
2101
2102 ret = -ERESTART_RESTARTBLOCK;
2103
2104out:
2105 if (to) {
2106 hrtimer_cancel(&to->timer);
2107 destroy_hrtimer_on_stack(&to->timer);
2108 }
2109 return ret;
2110}
2111
2112
2113static long futex_wait_restart(struct restart_block *restart)
2114{
2115 u32 __user *uaddr = restart->futex.uaddr;
2116 ktime_t t, *tp = NULL;
2117
2118 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
2119 t.tv64 = restart->futex.time;
2120 tp = &t;
2121 }
2122 restart->fn = do_no_restart_syscall;
2123
2124 return (long)futex_wait(uaddr, restart->futex.flags,
2125 restart->futex.val, tp, restart->futex.bitset);
2126}
2127
2128
2129
2130
2131
2132
2133
2134
2135static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
2136 ktime_t *time, int trylock)
2137{
2138 struct hrtimer_sleeper timeout, *to = NULL;
2139 struct futex_hash_bucket *hb;
2140 struct futex_q q = futex_q_init;
2141 int res, ret;
2142
2143 if (refill_pi_state_cache())
2144 return -ENOMEM;
2145
2146 if (time) {
2147 to = &timeout;
2148 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2149 HRTIMER_MODE_ABS);
2150 hrtimer_init_sleeper(to, current);
2151 hrtimer_set_expires(&to->timer, *time);
2152 }
2153
2154retry:
2155 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2156 if (unlikely(ret != 0))
2157 goto out;
2158
2159retry_private:
2160 hb = queue_lock(&q);
2161
2162 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
2163 if (unlikely(ret)) {
2164 switch (ret) {
2165 case 1:
2166
2167 ret = 0;
2168 goto out_unlock_put_key;
2169 case -EFAULT:
2170 goto uaddr_faulted;
2171 case -EAGAIN:
2172
2173
2174
2175
2176 queue_unlock(hb);
2177 put_futex_key(&q.key);
2178 cond_resched();
2179 goto retry;
2180 default:
2181 goto out_unlock_put_key;
2182 }
2183 }
2184
2185
2186
2187
2188 queue_me(&q, hb);
2189
2190 WARN_ON(!q.pi_state);
2191
2192
2193
2194 if (!trylock)
2195 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
2196 else {
2197 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2198
2199 ret = ret ? 0 : -EWOULDBLOCK;
2200 }
2201
2202 spin_lock(q.lock_ptr);
2203
2204
2205
2206
2207 res = fixup_owner(uaddr, &q, !ret);
2208
2209
2210
2211
2212 if (res)
2213 ret = (res < 0) ? res : 0;
2214
2215
2216
2217
2218
2219 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2220 rt_mutex_unlock(&q.pi_state->pi_mutex);
2221
2222
2223 unqueue_me_pi(&q);
2224
2225 goto out_put_key;
2226
2227out_unlock_put_key:
2228 queue_unlock(hb);
2229
2230out_put_key:
2231 put_futex_key(&q.key);
2232out:
2233 if (to)
2234 destroy_hrtimer_on_stack(&to->timer);
2235 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2236
2237uaddr_faulted:
2238 queue_unlock(hb);
2239
2240 ret = fault_in_user_writeable(uaddr);
2241 if (ret)
2242 goto out_put_key;
2243
2244 if (!(flags & FLAGS_SHARED))
2245 goto retry_private;
2246
2247 put_futex_key(&q.key);
2248 goto retry;
2249}
2250
2251
2252
2253
2254
2255
2256static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2257{
2258 struct futex_hash_bucket *hb;
2259 struct futex_q *this, *next;
2260 union futex_key key = FUTEX_KEY_INIT;
2261 u32 uval, vpid = task_pid_vnr(current);
2262 int ret;
2263
2264retry:
2265 if (get_user(uval, uaddr))
2266 return -EFAULT;
2267
2268
2269
2270 if ((uval & FUTEX_TID_MASK) != vpid)
2271 return -EPERM;
2272
2273 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2274 if (unlikely(ret != 0))
2275 goto out;
2276
2277 hb = hash_futex(&key);
2278 spin_lock(&hb->lock);
2279
2280
2281
2282
2283
2284
2285 if (!(uval & FUTEX_OWNER_DIED) &&
2286 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2287 goto pi_faulted;
2288
2289
2290
2291
2292 if (unlikely(uval == vpid))
2293 goto out_unlock;
2294
2295
2296
2297
2298
2299 plist_for_each_entry_safe(this, next, &hb->chain, list) {
2300 if (!match_futex (&this->key, &key))
2301 continue;
2302 ret = wake_futex_pi(uaddr, uval, this);
2303
2304
2305
2306
2307
2308 if (ret == -EFAULT)
2309 goto pi_faulted;
2310 goto out_unlock;
2311 }
2312
2313
2314
2315 if (!(uval & FUTEX_OWNER_DIED)) {
2316 ret = unlock_futex_pi(uaddr, uval);
2317 if (ret == -EFAULT)
2318 goto pi_faulted;
2319 }
2320
2321out_unlock:
2322 spin_unlock(&hb->lock);
2323 put_futex_key(&key);
2324
2325out:
2326 return ret;
2327
2328pi_faulted:
2329 spin_unlock(&hb->lock);
2330 put_futex_key(&key);
2331
2332 ret = fault_in_user_writeable(uaddr);
2333 if (!ret)
2334 goto retry;
2335
2336 return ret;
2337}
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355static inline
2356int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2357 struct futex_q *q, union futex_key *key2,
2358 struct hrtimer_sleeper *timeout)
2359{
2360 int ret = 0;
2361
2362
2363
2364
2365
2366
2367
2368
2369 if (!match_futex(&q->key, key2)) {
2370 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2371
2372
2373
2374
2375 plist_del(&q->list, &hb->chain);
2376 hb_waiters_dec(hb);
2377
2378
2379 ret = -EWOULDBLOCK;
2380 if (timeout && !timeout->task)
2381 ret = -ETIMEDOUT;
2382 else if (signal_pending(current))
2383 ret = -ERESTARTNOINTR;
2384 }
2385 return ret;
2386}
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2429 u32 val, ktime_t *abs_time, u32 bitset,
2430 u32 __user *uaddr2)
2431{
2432 struct hrtimer_sleeper timeout, *to = NULL;
2433 struct rt_mutex_waiter rt_waiter;
2434 struct rt_mutex *pi_mutex = NULL;
2435 struct futex_hash_bucket *hb;
2436 union futex_key key2 = FUTEX_KEY_INIT;
2437 struct futex_q q = futex_q_init;
2438 int res, ret;
2439
2440 if (uaddr == uaddr2)
2441 return -EINVAL;
2442
2443 if (!bitset)
2444 return -EINVAL;
2445
2446 if (abs_time) {
2447 to = &timeout;
2448 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2449 CLOCK_REALTIME : CLOCK_MONOTONIC,
2450 HRTIMER_MODE_ABS);
2451 hrtimer_init_sleeper(to, current);
2452 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2453 current->timer_slack_ns);
2454 }
2455
2456
2457
2458
2459
2460 debug_rt_mutex_init_waiter(&rt_waiter);
2461 RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
2462 RB_CLEAR_NODE(&rt_waiter.tree_entry);
2463 rt_waiter.task = NULL;
2464
2465 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2466 if (unlikely(ret != 0))
2467 goto out;
2468
2469 q.bitset = bitset;
2470 q.rt_waiter = &rt_waiter;
2471 q.requeue_pi_key = &key2;
2472
2473
2474
2475
2476
2477 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2478 if (ret)
2479 goto out_key2;
2480
2481
2482 futex_wait_queue_me(hb, &q, to);
2483
2484 spin_lock(&hb->lock);
2485 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2486 spin_unlock(&hb->lock);
2487 if (ret)
2488 goto out_put_keys;
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500 if (!q.rt_waiter) {
2501
2502
2503
2504
2505 if (q.pi_state && (q.pi_state->owner != current)) {
2506 spin_lock(q.lock_ptr);
2507 ret = fixup_pi_state_owner(uaddr2, &q, current);
2508 spin_unlock(q.lock_ptr);
2509 }
2510 } else {
2511
2512
2513
2514
2515
2516 WARN_ON(!q.pi_state);
2517 pi_mutex = &q.pi_state->pi_mutex;
2518 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2519 debug_rt_mutex_free_waiter(&rt_waiter);
2520
2521 spin_lock(q.lock_ptr);
2522
2523
2524
2525
2526 res = fixup_owner(uaddr2, &q, !ret);
2527
2528
2529
2530
2531 if (res)
2532 ret = (res < 0) ? res : 0;
2533
2534
2535 unqueue_me_pi(&q);
2536 }
2537
2538
2539
2540
2541
2542 if (ret == -EFAULT) {
2543 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2544 rt_mutex_unlock(pi_mutex);
2545 } else if (ret == -EINTR) {
2546
2547
2548
2549
2550
2551
2552
2553 ret = -EWOULDBLOCK;
2554 }
2555
2556out_put_keys:
2557 put_futex_key(&q.key);
2558out_key2:
2559 put_futex_key(&key2);
2560
2561out:
2562 if (to) {
2563 hrtimer_cancel(&to->timer);
2564 destroy_hrtimer_on_stack(&to->timer);
2565 }
2566 return ret;
2567}
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2590 size_t, len)
2591{
2592 if (!futex_cmpxchg_enabled)
2593 return -ENOSYS;
2594
2595
2596
2597 if (unlikely(len != sizeof(*head)))
2598 return -EINVAL;
2599
2600 current->robust_list = head;
2601
2602 return 0;
2603}
2604
2605
2606
2607
2608
2609
2610
2611SYSCALL_DEFINE3(get_robust_list, int, pid,
2612 struct robust_list_head __user * __user *, head_ptr,
2613 size_t __user *, len_ptr)
2614{
2615 struct robust_list_head __user *head;
2616 unsigned long ret;
2617 struct task_struct *p;
2618
2619 if (!futex_cmpxchg_enabled)
2620 return -ENOSYS;
2621
2622 rcu_read_lock();
2623
2624 ret = -ESRCH;
2625 if (!pid)
2626 p = current;
2627 else {
2628 p = find_task_by_vpid(pid);
2629 if (!p)
2630 goto err_unlock;
2631 }
2632
2633 ret = -EPERM;
2634 if (!ptrace_may_access(p, PTRACE_MODE_READ))
2635 goto err_unlock;
2636
2637 head = p->robust_list;
2638 rcu_read_unlock();
2639
2640 if (put_user(sizeof(*head), len_ptr))
2641 return -EFAULT;
2642 return put_user(head, head_ptr);
2643
2644err_unlock:
2645 rcu_read_unlock();
2646
2647 return ret;
2648}
2649
2650
2651
2652
2653
2654int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2655{
2656 u32 uval, uninitialized_var(nval), mval;
2657
2658retry:
2659 if (get_user(uval, uaddr))
2660 return -1;
2661
2662 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2684 if (fault_in_user_writeable(uaddr))
2685 return -1;
2686 goto retry;
2687 }
2688 if (nval != uval)
2689 goto retry;
2690
2691
2692
2693
2694
2695 if (!pi && (uval & FUTEX_WAITERS))
2696 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2697 }
2698 return 0;
2699}
2700
2701
2702
2703
2704static inline int fetch_robust_entry(struct robust_list __user **entry,
2705 struct robust_list __user * __user *head,
2706 unsigned int *pi)
2707{
2708 unsigned long uentry;
2709
2710 if (get_user(uentry, (unsigned long __user *)head))
2711 return -EFAULT;
2712
2713 *entry = (void __user *)(uentry & ~1UL);
2714 *pi = uentry & 1;
2715
2716 return 0;
2717}
2718
2719
2720
2721
2722
2723
2724
2725void exit_robust_list(struct task_struct *curr)
2726{
2727 struct robust_list_head __user *head = curr->robust_list;
2728 struct robust_list __user *entry, *next_entry, *pending;
2729 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2730 unsigned int uninitialized_var(next_pi);
2731 unsigned long futex_offset;
2732 int rc;
2733
2734 if (!futex_cmpxchg_enabled)
2735 return;
2736
2737
2738
2739
2740
2741 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2742 return;
2743
2744
2745
2746 if (get_user(futex_offset, &head->futex_offset))
2747 return;
2748
2749
2750
2751
2752 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2753 return;
2754
2755 next_entry = NULL;
2756 while (entry != &head->list) {
2757
2758
2759
2760
2761 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2762
2763
2764
2765
2766 if (entry != pending)
2767 if (handle_futex_death((void __user *)entry + futex_offset,
2768 curr, pi))
2769 return;
2770 if (rc)
2771 return;
2772 entry = next_entry;
2773 pi = next_pi;
2774
2775
2776
2777 if (!--limit)
2778 break;
2779
2780 cond_resched();
2781 }
2782
2783 if (pending)
2784 handle_futex_death((void __user *)pending + futex_offset,
2785 curr, pip);
2786}
2787
2788long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2789 u32 __user *uaddr2, u32 val2, u32 val3)
2790{
2791 int cmd = op & FUTEX_CMD_MASK;
2792 unsigned int flags = 0;
2793
2794 if (!(op & FUTEX_PRIVATE_FLAG))
2795 flags |= FLAGS_SHARED;
2796
2797 if (op & FUTEX_CLOCK_REALTIME) {
2798 flags |= FLAGS_CLOCKRT;
2799 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2800 return -ENOSYS;
2801 }
2802
2803 switch (cmd) {
2804 case FUTEX_LOCK_PI:
2805 case FUTEX_UNLOCK_PI:
2806 case FUTEX_TRYLOCK_PI:
2807 case FUTEX_WAIT_REQUEUE_PI:
2808 case FUTEX_CMP_REQUEUE_PI:
2809 if (!futex_cmpxchg_enabled)
2810 return -ENOSYS;
2811 }
2812
2813 switch (cmd) {
2814 case FUTEX_WAIT:
2815 val3 = FUTEX_BITSET_MATCH_ANY;
2816 case FUTEX_WAIT_BITSET:
2817 return futex_wait(uaddr, flags, val, timeout, val3);
2818 case FUTEX_WAKE:
2819 val3 = FUTEX_BITSET_MATCH_ANY;
2820 case FUTEX_WAKE_BITSET:
2821 return futex_wake(uaddr, flags, val, val3);
2822 case FUTEX_REQUEUE:
2823 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2824 case FUTEX_CMP_REQUEUE:
2825 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2826 case FUTEX_WAKE_OP:
2827 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2828 case FUTEX_LOCK_PI:
2829 return futex_lock_pi(uaddr, flags, val, timeout, 0);
2830 case FUTEX_UNLOCK_PI:
2831 return futex_unlock_pi(uaddr, flags);
2832 case FUTEX_TRYLOCK_PI:
2833 return futex_lock_pi(uaddr, flags, 0, timeout, 1);
2834 case FUTEX_WAIT_REQUEUE_PI:
2835 val3 = FUTEX_BITSET_MATCH_ANY;
2836 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2837 uaddr2);
2838 case FUTEX_CMP_REQUEUE_PI:
2839 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2840 }
2841 return -ENOSYS;
2842}
2843
2844
2845SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2846 struct timespec __user *, utime, u32 __user *, uaddr2,
2847 u32, val3)
2848{
2849 struct timespec ts;
2850 ktime_t t, *tp = NULL;
2851 u32 val2 = 0;
2852 int cmd = op & FUTEX_CMD_MASK;
2853
2854 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2855 cmd == FUTEX_WAIT_BITSET ||
2856 cmd == FUTEX_WAIT_REQUEUE_PI)) {
2857 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2858 return -EFAULT;
2859 if (!timespec_valid(&ts))
2860 return -EINVAL;
2861
2862 t = timespec_to_ktime(ts);
2863 if (cmd == FUTEX_WAIT)
2864 t = ktime_add_safe(ktime_get(), t);
2865 tp = &t;
2866 }
2867
2868
2869
2870
2871 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2872 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2873 val2 = (u32) (unsigned long) utime;
2874
2875 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2876}
2877
2878static int __init futex_init(void)
2879{
2880 u32 curval;
2881 unsigned int futex_shift;
2882 unsigned long i;
2883
2884#if CONFIG_BASE_SMALL
2885 futex_hashsize = 16;
2886#else
2887 futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
2888#endif
2889
2890 futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
2891 futex_hashsize, 0,
2892 futex_hashsize < 256 ? HASH_SMALL : 0,
2893 &futex_shift, NULL,
2894 futex_hashsize, futex_hashsize);
2895 futex_hashsize = 1UL << futex_shift;
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2907 futex_cmpxchg_enabled = 1;
2908
2909 for (i = 0; i < futex_hashsize; i++) {
2910 atomic_set(&futex_queues[i].waiters, 0);
2911 plist_head_init(&futex_queues[i].chain);
2912 spin_lock_init(&futex_queues[i].lock);
2913 }
2914
2915 return 0;
2916}
2917__initcall(futex_init);
2918