1
2
3
4
5
6
7
8#include <linux/kernel.h>
9#include <linux/slab.h>
10#include <linux/syscalls.h>
11#include <linux/sched/sysctl.h>
12
13#include <asm/insn.h>
14#include <asm/mman.h>
15#include <asm/mmu_context.h>
16#include <asm/mpx.h>
17#include <asm/processor.h>
18#include <asm/fpu/internal.h>
19
20#define CREATE_TRACE_POINTS
21#include <asm/trace/mpx.h>
22
23static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm)
24{
25 if (is_64bit_mm(mm))
26 return MPX_BD_SIZE_BYTES_64;
27 else
28 return MPX_BD_SIZE_BYTES_32;
29}
30
31static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm)
32{
33 if (is_64bit_mm(mm))
34 return MPX_BT_SIZE_BYTES_64;
35 else
36 return MPX_BT_SIZE_BYTES_32;
37}
38
39
40
41
42
43static unsigned long mpx_mmap(unsigned long len)
44{
45 struct mm_struct *mm = current->mm;
46 unsigned long addr, populate;
47
48
49 if (len != mpx_bt_size_bytes(mm))
50 return -EINVAL;
51
52 down_write(&mm->mmap_sem);
53 addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE,
54 MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate);
55 up_write(&mm->mmap_sem);
56 if (populate)
57 mm_populate(addr, populate);
58
59 return addr;
60}
61
62enum reg_type {
63 REG_TYPE_RM = 0,
64 REG_TYPE_INDEX,
65 REG_TYPE_BASE,
66};
67
68static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
69 enum reg_type type)
70{
71 int regno = 0;
72
73 static const int regoff[] = {
74 offsetof(struct pt_regs, ax),
75 offsetof(struct pt_regs, cx),
76 offsetof(struct pt_regs, dx),
77 offsetof(struct pt_regs, bx),
78 offsetof(struct pt_regs, sp),
79 offsetof(struct pt_regs, bp),
80 offsetof(struct pt_regs, si),
81 offsetof(struct pt_regs, di),
82#ifdef CONFIG_X86_64
83 offsetof(struct pt_regs, r8),
84 offsetof(struct pt_regs, r9),
85 offsetof(struct pt_regs, r10),
86 offsetof(struct pt_regs, r11),
87 offsetof(struct pt_regs, r12),
88 offsetof(struct pt_regs, r13),
89 offsetof(struct pt_regs, r14),
90 offsetof(struct pt_regs, r15),
91#endif
92 };
93 int nr_registers = ARRAY_SIZE(regoff);
94
95
96
97
98 if (IS_ENABLED(CONFIG_X86_64) && !insn->x86_64)
99 nr_registers -= 8;
100
101 switch (type) {
102 case REG_TYPE_RM:
103 regno = X86_MODRM_RM(insn->modrm.value);
104 if (X86_REX_B(insn->rex_prefix.value))
105 regno += 8;
106 break;
107
108 case REG_TYPE_INDEX:
109 regno = X86_SIB_INDEX(insn->sib.value);
110 if (X86_REX_X(insn->rex_prefix.value))
111 regno += 8;
112 break;
113
114 case REG_TYPE_BASE:
115 regno = X86_SIB_BASE(insn->sib.value);
116 if (X86_REX_B(insn->rex_prefix.value))
117 regno += 8;
118 break;
119
120 default:
121 pr_err("invalid register type");
122 BUG();
123 break;
124 }
125
126 if (regno >= nr_registers) {
127 WARN_ONCE(1, "decoded an instruction with an invalid register");
128 return -EINVAL;
129 }
130 return regoff[regno];
131}
132
133
134
135
136
137
138static void __user *mpx_get_addr_ref(struct insn *insn, struct pt_regs *regs)
139{
140 unsigned long addr, base, indx;
141 int addr_offset, base_offset, indx_offset;
142 insn_byte_t sib;
143
144 insn_get_modrm(insn);
145 insn_get_sib(insn);
146 sib = insn->sib.value;
147
148 if (X86_MODRM_MOD(insn->modrm.value) == 3) {
149 addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM);
150 if (addr_offset < 0)
151 goto out_err;
152 addr = regs_get_register(regs, addr_offset);
153 } else {
154 if (insn->sib.nbytes) {
155 base_offset = get_reg_offset(insn, regs, REG_TYPE_BASE);
156 if (base_offset < 0)
157 goto out_err;
158
159 indx_offset = get_reg_offset(insn, regs, REG_TYPE_INDEX);
160 if (indx_offset < 0)
161 goto out_err;
162
163 base = regs_get_register(regs, base_offset);
164 indx = regs_get_register(regs, indx_offset);
165 addr = base + indx * (1 << X86_SIB_SCALE(sib));
166 } else {
167 addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM);
168 if (addr_offset < 0)
169 goto out_err;
170 addr = regs_get_register(regs, addr_offset);
171 }
172 addr += insn->displacement.value;
173 }
174 return (void __user *)addr;
175out_err:
176 return (void __user *)-1;
177}
178
179static int mpx_insn_decode(struct insn *insn,
180 struct pt_regs *regs)
181{
182 unsigned char buf[MAX_INSN_SIZE];
183 int x86_64 = !test_thread_flag(TIF_IA32);
184 int not_copied;
185 int nr_copied;
186
187 not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf));
188 nr_copied = sizeof(buf) - not_copied;
189
190
191
192
193
194 if (!nr_copied)
195 return -EFAULT;
196 insn_init(insn, buf, nr_copied, x86_64);
197 insn_get_length(insn);
198
199
200
201
202
203
204
205
206 if (nr_copied < insn->length)
207 return -EFAULT;
208
209 insn_get_opcode(insn);
210
211
212
213
214 if (insn->opcode.bytes[0] != 0x0f)
215 goto bad_opcode;
216 if ((insn->opcode.bytes[1] != 0x1a) &&
217 (insn->opcode.bytes[1] != 0x1b))
218 goto bad_opcode;
219
220 return 0;
221bad_opcode:
222 return -EINVAL;
223}
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
239{
240 const struct mpx_bndreg_state *bndregs;
241 const struct mpx_bndreg *bndreg;
242 siginfo_t *info = NULL;
243 struct insn insn;
244 uint8_t bndregno;
245 int err;
246
247 err = mpx_insn_decode(&insn, regs);
248 if (err)
249 goto err_out;
250
251
252
253
254
255 insn_get_modrm(&insn);
256 bndregno = X86_MODRM_REG(insn.modrm.value);
257 if (bndregno > 3) {
258 err = -EINVAL;
259 goto err_out;
260 }
261
262 bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS);
263 if (!bndregs) {
264 err = -EINVAL;
265 goto err_out;
266 }
267
268 bndreg = &bndregs->bndreg[bndregno];
269
270 info = kzalloc(sizeof(*info), GFP_KERNEL);
271 if (!info) {
272 err = -ENOMEM;
273 goto err_out;
274 }
275
276
277
278
279
280
281
282
283
284
285 info->si_lower = (void __user *)(unsigned long)bndreg->lower_bound;
286 info->si_upper = (void __user *)(unsigned long)~bndreg->upper_bound;
287 info->si_addr_lsb = 0;
288 info->si_signo = SIGSEGV;
289 info->si_errno = 0;
290 info->si_code = SEGV_BNDERR;
291 info->si_addr = mpx_get_addr_ref(&insn, regs);
292
293
294
295
296 if (info->si_addr == (void *)-1) {
297 err = -EINVAL;
298 goto err_out;
299 }
300 trace_mpx_bounds_register_exception(info->si_addr, bndreg);
301 return info;
302err_out:
303
304 kfree(info);
305 return ERR_PTR(err);
306}
307
308static __user void *mpx_get_bounds_dir(void)
309{
310 const struct mpx_bndcsr *bndcsr;
311
312 if (!cpu_feature_enabled(X86_FEATURE_MPX))
313 return MPX_INVALID_BOUNDS_DIR;
314
315
316
317
318
319 bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
320 if (!bndcsr)
321 return MPX_INVALID_BOUNDS_DIR;
322
323
324
325
326
327 if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG))
328 return MPX_INVALID_BOUNDS_DIR;
329
330
331
332
333
334 return (void __user *)(unsigned long)
335 (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK);
336}
337
338int mpx_enable_management(void)
339{
340 void __user *bd_base = MPX_INVALID_BOUNDS_DIR;
341 struct mm_struct *mm = current->mm;
342 int ret = 0;
343
344
345
346
347
348
349
350
351
352
353
354
355 bd_base = mpx_get_bounds_dir();
356 down_write(&mm->mmap_sem);
357 mm->bd_addr = bd_base;
358 if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR)
359 ret = -ENXIO;
360
361 up_write(&mm->mmap_sem);
362 return ret;
363}
364
365int mpx_disable_management(void)
366{
367 struct mm_struct *mm = current->mm;
368
369 if (!cpu_feature_enabled(X86_FEATURE_MPX))
370 return -ENXIO;
371
372 down_write(&mm->mmap_sem);
373 mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
374 up_write(&mm->mmap_sem);
375 return 0;
376}
377
378static int mpx_cmpxchg_bd_entry(struct mm_struct *mm,
379 unsigned long *curval,
380 unsigned long __user *addr,
381 unsigned long old_val, unsigned long new_val)
382{
383 int ret;
384
385
386
387
388
389
390
391 if (is_64bit_mm(mm)) {
392 ret = user_atomic_cmpxchg_inatomic(curval,
393 addr, old_val, new_val);
394 } else {
395 u32 uninitialized_var(curval_32);
396 u32 old_val_32 = old_val;
397 u32 new_val_32 = new_val;
398 u32 __user *addr_32 = (u32 __user *)addr;
399
400 ret = user_atomic_cmpxchg_inatomic(&curval_32,
401 addr_32, old_val_32, new_val_32);
402 *curval = curval_32;
403 }
404 return ret;
405}
406
407
408
409
410
411
412static int allocate_bt(struct mm_struct *mm, long __user *bd_entry)
413{
414 unsigned long expected_old_val = 0;
415 unsigned long actual_old_val = 0;
416 unsigned long bt_addr;
417 unsigned long bd_new_entry;
418 int ret = 0;
419
420
421
422
423
424 bt_addr = mpx_mmap(mpx_bt_size_bytes(mm));
425 if (IS_ERR((void *)bt_addr))
426 return PTR_ERR((void *)bt_addr);
427
428
429
430 bd_new_entry = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
431
432
433
434
435
436
437
438
439
440
441
442
443 ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry,
444 expected_old_val, bd_new_entry);
445 if (ret)
446 goto out_unmap;
447
448
449
450
451
452
453
454
455
456
457
458 if (actual_old_val & MPX_BD_ENTRY_VALID_FLAG) {
459 ret = 0;
460 goto out_unmap;
461 }
462
463
464
465
466
467
468 if (expected_old_val != actual_old_val) {
469 ret = -EINVAL;
470 goto out_unmap;
471 }
472 trace_mpx_new_bounds_table(bt_addr);
473 return 0;
474out_unmap:
475 vm_munmap(bt_addr, mpx_bt_size_bytes(mm));
476 return ret;
477}
478
479
480
481
482
483
484
485
486
487
488
489
490static int do_mpx_bt_fault(void)
491{
492 unsigned long bd_entry, bd_base;
493 const struct mpx_bndcsr *bndcsr;
494 struct mm_struct *mm = current->mm;
495
496 bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
497 if (!bndcsr)
498 return -EINVAL;
499
500
501
502 bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK;
503
504
505
506
507 bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK;
508
509
510
511
512 if ((bd_entry < bd_base) ||
513 (bd_entry >= bd_base + mpx_bd_size_bytes(mm)))
514 return -EINVAL;
515
516 return allocate_bt(mm, (long __user *)bd_entry);
517}
518
519int mpx_handle_bd_fault(void)
520{
521
522
523
524
525 if (!kernel_managing_mpx_tables(current->mm))
526 return -EINVAL;
527
528 if (do_mpx_bt_fault()) {
529 force_sig(SIGSEGV, current);
530
531
532
533
534
535 }
536 return 0;
537}
538
539
540
541
542
543static int mpx_resolve_fault(long __user *addr, int write)
544{
545 long gup_ret;
546 int nr_pages = 1;
547
548 gup_ret = get_user_pages((unsigned long)addr, nr_pages,
549 write ? FOLL_WRITE : 0, NULL, NULL);
550
551
552
553
554
555 if (!gup_ret)
556 return -EFAULT;
557
558 if (gup_ret < 0)
559 return gup_ret;
560
561 return 0;
562}
563
564static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
565 unsigned long bd_entry)
566{
567 unsigned long bt_addr = bd_entry;
568 int align_to_bytes;
569
570
571
572 bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG;
573
574
575
576
577
578
579 if (is_64bit_mm(mm))
580 align_to_bytes = 8;
581 else
582 align_to_bytes = 4;
583 bt_addr &= ~(align_to_bytes-1);
584 return bt_addr;
585}
586
587
588
589
590
591
592int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
593 long __user *bd_entry_ptr)
594{
595 u32 bd_entry_32;
596 int ret;
597
598 if (is_64bit_mm(mm))
599 return get_user(*bd_entry_ret, bd_entry_ptr);
600
601
602
603
604
605 ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr);
606 *bd_entry_ret = bd_entry_32;
607 return ret;
608}
609
610
611
612
613
614static int get_bt_addr(struct mm_struct *mm,
615 long __user *bd_entry_ptr,
616 unsigned long *bt_addr_result)
617{
618 int ret;
619 int valid_bit;
620 unsigned long bd_entry;
621 unsigned long bt_addr;
622
623 if (!access_ok(VERIFY_READ, (bd_entry_ptr), sizeof(*bd_entry_ptr)))
624 return -EFAULT;
625
626 while (1) {
627 int need_write = 0;
628
629 pagefault_disable();
630 ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr);
631 pagefault_enable();
632 if (!ret)
633 break;
634 if (ret == -EFAULT)
635 ret = mpx_resolve_fault(bd_entry_ptr, need_write);
636
637
638
639
640 if (ret)
641 return ret;
642 }
643
644 valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG;
645 bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry);
646
647
648
649
650
651
652
653
654 if (!valid_bit && bt_addr)
655 return -EINVAL;
656
657
658
659
660
661
662 if (!valid_bit)
663 return -ENOENT;
664
665 *bt_addr_result = bt_addr;
666 return 0;
667}
668
669static inline int bt_entry_size_bytes(struct mm_struct *mm)
670{
671 if (is_64bit_mm(mm))
672 return MPX_BT_ENTRY_BYTES_64;
673 else
674 return MPX_BT_ENTRY_BYTES_32;
675}
676
677
678
679
680
681
682static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm,
683 unsigned long addr)
684{
685 unsigned long bt_table_nr_entries;
686 unsigned long offset = addr;
687
688 if (is_64bit_mm(mm)) {
689
690 offset >>= 3;
691 bt_table_nr_entries = MPX_BT_NR_ENTRIES_64;
692 } else {
693
694 offset >>= 2;
695 bt_table_nr_entries = MPX_BT_NR_ENTRIES_32;
696 }
697
698
699
700
701
702
703
704
705
706
707 offset &= (bt_table_nr_entries-1);
708
709
710
711
712 offset *= bt_entry_size_bytes(mm);
713 return offset;
714}
715
716
717
718
719
720
721
722
723static inline unsigned long bd_entry_virt_space(struct mm_struct *mm)
724{
725 unsigned long long virt_space;
726 unsigned long long GB = (1ULL << 30);
727
728
729
730
731
732 if (!is_64bit_mm(mm))
733 return (4ULL * GB) / MPX_BD_NR_ENTRIES_32;
734
735
736
737
738
739
740 virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
741 return virt_space / MPX_BD_NR_ENTRIES_64;
742}
743
744
745
746
747
748static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
749 unsigned long bt_addr,
750 unsigned long start_mapping, unsigned long end_mapping)
751{
752 struct vm_area_struct *vma;
753 unsigned long addr, len;
754 unsigned long start;
755 unsigned long end;
756
757
758
759
760
761
762
763 start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping);
764 end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1);
765
766
767
768
769
770 end += bt_entry_size_bytes(mm);
771
772
773
774
775
776
777 vma = find_vma(mm, start);
778 if (!vma || vma->vm_start > start)
779 return -EINVAL;
780
781
782
783
784
785
786
787 addr = start;
788 while (vma && vma->vm_start < end) {
789
790
791
792
793
794
795 if (!(vma->vm_flags & VM_MPX))
796 return -EINVAL;
797
798 len = min(vma->vm_end, end) - addr;
799 zap_page_range(vma, addr, len, NULL);
800 trace_mpx_unmap_zap(addr, addr+len);
801
802 vma = vma->vm_next;
803 addr = vma->vm_start;
804 }
805 return 0;
806}
807
808static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm,
809 unsigned long addr)
810{
811
812
813
814
815
816
817
818
819
820
821
822 if (is_64bit_mm(mm)) {
823 int bd_entry_size = 8;
824
825
826
827 addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1);
828 return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
829 } else {
830 int bd_entry_size = 4;
831
832
833
834 return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
835 }
836
837
838
839
840
841
842
843}
844
845static int unmap_entire_bt(struct mm_struct *mm,
846 long __user *bd_entry, unsigned long bt_addr)
847{
848 unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
849 unsigned long uninitialized_var(actual_old_val);
850 int ret;
851
852 while (1) {
853 int need_write = 1;
854 unsigned long cleared_bd_entry = 0;
855
856 pagefault_disable();
857 ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val,
858 bd_entry, expected_old_val, cleared_bd_entry);
859 pagefault_enable();
860 if (!ret)
861 break;
862 if (ret == -EFAULT)
863 ret = mpx_resolve_fault(bd_entry, need_write);
864
865
866
867
868 if (ret)
869 return ret;
870 }
871
872
873
874 if (actual_old_val != expected_old_val) {
875
876
877
878
879
880 if (!actual_old_val)
881 return 0;
882
883
884
885
886
887
888
889 return -EINVAL;
890 }
891
892
893
894
895
896 return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm));
897}
898
899static int try_unmap_single_bt(struct mm_struct *mm,
900 unsigned long start, unsigned long end)
901{
902 struct vm_area_struct *next;
903 struct vm_area_struct *prev;
904
905
906
907
908 unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1);
909 unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm);
910 unsigned long uninitialized_var(bt_addr);
911 void __user *bde_vaddr;
912 int ret;
913
914
915
916
917
918
919 next = find_vma_prev(mm, start, &prev);
920
921
922
923
924
925
926
927
928 while (next && (next->vm_flags & VM_MPX))
929 next = next->vm_next;
930 while (prev && (prev->vm_flags & VM_MPX))
931 prev = prev->vm_prev;
932
933
934
935
936
937
938
939 next = find_vma_prev(mm, start, &prev);
940 if ((!prev || prev->vm_end <= bta_start_vaddr) &&
941 (!next || next->vm_start >= bta_end_vaddr)) {
942
943
944
945
946 start = bta_start_vaddr;
947 end = bta_end_vaddr;
948 }
949
950 bde_vaddr = mm->bd_addr + mpx_get_bd_entry_offset(mm, start);
951 ret = get_bt_addr(mm, bde_vaddr, &bt_addr);
952
953
954
955 if (ret == -ENOENT) {
956 ret = 0;
957 return 0;
958 }
959 if (ret)
960 return ret;
961
962
963
964
965
966
967 if ((start == bta_start_vaddr) &&
968 (end == bta_end_vaddr))
969 return unmap_entire_bt(mm, bde_vaddr, bt_addr);
970 return zap_bt_entries_mapping(mm, bt_addr, start, end);
971}
972
973static int mpx_unmap_tables(struct mm_struct *mm,
974 unsigned long start, unsigned long end)
975{
976 unsigned long one_unmap_start;
977 trace_mpx_unmap_search(start, end);
978
979 one_unmap_start = start;
980 while (one_unmap_start < end) {
981 int ret;
982 unsigned long next_unmap_start = ALIGN(one_unmap_start+1,
983 bd_entry_virt_space(mm));
984 unsigned long one_unmap_end = end;
985
986
987
988
989
990 if (one_unmap_end > next_unmap_start)
991 one_unmap_end = next_unmap_start;
992 ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end);
993 if (ret)
994 return ret;
995
996 one_unmap_start = next_unmap_start;
997 }
998 return 0;
999}
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
1010 unsigned long start, unsigned long end)
1011{
1012 int ret;
1013
1014
1015
1016
1017
1018 if (!kernel_managing_mpx_tables(current->mm))
1019 return;
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030 do {
1031 if (vma->vm_flags & VM_MPX)
1032 return;
1033 vma = vma->vm_next;
1034 } while (vma && vma->vm_start < end);
1035
1036 ret = mpx_unmap_tables(mm, start, end);
1037 if (ret)
1038 force_sig(SIGSEGV, current);
1039}
1040