1
2
3
4
5
6
7
8#include <linux/kernel.h>
9#include <linux/slab.h>
10#include <linux/syscalls.h>
11#include <linux/sched/sysctl.h>
12
13#include <asm/insn.h>
14#include <asm/mman.h>
15#include <asm/mmu_context.h>
16#include <asm/mpx.h>
17#include <asm/processor.h>
18#include <asm/fpu/internal.h>
19
20#define CREATE_TRACE_POINTS
21#include <asm/trace/mpx.h>
22
23static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm)
24{
25 if (is_64bit_mm(mm))
26 return MPX_BD_SIZE_BYTES_64;
27 else
28 return MPX_BD_SIZE_BYTES_32;
29}
30
31static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm)
32{
33 if (is_64bit_mm(mm))
34 return MPX_BT_SIZE_BYTES_64;
35 else
36 return MPX_BT_SIZE_BYTES_32;
37}
38
39
40
41
42
43static unsigned long mpx_mmap(unsigned long len)
44{
45 struct mm_struct *mm = current->mm;
46 unsigned long addr, populate;
47
48
49 if (len != mpx_bt_size_bytes(mm))
50 return -EINVAL;
51
52 down_write(&mm->mmap_sem);
53 addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE,
54 MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate);
55 up_write(&mm->mmap_sem);
56 if (populate)
57 mm_populate(addr, populate);
58
59 return addr;
60}
61
62enum reg_type {
63 REG_TYPE_RM = 0,
64 REG_TYPE_INDEX,
65 REG_TYPE_BASE,
66};
67
68static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
69 enum reg_type type)
70{
71 int regno = 0;
72
73 static const int regoff[] = {
74 offsetof(struct pt_regs, ax),
75 offsetof(struct pt_regs, cx),
76 offsetof(struct pt_regs, dx),
77 offsetof(struct pt_regs, bx),
78 offsetof(struct pt_regs, sp),
79 offsetof(struct pt_regs, bp),
80 offsetof(struct pt_regs, si),
81 offsetof(struct pt_regs, di),
82#ifdef CONFIG_X86_64
83 offsetof(struct pt_regs, r8),
84 offsetof(struct pt_regs, r9),
85 offsetof(struct pt_regs, r10),
86 offsetof(struct pt_regs, r11),
87 offsetof(struct pt_regs, r12),
88 offsetof(struct pt_regs, r13),
89 offsetof(struct pt_regs, r14),
90 offsetof(struct pt_regs, r15),
91#endif
92 };
93 int nr_registers = ARRAY_SIZE(regoff);
94
95
96
97
98 if (IS_ENABLED(CONFIG_X86_64) && !insn->x86_64)
99 nr_registers -= 8;
100
101 switch (type) {
102 case REG_TYPE_RM:
103 regno = X86_MODRM_RM(insn->modrm.value);
104 if (X86_REX_B(insn->rex_prefix.value))
105 regno += 8;
106 break;
107
108 case REG_TYPE_INDEX:
109 regno = X86_SIB_INDEX(insn->sib.value);
110 if (X86_REX_X(insn->rex_prefix.value))
111 regno += 8;
112 break;
113
114 case REG_TYPE_BASE:
115 regno = X86_SIB_BASE(insn->sib.value);
116 if (X86_REX_B(insn->rex_prefix.value))
117 regno += 8;
118 break;
119
120 default:
121 pr_err("invalid register type");
122 BUG();
123 break;
124 }
125
126 if (regno >= nr_registers) {
127 WARN_ONCE(1, "decoded an instruction with an invalid register");
128 return -EINVAL;
129 }
130 return regoff[regno];
131}
132
133
134
135
136
137
138static void __user *mpx_get_addr_ref(struct insn *insn, struct pt_regs *regs)
139{
140 unsigned long addr, base, indx;
141 int addr_offset, base_offset, indx_offset;
142 insn_byte_t sib;
143
144 insn_get_modrm(insn);
145 insn_get_sib(insn);
146 sib = insn->sib.value;
147
148 if (X86_MODRM_MOD(insn->modrm.value) == 3) {
149 addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM);
150 if (addr_offset < 0)
151 goto out_err;
152 addr = regs_get_register(regs, addr_offset);
153 } else {
154 if (insn->sib.nbytes) {
155 base_offset = get_reg_offset(insn, regs, REG_TYPE_BASE);
156 if (base_offset < 0)
157 goto out_err;
158
159 indx_offset = get_reg_offset(insn, regs, REG_TYPE_INDEX);
160 if (indx_offset < 0)
161 goto out_err;
162
163 base = regs_get_register(regs, base_offset);
164 indx = regs_get_register(regs, indx_offset);
165 addr = base + indx * (1 << X86_SIB_SCALE(sib));
166 } else {
167 addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM);
168 if (addr_offset < 0)
169 goto out_err;
170 addr = regs_get_register(regs, addr_offset);
171 }
172 addr += insn->displacement.value;
173 }
174 return (void __user *)addr;
175out_err:
176 return (void __user *)-1;
177}
178
179static int mpx_insn_decode(struct insn *insn,
180 struct pt_regs *regs)
181{
182 unsigned char buf[MAX_INSN_SIZE];
183 int x86_64 = !test_thread_flag(TIF_IA32);
184 int not_copied;
185 int nr_copied;
186
187 not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf));
188 nr_copied = sizeof(buf) - not_copied;
189
190
191
192
193
194 if (!nr_copied)
195 return -EFAULT;
196 insn_init(insn, buf, nr_copied, x86_64);
197 insn_get_length(insn);
198
199
200
201
202
203
204
205
206 if (nr_copied < insn->length)
207 return -EFAULT;
208
209 insn_get_opcode(insn);
210
211
212
213
214 if (insn->opcode.bytes[0] != 0x0f)
215 goto bad_opcode;
216 if ((insn->opcode.bytes[1] != 0x1a) &&
217 (insn->opcode.bytes[1] != 0x1b))
218 goto bad_opcode;
219
220 return 0;
221bad_opcode:
222 return -EINVAL;
223}
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
239{
240 const struct mpx_bndreg_state *bndregs;
241 const struct mpx_bndreg *bndreg;
242 siginfo_t *info = NULL;
243 struct insn insn;
244 uint8_t bndregno;
245 int err;
246
247 err = mpx_insn_decode(&insn, regs);
248 if (err)
249 goto err_out;
250
251
252
253
254
255 insn_get_modrm(&insn);
256 bndregno = X86_MODRM_REG(insn.modrm.value);
257 if (bndregno > 3) {
258 err = -EINVAL;
259 goto err_out;
260 }
261
262 bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS);
263 if (!bndregs) {
264 err = -EINVAL;
265 goto err_out;
266 }
267
268 bndreg = &bndregs->bndreg[bndregno];
269
270 info = kzalloc(sizeof(*info), GFP_KERNEL);
271 if (!info) {
272 err = -ENOMEM;
273 goto err_out;
274 }
275
276
277
278
279
280
281
282
283
284
285 info->si_lower = (void __user *)(unsigned long)bndreg->lower_bound;
286 info->si_upper = (void __user *)(unsigned long)~bndreg->upper_bound;
287 info->si_addr_lsb = 0;
288 info->si_signo = SIGSEGV;
289 info->si_errno = 0;
290 info->si_code = SEGV_BNDERR;
291 info->si_addr = mpx_get_addr_ref(&insn, regs);
292
293
294
295
296 if (info->si_addr == (void *)-1) {
297 err = -EINVAL;
298 goto err_out;
299 }
300 trace_mpx_bounds_register_exception(info->si_addr, bndreg);
301 return info;
302err_out:
303
304 kfree(info);
305 return ERR_PTR(err);
306}
307
308static __user void *mpx_get_bounds_dir(void)
309{
310 const struct mpx_bndcsr *bndcsr;
311
312 if (!cpu_feature_enabled(X86_FEATURE_MPX))
313 return MPX_INVALID_BOUNDS_DIR;
314
315
316
317
318
319 bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
320 if (!bndcsr)
321 return MPX_INVALID_BOUNDS_DIR;
322
323
324
325
326
327 if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG))
328 return MPX_INVALID_BOUNDS_DIR;
329
330
331
332
333
334 return (void __user *)(unsigned long)
335 (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK);
336}
337
338int mpx_enable_management(void)
339{
340 void __user *bd_base = MPX_INVALID_BOUNDS_DIR;
341 struct mm_struct *mm = current->mm;
342 int ret = 0;
343
344
345
346
347
348
349
350
351
352
353
354
355 bd_base = mpx_get_bounds_dir();
356 down_write(&mm->mmap_sem);
357 mm->bd_addr = bd_base;
358 if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR)
359 ret = -ENXIO;
360
361 up_write(&mm->mmap_sem);
362 return ret;
363}
364
365int mpx_disable_management(void)
366{
367 struct mm_struct *mm = current->mm;
368
369 if (!cpu_feature_enabled(X86_FEATURE_MPX))
370 return -ENXIO;
371
372 down_write(&mm->mmap_sem);
373 mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
374 up_write(&mm->mmap_sem);
375 return 0;
376}
377
378static int mpx_cmpxchg_bd_entry(struct mm_struct *mm,
379 unsigned long *curval,
380 unsigned long __user *addr,
381 unsigned long old_val, unsigned long new_val)
382{
383 int ret;
384
385
386
387
388
389
390
391 if (is_64bit_mm(mm)) {
392 ret = user_atomic_cmpxchg_inatomic(curval,
393 addr, old_val, new_val);
394 } else {
395 u32 uninitialized_var(curval_32);
396 u32 old_val_32 = old_val;
397 u32 new_val_32 = new_val;
398 u32 __user *addr_32 = (u32 __user *)addr;
399
400 ret = user_atomic_cmpxchg_inatomic(&curval_32,
401 addr_32, old_val_32, new_val_32);
402 *curval = curval_32;
403 }
404 return ret;
405}
406
407
408
409
410
411
412static int allocate_bt(struct mm_struct *mm, long __user *bd_entry)
413{
414 unsigned long expected_old_val = 0;
415 unsigned long actual_old_val = 0;
416 unsigned long bt_addr;
417 unsigned long bd_new_entry;
418 int ret = 0;
419
420
421
422
423
424 bt_addr = mpx_mmap(mpx_bt_size_bytes(mm));
425 if (IS_ERR((void *)bt_addr))
426 return PTR_ERR((void *)bt_addr);
427
428
429
430 bd_new_entry = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
431
432
433
434
435
436
437
438
439
440
441
442
443 ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry,
444 expected_old_val, bd_new_entry);
445 if (ret)
446 goto out_unmap;
447
448
449
450
451
452
453
454
455
456
457
458 if (actual_old_val & MPX_BD_ENTRY_VALID_FLAG) {
459 ret = 0;
460 goto out_unmap;
461 }
462
463
464
465
466
467
468 if (expected_old_val != actual_old_val) {
469 ret = -EINVAL;
470 goto out_unmap;
471 }
472 trace_mpx_new_bounds_table(bt_addr);
473 return 0;
474out_unmap:
475 vm_munmap(bt_addr, mpx_bt_size_bytes(mm));
476 return ret;
477}
478
479
480
481
482
483
484
485
486
487
488
489
490static int do_mpx_bt_fault(void)
491{
492 unsigned long bd_entry, bd_base;
493 const struct mpx_bndcsr *bndcsr;
494 struct mm_struct *mm = current->mm;
495
496 bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
497 if (!bndcsr)
498 return -EINVAL;
499
500
501
502 bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK;
503
504
505
506
507 bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK;
508
509
510
511
512 if ((bd_entry < bd_base) ||
513 (bd_entry >= bd_base + mpx_bd_size_bytes(mm)))
514 return -EINVAL;
515
516 return allocate_bt(mm, (long __user *)bd_entry);
517}
518
519int mpx_handle_bd_fault(void)
520{
521
522
523
524
525 if (!kernel_managing_mpx_tables(current->mm))
526 return -EINVAL;
527
528 if (do_mpx_bt_fault()) {
529 force_sig(SIGSEGV, current);
530
531
532
533
534
535 }
536 return 0;
537}
538
539
540
541
542
543static int mpx_resolve_fault(long __user *addr, int write)
544{
545 long gup_ret;
546 int nr_pages = 1;
547 int force = 0;
548
549 gup_ret = get_user_pages((unsigned long)addr, nr_pages, write,
550 force, NULL, NULL);
551
552
553
554
555
556 if (!gup_ret)
557 return -EFAULT;
558
559 if (gup_ret < 0)
560 return gup_ret;
561
562 return 0;
563}
564
565static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
566 unsigned long bd_entry)
567{
568 unsigned long bt_addr = bd_entry;
569 int align_to_bytes;
570
571
572
573 bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG;
574
575
576
577
578
579
580 if (is_64bit_mm(mm))
581 align_to_bytes = 8;
582 else
583 align_to_bytes = 4;
584 bt_addr &= ~(align_to_bytes-1);
585 return bt_addr;
586}
587
588
589
590
591
592
593int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
594 long __user *bd_entry_ptr)
595{
596 u32 bd_entry_32;
597 int ret;
598
599 if (is_64bit_mm(mm))
600 return get_user(*bd_entry_ret, bd_entry_ptr);
601
602
603
604
605
606 ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr);
607 *bd_entry_ret = bd_entry_32;
608 return ret;
609}
610
611
612
613
614
615static int get_bt_addr(struct mm_struct *mm,
616 long __user *bd_entry_ptr,
617 unsigned long *bt_addr_result)
618{
619 int ret;
620 int valid_bit;
621 unsigned long bd_entry;
622 unsigned long bt_addr;
623
624 if (!access_ok(VERIFY_READ, (bd_entry_ptr), sizeof(*bd_entry_ptr)))
625 return -EFAULT;
626
627 while (1) {
628 int need_write = 0;
629
630 pagefault_disable();
631 ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr);
632 pagefault_enable();
633 if (!ret)
634 break;
635 if (ret == -EFAULT)
636 ret = mpx_resolve_fault(bd_entry_ptr, need_write);
637
638
639
640
641 if (ret)
642 return ret;
643 }
644
645 valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG;
646 bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry);
647
648
649
650
651
652
653
654
655 if (!valid_bit && bt_addr)
656 return -EINVAL;
657
658
659
660
661
662
663 if (!valid_bit)
664 return -ENOENT;
665
666 *bt_addr_result = bt_addr;
667 return 0;
668}
669
670static inline int bt_entry_size_bytes(struct mm_struct *mm)
671{
672 if (is_64bit_mm(mm))
673 return MPX_BT_ENTRY_BYTES_64;
674 else
675 return MPX_BT_ENTRY_BYTES_32;
676}
677
678
679
680
681
682
683static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm,
684 unsigned long addr)
685{
686 unsigned long bt_table_nr_entries;
687 unsigned long offset = addr;
688
689 if (is_64bit_mm(mm)) {
690
691 offset >>= 3;
692 bt_table_nr_entries = MPX_BT_NR_ENTRIES_64;
693 } else {
694
695 offset >>= 2;
696 bt_table_nr_entries = MPX_BT_NR_ENTRIES_32;
697 }
698
699
700
701
702
703
704
705
706
707
708 offset &= (bt_table_nr_entries-1);
709
710
711
712
713 offset *= bt_entry_size_bytes(mm);
714 return offset;
715}
716
717
718
719
720
721
722
723
724static inline unsigned long bd_entry_virt_space(struct mm_struct *mm)
725{
726 unsigned long long virt_space;
727 unsigned long long GB = (1ULL << 30);
728
729
730
731
732
733 if (!is_64bit_mm(mm))
734 return (4ULL * GB) / MPX_BD_NR_ENTRIES_32;
735
736
737
738
739
740
741 virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
742 return virt_space / MPX_BD_NR_ENTRIES_64;
743}
744
745
746
747
748
749static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
750 unsigned long bt_addr,
751 unsigned long start_mapping, unsigned long end_mapping)
752{
753 struct vm_area_struct *vma;
754 unsigned long addr, len;
755 unsigned long start;
756 unsigned long end;
757
758
759
760
761
762
763
764 start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping);
765 end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1);
766
767
768
769
770
771 end += bt_entry_size_bytes(mm);
772
773
774
775
776
777
778 vma = find_vma(mm, start);
779 if (!vma || vma->vm_start > start)
780 return -EINVAL;
781
782
783
784
785
786
787
788 addr = start;
789 while (vma && vma->vm_start < end) {
790
791
792
793
794
795
796 if (!(vma->vm_flags & VM_MPX))
797 return -EINVAL;
798
799 len = min(vma->vm_end, end) - addr;
800 zap_page_range(vma, addr, len, NULL);
801 trace_mpx_unmap_zap(addr, addr+len);
802
803 vma = vma->vm_next;
804 addr = vma->vm_start;
805 }
806 return 0;
807}
808
809static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm,
810 unsigned long addr)
811{
812
813
814
815
816
817
818
819
820
821
822
823 if (is_64bit_mm(mm)) {
824 int bd_entry_size = 8;
825
826
827
828 addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1);
829 return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
830 } else {
831 int bd_entry_size = 4;
832
833
834
835 return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
836 }
837
838
839
840
841
842
843
844}
845
846static int unmap_entire_bt(struct mm_struct *mm,
847 long __user *bd_entry, unsigned long bt_addr)
848{
849 unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
850 unsigned long uninitialized_var(actual_old_val);
851 int ret;
852
853 while (1) {
854 int need_write = 1;
855 unsigned long cleared_bd_entry = 0;
856
857 pagefault_disable();
858 ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val,
859 bd_entry, expected_old_val, cleared_bd_entry);
860 pagefault_enable();
861 if (!ret)
862 break;
863 if (ret == -EFAULT)
864 ret = mpx_resolve_fault(bd_entry, need_write);
865
866
867
868
869 if (ret)
870 return ret;
871 }
872
873
874
875 if (actual_old_val != expected_old_val) {
876
877
878
879
880
881 if (!actual_old_val)
882 return 0;
883
884
885
886
887
888
889
890 return -EINVAL;
891 }
892
893
894
895
896
897 return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm));
898}
899
900static int try_unmap_single_bt(struct mm_struct *mm,
901 unsigned long start, unsigned long end)
902{
903 struct vm_area_struct *next;
904 struct vm_area_struct *prev;
905
906
907
908
909 unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1);
910 unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm);
911 unsigned long uninitialized_var(bt_addr);
912 void __user *bde_vaddr;
913 int ret;
914
915
916
917
918
919
920 next = find_vma_prev(mm, start, &prev);
921
922
923
924
925
926
927
928
929 while (next && (next->vm_flags & VM_MPX))
930 next = next->vm_next;
931 while (prev && (prev->vm_flags & VM_MPX))
932 prev = prev->vm_prev;
933
934
935
936
937
938
939
940 next = find_vma_prev(mm, start, &prev);
941 if ((!prev || prev->vm_end <= bta_start_vaddr) &&
942 (!next || next->vm_start >= bta_end_vaddr)) {
943
944
945
946
947 start = bta_start_vaddr;
948 end = bta_end_vaddr;
949 }
950
951 bde_vaddr = mm->bd_addr + mpx_get_bd_entry_offset(mm, start);
952 ret = get_bt_addr(mm, bde_vaddr, &bt_addr);
953
954
955
956 if (ret == -ENOENT) {
957 ret = 0;
958 return 0;
959 }
960 if (ret)
961 return ret;
962
963
964
965
966
967
968 if ((start == bta_start_vaddr) &&
969 (end == bta_end_vaddr))
970 return unmap_entire_bt(mm, bde_vaddr, bt_addr);
971 return zap_bt_entries_mapping(mm, bt_addr, start, end);
972}
973
974static int mpx_unmap_tables(struct mm_struct *mm,
975 unsigned long start, unsigned long end)
976{
977 unsigned long one_unmap_start;
978 trace_mpx_unmap_search(start, end);
979
980 one_unmap_start = start;
981 while (one_unmap_start < end) {
982 int ret;
983 unsigned long next_unmap_start = ALIGN(one_unmap_start+1,
984 bd_entry_virt_space(mm));
985 unsigned long one_unmap_end = end;
986
987
988
989
990
991 if (one_unmap_end > next_unmap_start)
992 one_unmap_end = next_unmap_start;
993 ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end);
994 if (ret)
995 return ret;
996
997 one_unmap_start = next_unmap_start;
998 }
999 return 0;
1000}
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
1011 unsigned long start, unsigned long end)
1012{
1013 int ret;
1014
1015
1016
1017
1018
1019 if (!kernel_managing_mpx_tables(current->mm))
1020 return;
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031 do {
1032 if (vma->vm_flags & VM_MPX)
1033 return;
1034 vma = vma->vm_next;
1035 } while (vma && vma->vm_start < end);
1036
1037 ret = mpx_unmap_tables(mm, start, end);
1038 if (ret)
1039 force_sig(SIGSEGV, current);
1040}
1041