1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/cpu.h>
19#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/sched/task.h>
22#include <linux/sched/task_stack.h>
23#include <linux/fs.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/elfcore.h>
27#include <linux/smp.h>
28#include <linux/slab.h>
29#include <linux/user.h>
30#include <linux/interrupt.h>
31#include <linux/delay.h>
32#include <linux/export.h>
33#include <linux/ptrace.h>
34#include <linux/notifier.h>
35#include <linux/kprobes.h>
36#include <linux/kdebug.h>
37#include <linux/prctl.h>
38#include <linux/uaccess.h>
39#include <linux/io.h>
40#include <linux/ftrace.h>
41#include <linux/syscalls.h>
42
43#include <asm/pgtable.h>
44#include <asm/processor.h>
45#include <asm/fpu/internal.h>
46#include <asm/mmu_context.h>
47#include <asm/prctl.h>
48#include <asm/desc.h>
49#include <asm/proto.h>
50#include <asm/ia32.h>
51#include <asm/syscalls.h>
52#include <asm/debugreg.h>
53#include <asm/switch_to.h>
54#include <asm/xen/hypervisor.h>
55#include <asm/vdso.h>
56#include <asm/resctrl_sched.h>
57#include <asm/unistd.h>
58#include <asm/fsgsbase.h>
59#ifdef CONFIG_IA32_EMULATION
60
61#include <asm/unistd_32_ia32.h>
62#endif
63
64#include "process.h"
65
66
67void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
68{
69 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
70 unsigned long d0, d1, d2, d3, d6, d7;
71 unsigned int fsindex, gsindex;
72 unsigned int ds, es;
73
74 show_iret_regs(regs);
75
76 if (regs->orig_ax != -1)
77 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
78 else
79 pr_cont("\n");
80
81 printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
82 regs->ax, regs->bx, regs->cx);
83 printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
84 regs->dx, regs->si, regs->di);
85 printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
86 regs->bp, regs->r8, regs->r9);
87 printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
88 regs->r10, regs->r11, regs->r12);
89 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
90 regs->r13, regs->r14, regs->r15);
91
92 if (mode == SHOW_REGS_SHORT)
93 return;
94
95 if (mode == SHOW_REGS_USER) {
96 rdmsrl(MSR_FS_BASE, fs);
97 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
98 printk(KERN_DEFAULT "FS: %016lx GS: %016lx\n",
99 fs, shadowgs);
100 return;
101 }
102
103 asm("movl %%ds,%0" : "=r" (ds));
104 asm("movl %%es,%0" : "=r" (es));
105 asm("movl %%fs,%0" : "=r" (fsindex));
106 asm("movl %%gs,%0" : "=r" (gsindex));
107
108 rdmsrl(MSR_FS_BASE, fs);
109 rdmsrl(MSR_GS_BASE, gs);
110 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
111
112 cr0 = read_cr0();
113 cr2 = read_cr2();
114 cr3 = __read_cr3();
115 cr4 = __read_cr4();
116
117 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
118 fs, fsindex, gs, gsindex, shadowgs);
119 printk(KERN_DEFAULT "CS: %04lx DS: %04x ES: %04x CR0: %016lx\n", regs->cs, ds,
120 es, cr0);
121 printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
122 cr4);
123
124 get_debugreg(d0, 0);
125 get_debugreg(d1, 1);
126 get_debugreg(d2, 2);
127 get_debugreg(d3, 3);
128 get_debugreg(d6, 6);
129 get_debugreg(d7, 7);
130
131
132 if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
133 (d6 == DR6_RESERVED) && (d7 == 0x400))) {
134 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
135 d0, d1, d2);
136 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
137 d3, d6, d7);
138 }
139
140 if (boot_cpu_has(X86_FEATURE_OSPKE))
141 printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
142}
143
144void release_thread(struct task_struct *dead_task)
145{
146 if (dead_task->mm) {
147#ifdef CONFIG_MODIFY_LDT_SYSCALL
148 if (dead_task->mm->context.ldt) {
149 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
150 dead_task->comm,
151 dead_task->mm->context.ldt->entries,
152 dead_task->mm->context.ldt->nr_entries);
153 BUG();
154 }
155#endif
156 }
157}
158
159enum which_selector {
160 FS,
161 GS
162};
163
164
165
166
167
168
169
170static __always_inline void save_base_legacy(struct task_struct *prev_p,
171 unsigned short selector,
172 enum which_selector which)
173{
174 if (likely(selector == 0)) {
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191 } else {
192
193
194
195
196
197
198
199
200
201
202 if (which == FS)
203 prev_p->thread.fsbase = 0;
204 else
205 prev_p->thread.gsbase = 0;
206 }
207}
208
209static __always_inline void save_fsgs(struct task_struct *task)
210{
211 savesegment(fs, task->thread.fsindex);
212 savesegment(gs, task->thread.gsindex);
213 save_base_legacy(task, task->thread.fsindex, FS);
214 save_base_legacy(task, task->thread.gsindex, GS);
215}
216
217#if IS_ENABLED(CONFIG_KVM)
218
219
220
221
222
223
224void save_fsgs_for_kvm(void)
225{
226 save_fsgs(current);
227}
228EXPORT_SYMBOL_GPL(save_fsgs_for_kvm);
229#endif
230
231static __always_inline void loadseg(enum which_selector which,
232 unsigned short sel)
233{
234 if (which == FS)
235 loadsegment(fs, sel);
236 else
237 load_gs_index(sel);
238}
239
240static __always_inline void load_seg_legacy(unsigned short prev_index,
241 unsigned long prev_base,
242 unsigned short next_index,
243 unsigned long next_base,
244 enum which_selector which)
245{
246 if (likely(next_index <= 3)) {
247
248
249
250
251 if (next_base == 0) {
252
253
254
255
256 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
257 loadseg(which, __USER_DS);
258 loadseg(which, next_index);
259 } else {
260
261
262
263
264
265
266
267
268
269
270
271
272 if (likely(prev_index | next_index | prev_base))
273 loadseg(which, next_index);
274 }
275 } else {
276 if (prev_index != next_index)
277 loadseg(which, next_index);
278 wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
279 next_base);
280 }
281 } else {
282
283
284
285
286 loadseg(which, next_index);
287 }
288}
289
290static __always_inline void x86_fsgsbase_load(struct thread_struct *prev,
291 struct thread_struct *next)
292{
293 load_seg_legacy(prev->fsindex, prev->fsbase,
294 next->fsindex, next->fsbase, FS);
295 load_seg_legacy(prev->gsindex, prev->gsbase,
296 next->gsindex, next->gsbase, GS);
297}
298
299static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
300 unsigned short selector)
301{
302 unsigned short idx = selector >> 3;
303 unsigned long base;
304
305 if (likely((selector & SEGMENT_TI_MASK) == 0)) {
306 if (unlikely(idx >= GDT_ENTRIES))
307 return 0;
308
309
310
311
312
313 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
314 return 0;
315
316 idx -= GDT_ENTRY_TLS_MIN;
317 base = get_desc_base(&task->thread.tls_array[idx]);
318 } else {
319#ifdef CONFIG_MODIFY_LDT_SYSCALL
320 struct ldt_struct *ldt;
321
322
323
324
325
326
327 mutex_lock(&task->mm->context.lock);
328 ldt = task->mm->context.ldt;
329 if (unlikely(idx >= ldt->nr_entries))
330 base = 0;
331 else
332 base = get_desc_base(ldt->entries + idx);
333 mutex_unlock(&task->mm->context.lock);
334#else
335 base = 0;
336#endif
337 }
338
339 return base;
340}
341
342unsigned long x86_fsbase_read_task(struct task_struct *task)
343{
344 unsigned long fsbase;
345
346 if (task == current)
347 fsbase = x86_fsbase_read_cpu();
348 else if (task->thread.fsindex == 0)
349 fsbase = task->thread.fsbase;
350 else
351 fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex);
352
353 return fsbase;
354}
355
356unsigned long x86_gsbase_read_task(struct task_struct *task)
357{
358 unsigned long gsbase;
359
360 if (task == current)
361 gsbase = x86_gsbase_read_cpu_inactive();
362 else if (task->thread.gsindex == 0)
363 gsbase = task->thread.gsbase;
364 else
365 gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex);
366
367 return gsbase;
368}
369
370void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
371{
372 WARN_ON_ONCE(task == current);
373
374 task->thread.fsbase = fsbase;
375}
376
377void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
378{
379 WARN_ON_ONCE(task == current);
380
381 task->thread.gsbase = gsbase;
382}
383
384int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
385 unsigned long arg, struct task_struct *p, unsigned long tls)
386{
387 int err;
388 struct pt_regs *childregs;
389 struct fork_frame *fork_frame;
390 struct inactive_task_frame *frame;
391 struct task_struct *me = current;
392
393 childregs = task_pt_regs(p);
394 fork_frame = container_of(childregs, struct fork_frame, regs);
395 frame = &fork_frame->frame;
396
397 frame->bp = 0;
398 frame->ret_addr = (unsigned long) ret_from_fork;
399 p->thread.sp = (unsigned long) fork_frame;
400 p->thread.io_bitmap_ptr = NULL;
401
402 savesegment(gs, p->thread.gsindex);
403 p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
404 savesegment(fs, p->thread.fsindex);
405 p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
406 savesegment(es, p->thread.es);
407 savesegment(ds, p->thread.ds);
408 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
409
410 if (unlikely(p->flags & PF_KTHREAD)) {
411
412 memset(childregs, 0, sizeof(struct pt_regs));
413 frame->bx = sp;
414 frame->r12 = arg;
415 return 0;
416 }
417 frame->bx = 0;
418 *childregs = *current_pt_regs();
419
420 childregs->ax = 0;
421 if (sp)
422 childregs->sp = sp;
423
424 err = -ENOMEM;
425 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
426 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
427 IO_BITMAP_BYTES, GFP_KERNEL);
428 if (!p->thread.io_bitmap_ptr) {
429 p->thread.io_bitmap_max = 0;
430 return -ENOMEM;
431 }
432 set_tsk_thread_flag(p, TIF_IO_BITMAP);
433 }
434
435
436
437
438 if (clone_flags & CLONE_SETTLS) {
439#ifdef CONFIG_IA32_EMULATION
440 if (in_ia32_syscall())
441 err = do_set_thread_area(p, -1,
442 (struct user_desc __user *)tls, 0);
443 else
444#endif
445 err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
446 if (err)
447 goto out;
448 }
449 err = 0;
450out:
451 if (err && p->thread.io_bitmap_ptr) {
452 kfree(p->thread.io_bitmap_ptr);
453 p->thread.io_bitmap_max = 0;
454 }
455
456 return err;
457}
458
459static void
460start_thread_common(struct pt_regs *regs, unsigned long new_ip,
461 unsigned long new_sp,
462 unsigned int _cs, unsigned int _ss, unsigned int _ds)
463{
464 WARN_ON_ONCE(regs != current_pt_regs());
465
466 if (static_cpu_has(X86_BUG_NULL_SEG)) {
467
468 loadsegment(fs, __USER_DS);
469 load_gs_index(__USER_DS);
470 }
471
472 loadsegment(fs, 0);
473 loadsegment(es, _ds);
474 loadsegment(ds, _ds);
475 load_gs_index(0);
476
477 regs->ip = new_ip;
478 regs->sp = new_sp;
479 regs->cs = _cs;
480 regs->ss = _ss;
481 regs->flags = X86_EFLAGS_IF;
482 force_iret();
483}
484
485void
486start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
487{
488 start_thread_common(regs, new_ip, new_sp,
489 __USER_CS, __USER_DS, 0);
490}
491EXPORT_SYMBOL_GPL(start_thread);
492
493#ifdef CONFIG_COMPAT
494void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
495{
496 start_thread_common(regs, new_ip, new_sp,
497 test_thread_flag(TIF_X32)
498 ? __USER_CS : __USER32_CS,
499 __USER_DS, __USER_DS);
500}
501#endif
502
503
504
505
506
507
508
509
510
511
512
513__visible __notrace_funcgraph struct task_struct *
514__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
515{
516 struct thread_struct *prev = &prev_p->thread;
517 struct thread_struct *next = &next_p->thread;
518 struct fpu *prev_fpu = &prev->fpu;
519 struct fpu *next_fpu = &next->fpu;
520 int cpu = smp_processor_id();
521
522 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
523 this_cpu_read(irq_count) != -1);
524
525 if (!test_thread_flag(TIF_NEED_FPU_LOAD))
526 switch_fpu_prepare(prev_fpu, cpu);
527
528
529
530
531
532
533 save_fsgs(prev_p);
534
535
536
537
538
539 load_TLS(next, cpu);
540
541
542
543
544
545
546 arch_end_context_switch(next_p);
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562 savesegment(es, prev->es);
563 if (unlikely(next->es | prev->es))
564 loadsegment(es, next->es);
565
566 savesegment(ds, prev->ds);
567 if (unlikely(next->ds | prev->ds))
568 loadsegment(ds, next->ds);
569
570 x86_fsgsbase_load(prev, next);
571
572
573
574
575 this_cpu_write(current_task, next_p);
576 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
577
578 switch_fpu_finish(next_fpu);
579
580
581 update_task_stack(next_p);
582
583 switch_to_extra(prev_p, next_p);
584
585#ifdef CONFIG_XEN_PV
586
587
588
589
590
591 if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
592 prev->iopl != next->iopl))
593 xen_set_iopl_mask(next->iopl);
594#endif
595
596 if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618 unsigned short ss_sel;
619 savesegment(ss, ss_sel);
620 if (ss_sel != __KERNEL_DS)
621 loadsegment(ss, __KERNEL_DS);
622 }
623
624
625 resctrl_sched_in();
626
627 return prev_p;
628}
629
630void set_personality_64bit(void)
631{
632
633
634
635 clear_thread_flag(TIF_IA32);
636 clear_thread_flag(TIF_ADDR32);
637 clear_thread_flag(TIF_X32);
638
639 task_pt_regs(current)->orig_ax = __NR_execve;
640 current_thread_info()->status &= ~TS_COMPAT;
641
642
643 if (current->mm)
644 current->mm->context.ia32_compat = 0;
645
646
647
648
649
650 current->personality &= ~READ_IMPLIES_EXEC;
651}
652
653static void __set_personality_x32(void)
654{
655#ifdef CONFIG_X86_X32
656 clear_thread_flag(TIF_IA32);
657 set_thread_flag(TIF_X32);
658 if (current->mm)
659 current->mm->context.ia32_compat = TIF_X32;
660 current->personality &= ~READ_IMPLIES_EXEC;
661
662
663
664
665
666
667
668
669 task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
670 current_thread_info()->status &= ~TS_COMPAT;
671#endif
672}
673
674static void __set_personality_ia32(void)
675{
676#ifdef CONFIG_IA32_EMULATION
677 set_thread_flag(TIF_IA32);
678 clear_thread_flag(TIF_X32);
679 if (current->mm)
680 current->mm->context.ia32_compat = TIF_IA32;
681 current->personality |= force_personality32;
682
683 task_pt_regs(current)->orig_ax = __NR_ia32_execve;
684 current_thread_info()->status |= TS_COMPAT;
685#endif
686}
687
688void set_personality_ia32(bool x32)
689{
690
691 set_thread_flag(TIF_ADDR32);
692
693 if (x32)
694 __set_personality_x32();
695 else
696 __set_personality_ia32();
697}
698EXPORT_SYMBOL_GPL(set_personality_ia32);
699
700#ifdef CONFIG_CHECKPOINT_RESTORE
701static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
702{
703 int ret;
704
705 ret = map_vdso_once(image, addr);
706 if (ret)
707 return ret;
708
709 return (long)image->size;
710}
711#endif
712
713long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
714{
715 int ret = 0;
716
717 switch (option) {
718 case ARCH_SET_GS: {
719 if (unlikely(arg2 >= TASK_SIZE_MAX))
720 return -EPERM;
721
722 preempt_disable();
723
724
725
726
727
728
729 if (task == current) {
730 loadseg(GS, 0);
731 x86_gsbase_write_cpu_inactive(arg2);
732
733
734
735
736
737 task->thread.gsbase = arg2;
738
739 } else {
740 task->thread.gsindex = 0;
741 x86_gsbase_write_task(task, arg2);
742 }
743 preempt_enable();
744 break;
745 }
746 case ARCH_SET_FS: {
747
748
749
750
751 if (unlikely(arg2 >= TASK_SIZE_MAX))
752 return -EPERM;
753
754 preempt_disable();
755
756
757
758
759 if (task == current) {
760 loadseg(FS, 0);
761 x86_fsbase_write_cpu(arg2);
762
763
764
765
766
767 task->thread.fsbase = arg2;
768 } else {
769 task->thread.fsindex = 0;
770 x86_fsbase_write_task(task, arg2);
771 }
772 preempt_enable();
773 break;
774 }
775 case ARCH_GET_FS: {
776 unsigned long base = x86_fsbase_read_task(task);
777
778 ret = put_user(base, (unsigned long __user *)arg2);
779 break;
780 }
781 case ARCH_GET_GS: {
782 unsigned long base = x86_gsbase_read_task(task);
783
784 ret = put_user(base, (unsigned long __user *)arg2);
785 break;
786 }
787
788#ifdef CONFIG_CHECKPOINT_RESTORE
789# ifdef CONFIG_X86_X32_ABI
790 case ARCH_MAP_VDSO_X32:
791 return prctl_map_vdso(&vdso_image_x32, arg2);
792# endif
793# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
794 case ARCH_MAP_VDSO_32:
795 return prctl_map_vdso(&vdso_image_32, arg2);
796# endif
797 case ARCH_MAP_VDSO_64:
798 return prctl_map_vdso(&vdso_image_64, arg2);
799#endif
800
801 default:
802 ret = -EINVAL;
803 break;
804 }
805
806 return ret;
807}
808
809SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
810{
811 long ret;
812
813 ret = do_arch_prctl_64(current, option, arg2);
814 if (ret == -EINVAL)
815 ret = do_arch_prctl_common(current, option, arg2);
816
817 return ret;
818}
819
820#ifdef CONFIG_IA32_EMULATION
821COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
822{
823 return do_arch_prctl_common(current, option, arg2);
824}
825#endif
826
827unsigned long KSTK_ESP(struct task_struct *task)
828{
829 return task_pt_regs(task)->sp;
830}
831