1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/cpu.h>
18#include <linux/errno.h>
19#include <linux/sched.h>
20#include <linux/sched/task.h>
21#include <linux/sched/task_stack.h>
22#include <linux/fs.h>
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
29#include <linux/interrupt.h>
30#include <linux/delay.h>
31#include <linux/export.h>
32#include <linux/ptrace.h>
33#include <linux/notifier.h>
34#include <linux/kprobes.h>
35#include <linux/kdebug.h>
36#include <linux/prctl.h>
37#include <linux/uaccess.h>
38#include <linux/io.h>
39#include <linux/ftrace.h>
40#include <linux/syscalls.h>
41
42#include <asm/pgtable.h>
43#include <asm/processor.h>
44#include <asm/fpu/internal.h>
45#include <asm/mmu_context.h>
46#include <asm/prctl.h>
47#include <asm/desc.h>
48#include <asm/proto.h>
49#include <asm/ia32.h>
50#include <asm/syscalls.h>
51#include <asm/debugreg.h>
52#include <asm/switch_to.h>
53#include <asm/xen/hypervisor.h>
54#include <asm/vdso.h>
55#include <asm/intel_rdt_sched.h>
56#include <asm/unistd.h>
57#ifdef CONFIG_IA32_EMULATION
58
59#include <asm/unistd_32_ia32.h>
60#endif
61
62__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
63
64
65void __show_regs(struct pt_regs *regs, int all)
66{
67 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
68 unsigned long d0, d1, d2, d3, d6, d7;
69 unsigned int fsindex, gsindex;
70 unsigned int ds, cs, es;
71
72 printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
73 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
74 regs->sp, regs->flags);
75 if (regs->orig_ax != -1)
76 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
77 else
78 pr_cont("\n");
79
80 printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
81 regs->ax, regs->bx, regs->cx);
82 printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
83 regs->dx, regs->si, regs->di);
84 printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
85 regs->bp, regs->r8, regs->r9);
86 printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
87 regs->r10, regs->r11, regs->r12);
88 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
89 regs->r13, regs->r14, regs->r15);
90
91 asm("movl %%ds,%0" : "=r" (ds));
92 asm("movl %%cs,%0" : "=r" (cs));
93 asm("movl %%es,%0" : "=r" (es));
94 asm("movl %%fs,%0" : "=r" (fsindex));
95 asm("movl %%gs,%0" : "=r" (gsindex));
96
97 rdmsrl(MSR_FS_BASE, fs);
98 rdmsrl(MSR_GS_BASE, gs);
99 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
100
101 if (!all)
102 return;
103
104 cr0 = read_cr0();
105 cr2 = read_cr2();
106 cr3 = __read_cr3();
107 cr4 = __read_cr4();
108
109 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
110 fs, fsindex, gs, gsindex, shadowgs);
111 printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
112 es, cr0);
113 printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
114 cr4);
115
116 get_debugreg(d0, 0);
117 get_debugreg(d1, 1);
118 get_debugreg(d2, 2);
119 get_debugreg(d3, 3);
120 get_debugreg(d6, 6);
121 get_debugreg(d7, 7);
122
123
124 if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
125 (d6 == DR6_RESERVED) && (d7 == 0x400))) {
126 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
127 d0, d1, d2);
128 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
129 d3, d6, d7);
130 }
131
132 if (boot_cpu_has(X86_FEATURE_OSPKE))
133 printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
134}
135
136void release_thread(struct task_struct *dead_task)
137{
138 if (dead_task->mm) {
139#ifdef CONFIG_MODIFY_LDT_SYSCALL
140 if (dead_task->mm->context.ldt) {
141 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
142 dead_task->comm,
143 dead_task->mm->context.ldt->entries,
144 dead_task->mm->context.ldt->nr_entries);
145 BUG();
146 }
147#endif
148 }
149}
150
151enum which_selector {
152 FS,
153 GS
154};
155
156
157
158
159
160
161
162static __always_inline void save_base_legacy(struct task_struct *prev_p,
163 unsigned short selector,
164 enum which_selector which)
165{
166 if (likely(selector == 0)) {
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183 } else {
184
185
186
187
188
189
190
191
192
193
194 if (which == FS)
195 prev_p->thread.fsbase = 0;
196 else
197 prev_p->thread.gsbase = 0;
198 }
199}
200
201static __always_inline void save_fsgs(struct task_struct *task)
202{
203 savesegment(fs, task->thread.fsindex);
204 savesegment(gs, task->thread.gsindex);
205 save_base_legacy(task, task->thread.fsindex, FS);
206 save_base_legacy(task, task->thread.gsindex, GS);
207}
208
209static __always_inline void loadseg(enum which_selector which,
210 unsigned short sel)
211{
212 if (which == FS)
213 loadsegment(fs, sel);
214 else
215 load_gs_index(sel);
216}
217
218static __always_inline void load_seg_legacy(unsigned short prev_index,
219 unsigned long prev_base,
220 unsigned short next_index,
221 unsigned long next_base,
222 enum which_selector which)
223{
224 if (likely(next_index <= 3)) {
225
226
227
228
229 if (next_base == 0) {
230
231
232
233
234 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
235 loadseg(which, __USER_DS);
236 loadseg(which, next_index);
237 } else {
238
239
240
241
242
243
244
245
246
247
248
249
250 if (likely(prev_index | next_index | prev_base))
251 loadseg(which, next_index);
252 }
253 } else {
254 if (prev_index != next_index)
255 loadseg(which, next_index);
256 wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
257 next_base);
258 }
259 } else {
260
261
262
263
264 loadseg(which, next_index);
265 }
266}
267
268int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
269 unsigned long arg, struct task_struct *p, unsigned long tls)
270{
271 int err;
272 struct pt_regs *childregs;
273 struct fork_frame *fork_frame;
274 struct inactive_task_frame *frame;
275 struct task_struct *me = current;
276
277 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
278 childregs = task_pt_regs(p);
279 fork_frame = container_of(childregs, struct fork_frame, regs);
280 frame = &fork_frame->frame;
281 frame->bp = 0;
282 frame->ret_addr = (unsigned long) ret_from_fork;
283 p->thread.sp = (unsigned long) fork_frame;
284 p->thread.io_bitmap_ptr = NULL;
285
286 savesegment(gs, p->thread.gsindex);
287 p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
288 savesegment(fs, p->thread.fsindex);
289 p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
290 savesegment(es, p->thread.es);
291 savesegment(ds, p->thread.ds);
292 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
293
294 if (unlikely(p->flags & PF_KTHREAD)) {
295
296 memset(childregs, 0, sizeof(struct pt_regs));
297 frame->bx = sp;
298 frame->r12 = arg;
299 return 0;
300 }
301 frame->bx = 0;
302 *childregs = *current_pt_regs();
303
304 childregs->ax = 0;
305 if (sp)
306 childregs->sp = sp;
307
308 err = -ENOMEM;
309 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
310 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
311 IO_BITMAP_BYTES, GFP_KERNEL);
312 if (!p->thread.io_bitmap_ptr) {
313 p->thread.io_bitmap_max = 0;
314 return -ENOMEM;
315 }
316 set_tsk_thread_flag(p, TIF_IO_BITMAP);
317 }
318
319
320
321
322 if (clone_flags & CLONE_SETTLS) {
323#ifdef CONFIG_IA32_EMULATION
324 if (in_ia32_syscall())
325 err = do_set_thread_area(p, -1,
326 (struct user_desc __user *)tls, 0);
327 else
328#endif
329 err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
330 if (err)
331 goto out;
332 }
333 err = 0;
334out:
335 if (err && p->thread.io_bitmap_ptr) {
336 kfree(p->thread.io_bitmap_ptr);
337 p->thread.io_bitmap_max = 0;
338 }
339
340 return err;
341}
342
343static void
344start_thread_common(struct pt_regs *regs, unsigned long new_ip,
345 unsigned long new_sp,
346 unsigned int _cs, unsigned int _ss, unsigned int _ds)
347{
348 WARN_ON_ONCE(regs != current_pt_regs());
349
350 if (static_cpu_has(X86_BUG_NULL_SEG)) {
351
352 loadsegment(fs, __USER_DS);
353 load_gs_index(__USER_DS);
354 }
355
356 loadsegment(fs, 0);
357 loadsegment(es, _ds);
358 loadsegment(ds, _ds);
359 load_gs_index(0);
360
361 regs->ip = new_ip;
362 regs->sp = new_sp;
363 regs->cs = _cs;
364 regs->ss = _ss;
365 regs->flags = X86_EFLAGS_IF;
366 force_iret();
367}
368
369void
370start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
371{
372 start_thread_common(regs, new_ip, new_sp,
373 __USER_CS, __USER_DS, 0);
374}
375
376#ifdef CONFIG_COMPAT
377void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
378{
379 start_thread_common(regs, new_ip, new_sp,
380 test_thread_flag(TIF_X32)
381 ? __USER_CS : __USER32_CS,
382 __USER_DS, __USER_DS);
383}
384#endif
385
386
387
388
389
390
391
392
393
394
395
396__visible __notrace_funcgraph struct task_struct *
397__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
398{
399 struct thread_struct *prev = &prev_p->thread;
400 struct thread_struct *next = &next_p->thread;
401 struct fpu *prev_fpu = &prev->fpu;
402 struct fpu *next_fpu = &next->fpu;
403 int cpu = smp_processor_id();
404 struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
405
406 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
407 this_cpu_read(irq_count) != -1);
408
409 switch_fpu_prepare(prev_fpu, cpu);
410
411
412
413
414
415
416 save_fsgs(prev_p);
417
418
419
420
421
422 load_TLS(next, cpu);
423
424
425
426
427
428
429
430
431 arch_end_context_switch(next_p);
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447 savesegment(es, prev->es);
448 if (unlikely(next->es | prev->es))
449 loadsegment(es, next->es);
450
451 savesegment(ds, prev->ds);
452 if (unlikely(next->ds | prev->ds))
453 loadsegment(ds, next->ds);
454
455 load_seg_legacy(prev->fsindex, prev->fsbase,
456 next->fsindex, next->fsbase, FS);
457 load_seg_legacy(prev->gsindex, prev->gsbase,
458 next->gsindex, next->gsbase, GS);
459
460 switch_fpu_finish(next_fpu, cpu);
461
462
463
464
465 this_cpu_write(current_task, next_p);
466
467
468 load_sp0(tss, next);
469
470
471
472
473 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
474 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
475 __switch_to_xtra(prev_p, next_p, tss);
476
477#ifdef CONFIG_XEN_PV
478
479
480
481
482
483 if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
484 prev->iopl != next->iopl))
485 xen_set_iopl_mask(next->iopl);
486#endif
487
488 if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510 unsigned short ss_sel;
511 savesegment(ss, ss_sel);
512 if (ss_sel != __KERNEL_DS)
513 loadsegment(ss, __KERNEL_DS);
514 }
515
516
517 intel_rdt_sched_in();
518
519 return prev_p;
520}
521
522void set_personality_64bit(void)
523{
524
525
526
527 clear_thread_flag(TIF_IA32);
528 clear_thread_flag(TIF_ADDR32);
529 clear_thread_flag(TIF_X32);
530
531 task_pt_regs(current)->orig_ax = __NR_execve;
532
533
534 if (current->mm)
535 current->mm->context.ia32_compat = 0;
536
537
538
539
540
541 current->personality &= ~READ_IMPLIES_EXEC;
542}
543
544static void __set_personality_x32(void)
545{
546#ifdef CONFIG_X86_X32
547 clear_thread_flag(TIF_IA32);
548 set_thread_flag(TIF_X32);
549 if (current->mm)
550 current->mm->context.ia32_compat = TIF_X32;
551 current->personality &= ~READ_IMPLIES_EXEC;
552
553
554
555
556
557
558
559
560 task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
561 current->thread.status &= ~TS_COMPAT;
562#endif
563}
564
565static void __set_personality_ia32(void)
566{
567#ifdef CONFIG_IA32_EMULATION
568 set_thread_flag(TIF_IA32);
569 clear_thread_flag(TIF_X32);
570 if (current->mm)
571 current->mm->context.ia32_compat = TIF_IA32;
572 current->personality |= force_personality32;
573
574 task_pt_regs(current)->orig_ax = __NR_ia32_execve;
575 current->thread.status |= TS_COMPAT;
576#endif
577}
578
579void set_personality_ia32(bool x32)
580{
581
582 set_thread_flag(TIF_ADDR32);
583
584 if (x32)
585 __set_personality_x32();
586 else
587 __set_personality_ia32();
588}
589EXPORT_SYMBOL_GPL(set_personality_ia32);
590
591#ifdef CONFIG_CHECKPOINT_RESTORE
592static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
593{
594 int ret;
595
596 ret = map_vdso_once(image, addr);
597 if (ret)
598 return ret;
599
600 return (long)image->size;
601}
602#endif
603
604long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
605{
606 int ret = 0;
607 int doit = task == current;
608 int cpu;
609
610 switch (option) {
611 case ARCH_SET_GS:
612 if (arg2 >= TASK_SIZE_MAX)
613 return -EPERM;
614 cpu = get_cpu();
615 task->thread.gsindex = 0;
616 task->thread.gsbase = arg2;
617 if (doit) {
618 load_gs_index(0);
619 ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
620 }
621 put_cpu();
622 break;
623 case ARCH_SET_FS:
624
625
626 if (arg2 >= TASK_SIZE_MAX)
627 return -EPERM;
628 cpu = get_cpu();
629 task->thread.fsindex = 0;
630 task->thread.fsbase = arg2;
631 if (doit) {
632
633 loadsegment(fs, 0);
634 ret = wrmsrl_safe(MSR_FS_BASE, arg2);
635 }
636 put_cpu();
637 break;
638 case ARCH_GET_FS: {
639 unsigned long base;
640
641 if (doit)
642 rdmsrl(MSR_FS_BASE, base);
643 else
644 base = task->thread.fsbase;
645 ret = put_user(base, (unsigned long __user *)arg2);
646 break;
647 }
648 case ARCH_GET_GS: {
649 unsigned long base;
650
651 if (doit)
652 rdmsrl(MSR_KERNEL_GS_BASE, base);
653 else
654 base = task->thread.gsbase;
655 ret = put_user(base, (unsigned long __user *)arg2);
656 break;
657 }
658
659#ifdef CONFIG_CHECKPOINT_RESTORE
660# ifdef CONFIG_X86_X32_ABI
661 case ARCH_MAP_VDSO_X32:
662 return prctl_map_vdso(&vdso_image_x32, arg2);
663# endif
664# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
665 case ARCH_MAP_VDSO_32:
666 return prctl_map_vdso(&vdso_image_32, arg2);
667# endif
668 case ARCH_MAP_VDSO_64:
669 return prctl_map_vdso(&vdso_image_64, arg2);
670#endif
671
672 default:
673 ret = -EINVAL;
674 break;
675 }
676
677 return ret;
678}
679
680SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
681{
682 long ret;
683
684 ret = do_arch_prctl_64(current, option, arg2);
685 if (ret == -EINVAL)
686 ret = do_arch_prctl_common(current, option, arg2);
687
688 return ret;
689}
690
691#ifdef CONFIG_IA32_EMULATION
692COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
693{
694 return do_arch_prctl_common(current, option, arg2);
695}
696#endif
697
698unsigned long KSTK_ESP(struct task_struct *task)
699{
700 return task_pt_regs(task)->sp;
701}
702