1
2
3
4
5
6
7
8
9
10#include <linux/kernel.h>
11#include <linux/sched.h>
12#include <linux/sched/task_stack.h>
13#include <linux/mm.h>
14#include <linux/smp.h>
15#include <linux/errno.h>
16#include <linux/ptrace.h>
17#include <linux/tracehook.h>
18#include <linux/audit.h>
19#include <linux/seccomp.h>
20#include <linux/signal.h>
21#include <linux/export.h>
22#include <linux/context_tracking.h>
23#include <linux/user-return-notifier.h>
24#include <linux/nospec.h>
25#include <linux/uprobes.h>
26#include <linux/livepatch.h>
27#include <linux/syscalls.h>
28#include <linux/uaccess.h>
29
30#include <asm/desc.h>
31#include <asm/traps.h>
32#include <asm/vdso.h>
33#include <asm/cpufeature.h>
34#include <asm/fpu/api.h>
35#include <asm/nospec-branch.h>
36
37#define CREATE_TRACE_POINTS
38#include <trace/events/syscalls.h>
39
40#ifdef CONFIG_CONTEXT_TRACKING
41
42__visible inline void enter_from_user_mode(void)
43{
44 CT_WARN_ON(ct_state() != CONTEXT_USER);
45 user_exit_irqoff();
46}
47#else
48static inline void enter_from_user_mode(void) {}
49#endif
50
51static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
52{
53#ifdef CONFIG_X86_64
54 if (arch == AUDIT_ARCH_X86_64) {
55 audit_syscall_entry(regs->orig_ax, regs->di,
56 regs->si, regs->dx, regs->r10);
57 } else
58#endif
59 {
60 audit_syscall_entry(regs->orig_ax, regs->bx,
61 regs->cx, regs->dx, regs->si);
62 }
63}
64
65
66
67
68
69static long syscall_trace_enter(struct pt_regs *regs)
70{
71 u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
72
73 struct thread_info *ti = current_thread_info();
74 unsigned long ret = 0;
75 bool emulated = false;
76 u32 work;
77
78 if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
79 BUG_ON(regs != task_pt_regs(current));
80
81 work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
82
83 if (unlikely(work & _TIF_SYSCALL_EMU))
84 emulated = true;
85
86 if ((emulated || (work & _TIF_SYSCALL_TRACE)) &&
87 tracehook_report_syscall_entry(regs))
88 return -1L;
89
90 if (emulated)
91 return -1L;
92
93#ifdef CONFIG_SECCOMP
94
95
96
97 if (work & _TIF_SECCOMP) {
98 struct seccomp_data sd;
99
100 sd.arch = arch;
101 sd.nr = regs->orig_ax;
102 sd.instruction_pointer = regs->ip;
103#ifdef CONFIG_X86_64
104 if (arch == AUDIT_ARCH_X86_64) {
105 sd.args[0] = regs->di;
106 sd.args[1] = regs->si;
107 sd.args[2] = regs->dx;
108 sd.args[3] = regs->r10;
109 sd.args[4] = regs->r8;
110 sd.args[5] = regs->r9;
111 } else
112#endif
113 {
114 sd.args[0] = regs->bx;
115 sd.args[1] = regs->cx;
116 sd.args[2] = regs->dx;
117 sd.args[3] = regs->si;
118 sd.args[4] = regs->di;
119 sd.args[5] = regs->bp;
120 }
121
122 ret = __secure_computing(&sd);
123 if (ret == -1)
124 return ret;
125 }
126#endif
127
128 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
129 trace_sys_enter(regs, regs->orig_ax);
130
131 do_audit_syscall_entry(regs, arch);
132
133 return ret ?: regs->orig_ax;
134}
135
136#define EXIT_TO_USERMODE_LOOP_FLAGS \
137 (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
138 _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING)
139
140static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
141{
142
143
144
145
146
147
148
149 while (true) {
150
151 local_irq_enable();
152
153 if (cached_flags & _TIF_NEED_RESCHED)
154 schedule();
155
156 if (cached_flags & _TIF_UPROBE)
157 uprobe_notify_resume(regs);
158
159 if (cached_flags & _TIF_PATCH_PENDING)
160 klp_update_patch_state(current);
161
162
163 if (cached_flags & _TIF_SIGPENDING)
164 do_signal(regs);
165
166 if (cached_flags & _TIF_NOTIFY_RESUME) {
167 clear_thread_flag(TIF_NOTIFY_RESUME);
168 tracehook_notify_resume(regs);
169 rseq_handle_notify_resume(NULL, regs);
170 }
171
172 if (cached_flags & _TIF_USER_RETURN_NOTIFY)
173 fire_user_return_notifiers();
174
175
176 local_irq_disable();
177
178 cached_flags = READ_ONCE(current_thread_info()->flags);
179
180 if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
181 break;
182 }
183}
184
185
186__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
187{
188 struct thread_info *ti = current_thread_info();
189 u32 cached_flags;
190
191 addr_limit_user_check();
192
193 lockdep_assert_irqs_disabled();
194 lockdep_sys_exit();
195
196 cached_flags = READ_ONCE(ti->flags);
197
198 if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
199 exit_to_usermode_loop(regs, cached_flags);
200
201
202 cached_flags = READ_ONCE(ti->flags);
203
204 fpregs_assert_state_consistent();
205 if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD))
206 switch_fpu_return();
207
208#ifdef CONFIG_COMPAT
209
210
211
212
213
214
215
216
217
218
219
220 ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
221#endif
222
223 user_enter_irqoff();
224
225 mds_user_clear_cpu_buffers();
226}
227
228#define SYSCALL_EXIT_WORK_FLAGS \
229 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
230 _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
231
232static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
233{
234 bool step;
235
236 audit_syscall_exit(regs);
237
238 if (cached_flags & _TIF_SYSCALL_TRACEPOINT)
239 trace_sys_exit(regs, regs->ax);
240
241
242
243
244
245
246
247 step = unlikely(
248 (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU))
249 == _TIF_SINGLESTEP);
250 if (step || cached_flags & _TIF_SYSCALL_TRACE)
251 tracehook_report_syscall_exit(regs, step);
252}
253
254
255
256
257
258__visible inline void syscall_return_slowpath(struct pt_regs *regs)
259{
260 struct thread_info *ti = current_thread_info();
261 u32 cached_flags = READ_ONCE(ti->flags);
262
263 CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
264
265 if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
266 WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax))
267 local_irq_enable();
268
269 rseq_syscall(regs);
270
271
272
273
274
275 if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
276 syscall_slow_exit_work(regs, cached_flags);
277
278 local_irq_disable();
279 prepare_exit_to_usermode(regs);
280}
281
282#ifdef CONFIG_X86_64
283__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
284{
285 struct thread_info *ti;
286
287 enter_from_user_mode();
288 local_irq_enable();
289 ti = current_thread_info();
290 if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
291 nr = syscall_trace_enter(regs);
292
293
294
295
296
297
298 nr &= __SYSCALL_MASK;
299 if (likely(nr < NR_syscalls)) {
300 nr = array_index_nospec(nr, NR_syscalls);
301 regs->ax = sys_call_table[nr](regs);
302 }
303
304 syscall_return_slowpath(regs);
305}
306#endif
307
308#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
309
310
311
312
313
314
315static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
316{
317 struct thread_info *ti = current_thread_info();
318 unsigned int nr = (unsigned int)regs->orig_ax;
319
320#ifdef CONFIG_IA32_EMULATION
321 ti->status |= TS_COMPAT;
322#endif
323
324 if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
325
326
327
328
329
330
331 nr = syscall_trace_enter(regs);
332 }
333
334 if (likely(nr < IA32_NR_syscalls)) {
335 nr = array_index_nospec(nr, IA32_NR_syscalls);
336#ifdef CONFIG_IA32_EMULATION
337 regs->ax = ia32_sys_call_table[nr](regs);
338#else
339
340
341
342
343
344
345 regs->ax = ia32_sys_call_table[nr](
346 (unsigned int)regs->bx, (unsigned int)regs->cx,
347 (unsigned int)regs->dx, (unsigned int)regs->si,
348 (unsigned int)regs->di, (unsigned int)regs->bp);
349#endif
350 }
351
352 syscall_return_slowpath(regs);
353}
354
355
356__visible void do_int80_syscall_32(struct pt_regs *regs)
357{
358 enter_from_user_mode();
359 local_irq_enable();
360 do_syscall_32_irqs_on(regs);
361}
362
363
364__visible long do_fast_syscall_32(struct pt_regs *regs)
365{
366
367
368
369
370
371 unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
372 vdso_image_32.sym_int80_landing_pad;
373
374
375
376
377
378
379 regs->ip = landing_pad;
380
381 enter_from_user_mode();
382
383 local_irq_enable();
384
385
386 if (
387#ifdef CONFIG_X86_64
388
389
390
391
392 __get_user(*(u32 *)®s->bp,
393 (u32 __user __force *)(unsigned long)(u32)regs->sp)
394#else
395 get_user(*(u32 *)®s->bp,
396 (u32 __user __force *)(unsigned long)(u32)regs->sp)
397#endif
398 ) {
399
400
401 local_irq_disable();
402 regs->ax = -EFAULT;
403 prepare_exit_to_usermode(regs);
404 return 0;
405 }
406
407
408 do_syscall_32_irqs_on(regs);
409
410#ifdef CONFIG_X86_64
411
412
413
414
415
416
417
418
419
420 return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
421 regs->ip == landing_pad &&
422 (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
423#else
424
425
426
427
428
429
430
431
432
433
434 return static_cpu_has(X86_FEATURE_SEP) &&
435 regs->cs == __USER_CS && regs->ss == __USER_DS &&
436 regs->ip == landing_pad &&
437 (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
438#endif
439}
440#endif
441