1
2
3#include <linux/context_tracking.h>
4#include <linux/entry-common.h>
5#include <linux/highmem.h>
6#include <linux/livepatch.h>
7#include <linux/audit.h>
8#include <linux/tick.h>
9
10#include "common.h"
11
12#define CREATE_TRACE_POINTS
13#include <trace/events/syscalls.h>
14
15
16static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
17{
18 arch_check_user_regs(regs);
19 lockdep_hardirqs_off(CALLER_ADDR0);
20
21 CT_WARN_ON(ct_state() != CONTEXT_USER);
22 user_exit_irqoff();
23
24 instrumentation_begin();
25 trace_hardirqs_off_finish();
26 instrumentation_end();
27}
28
29void noinstr enter_from_user_mode(struct pt_regs *regs)
30{
31 __enter_from_user_mode(regs);
32}
33
34static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
35{
36 if (unlikely(audit_context())) {
37 unsigned long args[6];
38
39 syscall_get_arguments(current, regs, args);
40 audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
41 }
42}
43
44static long syscall_trace_enter(struct pt_regs *regs, long syscall,
45 unsigned long work)
46{
47 long ret = 0;
48
49
50
51
52
53
54 if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
55 if (syscall_user_dispatch(regs))
56 return -1L;
57 }
58
59
60 if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
61 ret = arch_syscall_enter_tracehook(regs);
62 if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
63 return -1L;
64 }
65
66
67 if (work & SYSCALL_WORK_SECCOMP) {
68 ret = __secure_computing(NULL);
69 if (ret == -1L)
70 return ret;
71 }
72
73
74 syscall = syscall_get_nr(current, regs);
75
76 if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
77 trace_sys_enter(regs, syscall);
78
79 syscall_enter_audit(regs, syscall);
80
81 return ret ? : syscall;
82}
83
84static __always_inline long
85__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
86{
87 unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
88
89 if (work & SYSCALL_WORK_ENTER)
90 syscall = syscall_trace_enter(regs, syscall, work);
91
92 return syscall;
93}
94
95long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
96{
97 return __syscall_enter_from_user_work(regs, syscall);
98}
99
100noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
101{
102 long ret;
103
104 __enter_from_user_mode(regs);
105
106 instrumentation_begin();
107 local_irq_enable();
108 ret = __syscall_enter_from_user_work(regs, syscall);
109 instrumentation_end();
110
111 return ret;
112}
113
114noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
115{
116 __enter_from_user_mode(regs);
117 instrumentation_begin();
118 local_irq_enable();
119 instrumentation_end();
120}
121
122
123static __always_inline void __exit_to_user_mode(void)
124{
125 instrumentation_begin();
126 trace_hardirqs_on_prepare();
127 lockdep_hardirqs_on_prepare(CALLER_ADDR0);
128 instrumentation_end();
129
130 user_enter_irqoff();
131 arch_exit_to_user_mode();
132 lockdep_hardirqs_on(CALLER_ADDR0);
133}
134
135void noinstr exit_to_user_mode(void)
136{
137 __exit_to_user_mode();
138}
139
140
141void __weak arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { }
142
143static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work)
144{
145 if (ti_work & _TIF_NOTIFY_SIGNAL)
146 tracehook_notify_signal();
147
148 arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING);
149}
150
151static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
152 unsigned long ti_work)
153{
154
155
156
157
158 while (ti_work & EXIT_TO_USER_MODE_WORK) {
159
160 local_irq_enable_exit_to_user(ti_work);
161
162 if (ti_work & _TIF_NEED_RESCHED)
163 schedule();
164
165 if (ti_work & _TIF_UPROBE)
166 uprobe_notify_resume(regs);
167
168 if (ti_work & _TIF_PATCH_PENDING)
169 klp_update_patch_state(current);
170
171 if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
172 handle_signal_work(regs, ti_work);
173
174 if (ti_work & _TIF_NOTIFY_RESUME)
175 tracehook_notify_resume(regs);
176
177
178 arch_exit_to_user_mode_work(regs, ti_work);
179
180
181
182
183
184
185 local_irq_disable_exit_to_user();
186
187
188 tick_nohz_user_enter_prepare();
189
190 ti_work = READ_ONCE(current_thread_info()->flags);
191 }
192
193
194 return ti_work;
195}
196
197static void exit_to_user_mode_prepare(struct pt_regs *regs)
198{
199 unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
200
201 lockdep_assert_irqs_disabled();
202
203
204 tick_nohz_user_enter_prepare();
205
206 if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
207 ti_work = exit_to_user_mode_loop(regs, ti_work);
208
209 arch_exit_to_user_mode_prepare(regs, ti_work);
210
211
212 addr_limit_user_check();
213 kmap_assert_nomap();
214 lockdep_assert_irqs_disabled();
215 lockdep_sys_exit();
216}
217
218
219
220
221
222
223static inline bool report_single_step(unsigned long work)
224{
225 if (work & SYSCALL_WORK_SYSCALL_EMU)
226 return false;
227
228 return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
229}
230
231static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
232{
233 bool step;
234
235
236
237
238
239
240
241 if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
242 if (unlikely(current->syscall_dispatch.on_dispatch)) {
243 current->syscall_dispatch.on_dispatch = false;
244 return;
245 }
246 }
247
248 audit_syscall_exit(regs);
249
250 if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
251 trace_sys_exit(regs, syscall_get_return_value(current, regs));
252
253 step = report_single_step(work);
254 if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
255 arch_syscall_exit_tracehook(regs, step);
256}
257
258
259
260
261
262static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
263{
264 unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
265 unsigned long nr = syscall_get_nr(current, regs);
266
267 CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
268
269 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
270 if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
271 local_irq_enable();
272 }
273
274 rseq_syscall(regs);
275
276
277
278
279
280
281 if (unlikely(work & SYSCALL_WORK_EXIT))
282 syscall_exit_work(regs, work);
283}
284
285static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
286{
287 syscall_exit_to_user_mode_prepare(regs);
288 local_irq_disable_exit_to_user();
289 exit_to_user_mode_prepare(regs);
290}
291
292void syscall_exit_to_user_mode_work(struct pt_regs *regs)
293{
294 __syscall_exit_to_user_mode_work(regs);
295}
296
297__visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
298{
299 instrumentation_begin();
300 __syscall_exit_to_user_mode_work(regs);
301 instrumentation_end();
302 __exit_to_user_mode();
303}
304
305noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
306{
307 __enter_from_user_mode(regs);
308}
309
310noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
311{
312 instrumentation_begin();
313 exit_to_user_mode_prepare(regs);
314 instrumentation_end();
315 __exit_to_user_mode();
316}
317
318noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
319{
320 irqentry_state_t ret = {
321 .exit_rcu = false,
322 };
323
324 if (user_mode(regs)) {
325 irqentry_enter_from_user_mode(regs);
326 return ret;
327 }
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352 if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
353
354
355
356
357
358 lockdep_hardirqs_off(CALLER_ADDR0);
359 rcu_irq_enter();
360 instrumentation_begin();
361 trace_hardirqs_off_finish();
362 instrumentation_end();
363
364 ret.exit_rcu = true;
365 return ret;
366 }
367
368
369
370
371
372
373
374 lockdep_hardirqs_off(CALLER_ADDR0);
375 instrumentation_begin();
376 rcu_irq_enter_check_tick();
377 trace_hardirqs_off_finish();
378 instrumentation_end();
379
380 return ret;
381}
382
383void irqentry_exit_cond_resched(void)
384{
385 if (!preempt_count()) {
386
387 rcu_irq_exit_check_preempt();
388 if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
389 WARN_ON_ONCE(!on_thread_stack());
390 if (need_resched())
391 preempt_schedule_irq();
392 }
393}
394#ifdef CONFIG_PREEMPT_DYNAMIC
395DEFINE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
396#endif
397
398noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
399{
400 lockdep_assert_irqs_disabled();
401
402
403 if (user_mode(regs)) {
404 irqentry_exit_to_user_mode(regs);
405 } else if (!regs_irqs_disabled(regs)) {
406
407
408
409
410
411 if (state.exit_rcu) {
412 instrumentation_begin();
413
414 trace_hardirqs_on_prepare();
415 lockdep_hardirqs_on_prepare(CALLER_ADDR0);
416 instrumentation_end();
417 rcu_irq_exit();
418 lockdep_hardirqs_on(CALLER_ADDR0);
419 return;
420 }
421
422 instrumentation_begin();
423 if (IS_ENABLED(CONFIG_PREEMPTION)) {
424#ifdef CONFIG_PREEMPT_DYNAMIC
425 static_call(irqentry_exit_cond_resched)();
426#else
427 irqentry_exit_cond_resched();
428#endif
429 }
430
431 trace_hardirqs_on();
432 instrumentation_end();
433 } else {
434
435
436
437
438 if (state.exit_rcu)
439 rcu_irq_exit();
440 }
441}
442
443irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
444{
445 irqentry_state_t irq_state;
446
447 irq_state.lockdep = lockdep_hardirqs_enabled();
448
449 __nmi_enter();
450 lockdep_hardirqs_off(CALLER_ADDR0);
451 lockdep_hardirq_enter();
452 rcu_nmi_enter();
453
454 instrumentation_begin();
455 trace_hardirqs_off_finish();
456 ftrace_nmi_enter();
457 instrumentation_end();
458
459 return irq_state;
460}
461
462void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
463{
464 instrumentation_begin();
465 ftrace_nmi_exit();
466 if (irq_state.lockdep) {
467 trace_hardirqs_on_prepare();
468 lockdep_hardirqs_on_prepare(CALLER_ADDR0);
469 }
470 instrumentation_end();
471
472 rcu_nmi_exit();
473 lockdep_hardirq_exit();
474 if (irq_state.lockdep)
475 lockdep_hardirqs_on(CALLER_ADDR0);
476 __nmi_exit();
477}
478