linux/kernel/context_tracking.c
<<
>>
Prefs
   1/*
   2 * Context tracking: Probe on high level context boundaries such as kernel
   3 * and userspace. This includes syscalls and exceptions entry/exit.
   4 *
   5 * This is used by RCU to remove its dependency on the timer tick while a CPU
   6 * runs in userspace.
   7 *
   8 *  Started by Frederic Weisbecker:
   9 *
  10 * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
  11 *
  12 * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
  13 * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
  14 *
  15 */
  16
  17#include <linux/context_tracking.h>
  18#include <linux/rcupdate.h>
  19#include <linux/sched.h>
  20#include <linux/hardirq.h>
  21#include <linux/export.h>
  22#include <linux/kprobes.h>
  23
  24#define CREATE_TRACE_POINTS
  25#include <trace/events/context_tracking.h>
  26
  27DEFINE_STATIC_KEY_FALSE(context_tracking_enabled);
  28EXPORT_SYMBOL_GPL(context_tracking_enabled);
  29
  30DEFINE_PER_CPU(struct context_tracking, context_tracking);
  31EXPORT_SYMBOL_GPL(context_tracking);
  32
  33static bool context_tracking_recursion_enter(void)
  34{
  35        int recursion;
  36
  37        recursion = __this_cpu_inc_return(context_tracking.recursion);
  38        if (recursion == 1)
  39                return true;
  40
  41        WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion);
  42        __this_cpu_dec(context_tracking.recursion);
  43
  44        return false;
  45}
  46
  47static void context_tracking_recursion_exit(void)
  48{
  49        __this_cpu_dec(context_tracking.recursion);
  50}
  51
  52/**
  53 * context_tracking_enter - Inform the context tracking that the CPU is going
  54 *                          enter user or guest space mode.
  55 *
  56 * This function must be called right before we switch from the kernel
  57 * to user or guest space, when it's guaranteed the remaining kernel
  58 * instructions to execute won't use any RCU read side critical section
  59 * because this function sets RCU in extended quiescent state.
  60 */
  61void __context_tracking_enter(enum ctx_state state)
  62{
  63        /* Kernel threads aren't supposed to go to userspace */
  64        WARN_ON_ONCE(!current->mm);
  65
  66        if (!context_tracking_recursion_enter())
  67                return;
  68
  69        if ( __this_cpu_read(context_tracking.state) != state) {
  70                if (__this_cpu_read(context_tracking.active)) {
  71                        /*
  72                         * At this stage, only low level arch entry code remains and
  73                         * then we'll run in userspace. We can assume there won't be
  74                         * any RCU read-side critical section until the next call to
  75                         * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
  76                         * on the tick.
  77                         */
  78                        if (state == CONTEXT_USER) {
  79                                trace_user_enter(0);
  80                                vtime_user_enter(current);
  81                        }
  82                        rcu_user_enter();
  83                }
  84                /*
  85                 * Even if context tracking is disabled on this CPU, because it's outside
  86                 * the full dynticks mask for example, we still have to keep track of the
  87                 * context transitions and states to prevent inconsistency on those of
  88                 * other CPUs.
  89                 * If a task triggers an exception in userspace, sleep on the exception
  90                 * handler and then migrate to another CPU, that new CPU must know where
  91                 * the exception returns by the time we call exception_exit().
  92                 * This information can only be provided by the previous CPU when it called
  93                 * exception_enter().
  94                 * OTOH we can spare the calls to vtime and RCU when context_tracking.active
  95                 * is false because we know that CPU is not tickless.
  96                 */
  97                __this_cpu_write(context_tracking.state, state);
  98        }
  99        context_tracking_recursion_exit();
 100}
 101NOKPROBE_SYMBOL(__context_tracking_enter);
 102EXPORT_SYMBOL_GPL(__context_tracking_enter);
 103
 104void context_tracking_enter(enum ctx_state state)
 105{
 106        unsigned long flags;
 107
 108        /*
 109         * Some contexts may involve an exception occuring in an irq,
 110         * leading to that nesting:
 111         * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
 112         * This would mess up the dyntick_nesting count though. And rcu_irq_*()
 113         * helpers are enough to protect RCU uses inside the exception. So
 114         * just return immediately if we detect we are in an IRQ.
 115         */
 116        if (in_interrupt())
 117                return;
 118
 119        local_irq_save(flags);
 120        __context_tracking_enter(state);
 121        local_irq_restore(flags);
 122}
 123NOKPROBE_SYMBOL(context_tracking_enter);
 124EXPORT_SYMBOL_GPL(context_tracking_enter);
 125
 126void context_tracking_user_enter(void)
 127{
 128        user_enter();
 129}
 130NOKPROBE_SYMBOL(context_tracking_user_enter);
 131
 132/**
 133 * context_tracking_exit - Inform the context tracking that the CPU is
 134 *                         exiting user or guest mode and entering the kernel.
 135 *
 136 * This function must be called after we entered the kernel from user or
 137 * guest space before any use of RCU read side critical section. This
 138 * potentially include any high level kernel code like syscalls, exceptions,
 139 * signal handling, etc...
 140 *
 141 * This call supports re-entrancy. This way it can be called from any exception
 142 * handler without needing to know if we came from userspace or not.
 143 */
 144void __context_tracking_exit(enum ctx_state state)
 145{
 146        if (!context_tracking_recursion_enter())
 147                return;
 148
 149        if (__this_cpu_read(context_tracking.state) == state) {
 150                if (__this_cpu_read(context_tracking.active)) {
 151                        /*
 152                         * We are going to run code that may use RCU. Inform
 153                         * RCU core about that (ie: we may need the tick again).
 154                         */
 155                        rcu_user_exit();
 156                        if (state == CONTEXT_USER) {
 157                                vtime_user_exit(current);
 158                                trace_user_exit(0);
 159                        }
 160                }
 161                __this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
 162        }
 163        context_tracking_recursion_exit();
 164}
 165NOKPROBE_SYMBOL(__context_tracking_exit);
 166EXPORT_SYMBOL_GPL(__context_tracking_exit);
 167
 168void context_tracking_exit(enum ctx_state state)
 169{
 170        unsigned long flags;
 171
 172        if (in_interrupt())
 173                return;
 174
 175        local_irq_save(flags);
 176        __context_tracking_exit(state);
 177        local_irq_restore(flags);
 178}
 179NOKPROBE_SYMBOL(context_tracking_exit);
 180EXPORT_SYMBOL_GPL(context_tracking_exit);
 181
 182void context_tracking_user_exit(void)
 183{
 184        user_exit();
 185}
 186NOKPROBE_SYMBOL(context_tracking_user_exit);
 187
 188void __init context_tracking_cpu_set(int cpu)
 189{
 190        static __initdata bool initialized = false;
 191
 192        if (!per_cpu(context_tracking.active, cpu)) {
 193                per_cpu(context_tracking.active, cpu) = true;
 194                static_branch_inc(&context_tracking_enabled);
 195        }
 196
 197        if (initialized)
 198                return;
 199
 200        /*
 201         * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork
 202         * This assumes that init is the only task at this early boot stage.
 203         */
 204        set_tsk_thread_flag(&init_task, TIF_NOHZ);
 205        WARN_ON_ONCE(!tasklist_empty());
 206
 207        initialized = true;
 208}
 209
 210#ifdef CONFIG_CONTEXT_TRACKING_FORCE
 211void __init context_tracking_init(void)
 212{
 213        int cpu;
 214
 215        for_each_possible_cpu(cpu)
 216                context_tracking_cpu_set(cpu);
 217}
 218#endif
 219