linux/arch/x86/kernel/process.c
<<
>>
Prefs
   1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   2
   3#include <linux/errno.h>
   4#include <linux/kernel.h>
   5#include <linux/mm.h>
   6#include <linux/smp.h>
   7#include <linux/prctl.h>
   8#include <linux/slab.h>
   9#include <linux/sched.h>
  10#include <linux/module.h>
  11#include <linux/pm.h>
  12#include <linux/clockchips.h>
  13#include <linux/random.h>
  14#include <linux/user-return-notifier.h>
  15#include <linux/dmi.h>
  16#include <linux/utsname.h>
  17#include <linux/stackprotector.h>
  18#include <linux/tick.h>
  19#include <linux/cpuidle.h>
  20#include <trace/events/power.h>
  21#include <linux/hw_breakpoint.h>
  22#include <asm/cpu.h>
  23#include <asm/apic.h>
  24#include <asm/syscalls.h>
  25#include <asm/idle.h>
  26#include <asm/uaccess.h>
  27#include <asm/i387.h>
  28#include <asm/fpu-internal.h>
  29#include <asm/debugreg.h>
  30#include <asm/nmi.h>
  31
  32/*
  33 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
  34 * no more per-task TSS's. The TSS size is kept cacheline-aligned
  35 * so they are allowed to end up in the .data..cacheline_aligned
  36 * section. Since TSS's are completely CPU-local, we want them
  37 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  38 */
  39__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
  40
  41#ifdef CONFIG_X86_64
  42static DEFINE_PER_CPU(unsigned char, is_idle);
  43static ATOMIC_NOTIFIER_HEAD(idle_notifier);
  44
  45void idle_notifier_register(struct notifier_block *n)
  46{
  47        atomic_notifier_chain_register(&idle_notifier, n);
  48}
  49EXPORT_SYMBOL_GPL(idle_notifier_register);
  50
  51void idle_notifier_unregister(struct notifier_block *n)
  52{
  53        atomic_notifier_chain_unregister(&idle_notifier, n);
  54}
  55EXPORT_SYMBOL_GPL(idle_notifier_unregister);
  56#endif
  57
  58struct kmem_cache *task_xstate_cachep;
  59EXPORT_SYMBOL_GPL(task_xstate_cachep);
  60
  61/*
  62 * this gets called so that we can store lazy state into memory and copy the
  63 * current task into the new thread.
  64 */
  65int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
  66{
  67        int ret;
  68
  69        *dst = *src;
  70        if (fpu_allocated(&src->thread.fpu)) {
  71                memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
  72                ret = fpu_alloc(&dst->thread.fpu);
  73                if (ret)
  74                        return ret;
  75                fpu_copy(dst, src);
  76        }
  77        return 0;
  78}
  79
  80void free_thread_xstate(struct task_struct *tsk)
  81{
  82        fpu_free(&tsk->thread.fpu);
  83}
  84
  85void arch_release_task_struct(struct task_struct *tsk)
  86{
  87        free_thread_xstate(tsk);
  88}
  89
  90void arch_task_cache_init(void)
  91{
  92        task_xstate_cachep =
  93                kmem_cache_create("task_xstate", xstate_size,
  94                                  __alignof__(union thread_xstate),
  95                                  SLAB_PANIC | SLAB_NOTRACK, NULL);
  96        setup_xstate_comp();
  97}
  98
  99/*
 100 * Free current thread data structures etc..
 101 */
 102void exit_thread(void)
 103{
 104        struct task_struct *me = current;
 105        struct thread_struct *t = &me->thread;
 106        unsigned long *bp = t->io_bitmap_ptr;
 107
 108        if (bp) {
 109                struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
 110
 111                t->io_bitmap_ptr = NULL;
 112                clear_thread_flag(TIF_IO_BITMAP);
 113                /*
 114                 * Careful, clear this in the TSS too:
 115                 */
 116                memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
 117                t->io_bitmap_max = 0;
 118                put_cpu();
 119                kfree(bp);
 120        }
 121
 122        drop_fpu(me);
 123}
 124
 125void flush_thread(void)
 126{
 127        struct task_struct *tsk = current;
 128
 129        flush_ptrace_hw_breakpoint(tsk);
 130        memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 131        drop_init_fpu(tsk);
 132        /*
 133         * Free the FPU state for non xsave platforms. They get reallocated
 134         * lazily at the first use.
 135         */
 136        if (!use_eager_fpu())
 137                free_thread_xstate(tsk);
 138}
 139
 140static void hard_disable_TSC(void)
 141{
 142        write_cr4(read_cr4() | X86_CR4_TSD);
 143}
 144
 145void disable_TSC(void)
 146{
 147        preempt_disable();
 148        if (!test_and_set_thread_flag(TIF_NOTSC))
 149                /*
 150                 * Must flip the CPU state synchronously with
 151                 * TIF_NOTSC in the current running context.
 152                 */
 153                hard_disable_TSC();
 154        preempt_enable();
 155}
 156
 157static void hard_enable_TSC(void)
 158{
 159        write_cr4(read_cr4() & ~X86_CR4_TSD);
 160}
 161
 162static void enable_TSC(void)
 163{
 164        preempt_disable();
 165        if (test_and_clear_thread_flag(TIF_NOTSC))
 166                /*
 167                 * Must flip the CPU state synchronously with
 168                 * TIF_NOTSC in the current running context.
 169                 */
 170                hard_enable_TSC();
 171        preempt_enable();
 172}
 173
 174int get_tsc_mode(unsigned long adr)
 175{
 176        unsigned int val;
 177
 178        if (test_thread_flag(TIF_NOTSC))
 179                val = PR_TSC_SIGSEGV;
 180        else
 181                val = PR_TSC_ENABLE;
 182
 183        return put_user(val, (unsigned int __user *)adr);
 184}
 185
 186int set_tsc_mode(unsigned int val)
 187{
 188        if (val == PR_TSC_SIGSEGV)
 189                disable_TSC();
 190        else if (val == PR_TSC_ENABLE)
 191                enable_TSC();
 192        else
 193                return -EINVAL;
 194
 195        return 0;
 196}
 197
 198void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 199                      struct tss_struct *tss)
 200{
 201        struct thread_struct *prev, *next;
 202
 203        prev = &prev_p->thread;
 204        next = &next_p->thread;
 205
 206        if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
 207            test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
 208                unsigned long debugctl = get_debugctlmsr();
 209
 210                debugctl &= ~DEBUGCTLMSR_BTF;
 211                if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
 212                        debugctl |= DEBUGCTLMSR_BTF;
 213
 214                update_debugctlmsr(debugctl);
 215        }
 216
 217        if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
 218            test_tsk_thread_flag(next_p, TIF_NOTSC)) {
 219                /* prev and next are different */
 220                if (test_tsk_thread_flag(next_p, TIF_NOTSC))
 221                        hard_disable_TSC();
 222                else
 223                        hard_enable_TSC();
 224        }
 225
 226        if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
 227                /*
 228                 * Copy the relevant range of the IO bitmap.
 229                 * Normally this is 128 bytes or less:
 230                 */
 231                memcpy(tss->io_bitmap, next->io_bitmap_ptr,
 232                       max(prev->io_bitmap_max, next->io_bitmap_max));
 233        } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
 234                /*
 235                 * Clear any possible leftover bits:
 236                 */
 237                memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 238        }
 239        propagate_user_return_notify(prev_p, next_p);
 240}
 241
 242/*
 243 * Idle related variables and functions
 244 */
 245unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 246EXPORT_SYMBOL(boot_option_idle_override);
 247
 248static void (*x86_idle)(void);
 249
 250#ifndef CONFIG_SMP
 251static inline void play_dead(void)
 252{
 253        BUG();
 254}
 255#endif
 256
 257#ifdef CONFIG_X86_64
 258void enter_idle(void)
 259{
 260        this_cpu_write(is_idle, 1);
 261        atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
 262}
 263
 264static void __exit_idle(void)
 265{
 266        if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
 267                return;
 268        atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
 269}
 270
 271/* Called from interrupts to signify idle end */
 272void exit_idle(void)
 273{
 274        /* idle loop has pid 0 */
 275        if (current->pid)
 276                return;
 277        __exit_idle();
 278}
 279#endif
 280
 281void arch_cpu_idle_enter(void)
 282{
 283        local_touch_nmi();
 284        enter_idle();
 285}
 286
 287void arch_cpu_idle_exit(void)
 288{
 289        __exit_idle();
 290}
 291
 292void arch_cpu_idle_dead(void)
 293{
 294        play_dead();
 295}
 296
 297/*
 298 * Called from the generic idle code.
 299 */
 300void arch_cpu_idle(void)
 301{
 302        x86_idle();
 303}
 304
 305/*
 306 * We use this if we don't have any better idle routine..
 307 */
 308void default_idle(void)
 309{
 310        trace_cpu_idle_rcuidle(1, smp_processor_id());
 311        safe_halt();
 312        trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 313}
 314#ifdef CONFIG_APM_MODULE
 315EXPORT_SYMBOL(default_idle);
 316#endif
 317
 318#ifdef CONFIG_XEN
 319bool xen_set_default_idle(void)
 320{
 321        bool ret = !!x86_idle;
 322
 323        x86_idle = default_idle;
 324
 325        return ret;
 326}
 327#endif
 328void stop_this_cpu(void *dummy)
 329{
 330        local_irq_disable();
 331        /*
 332         * Remove this CPU:
 333         */
 334        set_cpu_online(smp_processor_id(), false);
 335        disable_local_APIC();
 336
 337        for (;;)
 338                halt();
 339}
 340
 341bool amd_e400_c1e_detected;
 342EXPORT_SYMBOL(amd_e400_c1e_detected);
 343
 344static cpumask_var_t amd_e400_c1e_mask;
 345
 346void amd_e400_remove_cpu(int cpu)
 347{
 348        if (amd_e400_c1e_mask != NULL)
 349                cpumask_clear_cpu(cpu, amd_e400_c1e_mask);
 350}
 351
 352/*
 353 * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt
 354 * pending message MSR. If we detect C1E, then we handle it the same
 355 * way as C3 power states (local apic timer and TSC stop)
 356 */
 357static void amd_e400_idle(void)
 358{
 359        if (!amd_e400_c1e_detected) {
 360                u32 lo, hi;
 361
 362                rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
 363
 364                if (lo & K8_INTP_C1E_ACTIVE_MASK) {
 365                        amd_e400_c1e_detected = true;
 366                        if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 367                                mark_tsc_unstable("TSC halt in AMD C1E");
 368                        pr_info("System has AMD C1E enabled\n");
 369                }
 370        }
 371
 372        if (amd_e400_c1e_detected) {
 373                int cpu = smp_processor_id();
 374
 375                if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
 376                        cpumask_set_cpu(cpu, amd_e400_c1e_mask);
 377                        /*
 378                         * Force broadcast so ACPI can not interfere.
 379                         */
 380                        clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
 381                                           &cpu);
 382                        pr_info("Switch to broadcast mode on CPU%d\n", cpu);
 383                }
 384                clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
 385
 386                default_idle();
 387
 388                /*
 389                 * The switch back from broadcast mode needs to be
 390                 * called with interrupts disabled.
 391                 */
 392                local_irq_disable();
 393                clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
 394                local_irq_enable();
 395        } else
 396                default_idle();
 397}
 398
 399void select_idle_routine(const struct cpuinfo_x86 *c)
 400{
 401#ifdef CONFIG_SMP
 402        if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
 403                pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
 404#endif
 405        if (x86_idle || boot_option_idle_override == IDLE_POLL)
 406                return;
 407
 408        if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) {
 409                /* E400: APIC timer interrupt does not wake up CPU from C1e */
 410                pr_info("using AMD E400 aware idle routine\n");
 411                x86_idle = amd_e400_idle;
 412        } else
 413                x86_idle = default_idle;
 414}
 415
 416void __init init_amd_e400_c1e_mask(void)
 417{
 418        /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
 419        if (x86_idle == amd_e400_idle)
 420                zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
 421}
 422
 423static int __init idle_setup(char *str)
 424{
 425        if (!str)
 426                return -EINVAL;
 427
 428        if (!strcmp(str, "poll")) {
 429                pr_info("using polling idle threads\n");
 430                boot_option_idle_override = IDLE_POLL;
 431                cpu_idle_poll_ctrl(true);
 432        } else if (!strcmp(str, "halt")) {
 433                /*
 434                 * When the boot option of idle=halt is added, halt is
 435                 * forced to be used for CPU idle. In such case CPU C2/C3
 436                 * won't be used again.
 437                 * To continue to load the CPU idle driver, don't touch
 438                 * the boot_option_idle_override.
 439                 */
 440                x86_idle = default_idle;
 441                boot_option_idle_override = IDLE_HALT;
 442        } else if (!strcmp(str, "nomwait")) {
 443                /*
 444                 * If the boot option of "idle=nomwait" is added,
 445                 * it means that mwait will be disabled for CPU C2/C3
 446                 * states. In such case it won't touch the variable
 447                 * of boot_option_idle_override.
 448                 */
 449                boot_option_idle_override = IDLE_NOMWAIT;
 450        } else
 451                return -1;
 452
 453        return 0;
 454}
 455early_param("idle", idle_setup);
 456
 457unsigned long arch_align_stack(unsigned long sp)
 458{
 459        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 460                sp -= get_random_int() % 8192;
 461        return sp & ~0xf;
 462}
 463
 464unsigned long arch_randomize_brk(struct mm_struct *mm)
 465{
 466        unsigned long range_end = mm->brk + 0x02000000;
 467        return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 468}
 469
 470