linux/arch/powerpc/kernel/process.c
<<
>>
Prefs
   1/*
   2 *  Derived from "arch/i386/kernel/process.c"
   3 *    Copyright (C) 1995  Linus Torvalds
   4 *
   5 *  Updated and modified by Cort Dougan (cort@cs.nmt.edu) and
   6 *  Paul Mackerras (paulus@cs.anu.edu.au)
   7 *
   8 *  PowerPC version
   9 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  10 *
  11 *  This program is free software; you can redistribute it and/or
  12 *  modify it under the terms of the GNU General Public License
  13 *  as published by the Free Software Foundation; either version
  14 *  2 of the License, or (at your option) any later version.
  15 */
  16
  17#include <linux/errno.h>
  18#include <linux/sched.h>
  19#include <linux/kernel.h>
  20#include <linux/mm.h>
  21#include <linux/smp.h>
  22#include <linux/stddef.h>
  23#include <linux/unistd.h>
  24#include <linux/ptrace.h>
  25#include <linux/slab.h>
  26#include <linux/user.h>
  27#include <linux/elf.h>
  28#include <linux/prctl.h>
  29#include <linux/init_task.h>
  30#include <linux/export.h>
  31#include <linux/kallsyms.h>
  32#include <linux/mqueue.h>
  33#include <linux/hardirq.h>
  34#include <linux/utsname.h>
  35#include <linux/ftrace.h>
  36#include <linux/kernel_stat.h>
  37#include <linux/personality.h>
  38#include <linux/random.h>
  39#include <linux/hw_breakpoint.h>
  40
  41#include <asm/pgtable.h>
  42#include <asm/uaccess.h>
  43#include <asm/io.h>
  44#include <asm/processor.h>
  45#include <asm/mmu.h>
  46#include <asm/prom.h>
  47#include <asm/machdep.h>
  48#include <asm/time.h>
  49#include <asm/runlatch.h>
  50#include <asm/syscalls.h>
  51#include <asm/switch_to.h>
  52#include <asm/tm.h>
  53#include <asm/debug.h>
  54#ifdef CONFIG_PPC64
  55#include <asm/firmware.h>
  56#endif
  57#include <asm/code-patching.h>
  58#include <asm/livepatch.h>
  59
  60#include <linux/kprobes.h>
  61#include <linux/kdebug.h>
  62
  63/* Transactional Memory debug */
  64#ifdef TM_DEBUG_SW
  65#define TM_DEBUG(x...) printk(KERN_INFO x)
  66#else
  67#define TM_DEBUG(x...) do { } while(0)
  68#endif
  69
  70extern unsigned long _get_SP(void);
  71
  72#ifndef CONFIG_SMP
  73struct task_struct *last_task_used_math = NULL;
  74struct task_struct *last_task_used_altivec = NULL;
  75struct task_struct *last_task_used_vsx = NULL;
  76struct task_struct *last_task_used_spe = NULL;
  77#endif
  78
  79#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
  80void giveup_fpu_maybe_transactional(struct task_struct *tsk)
  81{
  82        /*
  83         * If we are saving the current thread's registers, and the
  84         * thread is in a transactional state, set the TIF_RESTORE_TM
  85         * bit so that we know to restore the registers before
  86         * returning to userspace.
  87         */
  88        if (tsk == current && tsk->thread.regs &&
  89            MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
  90            !test_thread_flag(TIF_RESTORE_TM)) {
  91                tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr;
  92                set_thread_flag(TIF_RESTORE_TM);
  93        }
  94
  95        giveup_fpu(tsk);
  96}
  97
  98void giveup_altivec_maybe_transactional(struct task_struct *tsk)
  99{
 100        /*
 101         * If we are saving the current thread's registers, and the
 102         * thread is in a transactional state, set the TIF_RESTORE_TM
 103         * bit so that we know to restore the registers before
 104         * returning to userspace.
 105         */
 106        if (tsk == current && tsk->thread.regs &&
 107            MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
 108            !test_thread_flag(TIF_RESTORE_TM)) {
 109                tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr;
 110                set_thread_flag(TIF_RESTORE_TM);
 111        }
 112
 113        giveup_altivec(tsk);
 114}
 115
 116#else
 117#define giveup_fpu_maybe_transactional(tsk)     giveup_fpu(tsk)
 118#define giveup_altivec_maybe_transactional(tsk) giveup_altivec(tsk)
 119#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 120
 121#ifdef CONFIG_PPC_FPU
 122/*
 123 * Make sure the floating-point register state in the
 124 * the thread_struct is up to date for task tsk.
 125 */
 126void flush_fp_to_thread(struct task_struct *tsk)
 127{
 128        if (tsk->thread.regs) {
 129                /*
 130                 * We need to disable preemption here because if we didn't,
 131                 * another process could get scheduled after the regs->msr
 132                 * test but before we have finished saving the FP registers
 133                 * to the thread_struct.  That process could take over the
 134                 * FPU, and then when we get scheduled again we would store
 135                 * bogus values for the remaining FP registers.
 136                 */
 137                preempt_disable();
 138                if (tsk->thread.regs->msr & MSR_FP) {
 139#ifdef CONFIG_SMP
 140                        /*
 141                         * This should only ever be called for current or
 142                         * for a stopped child process.  Since we save away
 143                         * the FP register state on context switch on SMP,
 144                         * there is something wrong if a stopped child appears
 145                         * to still have its FP state in the CPU registers.
 146                         */
 147                        BUG_ON(tsk != current);
 148#endif
 149                        giveup_fpu_maybe_transactional(tsk);
 150                }
 151                preempt_enable();
 152        }
 153}
 154EXPORT_SYMBOL_GPL(flush_fp_to_thread);
 155#endif /* CONFIG_PPC_FPU */
 156
 157void enable_kernel_fp(void)
 158{
 159        WARN_ON(preemptible());
 160
 161#ifdef CONFIG_SMP
 162        if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
 163                giveup_fpu_maybe_transactional(current);
 164        else
 165                giveup_fpu(NULL);       /* just enables FP for kernel */
 166#else
 167        giveup_fpu_maybe_transactional(last_task_used_math);
 168#endif /* CONFIG_SMP */
 169}
 170EXPORT_SYMBOL(enable_kernel_fp);
 171
 172#ifdef CONFIG_ALTIVEC
 173void enable_kernel_altivec(void)
 174{
 175        WARN_ON(preemptible());
 176
 177#ifdef CONFIG_SMP
 178        if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
 179                giveup_altivec_maybe_transactional(current);
 180        else
 181                giveup_altivec_notask();
 182#else
 183        giveup_altivec_maybe_transactional(last_task_used_altivec);
 184#endif /* CONFIG_SMP */
 185}
 186EXPORT_SYMBOL(enable_kernel_altivec);
 187
 188/*
 189 * Make sure the VMX/Altivec register state in the
 190 * the thread_struct is up to date for task tsk.
 191 */
 192void flush_altivec_to_thread(struct task_struct *tsk)
 193{
 194        if (tsk->thread.regs) {
 195                preempt_disable();
 196                if (tsk->thread.regs->msr & MSR_VEC) {
 197#ifdef CONFIG_SMP
 198                        BUG_ON(tsk != current);
 199#endif
 200                        giveup_altivec_maybe_transactional(tsk);
 201                }
 202                preempt_enable();
 203        }
 204}
 205EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
 206#endif /* CONFIG_ALTIVEC */
 207
 208#ifdef CONFIG_VSX
 209void enable_kernel_vsx(void)
 210{
 211        WARN_ON(preemptible());
 212
 213#ifdef CONFIG_SMP
 214        if (current->thread.regs && (current->thread.regs->msr & MSR_VSX))
 215                giveup_vsx(current);
 216        else
 217                giveup_vsx(NULL);       /* just enable vsx for kernel - force */
 218#else
 219        giveup_vsx(last_task_used_vsx);
 220#endif /* CONFIG_SMP */
 221}
 222EXPORT_SYMBOL(enable_kernel_vsx);
 223
 224void giveup_vsx(struct task_struct *tsk)
 225{
 226        giveup_fpu_maybe_transactional(tsk);
 227        giveup_altivec_maybe_transactional(tsk);
 228        __giveup_vsx(tsk);
 229}
 230
 231void flush_vsx_to_thread(struct task_struct *tsk)
 232{
 233        if (tsk->thread.regs) {
 234                preempt_disable();
 235                if (tsk->thread.regs->msr & MSR_VSX) {
 236#ifdef CONFIG_SMP
 237                        BUG_ON(tsk != current);
 238#endif
 239                        giveup_vsx(tsk);
 240                }
 241                preempt_enable();
 242        }
 243}
 244EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
 245#endif /* CONFIG_VSX */
 246
 247#ifdef CONFIG_SPE
 248
 249void enable_kernel_spe(void)
 250{
 251        WARN_ON(preemptible());
 252
 253#ifdef CONFIG_SMP
 254        if (current->thread.regs && (current->thread.regs->msr & MSR_SPE))
 255                giveup_spe(current);
 256        else
 257                giveup_spe(NULL);       /* just enable SPE for kernel - force */
 258#else
 259        giveup_spe(last_task_used_spe);
 260#endif /* __SMP __ */
 261}
 262EXPORT_SYMBOL(enable_kernel_spe);
 263
 264void flush_spe_to_thread(struct task_struct *tsk)
 265{
 266        if (tsk->thread.regs) {
 267                preempt_disable();
 268                if (tsk->thread.regs->msr & MSR_SPE) {
 269#ifdef CONFIG_SMP
 270                        BUG_ON(tsk != current);
 271#endif
 272                        tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
 273                        giveup_spe(tsk);
 274                }
 275                preempt_enable();
 276        }
 277}
 278#endif /* CONFIG_SPE */
 279
 280#ifndef CONFIG_SMP
 281/*
 282 * If we are doing lazy switching of CPU state (FP, altivec or SPE),
 283 * and the current task has some state, discard it.
 284 */
 285void discard_lazy_cpu_state(void)
 286{
 287        preempt_disable();
 288        if (last_task_used_math == current)
 289                last_task_used_math = NULL;
 290#ifdef CONFIG_ALTIVEC
 291        if (last_task_used_altivec == current)
 292                last_task_used_altivec = NULL;
 293#endif /* CONFIG_ALTIVEC */
 294#ifdef CONFIG_VSX
 295        if (last_task_used_vsx == current)
 296                last_task_used_vsx = NULL;
 297#endif /* CONFIG_VSX */
 298#ifdef CONFIG_SPE
 299        if (last_task_used_spe == current)
 300                last_task_used_spe = NULL;
 301#endif
 302        preempt_enable();
 303}
 304#endif /* CONFIG_SMP */
 305
 306#ifdef CONFIG_PPC_ADV_DEBUG_REGS
 307void do_send_trap(struct pt_regs *regs, unsigned long address,
 308                  unsigned long error_code, int signal_code, int breakpt)
 309{
 310        siginfo_t info;
 311
 312        current->thread.trap_nr = signal_code;
 313        if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
 314                        11, SIGSEGV) == NOTIFY_STOP)
 315                return;
 316
 317        /* Deliver the signal to userspace */
 318        info.si_signo = SIGTRAP;
 319        info.si_errno = breakpt;        /* breakpoint or watchpoint id */
 320        info.si_code = signal_code;
 321        info.si_addr = (void __user *)address;
 322        force_sig_info(SIGTRAP, &info, current);
 323}
 324#else   /* !CONFIG_PPC_ADV_DEBUG_REGS */
 325void do_break (struct pt_regs *regs, unsigned long address,
 326                    unsigned long error_code)
 327{
 328        siginfo_t info;
 329
 330        current->thread.trap_nr = TRAP_HWBKPT;
 331        if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
 332                        11, SIGSEGV) == NOTIFY_STOP)
 333                return;
 334
 335        if (debugger_break_match(regs))
 336                return;
 337
 338        /* Clear the breakpoint */
 339        hw_breakpoint_disable();
 340
 341        /* Deliver the signal to userspace */
 342        info.si_signo = SIGTRAP;
 343        info.si_errno = 0;
 344        info.si_code = TRAP_HWBKPT;
 345        info.si_addr = (void __user *)address;
 346        force_sig_info(SIGTRAP, &info, current);
 347}
 348#endif  /* CONFIG_PPC_ADV_DEBUG_REGS */
 349
 350static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk);
 351
 352#ifdef CONFIG_PPC_ADV_DEBUG_REGS
 353/*
 354 * Set the debug registers back to their default "safe" values.
 355 */
 356static void set_debug_reg_defaults(struct thread_struct *thread)
 357{
 358        thread->debug.iac1 = thread->debug.iac2 = 0;
 359#if CONFIG_PPC_ADV_DEBUG_IACS > 2
 360        thread->debug.iac3 = thread->debug.iac4 = 0;
 361#endif
 362        thread->debug.dac1 = thread->debug.dac2 = 0;
 363#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
 364        thread->debug.dvc1 = thread->debug.dvc2 = 0;
 365#endif
 366        thread->debug.dbcr0 = 0;
 367#ifdef CONFIG_BOOKE
 368        /*
 369         * Force User/Supervisor bits to b11 (user-only MSR[PR]=1)
 370         */
 371        thread->debug.dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
 372                        DBCR1_IAC3US | DBCR1_IAC4US;
 373        /*
 374         * Force Data Address Compare User/Supervisor bits to be User-only
 375         * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0.
 376         */
 377        thread->debug.dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
 378#else
 379        thread->debug.dbcr1 = 0;
 380#endif
 381}
 382
 383static void prime_debug_regs(struct thread_struct *thread)
 384{
 385        /*
 386         * We could have inherited MSR_DE from userspace, since
 387         * it doesn't get cleared on exception entry.  Make sure
 388         * MSR_DE is clear before we enable any debug events.
 389         */
 390        mtmsr(mfmsr() & ~MSR_DE);
 391
 392        mtspr(SPRN_IAC1, thread->debug.iac1);
 393        mtspr(SPRN_IAC2, thread->debug.iac2);
 394#if CONFIG_PPC_ADV_DEBUG_IACS > 2
 395        mtspr(SPRN_IAC3, thread->debug.iac3);
 396        mtspr(SPRN_IAC4, thread->debug.iac4);
 397#endif
 398        mtspr(SPRN_DAC1, thread->debug.dac1);
 399        mtspr(SPRN_DAC2, thread->debug.dac2);
 400#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
 401        mtspr(SPRN_DVC1, thread->debug.dvc1);
 402        mtspr(SPRN_DVC2, thread->debug.dvc2);
 403#endif
 404        mtspr(SPRN_DBCR0, thread->debug.dbcr0);
 405        mtspr(SPRN_DBCR1, thread->debug.dbcr1);
 406#ifdef CONFIG_BOOKE
 407        mtspr(SPRN_DBCR2, thread->debug.dbcr2);
 408#endif
 409}
 410/*
 411 * Unless neither the old or new thread are making use of the
 412 * debug registers, set the debug registers from the values
 413 * stored in the new thread.
 414 */
 415void switch_booke_debug_regs(struct thread_struct *new_thread)
 416{
 417        if ((current->thread.debug.dbcr0 & DBCR0_IDM)
 418                || (new_thread->debug.dbcr0 & DBCR0_IDM))
 419                        prime_debug_regs(new_thread);
 420}
 421EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
 422#else   /* !CONFIG_PPC_ADV_DEBUG_REGS */
 423#ifndef CONFIG_HAVE_HW_BREAKPOINT
 424static void set_debug_reg_defaults(struct thread_struct *thread)
 425{
 426        thread->hw_brk.address = 0;
 427        thread->hw_brk.type = 0;
 428        set_breakpoint(&thread->hw_brk);
 429}
 430#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
 431#endif  /* CONFIG_PPC_ADV_DEBUG_REGS */
 432
 433#ifdef CONFIG_PPC_ADV_DEBUG_REGS
 434static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
 435{
 436        mtspr(SPRN_DAC1, dabr);
 437#ifdef CONFIG_PPC_47x
 438        isync();
 439#endif
 440        return 0;
 441}
 442#elif defined(CONFIG_PPC_BOOK3S)
 443static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
 444{
 445        mtspr(SPRN_DABR, dabr);
 446        if (cpu_has_feature(CPU_FTR_DABRX))
 447                mtspr(SPRN_DABRX, dabrx);
 448        return 0;
 449}
 450#else
 451static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
 452{
 453        return -EINVAL;
 454}
 455#endif
 456
 457static inline int set_dabr(struct arch_hw_breakpoint *brk)
 458{
 459        unsigned long dabr, dabrx;
 460
 461        dabr = brk->address | (brk->type & HW_BRK_TYPE_DABR);
 462        dabrx = ((brk->type >> 3) & 0x7);
 463
 464        if (ppc_md.set_dabr)
 465                return ppc_md.set_dabr(dabr, dabrx);
 466
 467        return __set_dabr(dabr, dabrx);
 468}
 469
 470static inline int set_dawr(struct arch_hw_breakpoint *brk)
 471{
 472        unsigned long dawr, dawrx, mrd;
 473
 474        dawr = brk->address;
 475
 476        dawrx  = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)) \
 477                                   << (63 - 58); //* read/write bits */
 478        dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) \
 479                                   << (63 - 59); //* translate */
 480        dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) \
 481                                   >> 3; //* PRIM bits */
 482        /* dawr length is stored in field MDR bits 48:53.  Matches range in
 483           doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
 484           0b111111=64DW.
 485           brk->len is in bytes.
 486           This aligns up to double word size, shifts and does the bias.
 487        */
 488        mrd = ((brk->len + 7) >> 3) - 1;
 489        dawrx |= (mrd & 0x3f) << (63 - 53);
 490
 491        if (ppc_md.set_dawr)
 492                return ppc_md.set_dawr(dawr, dawrx);
 493        mtspr(SPRN_DAWR, dawr);
 494        mtspr(SPRN_DAWRX, dawrx);
 495        return 0;
 496}
 497
 498void __set_breakpoint(struct arch_hw_breakpoint *brk)
 499{
 500        __get_cpu_var(current_brk) = *brk;
 501
 502        if (cpu_has_feature(CPU_FTR_DAWR))
 503                set_dawr(brk);
 504        else
 505                set_dabr(brk);
 506}
 507
 508void set_breakpoint(struct arch_hw_breakpoint *brk)
 509{
 510        preempt_disable();
 511        __set_breakpoint(brk);
 512        preempt_enable();
 513}
 514
 515#ifdef CONFIG_PPC64
 516DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
 517#endif
 518
 519static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
 520                              struct arch_hw_breakpoint *b)
 521{
 522        if (a->address != b->address)
 523                return false;
 524        if (a->type != b->type)
 525                return false;
 526        if (a->len != b->len)
 527                return false;
 528        return true;
 529}
 530
 531#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 532static void tm_reclaim_thread(struct thread_struct *thr,
 533                              struct thread_info *ti, uint8_t cause)
 534{
 535        unsigned long msr_diff = 0;
 536
 537        /*
 538         * If FP/VSX registers have been already saved to the
 539         * thread_struct, move them to the transact_fp array.
 540         * We clear the TIF_RESTORE_TM bit since after the reclaim
 541         * the thread will no longer be transactional.
 542         */
 543        if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) {
 544                msr_diff = thr->ckpt_regs.msr & ~thr->regs->msr;
 545                if (msr_diff & MSR_FP)
 546                        memcpy(&thr->transact_fp, &thr->fp_state,
 547                               sizeof(struct thread_fp_state));
 548                if (msr_diff & MSR_VEC)
 549                        memcpy(&thr->transact_vr, &thr->vr_state,
 550                               sizeof(struct thread_vr_state));
 551                clear_ti_thread_flag(ti, TIF_RESTORE_TM);
 552                msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1;
 553        }
 554
 555        /*
 556         * Use the current MSR TM suspended bit to track if we have
 557         * checkpointed state outstanding.
 558         * On signal delivery, we'd normally reclaim the checkpointed
 559         * state to obtain stack pointer (see:get_tm_stackpointer()).
 560         * This will then directly return to userspace without going
 561         * through __switch_to(). However, if the stack frame is bad,
 562         * we need to exit this thread which calls __switch_to() which
 563         * will again attempt to reclaim the already saved tm state.
 564         * Hence we need to check that we've not already reclaimed
 565         * this state.
 566         * We do this using the current MSR, rather tracking it in
 567         * some specific thread_struct bit, as it has the additional
 568         * benifit of checking for a potential TM bad thing exception.
 569         */
 570        if (!MSR_TM_SUSPENDED(mfmsr()))
 571                return;
 572
 573        tm_reclaim(thr, thr->regs->msr, cause);
 574
 575        /* Having done the reclaim, we now have the checkpointed
 576         * FP/VSX values in the registers.  These might be valid
 577         * even if we have previously called enable_kernel_fp() or
 578         * flush_fp_to_thread(), so update thr->regs->msr to
 579         * indicate their current validity.
 580         */
 581        thr->regs->msr |= msr_diff;
 582}
 583
 584void tm_reclaim_current(uint8_t cause)
 585{
 586        tm_enable();
 587        tm_reclaim_thread(&current->thread, current_thread_info(), cause);
 588}
 589
 590static inline void tm_reclaim_task(struct task_struct *tsk)
 591{
 592        /* We have to work out if we're switching from/to a task that's in the
 593         * middle of a transaction.
 594         *
 595         * In switching we need to maintain a 2nd register state as
 596         * oldtask->thread.ckpt_regs.  We tm_reclaim(oldproc); this saves the
 597         * checkpointed (tbegin) state in ckpt_regs and saves the transactional
 598         * (current) FPRs into oldtask->thread.transact_fpr[].
 599         *
 600         * We also context switch (save) TFHAR/TEXASR/TFIAR in here.
 601         */
 602        struct thread_struct *thr = &tsk->thread;
 603
 604        if (!thr->regs)
 605                return;
 606
 607        if (!MSR_TM_ACTIVE(thr->regs->msr))
 608                goto out_and_saveregs;
 609
 610        /* Stash the original thread MSR, as giveup_fpu et al will
 611         * modify it.  We hold onto it to see whether the task used
 612         * FP & vector regs.  If the TIF_RESTORE_TM flag is set,
 613         * ckpt_regs.msr is already set.
 614         */
 615        if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM))
 616                thr->ckpt_regs.msr = thr->regs->msr;
 617
 618        TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
 619                 "ccr=%lx, msr=%lx, trap=%lx)\n",
 620                 tsk->pid, thr->regs->nip,
 621                 thr->regs->ccr, thr->regs->msr,
 622                 thr->regs->trap);
 623
 624        tm_reclaim_thread(thr, task_thread_info(tsk), TM_CAUSE_RESCHED);
 625
 626        TM_DEBUG("--- tm_reclaim on pid %d complete\n",
 627                 tsk->pid);
 628
 629out_and_saveregs:
 630        /* Always save the regs here, even if a transaction's not active.
 631         * This context-switches a thread's TM info SPRs.  We do it here to
 632         * be consistent with the restore path (in recheckpoint) which
 633         * cannot happen later in _switch().
 634         */
 635        tm_save_sprs(thr);
 636}
 637
 638extern void __tm_recheckpoint(struct thread_struct *thread,
 639                              unsigned long orig_msr);
 640
 641void tm_recheckpoint(struct thread_struct *thread,
 642                     unsigned long orig_msr)
 643{
 644        unsigned long flags;
 645
 646        /* We really can't be interrupted here as the TEXASR registers can't
 647         * change and later in the trecheckpoint code, we have a userspace R1.
 648         * So let's hard disable over this region.
 649         */
 650        local_irq_save(flags);
 651        hard_irq_disable();
 652
 653        /* The TM SPRs are restored here, so that TEXASR.FS can be set
 654         * before the trecheckpoint and no explosion occurs.
 655         */
 656        tm_restore_sprs(thread);
 657
 658        __tm_recheckpoint(thread, orig_msr);
 659
 660        local_irq_restore(flags);
 661}
 662
 663static inline void tm_recheckpoint_new_task(struct task_struct *new)
 664{
 665        unsigned long msr;
 666
 667        if (!cpu_has_feature(CPU_FTR_TM))
 668                return;
 669
 670        /* Recheckpoint the registers of the thread we're about to switch to.
 671         *
 672         * If the task was using FP, we non-lazily reload both the original and
 673         * the speculative FP register states.  This is because the kernel
 674         * doesn't see if/when a TM rollback occurs, so if we take an FP
 675         * unavoidable later, we are unable to determine which set of FP regs
 676         * need to be restored.
 677         */
 678        if (!new->thread.regs)
 679                return;
 680
 681        if (!MSR_TM_ACTIVE(new->thread.regs->msr)){
 682                tm_restore_sprs(&new->thread);
 683                return;
 684        }
 685        msr = new->thread.ckpt_regs.msr;
 686        /* Recheckpoint to restore original checkpointed register state. */
 687        TM_DEBUG("*** tm_recheckpoint of pid %d "
 688                 "(new->msr 0x%lx, new->origmsr 0x%lx)\n",
 689                 new->pid, new->thread.regs->msr, msr);
 690
 691        /* This loads the checkpointed FP/VEC state, if used */
 692        tm_recheckpoint(&new->thread, msr);
 693
 694        /* This loads the speculative FP/VEC state, if used */
 695        if (msr & MSR_FP) {
 696                do_load_up_transact_fpu(&new->thread);
 697                new->thread.regs->msr |=
 698                        (MSR_FP | new->thread.fpexc_mode);
 699        }
 700#ifdef CONFIG_ALTIVEC
 701        if (msr & MSR_VEC) {
 702                do_load_up_transact_altivec(&new->thread);
 703                new->thread.regs->msr |= MSR_VEC;
 704        }
 705#endif
 706        /* We may as well turn on VSX too since all the state is restored now */
 707        if (msr & MSR_VSX)
 708                new->thread.regs->msr |= MSR_VSX;
 709
 710        TM_DEBUG("*** tm_recheckpoint of pid %d complete "
 711                 "(kernel msr 0x%lx)\n",
 712                 new->pid, mfmsr());
 713}
 714
 715static inline void __switch_to_tm(struct task_struct *prev)
 716{
 717        if (cpu_has_feature(CPU_FTR_TM)) {
 718                tm_enable();
 719                tm_reclaim_task(prev);
 720        }
 721}
 722
 723/*
 724 * This is called if we are on the way out to userspace and the
 725 * TIF_RESTORE_TM flag is set.  It checks if we need to reload
 726 * FP and/or vector state and does so if necessary.
 727 * If userspace is inside a transaction (whether active or
 728 * suspended) and FP/VMX/VSX instructions have ever been enabled
 729 * inside that transaction, then we have to keep them enabled
 730 * and keep the FP/VMX/VSX state loaded while ever the transaction
 731 * continues.  The reason is that if we didn't, and subsequently
 732 * got a FP/VMX/VSX unavailable interrupt inside a transaction,
 733 * we don't know whether it's the same transaction, and thus we
 734 * don't know which of the checkpointed state and the transactional
 735 * state to use.
 736 */
 737void restore_tm_state(struct pt_regs *regs)
 738{
 739        unsigned long msr_diff;
 740
 741        clear_thread_flag(TIF_RESTORE_TM);
 742        if (!MSR_TM_ACTIVE(regs->msr))
 743                return;
 744
 745        msr_diff = current->thread.ckpt_regs.msr & ~regs->msr;
 746        msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
 747        if (msr_diff & MSR_FP) {
 748                fp_enable();
 749                load_fp_state(&current->thread.fp_state);
 750                regs->msr |= current->thread.fpexc_mode;
 751        }
 752        if (msr_diff & MSR_VEC) {
 753                vec_enable();
 754                load_vr_state(&current->thread.vr_state);
 755        }
 756        regs->msr |= msr_diff;
 757}
 758
 759#else
 760#define tm_recheckpoint_new_task(new)
 761#define __switch_to_tm(prev)
 762#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 763
 764struct task_struct *__switch_to(struct task_struct *prev,
 765        struct task_struct *new)
 766{
 767        struct thread_struct *new_thread, *old_thread;
 768        struct task_struct *last;
 769#ifdef CONFIG_PPC_BOOK3S_64
 770        struct ppc64_tlb_batch *batch;
 771#endif
 772
 773        WARN_ON(!irqs_disabled());
 774
 775        /* Back up the TAR and DSCR across context switches.
 776         * Note that the TAR is not available for use in the kernel.  (To
 777         * provide this, the TAR should be backed up/restored on exception
 778         * entry/exit instead, and be in pt_regs.  FIXME, this should be in
 779         * pt_regs anyway (for debug).)
 780         * Save the TAR and DSCR here before we do treclaim/trecheckpoint as
 781         * these will change them.
 782         */
 783        save_early_sprs(&prev->thread);
 784
 785        __switch_to_tm(prev);
 786
 787#ifdef CONFIG_SMP
 788        /* avoid complexity of lazy save/restore of fpu
 789         * by just saving it every time we switch out if
 790         * this task used the fpu during the last quantum.
 791         *
 792         * If it tries to use the fpu again, it'll trap and
 793         * reload its fp regs.  So we don't have to do a restore
 794         * every switch, just a save.
 795         *  -- Cort
 796         */
 797        if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP))
 798                giveup_fpu(prev);
 799#ifdef CONFIG_ALTIVEC
 800        /*
 801         * If the previous thread used altivec in the last quantum
 802         * (thus changing altivec regs) then save them.
 803         * We used to check the VRSAVE register but not all apps
 804         * set it, so we don't rely on it now (and in fact we need
 805         * to save & restore VSCR even if VRSAVE == 0).  -- paulus
 806         *
 807         * On SMP we always save/restore altivec regs just to avoid the
 808         * complexity of changing processors.
 809         *  -- Cort
 810         */
 811        if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
 812                giveup_altivec(prev);
 813#endif /* CONFIG_ALTIVEC */
 814#ifdef CONFIG_VSX
 815        if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX))
 816                /* VMX and FPU registers are already save here */
 817                __giveup_vsx(prev);
 818#endif /* CONFIG_VSX */
 819#ifdef CONFIG_SPE
 820        /*
 821         * If the previous thread used spe in the last quantum
 822         * (thus changing spe regs) then save them.
 823         *
 824         * On SMP we always save/restore spe regs just to avoid the
 825         * complexity of changing processors.
 826         */
 827        if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE)))
 828                giveup_spe(prev);
 829#endif /* CONFIG_SPE */
 830
 831#else  /* CONFIG_SMP */
 832#ifdef CONFIG_ALTIVEC
 833        /* Avoid the trap.  On smp this this never happens since
 834         * we don't set last_task_used_altivec -- Cort
 835         */
 836        if (new->thread.regs && last_task_used_altivec == new)
 837                new->thread.regs->msr |= MSR_VEC;
 838#endif /* CONFIG_ALTIVEC */
 839#ifdef CONFIG_VSX
 840        if (new->thread.regs && last_task_used_vsx == new)
 841                new->thread.regs->msr |= MSR_VSX;
 842#endif /* CONFIG_VSX */
 843#ifdef CONFIG_SPE
 844        /* Avoid the trap.  On smp this this never happens since
 845         * we don't set last_task_used_spe
 846         */
 847        if (new->thread.regs && last_task_used_spe == new)
 848                new->thread.regs->msr |= MSR_SPE;
 849#endif /* CONFIG_SPE */
 850
 851#endif /* CONFIG_SMP */
 852
 853#ifdef CONFIG_PPC_ADV_DEBUG_REGS
 854        switch_booke_debug_regs(&new->thread);
 855#else
 856/*
 857 * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would
 858 * schedule DABR
 859 */
 860#ifndef CONFIG_HAVE_HW_BREAKPOINT
 861        if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk)))
 862                __set_breakpoint(&new->thread.hw_brk);
 863#endif /* CONFIG_HAVE_HW_BREAKPOINT */
 864#endif
 865
 866
 867        new_thread = &new->thread;
 868        old_thread = &current->thread;
 869
 870#ifdef CONFIG_PPC64
 871        /*
 872         * Collect processor utilization data per process
 873         */
 874        if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 875                struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
 876                long unsigned start_tb, current_tb;
 877                start_tb = old_thread->start_tb;
 878                cu->current_tb = current_tb = mfspr(SPRN_PURR);
 879                old_thread->accum_tb += (current_tb - start_tb);
 880                new_thread->start_tb = current_tb;
 881        }
 882#endif /* CONFIG_PPC64 */
 883
 884#ifdef CONFIG_PPC_BOOK3S_64
 885        batch = &__get_cpu_var(ppc64_tlb_batch);
 886        if (batch->active) {
 887                current_thread_info()->local_flags |= _TLF_LAZY_MMU;
 888                if (batch->index)
 889                        __flush_tlb_pending(batch);
 890                batch->active = 0;
 891        }
 892#endif /* CONFIG_PPC_BOOK3S_64 */
 893
 894        /*
 895         * We can't take a PMU exception inside _switch() since there is a
 896         * window where the kernel stack SLB and the kernel stack are out
 897         * of sync. Hard disable here.
 898         */
 899        hard_irq_disable();
 900
 901        tm_recheckpoint_new_task(new);
 902
 903        last = _switch(old_thread, new_thread);
 904
 905#ifdef CONFIG_PPC_BOOK3S_64
 906        if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
 907                current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
 908                batch = &__get_cpu_var(ppc64_tlb_batch);
 909                batch->active = 1;
 910        }
 911#endif /* CONFIG_PPC_BOOK3S_64 */
 912
 913        return last;
 914}
 915
 916static int instructions_to_print = 16;
 917
 918static void show_instructions(struct pt_regs *regs)
 919{
 920        int i;
 921        unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 *
 922                        sizeof(int));
 923
 924        printk("Instruction dump:");
 925
 926        for (i = 0; i < instructions_to_print; i++) {
 927                int instr;
 928
 929                if (!(i % 8))
 930                        printk("\n");
 931
 932#if !defined(CONFIG_BOOKE)
 933                /* If executing with the IMMU off, adjust pc rather
 934                 * than print XXXXXXXX.
 935                 */
 936                if (!(regs->msr & MSR_IR))
 937                        pc = (unsigned long)phys_to_virt(pc);
 938#endif
 939
 940                /* We use __get_user here *only* to avoid an OOPS on a
 941                 * bad address because the pc *should* only be a
 942                 * kernel address.
 943                 */
 944                if (!__kernel_text_address(pc) ||
 945                     __get_user(instr, (unsigned int __user *)pc)) {
 946                        printk(KERN_CONT "XXXXXXXX ");
 947                } else {
 948                        if (regs->nip == pc)
 949                                printk(KERN_CONT "<%08x> ", instr);
 950                        else
 951                                printk(KERN_CONT "%08x ", instr);
 952                }
 953
 954                pc += sizeof(int);
 955        }
 956
 957        printk("\n");
 958}
 959
 960static struct regbit {
 961        unsigned long bit;
 962        const char *name;
 963} msr_bits[] = {
 964#if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE)
 965        {MSR_SF,        "SF"},
 966        {MSR_HV,        "HV"},
 967#endif
 968        {MSR_VEC,       "VEC"},
 969        {MSR_VSX,       "VSX"},
 970#ifdef CONFIG_BOOKE
 971        {MSR_CE,        "CE"},
 972#endif
 973        {MSR_EE,        "EE"},
 974        {MSR_PR,        "PR"},
 975        {MSR_FP,        "FP"},
 976        {MSR_ME,        "ME"},
 977#ifdef CONFIG_BOOKE
 978        {MSR_DE,        "DE"},
 979#else
 980        {MSR_SE,        "SE"},
 981        {MSR_BE,        "BE"},
 982#endif
 983        {MSR_IR,        "IR"},
 984        {MSR_DR,        "DR"},
 985        {MSR_PMM,       "PMM"},
 986#ifndef CONFIG_BOOKE
 987        {MSR_RI,        "RI"},
 988        {MSR_LE,        "LE"},
 989#endif
 990        {0,             NULL}
 991};
 992
 993static void printbits(unsigned long val, struct regbit *bits)
 994{
 995        const char *sep = "";
 996
 997        printk("<");
 998        for (; bits->bit; ++bits)
 999                if (val & bits->bit) {
1000                        printk("%s%s", sep, bits->name);
1001                        sep = ",";
1002                }
1003        printk(">");
1004}
1005
1006#ifdef CONFIG_PPC64
1007#define REG             "%016lx"
1008#define REGS_PER_LINE   4
1009#define LAST_VOLATILE   13
1010#else
1011#define REG             "%08lx"
1012#define REGS_PER_LINE   8
1013#define LAST_VOLATILE   12
1014#endif
1015
1016void show_regs(struct pt_regs * regs)
1017{
1018        int i, trap;
1019
1020        show_regs_print_info(KERN_DEFAULT);
1021
1022        printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
1023               regs->nip, regs->link, regs->ctr);
1024        printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
1025               regs, regs->trap, print_tainted(), init_utsname()->release);
1026        printk("MSR: "REG" ", regs->msr);
1027        printbits(regs->msr, msr_bits);
1028        printk("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
1029        trap = TRAP(regs);
1030        if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
1031                printk("CFAR: "REG" ", regs->orig_gpr3);
1032        if (trap == 0x200 || trap == 0x300 || trap == 0x600)
1033#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
1034                printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
1035#else
1036                printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
1037#endif
1038#ifdef CONFIG_PPC64
1039        printk("SOFTE: %ld ", regs->softe);
1040#endif
1041#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1042        if (MSR_TM_ACTIVE(regs->msr))
1043                printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
1044#endif
1045
1046        for (i = 0;  i < 32;  i++) {
1047                if ((i % REGS_PER_LINE) == 0)
1048                        printk("\nGPR%02d: ", i);
1049                printk(REG " ", regs->gpr[i]);
1050                if (i == LAST_VOLATILE && !FULL_REGS(regs))
1051                        break;
1052        }
1053        printk("\n");
1054#ifdef CONFIG_KALLSYMS
1055        /*
1056         * Lookup NIP late so we have the best change of getting the
1057         * above info out without failing
1058         */
1059        printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
1060        printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
1061#endif
1062        show_stack(current, (unsigned long *) regs->gpr[1]);
1063        if (!user_mode(regs))
1064                show_instructions(regs);
1065}
1066
1067void exit_thread(void)
1068{
1069        discard_lazy_cpu_state();
1070}
1071
1072void flush_thread(void)
1073{
1074        discard_lazy_cpu_state();
1075
1076#ifdef CONFIG_HAVE_HW_BREAKPOINT
1077        flush_ptrace_hw_breakpoint(current);
1078#else /* CONFIG_HAVE_HW_BREAKPOINT */
1079        set_debug_reg_defaults(&current->thread);
1080#endif /* CONFIG_HAVE_HW_BREAKPOINT */
1081}
1082
1083void
1084release_thread(struct task_struct *t)
1085{
1086}
1087
1088/*
1089 * this gets called so that we can store coprocessor state into memory and
1090 * copy the current task into the new thread.
1091 */
1092int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
1093{
1094        flush_fp_to_thread(src);
1095        flush_altivec_to_thread(src);
1096        flush_vsx_to_thread(src);
1097        flush_spe_to_thread(src);
1098        /*
1099         * Flush TM state out so we can copy it.  __switch_to_tm() does this
1100         * flush but it removes the checkpointed state from the current CPU and
1101         * transitions the CPU out of TM mode.  Hence we need to call
1102         * tm_recheckpoint_new_task() (on the same task) to restore the
1103         * checkpointed state back and the TM mode.
1104         */
1105        __switch_to_tm(src);
1106        tm_recheckpoint_new_task(src);
1107
1108        *dst = *src;
1109
1110        clear_task_ebb(dst);
1111
1112        return 0;
1113}
1114
1115/*
1116 * Copy a thread..
1117 */
1118
1119int copy_thread(unsigned long clone_flags, unsigned long usp,
1120                unsigned long arg, struct task_struct *p)
1121{
1122        struct pt_regs *childregs, *kregs;
1123        extern void ret_from_fork(void);
1124        extern void ret_from_kernel_thread(void);
1125        void (*f)(void);
1126        unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
1127        struct thread_info *ti = task_thread_info(p);
1128
1129        klp_init_thread_info(ti);
1130
1131        /* Copy registers */
1132        sp -= sizeof(struct pt_regs);
1133        childregs = (struct pt_regs *) sp;
1134        if (unlikely(p->flags & PF_KTHREAD)) {
1135                memset(childregs, 0, sizeof(struct pt_regs));
1136                childregs->gpr[1] = sp + sizeof(struct pt_regs);
1137                /* function */
1138                if (usp)
1139                        childregs->gpr[14] = ppc_function_entry((void *)usp);
1140#ifdef CONFIG_PPC64
1141                clear_tsk_thread_flag(p, TIF_32BIT);
1142                childregs->softe = 1;
1143#endif
1144                childregs->gpr[15] = arg;
1145                p->thread.regs = NULL;  /* no user register state */
1146                ti->flags |= _TIF_RESTOREALL;
1147                f = ret_from_kernel_thread;
1148        } else {
1149                struct pt_regs *regs = current_pt_regs();
1150                CHECK_FULL_REGS(regs);
1151                *childregs = *regs;
1152                if (usp)
1153                        childregs->gpr[1] = usp;
1154                p->thread.regs = childregs;
1155                childregs->gpr[3] = 0;  /* Result from fork() */
1156                if (clone_flags & CLONE_SETTLS) {
1157#ifdef CONFIG_PPC64
1158                        if (!is_32bit_task())
1159                                childregs->gpr[13] = childregs->gpr[6];
1160                        else
1161#endif
1162                                childregs->gpr[2] = childregs->gpr[6];
1163                }
1164
1165                f = ret_from_fork;
1166        }
1167        sp -= STACK_FRAME_OVERHEAD;
1168
1169        /*
1170         * The way this works is that at some point in the future
1171         * some task will call _switch to switch to the new task.
1172         * That will pop off the stack frame created below and start
1173         * the new task running at ret_from_fork.  The new task will
1174         * do some house keeping and then return from the fork or clone
1175         * system call, using the stack frame created above.
1176         */
1177        ((unsigned long *)sp)[0] = 0;
1178        sp -= sizeof(struct pt_regs);
1179        kregs = (struct pt_regs *) sp;
1180        sp -= STACK_FRAME_OVERHEAD;
1181        p->thread.ksp = sp;
1182#ifdef CONFIG_PPC32
1183        p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
1184                                _ALIGN_UP(sizeof(struct thread_info), 16);
1185#endif
1186#ifdef CONFIG_HAVE_HW_BREAKPOINT
1187        p->thread.ptrace_bps[0] = NULL;
1188#endif
1189
1190        p->thread.fp_save_area = NULL;
1191#ifdef CONFIG_ALTIVEC
1192        p->thread.vr_save_area = NULL;
1193#endif
1194
1195#ifdef CONFIG_PPC_STD_MMU_64
1196        if (mmu_has_feature(MMU_FTR_SLB)) {
1197                unsigned long sp_vsid;
1198                unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
1199
1200                if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1201                        sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
1202                                << SLB_VSID_SHIFT_1T;
1203                else
1204                        sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M)
1205                                << SLB_VSID_SHIFT;
1206                sp_vsid |= SLB_VSID_KERNEL | llp;
1207                p->thread.ksp_vsid = sp_vsid;
1208        }
1209#endif /* CONFIG_PPC_STD_MMU_64 */
1210#ifdef CONFIG_PPC64 
1211        if (cpu_has_feature(CPU_FTR_DSCR)) {
1212                p->thread.dscr_inherit = current->thread.dscr_inherit;
1213                p->thread.dscr = mfspr(SPRN_DSCR);
1214        }
1215        if (cpu_has_feature(CPU_FTR_HAS_PPR))
1216                p->thread.ppr = INIT_PPR;
1217#endif
1218        kregs->nip = ppc_function_entry(f);
1219        return 0;
1220}
1221
1222/*
1223 * Set up a thread for executing a new program
1224 */
1225void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
1226{
1227#ifdef CONFIG_PPC64
1228        unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
1229#endif
1230
1231        /*
1232         * If we exec out of a kernel thread then thread.regs will not be
1233         * set.  Do it now.
1234         */
1235        if (!current->thread.regs) {
1236                struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE;
1237                current->thread.regs = regs - 1;
1238        }
1239
1240#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1241        /*
1242         * Clear any transactional state, we're exec()ing. The cause is
1243         * not important as there will never be a recheckpoint so it's not
1244         * user visible.
1245         */
1246        if (MSR_TM_SUSPENDED(mfmsr()))
1247                tm_reclaim_current(0);
1248#endif
1249
1250        memset(regs->gpr, 0, sizeof(regs->gpr));
1251        regs->ctr = 0;
1252        regs->link = 0;
1253        regs->xer = 0;
1254        regs->ccr = 0;
1255        regs->gpr[1] = sp;
1256
1257        /*
1258         * We have just cleared all the nonvolatile GPRs, so make
1259         * FULL_REGS(regs) return true.  This is necessary to allow
1260         * ptrace to examine the thread immediately after exec.
1261         */
1262        regs->trap &= ~1UL;
1263
1264#ifdef CONFIG_PPC32
1265        regs->mq = 0;
1266        regs->nip = start;
1267        regs->msr = MSR_USER;
1268#else
1269        if (!is_32bit_task()) {
1270                unsigned long entry;
1271
1272                if (is_elf2_task()) {
1273                        /* Look ma, no function descriptors! */
1274                        entry = start;
1275
1276                        /*
1277                         * Ulrich says:
1278                         *   The latest iteration of the ABI requires that when
1279                         *   calling a function (at its global entry point),
1280                         *   the caller must ensure r12 holds the entry point
1281                         *   address (so that the function can quickly
1282                         *   establish addressability).
1283                         */
1284                        regs->gpr[12] = start;
1285                        /* Make sure that's restored on entry to userspace. */
1286                        set_thread_flag(TIF_RESTOREALL);
1287                } else {
1288                        unsigned long toc;
1289
1290                        /* start is a relocated pointer to the function
1291                         * descriptor for the elf _start routine.  The first
1292                         * entry in the function descriptor is the entry
1293                         * address of _start and the second entry is the TOC
1294                         * value we need to use.
1295                         */
1296                        __get_user(entry, (unsigned long __user *)start);
1297                        __get_user(toc, (unsigned long __user *)start+1);
1298
1299                        /* Check whether the e_entry function descriptor entries
1300                         * need to be relocated before we can use them.
1301                         */
1302                        if (load_addr != 0) {
1303                                entry += load_addr;
1304                                toc   += load_addr;
1305                        }
1306                        regs->gpr[2] = toc;
1307                }
1308                regs->nip = entry;
1309                regs->msr = MSR_USER64;
1310        } else {
1311                regs->nip = start;
1312                regs->gpr[2] = 0;
1313                regs->msr = MSR_USER32;
1314        }
1315#endif
1316        discard_lazy_cpu_state();
1317#ifdef CONFIG_VSX
1318        current->thread.used_vsr = 0;
1319#endif
1320        memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
1321        current->thread.fp_save_area = NULL;
1322#ifdef CONFIG_ALTIVEC
1323        memset(&current->thread.vr_state, 0, sizeof(current->thread.vr_state));
1324        current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */
1325        current->thread.vr_save_area = NULL;
1326        current->thread.vrsave = 0;
1327        current->thread.used_vr = 0;
1328#endif /* CONFIG_ALTIVEC */
1329#ifdef CONFIG_SPE
1330        memset(current->thread.evr, 0, sizeof(current->thread.evr));
1331        current->thread.acc = 0;
1332        current->thread.spefscr = 0;
1333        current->thread.used_spe = 0;
1334#endif /* CONFIG_SPE */
1335#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1336        if (cpu_has_feature(CPU_FTR_TM))
1337                regs->msr |= MSR_TM;
1338        current->thread.tm_tfhar = 0;
1339        current->thread.tm_texasr = 0;
1340        current->thread.tm_tfiar = 0;
1341#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
1342}
1343
1344#define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \
1345                | PR_FP_EXC_RES | PR_FP_EXC_INV)
1346
1347int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
1348{
1349        struct pt_regs *regs = tsk->thread.regs;
1350
1351        /* This is a bit hairy.  If we are an SPE enabled  processor
1352         * (have embedded fp) we store the IEEE exception enable flags in
1353         * fpexc_mode.  fpexc_mode is also used for setting FP exception
1354         * mode (asyn, precise, disabled) for 'Classic' FP. */
1355        if (val & PR_FP_EXC_SW_ENABLE) {
1356#ifdef CONFIG_SPE
1357                if (cpu_has_feature(CPU_FTR_SPE)) {
1358                        tsk->thread.fpexc_mode = val &
1359                                (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
1360                        return 0;
1361                } else {
1362                        return -EINVAL;
1363                }
1364#else
1365                return -EINVAL;
1366#endif
1367        }
1368
1369        /* on a CONFIG_SPE this does not hurt us.  The bits that
1370         * __pack_fe01 use do not overlap with bits used for
1371         * PR_FP_EXC_SW_ENABLE.  Additionally, the MSR[FE0,FE1] bits
1372         * on CONFIG_SPE implementations are reserved so writing to
1373         * them does not change anything */
1374        if (val > PR_FP_EXC_PRECISE)
1375                return -EINVAL;
1376        tsk->thread.fpexc_mode = __pack_fe01(val);
1377        if (regs != NULL && (regs->msr & MSR_FP) != 0)
1378                regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
1379                        | tsk->thread.fpexc_mode;
1380        return 0;
1381}
1382
1383int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
1384{
1385        unsigned int val;
1386
1387        if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
1388#ifdef CONFIG_SPE
1389                if (cpu_has_feature(CPU_FTR_SPE))
1390                        val = tsk->thread.fpexc_mode;
1391                else
1392                        return -EINVAL;
1393#else
1394                return -EINVAL;
1395#endif
1396        else
1397                val = __unpack_fe01(tsk->thread.fpexc_mode);
1398        return put_user(val, (unsigned int __user *) adr);
1399}
1400
1401int set_endian(struct task_struct *tsk, unsigned int val)
1402{
1403        struct pt_regs *regs = tsk->thread.regs;
1404
1405        if ((val == PR_ENDIAN_LITTLE && !cpu_has_feature(CPU_FTR_REAL_LE)) ||
1406            (val == PR_ENDIAN_PPC_LITTLE && !cpu_has_feature(CPU_FTR_PPC_LE)))
1407                return -EINVAL;
1408
1409        if (regs == NULL)
1410                return -EINVAL;
1411
1412        if (val == PR_ENDIAN_BIG)
1413                regs->msr &= ~MSR_LE;
1414        else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE)
1415                regs->msr |= MSR_LE;
1416        else
1417                return -EINVAL;
1418
1419        return 0;
1420}
1421
1422int get_endian(struct task_struct *tsk, unsigned long adr)
1423{
1424        struct pt_regs *regs = tsk->thread.regs;
1425        unsigned int val;
1426
1427        if (!cpu_has_feature(CPU_FTR_PPC_LE) &&
1428            !cpu_has_feature(CPU_FTR_REAL_LE))
1429                return -EINVAL;
1430
1431        if (regs == NULL)
1432                return -EINVAL;
1433
1434        if (regs->msr & MSR_LE) {
1435                if (cpu_has_feature(CPU_FTR_REAL_LE))
1436                        val = PR_ENDIAN_LITTLE;
1437                else
1438                        val = PR_ENDIAN_PPC_LITTLE;
1439        } else
1440                val = PR_ENDIAN_BIG;
1441
1442        return put_user(val, (unsigned int __user *)adr);
1443}
1444
1445int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
1446{
1447        tsk->thread.align_ctl = val;
1448        return 0;
1449}
1450
1451int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
1452{
1453        return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr);
1454}
1455
1456static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
1457                                  unsigned long nbytes)
1458{
1459        unsigned long stack_page;
1460        unsigned long cpu = task_cpu(p);
1461
1462        /*
1463         * Avoid crashing if the stack has overflowed and corrupted
1464         * task_cpu(p), which is in the thread_info struct.
1465         */
1466        if (cpu < NR_CPUS && cpu_possible(cpu)) {
1467                stack_page = (unsigned long) hardirq_ctx[cpu];
1468                if (sp >= stack_page + sizeof(struct thread_struct)
1469                    && sp <= stack_page + THREAD_SIZE - nbytes)
1470                        return 1;
1471
1472                stack_page = (unsigned long) softirq_ctx[cpu];
1473                if (sp >= stack_page + sizeof(struct thread_struct)
1474                    && sp <= stack_page + THREAD_SIZE - nbytes)
1475                        return 1;
1476        }
1477        return 0;
1478}
1479
1480int validate_sp(unsigned long sp, struct task_struct *p,
1481                       unsigned long nbytes)
1482{
1483        unsigned long stack_page = (unsigned long)task_stack_page(p);
1484
1485        if (sp >= stack_page + sizeof(struct thread_struct)
1486            && sp <= stack_page + THREAD_SIZE - nbytes)
1487                return 1;
1488
1489        return valid_irq_stack(sp, p, nbytes);
1490}
1491
1492EXPORT_SYMBOL(validate_sp);
1493
1494unsigned long get_wchan(struct task_struct *p)
1495{
1496        unsigned long ip, sp;
1497        int count = 0;
1498
1499        if (!p || p == current || p->state == TASK_RUNNING)
1500                return 0;
1501
1502        sp = p->thread.ksp;
1503        if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
1504                return 0;
1505
1506        do {
1507                sp = *(unsigned long *)sp;
1508                if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
1509                        return 0;
1510                if (count > 0) {
1511                        ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
1512                        if (!in_sched_functions(ip))
1513                                return ip;
1514                }
1515        } while (count++ < 16);
1516        return 0;
1517}
1518
1519static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
1520
1521void show_stack(struct task_struct *tsk, unsigned long *stack)
1522{
1523        unsigned long sp, ip, lr, newsp;
1524        int count = 0;
1525        int firstframe = 1;
1526#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1527        int curr_frame = current->curr_ret_stack;
1528        extern void return_to_handler(void);
1529        unsigned long rth = (unsigned long)return_to_handler;
1530        unsigned long mrth = -1;
1531#ifdef CONFIG_PPC64
1532        extern void mod_return_to_handler(void);
1533        rth = *(unsigned long *)rth;
1534        mrth = (unsigned long)mod_return_to_handler;
1535        mrth = *(unsigned long *)mrth;
1536#endif
1537#endif
1538
1539        sp = (unsigned long) stack;
1540        if (tsk == NULL)
1541                tsk = current;
1542        if (sp == 0) {
1543                if (tsk == current)
1544                        asm("mr %0,1" : "=r" (sp));
1545                else
1546                        sp = tsk->thread.ksp;
1547        }
1548
1549        lr = 0;
1550        printk("Call Trace:\n");
1551        do {
1552                if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
1553                        return;
1554
1555                stack = (unsigned long *) sp;
1556                newsp = stack[0];
1557                ip = stack[STACK_FRAME_LR_SAVE];
1558                if (!firstframe || ip != lr) {
1559                        printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
1560#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1561                        if ((ip == rth || ip == mrth) && curr_frame >= 0) {
1562                                printk(" (%pS)",
1563                                       (void *)current->ret_stack[curr_frame].ret);
1564                                curr_frame--;
1565                        }
1566#endif
1567                        if (firstframe)
1568                                printk(" (unreliable)");
1569                        printk("\n");
1570                }
1571                firstframe = 0;
1572
1573                /*
1574                 * See if this is an exception frame.
1575                 * We look for the "regshere" marker in the current frame.
1576                 */
1577                if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE)
1578                    && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
1579                        struct pt_regs *regs = (struct pt_regs *)
1580                                (sp + STACK_FRAME_OVERHEAD);
1581                        lr = regs->link;
1582                        printk("--- Exception: %lx at %pS\n    LR = %pS\n",
1583                               regs->trap, (void *)regs->nip, (void *)lr);
1584                        firstframe = 1;
1585                }
1586
1587                sp = newsp;
1588        } while (count++ < kstack_depth_to_print);
1589}
1590
1591#ifdef CONFIG_PPC64
1592/* Called with hard IRQs off */
1593void notrace __ppc64_runlatch_on(void)
1594{
1595        struct thread_info *ti = current_thread_info();
1596        unsigned long ctrl;
1597
1598        ctrl = mfspr(SPRN_CTRLF);
1599        ctrl |= CTRL_RUNLATCH;
1600        mtspr(SPRN_CTRLT, ctrl);
1601
1602        ti->local_flags |= _TLF_RUNLATCH;
1603}
1604
1605/* Called with hard IRQs off */
1606void notrace __ppc64_runlatch_off(void)
1607{
1608        struct thread_info *ti = current_thread_info();
1609        unsigned long ctrl;
1610
1611        ti->local_flags &= ~_TLF_RUNLATCH;
1612
1613        ctrl = mfspr(SPRN_CTRLF);
1614        ctrl &= ~CTRL_RUNLATCH;
1615        mtspr(SPRN_CTRLT, ctrl);
1616}
1617#endif /* CONFIG_PPC64 */
1618
1619unsigned long arch_align_stack(unsigned long sp)
1620{
1621        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1622                sp -= get_random_int() & ~PAGE_MASK;
1623        return sp & ~0xf;
1624}
1625
1626static inline unsigned long brk_rnd(void)
1627{
1628        unsigned long rnd = 0;
1629
1630        /* 8MB for 32bit, 1GB for 64bit */
1631        if (is_32bit_task())
1632                rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
1633        else
1634                rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
1635
1636        return rnd << PAGE_SHIFT;
1637}
1638
1639unsigned long arch_randomize_brk(struct mm_struct *mm)
1640{
1641        unsigned long base = mm->brk;
1642        unsigned long ret;
1643
1644#ifdef CONFIG_PPC_STD_MMU_64
1645        /*
1646         * If we are using 1TB segments and we are allowed to randomise
1647         * the heap, we can put it above 1TB so it is backed by a 1TB
1648         * segment. Otherwise the heap will be in the bottom 1TB
1649         * which always uses 256MB segments and this may result in a
1650         * performance penalty.
1651         */
1652        if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
1653                base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
1654#endif
1655
1656        ret = PAGE_ALIGN(base + brk_rnd());
1657
1658        if (ret < mm->brk)
1659                return mm->brk;
1660
1661        return ret;
1662}
1663
1664unsigned long randomize_et_dyn(unsigned long base)
1665{
1666        unsigned long ret = PAGE_ALIGN(base + brk_rnd());
1667
1668        if (ret < base)
1669                return base;
1670
1671        return ret;
1672}
1673