linux/arch/powerpc/kernel/vector.S
<<
>>
Prefs
   1#include <asm/processor.h>
   2#include <asm/ppc_asm.h>
   3#include <asm/reg.h>
   4#include <asm/asm-offsets.h>
   5#include <asm/cputable.h>
   6#include <asm/thread_info.h>
   7#include <asm/page.h>
   8#include <asm/ptrace.h>
   9
  10#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
  11/* void do_load_up_transact_altivec(struct thread_struct *thread)
  12 *
  13 * This is similar to load_up_altivec but for the transactional version of the
  14 * vector regs.  It doesn't mess with the task MSR or valid flags.
  15 * Furthermore, VEC laziness is not supported with TM currently.
  16 */
  17_GLOBAL(do_load_up_transact_altivec)
  18        mfmsr   r6
  19        oris    r5,r6,MSR_VEC@h
  20        MTMSRD(r5)
  21        isync
  22
  23        li      r4,1
  24        stw     r4,THREAD_USED_VR(r3)
  25
  26        li      r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR
  27        lvx     v0,r10,r3
  28        mtvscr  v0
  29        addi    r10,r3,THREAD_TRANSACT_VRSTATE
  30        REST_32VRS(0,r4,r10)
  31
  32        /* Disable VEC again. */
  33        MTMSRD(r6)
  34        isync
  35
  36        blr
  37#endif
  38
  39/*
  40 * Enable use of VMX/Altivec for the caller.
  41 */
  42_GLOBAL(vec_enable)
  43        mfmsr   r3
  44        oris    r3,r3,MSR_VEC@h
  45        MTMSRD(r3)
  46        isync
  47        blr
  48
  49/*
  50 * Load state from memory into VMX registers including VSCR.
  51 * Assumes the caller has enabled VMX in the MSR.
  52 */
  53_GLOBAL(load_vr_state)
  54        li      r4,VRSTATE_VSCR
  55        lvx     v0,r4,r3
  56        mtvscr  v0
  57        REST_32VRS(0,r4,r3)
  58        blr
  59
  60/*
  61 * Store VMX state into memory, including VSCR.
  62 * Assumes the caller has enabled VMX in the MSR.
  63 */
  64_GLOBAL(store_vr_state)
  65        SAVE_32VRS(0, r4, r3)
  66        mfvscr  v0
  67        li      r4, VRSTATE_VSCR
  68        stvx    v0, r4, r3
  69        blr
  70
  71/*
  72 * Disable VMX for the task which had it previously,
  73 * and save its vector registers in its thread_struct.
  74 * Enables the VMX for use in the kernel on return.
  75 * On SMP we know the VMX is free, since we give it up every
  76 * switch (ie, no lazy save of the vector registers).
  77 *
  78 * Note that on 32-bit this can only use registers that will be
  79 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
  80 */
  81_GLOBAL(load_up_altivec)
  82        mfmsr   r5                      /* grab the current MSR */
  83        oris    r5,r5,MSR_VEC@h
  84        MTMSRD(r5)                      /* enable use of AltiVec now */
  85        isync
  86
  87/*
  88 * For SMP, we don't do lazy VMX switching because it just gets too
  89 * horrendously complex, especially when a task switches from one CPU
  90 * to another.  Instead we call giveup_altvec in switch_to.
  91 * VRSAVE isn't dealt with here, that is done in the normal context
  92 * switch code. Note that we could rely on vrsave value to eventually
  93 * avoid saving all of the VREGs here...
  94 */
  95#ifndef CONFIG_SMP
  96        LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
  97        toreal(r3)
  98        PPC_LL  r4,ADDROFF(last_task_used_altivec)(r3)
  99        PPC_LCMPI       0,r4,0
 100        beq     1f
 101
 102        /* Save VMX state to last_task_used_altivec's THREAD struct */
 103        toreal(r4)
 104        addi    r4,r4,THREAD
 105        addi    r6,r4,THREAD_VRSTATE
 106        SAVE_32VRS(0,r5,r6)
 107        mfvscr  v0
 108        li      r10,VRSTATE_VSCR
 109        stvx    v0,r10,r6
 110        /* Disable VMX for last_task_used_altivec */
 111        PPC_LL  r5,PT_REGS(r4)
 112        toreal(r5)
 113        PPC_LL  r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 114        lis     r10,MSR_VEC@h
 115        andc    r4,r4,r10
 116        PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 1171:
 118#endif /* CONFIG_SMP */
 119
 120        /* Hack: if we get an altivec unavailable trap with VRSAVE
 121         * set to all zeros, we assume this is a broken application
 122         * that fails to set it properly, and thus we switch it to
 123         * all 1's
 124         */
 125        mfspr   r4,SPRN_VRSAVE
 126        cmpwi   0,r4,0
 127        bne+    1f
 128        li      r4,-1
 129        mtspr   SPRN_VRSAVE,r4
 1301:
 131        /* enable use of VMX after return */
 132#ifdef CONFIG_PPC32
 133        mfspr   r5,SPRN_SPRG_THREAD             /* current task's THREAD (phys) */
 134        oris    r9,r9,MSR_VEC@h
 135#else
 136        ld      r4,PACACURRENT(r13)
 137        addi    r5,r4,THREAD            /* Get THREAD */
 138        oris    r12,r12,MSR_VEC@h
 139        std     r12,_MSR(r1)
 140#endif
 141        addi    r6,r5,THREAD_VRSTATE
 142        li      r4,1
 143        li      r10,VRSTATE_VSCR
 144        stw     r4,THREAD_USED_VR(r5)
 145        lvx     v0,r10,r6
 146        mtvscr  v0
 147        REST_32VRS(0,r4,r6)
 148#ifndef CONFIG_SMP
 149        /* Update last_task_used_altivec to 'current' */
 150        subi    r4,r5,THREAD            /* Back to 'current' */
 151        fromreal(r4)
 152        PPC_STL r4,ADDROFF(last_task_used_altivec)(r3)
 153#endif /* CONFIG_SMP */
 154        /* restore registers and return */
 155        blr
 156
 157_GLOBAL(giveup_altivec_notask)
 158        mfmsr   r3
 159        andis.  r4,r3,MSR_VEC@h
 160        bnelr                           /* Already enabled? */
 161        oris    r3,r3,MSR_VEC@h
 162        SYNC
 163        MTMSRD(r3)                      /* enable use of VMX now */
 164        isync
 165        blr
 166
 167/*
 168 * giveup_altivec(tsk)
 169 * Disable VMX for the task given as the argument,
 170 * and save the vector registers in its thread_struct.
 171 * Enables the VMX for use in the kernel on return.
 172 */
 173_GLOBAL(giveup_altivec)
 174        mfmsr   r5
 175        oris    r5,r5,MSR_VEC@h
 176        SYNC
 177        MTMSRD(r5)                      /* enable use of VMX now */
 178        isync
 179        PPC_LCMPI       0,r3,0
 180        beqlr                           /* if no previous owner, done */
 181        addi    r3,r3,THREAD            /* want THREAD of task */
 182        PPC_LL  r7,THREAD_VRSAVEAREA(r3)
 183        PPC_LL  r5,PT_REGS(r3)
 184        PPC_LCMPI       0,r7,0
 185        bne     2f
 186        addi    r7,r3,THREAD_VRSTATE
 1872:      PPC_LCMPI       0,r5,0
 188        SAVE_32VRS(0,r4,r7)
 189        mfvscr  v0
 190        li      r4,VRSTATE_VSCR
 191        stvx    v0,r4,r7
 192        beq     1f
 193        PPC_LL  r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 194#ifdef CONFIG_VSX
 195BEGIN_FTR_SECTION
 196        lis     r3,(MSR_VEC|MSR_VSX)@h
 197FTR_SECTION_ELSE
 198        lis     r3,MSR_VEC@h
 199ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
 200#else
 201        lis     r3,MSR_VEC@h
 202#endif
 203        andc    r4,r4,r3                /* disable FP for previous task */
 204        PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 2051:
 206#ifndef CONFIG_SMP
 207        li      r5,0
 208        LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
 209        PPC_STL r5,ADDROFF(last_task_used_altivec)(r4)
 210#endif /* CONFIG_SMP */
 211        blr
 212
 213#ifdef CONFIG_VSX
 214
 215#ifdef CONFIG_PPC32
 216#error This asm code isn't ready for 32-bit kernels
 217#endif
 218
 219/*
 220 * load_up_vsx(unused, unused, tsk)
 221 * Disable VSX for the task which had it previously,
 222 * and save its vector registers in its thread_struct.
 223 * Reuse the fp and vsx saves, but first check to see if they have
 224 * been saved already.
 225 */
 226_GLOBAL(load_up_vsx)
 227/* Load FP and VSX registers if they haven't been done yet */
 228        andi.   r5,r12,MSR_FP
 229        beql+   load_up_fpu             /* skip if already loaded */
 230        andis.  r5,r12,MSR_VEC@h
 231        beql+   load_up_altivec         /* skip if already loaded */
 232
 233#ifndef CONFIG_SMP
 234        ld      r3,last_task_used_vsx@got(r2)
 235        ld      r4,0(r3)
 236        cmpdi   0,r4,0
 237        beq     1f
 238        /* Disable VSX for last_task_used_vsx */
 239        addi    r4,r4,THREAD
 240        ld      r5,PT_REGS(r4)
 241        ld      r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 242        lis     r6,MSR_VSX@h
 243        andc    r6,r4,r6
 244        std     r6,_MSR-STACK_FRAME_OVERHEAD(r5)
 2451:
 246#endif /* CONFIG_SMP */
 247        ld      r4,PACACURRENT(r13)
 248        addi    r4,r4,THREAD            /* Get THREAD */
 249        li      r6,1
 250        stw     r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
 251        /* enable use of VSX after return */
 252        oris    r12,r12,MSR_VSX@h
 253        std     r12,_MSR(r1)
 254#ifndef CONFIG_SMP
 255        /* Update last_task_used_vsx to 'current' */
 256        ld      r4,PACACURRENT(r13)
 257        std     r4,0(r3)
 258#endif /* CONFIG_SMP */
 259        b       fast_exception_return
 260
 261/*
 262 * __giveup_vsx(tsk)
 263 * Disable VSX for the task given as the argument.
 264 * Does NOT save vsx registers.
 265 * Enables the VSX for use in the kernel on return.
 266 */
 267_GLOBAL(__giveup_vsx)
 268        mfmsr   r5
 269        oris    r5,r5,MSR_VSX@h
 270        mtmsrd  r5                      /* enable use of VSX now */
 271        isync
 272
 273        cmpdi   0,r3,0
 274        beqlr-                          /* if no previous owner, done */
 275        addi    r3,r3,THREAD            /* want THREAD of task */
 276        ld      r5,PT_REGS(r3)
 277        cmpdi   0,r5,0
 278        beq     1f
 279        ld      r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 280        lis     r3,MSR_VSX@h
 281        andc    r4,r4,r3                /* disable VSX for previous task */
 282        std     r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 2831:
 284#ifndef CONFIG_SMP
 285        li      r5,0
 286        ld      r4,last_task_used_vsx@got(r2)
 287        std     r5,0(r4)
 288#endif /* CONFIG_SMP */
 289        blr
 290
 291#endif /* CONFIG_VSX */
 292
 293
 294/*
 295 * The routines below are in assembler so we can closely control the
 296 * usage of floating-point registers.  These routines must be called
 297 * with preempt disabled.
 298 */
 299#ifdef CONFIG_PPC32
 300        .data
 301fpzero:
 302        .long   0
 303fpone:
 304        .long   0x3f800000      /* 1.0 in single-precision FP */
 305fphalf:
 306        .long   0x3f000000      /* 0.5 in single-precision FP */
 307
 308#define LDCONST(fr, name)       \
 309        lis     r11,name@ha;    \
 310        lfs     fr,name@l(r11)
 311#else
 312
 313        .section ".toc","aw"
 314fpzero:
 315        .tc     FD_0_0[TC],0
 316fpone:
 317        .tc     FD_3ff00000_0[TC],0x3ff0000000000000    /* 1.0 */
 318fphalf:
 319        .tc     FD_3fe00000_0[TC],0x3fe0000000000000    /* 0.5 */
 320
 321#define LDCONST(fr, name)       \
 322        lfd     fr,name@toc(r2)
 323#endif
 324
 325        .text
 326/*
 327 * Internal routine to enable floating point and set FPSCR to 0.
 328 * Don't call it from C; it doesn't use the normal calling convention.
 329 */
 330fpenable:
 331#ifdef CONFIG_PPC32
 332        stwu    r1,-64(r1)
 333#else
 334        stdu    r1,-64(r1)
 335#endif
 336        mfmsr   r10
 337        ori     r11,r10,MSR_FP
 338        mtmsr   r11
 339        isync
 340        stfd    fr0,24(r1)
 341        stfd    fr1,16(r1)
 342        stfd    fr31,8(r1)
 343        LDCONST(fr1, fpzero)
 344        mffs    fr31
 345        MTFSF_L(fr1)
 346        blr
 347
 348fpdisable:
 349        mtlr    r12
 350        MTFSF_L(fr31)
 351        lfd     fr31,8(r1)
 352        lfd     fr1,16(r1)
 353        lfd     fr0,24(r1)
 354        mtmsr   r10
 355        isync
 356        addi    r1,r1,64
 357        blr
 358
 359/*
 360 * Vector add, floating point.
 361 */
 362_GLOBAL(vaddfp)
 363        mflr    r12
 364        bl      fpenable
 365        li      r0,4
 366        mtctr   r0
 367        li      r6,0
 3681:      lfsx    fr0,r4,r6
 369        lfsx    fr1,r5,r6
 370        fadds   fr0,fr0,fr1
 371        stfsx   fr0,r3,r6
 372        addi    r6,r6,4
 373        bdnz    1b
 374        b       fpdisable
 375
 376/*
 377 * Vector subtract, floating point.
 378 */
 379_GLOBAL(vsubfp)
 380        mflr    r12
 381        bl      fpenable
 382        li      r0,4
 383        mtctr   r0
 384        li      r6,0
 3851:      lfsx    fr0,r4,r6
 386        lfsx    fr1,r5,r6
 387        fsubs   fr0,fr0,fr1
 388        stfsx   fr0,r3,r6
 389        addi    r6,r6,4
 390        bdnz    1b
 391        b       fpdisable
 392
 393/*
 394 * Vector multiply and add, floating point.
 395 */
 396_GLOBAL(vmaddfp)
 397        mflr    r12
 398        bl      fpenable
 399        stfd    fr2,32(r1)
 400        li      r0,4
 401        mtctr   r0
 402        li      r7,0
 4031:      lfsx    fr0,r4,r7
 404        lfsx    fr1,r5,r7
 405        lfsx    fr2,r6,r7
 406        fmadds  fr0,fr0,fr2,fr1
 407        stfsx   fr0,r3,r7
 408        addi    r7,r7,4
 409        bdnz    1b
 410        lfd     fr2,32(r1)
 411        b       fpdisable
 412
 413/*
 414 * Vector negative multiply and subtract, floating point.
 415 */
 416_GLOBAL(vnmsubfp)
 417        mflr    r12
 418        bl      fpenable
 419        stfd    fr2,32(r1)
 420        li      r0,4
 421        mtctr   r0
 422        li      r7,0
 4231:      lfsx    fr0,r4,r7
 424        lfsx    fr1,r5,r7
 425        lfsx    fr2,r6,r7
 426        fnmsubs fr0,fr0,fr2,fr1
 427        stfsx   fr0,r3,r7
 428        addi    r7,r7,4
 429        bdnz    1b
 430        lfd     fr2,32(r1)
 431        b       fpdisable
 432
 433/*
 434 * Vector reciprocal estimate.  We just compute 1.0/x.
 435 * r3 -> destination, r4 -> source.
 436 */
 437_GLOBAL(vrefp)
 438        mflr    r12
 439        bl      fpenable
 440        li      r0,4
 441        LDCONST(fr1, fpone)
 442        mtctr   r0
 443        li      r6,0
 4441:      lfsx    fr0,r4,r6
 445        fdivs   fr0,fr1,fr0
 446        stfsx   fr0,r3,r6
 447        addi    r6,r6,4
 448        bdnz    1b
 449        b       fpdisable
 450
 451/*
 452 * Vector reciprocal square-root estimate, floating point.
 453 * We use the frsqrte instruction for the initial estimate followed
 454 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
 455 * r3 -> destination, r4 -> source.
 456 */
 457_GLOBAL(vrsqrtefp)
 458        mflr    r12
 459        bl      fpenable
 460        stfd    fr2,32(r1)
 461        stfd    fr3,40(r1)
 462        stfd    fr4,48(r1)
 463        stfd    fr5,56(r1)
 464        li      r0,4
 465        LDCONST(fr4, fpone)
 466        LDCONST(fr5, fphalf)
 467        mtctr   r0
 468        li      r6,0
 4691:      lfsx    fr0,r4,r6
 470        frsqrte fr1,fr0         /* r = frsqrte(s) */
 471        fmuls   fr3,fr1,fr0     /* r * s */
 472        fmuls   fr2,fr1,fr5     /* r * 0.5 */
 473        fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
 474        fmadds  fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
 475        fmuls   fr3,fr1,fr0     /* r * s */
 476        fmuls   fr2,fr1,fr5     /* r * 0.5 */
 477        fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
 478        fmadds  fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
 479        stfsx   fr1,r3,r6
 480        addi    r6,r6,4
 481        bdnz    1b
 482        lfd     fr5,56(r1)
 483        lfd     fr4,48(r1)
 484        lfd     fr3,40(r1)
 485        lfd     fr2,32(r1)
 486        b       fpdisable
 487