linux/arch/ia64/kernel/fsys.S
<<
>>
Prefs
   1/*
   2 * This file contains the light-weight system call handlers (fsyscall-handlers).
   3 *
   4 * Copyright (C) 2003 Hewlett-Packard Co
   5 *      David Mosberger-Tang <davidm@hpl.hp.com>
   6 *
   7 * 25-Sep-03 davidm     Implement fsys_rt_sigprocmask().
   8 * 18-Feb-03 louisk     Implement fsys_gettimeofday().
   9 * 28-Feb-03 davidm     Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
  10 *                      probably broke it along the way... ;-)
  11 * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
  12 *                      it capable of using memory based clocks without falling back to C code.
  13 * 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
  14 *
  15 */
  16
  17#include <asm/asmmacro.h>
  18#include <asm/errno.h>
  19#include <asm/asm-offsets.h>
  20#include <asm/percpu.h>
  21#include <asm/thread_info.h>
  22#include <asm/sal.h>
  23#include <asm/signal.h>
  24#include <asm/system.h>
  25#include <asm/unistd.h>
  26
  27#include "entry.h"
  28#include "paravirt_inst.h"
  29
  30/*
  31 * See Documentation/ia64/fsys.txt for details on fsyscalls.
  32 *
  33 * On entry to an fsyscall handler:
  34 *   r10        = 0 (i.e., defaults to "successful syscall return")
  35 *   r11        = saved ar.pfs (a user-level value)
  36 *   r15        = system call number
  37 *   r16        = "current" task pointer (in normal kernel-mode, this is in r13)
  38 *   r32-r39    = system call arguments
  39 *   b6         = return address (a user-level value)
  40 *   ar.pfs     = previous frame-state (a user-level value)
  41 *   PSR.be     = cleared to zero (i.e., little-endian byte order is in effect)
  42 *   all other registers may contain values passed in from user-mode
  43 *
  44 * On return from an fsyscall handler:
  45 *   r11        = saved ar.pfs (as passed into the fsyscall handler)
  46 *   r15        = system call number (as passed into the fsyscall handler)
  47 *   r32-r39    = system call arguments (as passed into the fsyscall handler)
  48 *   b6         = return address (as passed into the fsyscall handler)
  49 *   ar.pfs     = previous frame-state (as passed into the fsyscall handler)
  50 */
  51
  52ENTRY(fsys_ni_syscall)
  53        .prologue
  54        .altrp b6
  55        .body
  56        mov r8=ENOSYS
  57        mov r10=-1
  58        FSYS_RETURN
  59END(fsys_ni_syscall)
  60
  61ENTRY(fsys_getpid)
  62        .prologue
  63        .altrp b6
  64        .body
  65        add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
  66        ;;
  67        ld8 r17=[r17]                           // r17 = current->group_leader
  68        add r9=TI_FLAGS+IA64_TASK_SIZE,r16
  69        ;;
  70        ld4 r9=[r9]
  71        add r17=IA64_TASK_TGIDLINK_OFFSET,r17
  72        ;;
  73        and r9=TIF_ALLWORK_MASK,r9
  74        ld8 r17=[r17]                           // r17 = current->group_leader->pids[PIDTYPE_PID].pid
  75        ;;
  76        add r8=IA64_PID_LEVEL_OFFSET,r17
  77        ;;
  78        ld4 r8=[r8]                             // r8 = pid->level
  79        add r17=IA64_PID_UPID_OFFSET,r17        // r17 = &pid->numbers[0]
  80        ;;
  81        shl r8=r8,IA64_UPID_SHIFT
  82        ;;
  83        add r17=r17,r8                          // r17 = &pid->numbers[pid->level]
  84        ;;
  85        ld4 r8=[r17]                            // r8 = pid->numbers[pid->level].nr
  86        ;;
  87        mov r17=0
  88        ;;
  89        cmp.ne p8,p0=0,r9
  90(p8)    br.spnt.many fsys_fallback_syscall
  91        FSYS_RETURN
  92END(fsys_getpid)
  93
  94ENTRY(fsys_getppid)
  95        .prologue
  96        .altrp b6
  97        .body
  98        add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
  99        ;;
 100        ld8 r17=[r17]                           // r17 = current->group_leader
 101        add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 102        ;;
 103
 104        ld4 r9=[r9]
 105        add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
 106        ;;
 107        and r9=TIF_ALLWORK_MASK,r9
 108
 1091:      ld8 r18=[r17]                           // r18 = current->group_leader->real_parent
 110        ;;
 111        cmp.ne p8,p0=0,r9
 112        add r8=IA64_TASK_TGID_OFFSET,r18        // r8 = &current->group_leader->real_parent->tgid
 113        ;;
 114
 115        /*
 116         * The .acq is needed to ensure that the read of tgid has returned its data before
 117         * we re-check "real_parent".
 118         */
 119        ld4.acq r8=[r8]                         // r8 = current->group_leader->real_parent->tgid
 120#ifdef CONFIG_SMP
 121        /*
 122         * Re-read current->group_leader->real_parent.
 123         */
 124        ld8 r19=[r17]                           // r19 = current->group_leader->real_parent
 125(p8)    br.spnt.many fsys_fallback_syscall
 126        ;;
 127        cmp.ne p6,p0=r18,r19                    // did real_parent change?
 128        mov r19=0                       // i must not leak kernel bits...
 129(p6)    br.cond.spnt.few 1b                     // yes -> redo the read of tgid and the check
 130        ;;
 131        mov r17=0                       // i must not leak kernel bits...
 132        mov r18=0                       // i must not leak kernel bits...
 133#else
 134        mov r17=0                       // i must not leak kernel bits...
 135        mov r18=0                       // i must not leak kernel bits...
 136        mov r19=0                       // i must not leak kernel bits...
 137#endif
 138        FSYS_RETURN
 139END(fsys_getppid)
 140
 141ENTRY(fsys_set_tid_address)
 142        .prologue
 143        .altrp b6
 144        .body
 145        add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 146        add r17=IA64_TASK_TGIDLINK_OFFSET,r16
 147        ;;
 148        ld4 r9=[r9]
 149        tnat.z p6,p7=r32                // check argument register for being NaT
 150        ld8 r17=[r17]                           // r17 = current->pids[PIDTYPE_PID].pid
 151        ;;
 152        and r9=TIF_ALLWORK_MASK,r9
 153        add r8=IA64_PID_LEVEL_OFFSET,r17
 154        add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
 155        ;;
 156        ld4 r8=[r8]                             // r8 = pid->level
 157        add r17=IA64_PID_UPID_OFFSET,r17        // r17 = &pid->numbers[0]
 158        ;;
 159        shl r8=r8,IA64_UPID_SHIFT
 160        ;;
 161        add r17=r17,r8                          // r17 = &pid->numbers[pid->level]
 162        ;;
 163        ld4 r8=[r17]                            // r8 = pid->numbers[pid->level].nr
 164        ;;
 165        cmp.ne p8,p0=0,r9
 166        mov r17=-1
 167        ;;
 168(p6)    st8 [r18]=r32
 169(p7)    st8 [r18]=r17
 170(p8)    br.spnt.many fsys_fallback_syscall
 171        ;;
 172        mov r17=0                       // i must not leak kernel bits...
 173        mov r18=0                       // i must not leak kernel bits...
 174        FSYS_RETURN
 175END(fsys_set_tid_address)
 176
 177#if IA64_GTOD_LOCK_OFFSET !=0
 178#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
 179#endif
 180#if IA64_ITC_JITTER_OFFSET !=0
 181#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
 182#endif
 183#define CLOCK_REALTIME 0
 184#define CLOCK_MONOTONIC 1
 185#define CLOCK_DIVIDE_BY_1000 0x4000
 186#define CLOCK_ADD_MONOTONIC 0x8000
 187
 188ENTRY(fsys_gettimeofday)
 189        .prologue
 190        .altrp b6
 191        .body
 192        mov r31 = r32
 193        tnat.nz p6,p0 = r33             // guard against NaT argument
 194(p6)    br.cond.spnt.few .fail_einval
 195        mov r30 = CLOCK_DIVIDE_BY_1000
 196        ;;
 197.gettime:
 198        // Register map
 199        // Incoming r31 = pointer to address where to place result
 200        //          r30 = flags determining how time is processed
 201        // r2,r3 = temp r4-r7 preserved
 202        // r8 = result nanoseconds
 203        // r9 = result seconds
 204        // r10 = temporary storage for clock difference
 205        // r11 = preserved: saved ar.pfs
 206        // r12 = preserved: memory stack
 207        // r13 = preserved: thread pointer
 208        // r14 = address of mask / mask value
 209        // r15 = preserved: system call number
 210        // r16 = preserved: current task pointer
 211        // r17 = (not used)
 212        // r18 = (not used)
 213        // r19 = address of itc_lastcycle
 214        // r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence)
 215        // r21 = address of mmio_ptr
 216        // r22 = address of wall_time or monotonic_time
 217        // r23 = address of shift / value
 218        // r24 = address mult factor / cycle_last value
 219        // r25 = itc_lastcycle value
 220        // r26 = address clocksource cycle_last
 221        // r27 = (not used)
 222        // r28 = sequence number at the beginning of critcal section
 223        // r29 = address of itc_jitter
 224        // r30 = time processing flags / memory address
 225        // r31 = pointer to result
 226        // Predicates
 227        // p6,p7 short term use
 228        // p8 = timesource ar.itc
 229        // p9 = timesource mmio64
 230        // p10 = timesource mmio32 - not used
 231        // p11 = timesource not to be handled by asm code
 232        // p12 = memory time source ( = p9 | p10) - not used
 233        // p13 = do cmpxchg with itc_lastcycle
 234        // p14 = Divide by 1000
 235        // p15 = Add monotonic
 236        //
 237        // Note that instructions are optimized for McKinley. McKinley can
 238        // process two bundles simultaneously and therefore we continuously
 239        // try to feed the CPU two bundles and then a stop.
 240
 241        add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
 242        tnat.nz p6,p0 = r31             // guard against Nat argument
 243(p6)    br.cond.spnt.few .fail_einval
 244        movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
 245        ;;
 246        ld4 r2 = [r2]                   // process work pending flags
 247        movl r29 = itc_jitter_data      // itc_jitter
 248        add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20        // wall_time
 249        add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
 250        mov pr = r30,0xc000     // Set predicates according to function
 251        ;;
 252        and r2 = TIF_ALLWORK_MASK,r2
 253        add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
 254(p15)   add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20        // monotonic_time
 255        ;;
 256        add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20     // clksrc_cycle_last
 257        cmp.ne p6, p0 = 0, r2   // Fallback if work is scheduled
 258(p6)    br.cond.spnt.many fsys_fallback_syscall
 259        ;;
 260        // Begin critical section
 261.time_redo:
 262        ld4.acq r28 = [r20]     // gtod_lock.sequence, Must take first
 263        ;;
 264        and r28 = ~1,r28        // And make sequence even to force retry if odd
 265        ;;
 266        ld8 r30 = [r21]         // clocksource->mmio_ptr
 267        add r24 = IA64_CLKSRC_MULT_OFFSET,r20
 268        ld4 r2 = [r29]          // itc_jitter value
 269        add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
 270        add r14 = IA64_CLKSRC_MASK_OFFSET,r20
 271        ;;
 272        ld4 r3 = [r24]          // clocksource mult value
 273        ld8 r14 = [r14]         // clocksource mask value
 274        cmp.eq p8,p9 = 0,r30    // use cpu timer if no mmio_ptr
 275        ;;
 276        setf.sig f7 = r3        // Setup for mult scaling of counter
 277(p8)    cmp.ne p13,p0 = r2,r0   // need itc_jitter compensation, set p13
 278        ld4 r23 = [r23]         // clocksource shift value
 279        ld8 r24 = [r26]         // get clksrc_cycle_last value
 280(p9)    cmp.eq p13,p0 = 0,r30   // if mmio_ptr, clear p13 jitter control
 281        ;;
 282        .pred.rel.mutex p8,p9
 283        MOV_FROM_ITC(p8, p6, r2, r10)   // CPU_TIMER. 36 clocks latency!!!
 284(p9)    ld8 r2 = [r30]          // MMIO_TIMER. Could also have latency issues..
 285(p13)   ld8 r25 = [r19]         // get itc_lastcycle value
 286        ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET     // tv_sec
 287        ;;
 288        ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET    // tv_nsec
 289(p13)   sub r3 = r25,r2         // Diff needed before comparison (thanks davidm)
 290        ;;
 291(p13)   cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
 292        sub r10 = r2,r24        // current_cycle - last_cycle
 293        ;;
 294(p6)    sub r10 = r25,r24       // time we got was less than last_cycle
 295(p7)    mov ar.ccv = r25        // more than last_cycle. Prep for cmpxchg
 296        ;;
 297(p7)    cmpxchg8.rel r3 = [r19],r2,ar.ccv
 298        ;;
 299(p7)    cmp.ne p7,p0 = r25,r3   // if cmpxchg not successful
 300        ;;
 301(p7)    sub r10 = r3,r24        // then use new last_cycle instead
 302        ;;
 303        and r10 = r10,r14       // Apply mask
 304        ;;
 305        setf.sig f8 = r10
 306        nop.i 123
 307        ;;
 308        // fault check takes 5 cycles and we have spare time
 309EX(.fail_efault, probe.w.fault r31, 3)
 310        xmpy.l f8 = f8,f7       // nsec_per_cyc*(counter-last_counter)
 311        ;;
 312        getf.sig r2 = f8
 313        mf
 314        ;;
 315        ld4 r10 = [r20]         // gtod_lock.sequence
 316        shr.u r2 = r2,r23       // shift by factor
 317        ;;
 318        add r8 = r8,r2          // Add xtime.nsecs
 319        cmp4.ne p7,p0 = r28,r10
 320(p7)    br.cond.dpnt.few .time_redo     // sequence number changed, redo
 321        // End critical section.
 322        // Now r8=tv->tv_nsec and r9=tv->tv_sec
 323        mov r10 = r0
 324        movl r2 = 1000000000
 325        add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
 326(p14)   movl r3 = 2361183241434822607   // Prep for / 1000 hack
 327        ;;
 328.time_normalize:
 329        mov r21 = r8
 330        cmp.ge p6,p0 = r8,r2
 331(p14)   shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time
 332        ;;
 333(p14)   setf.sig f8 = r20
 334(p6)    sub r8 = r8,r2
 335(p6)    add r9 = 1,r9           // two nops before the branch.
 336(p14)   setf.sig f7 = r3        // Chances for repeats are 1 in 10000 for gettod
 337(p6)    br.cond.dpnt.few .time_normalize
 338        ;;
 339        // Divided by 8 though shift. Now divide by 125
 340        // The compiler was able to do that with a multiply
 341        // and a shift and we do the same
 342EX(.fail_efault, probe.w.fault r23, 3)  // This also costs 5 cycles
 343(p14)   xmpy.hu f8 = f8, f7             // xmpy has 5 cycles latency so use it
 344        ;;
 345(p14)   getf.sig r2 = f8
 346        ;;
 347        mov r8 = r0
 348(p14)   shr.u r21 = r2, 4
 349        ;;
 350EX(.fail_efault, st8 [r31] = r9)
 351EX(.fail_efault, st8 [r23] = r21)
 352        FSYS_RETURN
 353.fail_einval:
 354        mov r8 = EINVAL
 355        mov r10 = -1
 356        FSYS_RETURN
 357.fail_efault:
 358        mov r8 = EFAULT
 359        mov r10 = -1
 360        FSYS_RETURN
 361END(fsys_gettimeofday)
 362
 363ENTRY(fsys_clock_gettime)
 364        .prologue
 365        .altrp b6
 366        .body
 367        cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
 368        // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
 369(p6)    br.spnt.few fsys_fallback_syscall
 370        mov r31 = r33
 371        shl r30 = r32,15
 372        br.many .gettime
 373END(fsys_clock_gettime)
 374
 375/*
 376 * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
 377 */
 378#if _NSIG_WORDS != 1
 379# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
 380#endif
 381ENTRY(fsys_rt_sigprocmask)
 382        .prologue
 383        .altrp b6
 384        .body
 385
 386        add r2=IA64_TASK_BLOCKED_OFFSET,r16
 387        add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 388        cmp4.ltu p6,p0=SIG_SETMASK,r32
 389
 390        cmp.ne p15,p0=r0,r34                    // oset != NULL?
 391        tnat.nz p8,p0=r34
 392        add r31=IA64_TASK_SIGHAND_OFFSET,r16
 393        ;;
 394        ld8 r3=[r2]                             // read/prefetch current->blocked
 395        ld4 r9=[r9]
 396        tnat.nz.or p6,p0=r35
 397
 398        cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
 399        tnat.nz.or p6,p0=r32
 400(p6)    br.spnt.few .fail_einval                // fail with EINVAL
 401        ;;
 402#ifdef CONFIG_SMP
 403        ld8 r31=[r31]                           // r31 <- current->sighand
 404#endif
 405        and r9=TIF_ALLWORK_MASK,r9
 406        tnat.nz.or p8,p0=r33
 407        ;;
 408        cmp.ne p7,p0=0,r9
 409        cmp.eq p6,p0=r0,r33                     // set == NULL?
 410        add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
 411(p8)    br.spnt.few .fail_efault                // fail with EFAULT
 412(p7)    br.spnt.many fsys_fallback_syscall      // got pending kernel work...
 413(p6)    br.dpnt.many .store_mask                // -> short-circuit to just reading the signal mask
 414
 415        /* Argh, we actually have to do some work and _update_ the signal mask: */
 416
 417EX(.fail_efault, probe.r.fault r33, 3)          // verify user has read-access to *set
 418EX(.fail_efault, ld8 r14=[r33])                 // r14 <- *set
 419        mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
 420        ;;
 421
 422        RSM_PSR_I(p0, r18, r19)                 // mask interrupt delivery
 423        andcm r14=r14,r17                       // filter out SIGKILL & SIGSTOP
 424        mov r8=EINVAL                   // default to EINVAL
 425
 426#ifdef CONFIG_SMP
 427        // __ticket_spin_trylock(r31)
 428        ld4 r17=[r31]
 429        ;;
 430        mov.m ar.ccv=r17
 431        extr.u r9=r17,17,15
 432        adds r19=1,r17
 433        extr.u r18=r17,0,15
 434        ;;
 435        cmp.eq p6,p7=r9,r18
 436        ;;
 437(p6)    cmpxchg4.acq r9=[r31],r19,ar.ccv
 438(p6)    dep.z r20=r19,1,15              // next serving ticket for unlock
 439(p7)    br.cond.spnt.many .lock_contention
 440        ;;
 441        cmp4.eq p0,p7=r9,r17
 442        adds r31=2,r31
 443(p7)    br.cond.spnt.many .lock_contention
 444        ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
 445        ;;
 446#else
 447        ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
 448#endif
 449        add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
 450        add r19=IA64_TASK_SIGNAL_OFFSET,r16
 451        cmp4.eq p6,p0=SIG_BLOCK,r32
 452        ;;
 453        ld8 r19=[r19]                   // r19 <- current->signal
 454        cmp4.eq p7,p0=SIG_UNBLOCK,r32
 455        cmp4.eq p8,p0=SIG_SETMASK,r32
 456        ;;
 457        ld8 r18=[r18]                   // r18 <- current->pending.signal
 458        .pred.rel.mutex p6,p7,p8
 459(p6)    or r14=r3,r14                   // SIG_BLOCK
 460(p7)    andcm r14=r3,r14                // SIG_UNBLOCK
 461
 462(p8)    mov r14=r14                     // SIG_SETMASK
 463(p6)    mov r8=0                        // clear error code
 464        // recalc_sigpending()
 465        add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
 466
 467        add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
 468        ;;
 469        ld4 r17=[r17]           // r17 <- current->signal->group_stop_count
 470(p7)    mov r8=0                // clear error code
 471
 472        ld8 r19=[r19]           // r19 <- current->signal->shared_pending
 473        ;;
 474        cmp4.gt p6,p7=r17,r0    // p6/p7 <- (current->signal->group_stop_count > 0)?
 475(p8)    mov r8=0                // clear error code
 476
 477        or r18=r18,r19          // r18 <- current->pending | current->signal->shared_pending
 478        ;;
 479        // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
 480        andcm r18=r18,r14
 481        add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 482        ;;
 483
 484(p7)    cmp.ne.or.andcm p6,p7=r18,r0            // p6/p7 <- signal pending
 485        mov r19=0                                       // i must not leak kernel bits...
 486(p6)    br.cond.dpnt.many .sig_pending
 487        ;;
 488
 4891:      ld4 r17=[r9]                            // r17 <- current->thread_info->flags
 490        ;;
 491        mov ar.ccv=r17
 492        and r18=~_TIF_SIGPENDING,r17            // r18 <- r17 & ~(1 << TIF_SIGPENDING)
 493        ;;
 494
 495        st8 [r2]=r14                            // update current->blocked with new mask
 496        cmpxchg4.acq r8=[r9],r18,ar.ccv         // current->thread_info->flags <- r18
 497        ;;
 498        cmp.ne p6,p0=r17,r8                     // update failed?
 499(p6)    br.cond.spnt.few 1b                     // yes -> retry
 500
 501#ifdef CONFIG_SMP
 502        // __ticket_spin_unlock(r31)
 503        st2.rel [r31]=r20
 504        mov r20=0                                       // i must not leak kernel bits...
 505#endif
 506        SSM_PSR_I(p0, p9, r31)
 507        ;;
 508
 509        srlz.d                                  // ensure psr.i is set again
 510        mov r18=0                                       // i must not leak kernel bits...
 511
 512.store_mask:
 513EX(.fail_efault, (p15) probe.w.fault r34, 3)    // verify user has write-access to *oset
 514EX(.fail_efault, (p15) st8 [r34]=r3)
 515        mov r2=0                                        // i must not leak kernel bits...
 516        mov r3=0                                        // i must not leak kernel bits...
 517        mov r8=0                                // return 0
 518        mov r9=0                                        // i must not leak kernel bits...
 519        mov r14=0                                       // i must not leak kernel bits...
 520        mov r17=0                                       // i must not leak kernel bits...
 521        mov r31=0                                       // i must not leak kernel bits...
 522        FSYS_RETURN
 523
 524.sig_pending:
 525#ifdef CONFIG_SMP
 526        // __ticket_spin_unlock(r31)
 527        st2.rel [r31]=r20                       // release the lock
 528#endif
 529        SSM_PSR_I(p0, p9, r17)
 530        ;;
 531        srlz.d
 532        br.sptk.many fsys_fallback_syscall      // with signal pending, do the heavy-weight syscall
 533
 534#ifdef CONFIG_SMP
 535.lock_contention:
 536        /* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
 537        SSM_PSR_I(p0, p9, r17)
 538        ;;
 539        srlz.d
 540        br.sptk.many fsys_fallback_syscall
 541#endif
 542END(fsys_rt_sigprocmask)
 543
 544/*
 545 * fsys_getcpu doesn't use the third parameter in this implementation. It reads
 546 * current_thread_info()->cpu and corresponding node in cpu_to_node_map.
 547 */
 548ENTRY(fsys_getcpu)
 549        .prologue
 550        .altrp b6
 551        .body
 552        ;;
 553        add r2=TI_FLAGS+IA64_TASK_SIZE,r16
 554        tnat.nz p6,p0 = r32                     // guard against NaT argument
 555        add r3=TI_CPU+IA64_TASK_SIZE,r16
 556        ;;
 557        ld4 r3=[r3]                             // M r3 = thread_info->cpu
 558        ld4 r2=[r2]                             // M r2 = thread_info->flags
 559(p6)    br.cond.spnt.few .fail_einval           // B
 560        ;;
 561        tnat.nz p7,p0 = r33                     // I guard against NaT argument
 562(p7)    br.cond.spnt.few .fail_einval           // B
 563#ifdef CONFIG_NUMA
 564        movl r17=cpu_to_node_map
 565        ;;
 566EX(.fail_efault, probe.w.fault r32, 3)          // M This takes 5 cycles
 567EX(.fail_efault, probe.w.fault r33, 3)          // M This takes 5 cycles
 568        shladd r18=r3,1,r17
 569        ;;
 570        ld2 r20=[r18]                           // r20 = cpu_to_node_map[cpu]
 571        and r2 = TIF_ALLWORK_MASK,r2
 572        ;;
 573        cmp.ne p8,p0=0,r2
 574(p8)    br.spnt.many fsys_fallback_syscall
 575        ;;
 576        ;;
 577EX(.fail_efault, st4 [r32] = r3)
 578EX(.fail_efault, st2 [r33] = r20)
 579        mov r8=0
 580        ;;
 581#else
 582EX(.fail_efault, probe.w.fault r32, 3)          // M This takes 5 cycles
 583EX(.fail_efault, probe.w.fault r33, 3)          // M This takes 5 cycles
 584        and r2 = TIF_ALLWORK_MASK,r2
 585        ;;
 586        cmp.ne p8,p0=0,r2
 587(p8)    br.spnt.many fsys_fallback_syscall
 588        ;;
 589EX(.fail_efault, st4 [r32] = r3)
 590EX(.fail_efault, st2 [r33] = r0)
 591        mov r8=0
 592        ;;
 593#endif
 594        FSYS_RETURN
 595END(fsys_getcpu)
 596
 597ENTRY(fsys_fallback_syscall)
 598        .prologue
 599        .altrp b6
 600        .body
 601        /*
 602         * We only get here from light-weight syscall handlers.  Thus, we already
 603         * know that r15 contains a valid syscall number.  No need to re-check.
 604         */
 605        adds r17=-1024,r15
 606        movl r14=sys_call_table
 607        ;;
 608        RSM_PSR_I(p0, r26, r27)
 609        shladd r18=r17,3,r14
 610        ;;
 611        ld8 r18=[r18]                           // load normal (heavy-weight) syscall entry-point
 612        MOV_FROM_PSR(p0, r29, r26)              // read psr (12 cyc load latency)
 613        mov r27=ar.rsc
 614        mov r21=ar.fpsr
 615        mov r26=ar.pfs
 616END(fsys_fallback_syscall)
 617        /* FALL THROUGH */
 618GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 619        .prologue
 620        .altrp b6
 621        .body
 622        /*
 623         * We get here for syscalls that don't have a lightweight
 624         * handler.  For those, we need to bubble down into the kernel
 625         * and that requires setting up a minimal pt_regs structure,
 626         * and initializing the CPU state more or less as if an
 627         * interruption had occurred.  To make syscall-restarts work,
 628         * we setup pt_regs such that cr_iip points to the second
 629         * instruction in syscall_via_break.  Decrementing the IP
 630         * hence will restart the syscall via break and not
 631         * decrementing IP will return us to the caller, as usual.
 632         * Note that we preserve the value of psr.pp rather than
 633         * initializing it from dcr.pp.  This makes it possible to
 634         * distinguish fsyscall execution from other privileged
 635         * execution.
 636         *
 637         * On entry:
 638         *      - normal fsyscall handler register usage, except
 639         *        that we also have:
 640         *      - r18: address of syscall entry point
 641         *      - r21: ar.fpsr
 642         *      - r26: ar.pfs
 643         *      - r27: ar.rsc
 644         *      - r29: psr
 645         *
 646         * We used to clear some PSR bits here but that requires slow
 647         * serialization.  Fortuntely, that isn't really necessary.
 648         * The rationale is as follows: we used to clear bits
 649         * ~PSR_PRESERVED_BITS in PSR.L.  Since
 650         * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
 651         * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
 652         * However,
 653         *
 654         * PSR.BE : already is turned off in __kernel_syscall_via_epc()
 655         * PSR.AC : don't care (kernel normally turns PSR.AC on)
 656         * PSR.I  : already turned off by the time paravirt_fsys_bubble_down gets
 657         *          invoked
 658         * PSR.DFL: always 0 (kernel never turns it on)
 659         * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
 660         *          initiative
 661         * PSR.DI : always 0 (kernel never turns it on)
 662         * PSR.SI : always 0 (kernel never turns it on)
 663         * PSR.DB : don't care --- kernel never enables kernel-level
 664         *          breakpoints
 665         * PSR.TB : must be 0 already; if it wasn't zero on entry to
 666         *          __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
 667         *          will trigger a taken branch; the taken-trap-handler then
 668         *          converts the syscall into a break-based system-call.
 669         */
 670        /*
 671         * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
 672         * The rest we have to synthesize.
 673         */
 674#       define PSR_ONE_BITS             ((3 << IA64_PSR_CPL0_BIT)       \
 675                                         | (0x1 << IA64_PSR_RI_BIT)     \
 676                                         | IA64_PSR_BN | IA64_PSR_I)
 677
 678        invala                                  // M0|1
 679        movl r14=ia64_ret_from_syscall          // X
 680
 681        nop.m 0
 682        movl r28=__kernel_syscall_via_break     // X    create cr.iip
 683        ;;
 684
 685        mov r2=r16                              // A    get task addr to addl-addressable register
 686        adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
 687        mov r31=pr                              // I0   save pr (2 cyc)
 688        ;;
 689        st1 [r16]=r0                            // M2|3 clear current->thread.on_ustack flag
 690        addl r22=IA64_RBS_OFFSET,r2             // A    compute base of RBS
 691        add r3=TI_FLAGS+IA64_TASK_SIZE,r2       // A
 692        ;;
 693        ld4 r3=[r3]                             // M0|1 r3 = current_thread_info()->flags
 694        lfetch.fault.excl.nt1 [r22]             // M0|1 prefetch register backing-store
 695        nop.i 0
 696        ;;
 697        mov ar.rsc=0                            // M2   set enforced lazy mode, pl 0, LE, loadrs=0
 698#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 699        MOV_FROM_ITC(p0, p6, r30, r23)          // M    get cycle for accounting
 700#else
 701        nop.m 0
 702#endif
 703        nop.i 0
 704        ;;
 705        mov r23=ar.bspstore                     // M2 (12 cyc) save ar.bspstore
 706        mov.m r24=ar.rnat                       // M2 (5 cyc) read ar.rnat (dual-issues!)
 707        nop.i 0
 708        ;;
 709        mov ar.bspstore=r22                     // M2 (6 cyc) switch to kernel RBS
 710        movl r8=PSR_ONE_BITS                    // X
 711        ;;
 712        mov r25=ar.unat                         // M2 (5 cyc) save ar.unat
 713        mov r19=b6                              // I0   save b6 (2 cyc)
 714        mov r20=r1                              // A    save caller's gp in r20
 715        ;;
 716        or r29=r8,r29                           // A    construct cr.ipsr value to save
 717        mov b6=r18                              // I0   copy syscall entry-point to b6 (7 cyc)
 718        addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
 719
 720        mov r18=ar.bsp                          // M2   save (kernel) ar.bsp (12 cyc)
 721        cmp.ne pKStk,pUStk=r0,r0                // A    set pKStk <- 0, pUStk <- 1
 722        br.call.sptk.many b7=ia64_syscall_setup // B
 723        ;;
 724#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 725        // mov.m r30=ar.itc is called in advance
 726        add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
 727        add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
 728        ;;
 729        ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP   // time at last check in kernel
 730        ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE   // time at leave kernel
 731        ;;
 732        ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME   // cumulated stime
 733        ld8 r21=[r17]                           // cumulated utime
 734        sub r22=r19,r18                         // stime before leave kernel
 735        ;;
 736        st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP   // update stamp
 737        sub r18=r30,r19                         // elapsed time in user mode
 738        ;;
 739        add r20=r20,r22                         // sum stime
 740        add r21=r21,r18                         // sum utime
 741        ;;
 742        st8 [r16]=r20                           // update stime
 743        st8 [r17]=r21                           // update utime
 744        ;;
 745#endif
 746        mov ar.rsc=0x3                          // M2   set eager mode, pl 0, LE, loadrs=0
 747        mov rp=r14                              // I0   set the real return addr
 748        and r3=_TIF_SYSCALL_TRACEAUDIT,r3       // A
 749        ;;
 750        SSM_PSR_I(p0, p6, r22)                  // M2   we're on kernel stacks now, reenable irqs
 751        cmp.eq p8,p0=r3,r0                      // A
 752(p10)   br.cond.spnt.many ia64_ret_from_syscall // B    return if bad call-frame or r15 is a NaT
 753
 754        nop.m 0
 755(p8)    br.call.sptk.many b6=b6                 // B    (ignore return address)
 756        br.cond.spnt ia64_trace_syscall         // B
 757END(paravirt_fsys_bubble_down)
 758
 759        .rodata
 760        .align 8
 761        .globl paravirt_fsyscall_table
 762
 763        data8 paravirt_fsys_bubble_down
 764paravirt_fsyscall_table:
 765        data8 fsys_ni_syscall
 766        data8 0                         // exit                 // 1025
 767        data8 0                         // read
 768        data8 0                         // write
 769        data8 0                         // open
 770        data8 0                         // close
 771        data8 0                         // creat                // 1030
 772        data8 0                         // link
 773        data8 0                         // unlink
 774        data8 0                         // execve
 775        data8 0                         // chdir
 776        data8 0                         // fchdir               // 1035
 777        data8 0                         // utimes
 778        data8 0                         // mknod
 779        data8 0                         // chmod
 780        data8 0                         // chown
 781        data8 0                         // lseek                // 1040
 782        data8 fsys_getpid               // getpid
 783        data8 fsys_getppid              // getppid
 784        data8 0                         // mount
 785        data8 0                         // umount
 786        data8 0                         // setuid               // 1045
 787        data8 0                         // getuid
 788        data8 0                         // geteuid
 789        data8 0                         // ptrace
 790        data8 0                         // access
 791        data8 0                         // sync                 // 1050
 792        data8 0                         // fsync
 793        data8 0                         // fdatasync
 794        data8 0                         // kill
 795        data8 0                         // rename
 796        data8 0                         // mkdir                // 1055
 797        data8 0                         // rmdir
 798        data8 0                         // dup
 799        data8 0                         // pipe
 800        data8 0                         // times
 801        data8 0                         // brk                  // 1060
 802        data8 0                         // setgid
 803        data8 0                         // getgid
 804        data8 0                         // getegid
 805        data8 0                         // acct
 806        data8 0                         // ioctl                // 1065
 807        data8 0                         // fcntl
 808        data8 0                         // umask
 809        data8 0                         // chroot
 810        data8 0                         // ustat
 811        data8 0                         // dup2                 // 1070
 812        data8 0                         // setreuid
 813        data8 0                         // setregid
 814        data8 0                         // getresuid
 815        data8 0                         // setresuid
 816        data8 0                         // getresgid            // 1075
 817        data8 0                         // setresgid
 818        data8 0                         // getgroups
 819        data8 0                         // setgroups
 820        data8 0                         // getpgid
 821        data8 0                         // setpgid              // 1080
 822        data8 0                         // setsid
 823        data8 0                         // getsid
 824        data8 0                         // sethostname
 825        data8 0                         // setrlimit
 826        data8 0                         // getrlimit            // 1085
 827        data8 0                         // getrusage
 828        data8 fsys_gettimeofday         // gettimeofday
 829        data8 0                         // settimeofday
 830        data8 0                         // select
 831        data8 0                         // poll                 // 1090
 832        data8 0                         // symlink
 833        data8 0                         // readlink
 834        data8 0                         // uselib
 835        data8 0                         // swapon
 836        data8 0                         // swapoff              // 1095
 837        data8 0                         // reboot
 838        data8 0                         // truncate
 839        data8 0                         // ftruncate
 840        data8 0                         // fchmod
 841        data8 0                         // fchown               // 1100
 842        data8 0                         // getpriority
 843        data8 0                         // setpriority
 844        data8 0                         // statfs
 845        data8 0                         // fstatfs
 846        data8 0                         // gettid               // 1105
 847        data8 0                         // semget
 848        data8 0                         // semop
 849        data8 0                         // semctl
 850        data8 0                         // msgget
 851        data8 0                         // msgsnd               // 1110
 852        data8 0                         // msgrcv
 853        data8 0                         // msgctl
 854        data8 0                         // shmget
 855        data8 0                         // shmat
 856        data8 0                         // shmdt                // 1115
 857        data8 0                         // shmctl
 858        data8 0                         // syslog
 859        data8 0                         // setitimer
 860        data8 0                         // getitimer
 861        data8 0                                                 // 1120
 862        data8 0
 863        data8 0
 864        data8 0                         // vhangup
 865        data8 0                         // lchown
 866        data8 0                         // remap_file_pages     // 1125
 867        data8 0                         // wait4
 868        data8 0                         // sysinfo
 869        data8 0                         // clone
 870        data8 0                         // setdomainname
 871        data8 0                         // newuname             // 1130
 872        data8 0                         // adjtimex
 873        data8 0
 874        data8 0                         // init_module
 875        data8 0                         // delete_module
 876        data8 0                                                 // 1135
 877        data8 0
 878        data8 0                         // quotactl
 879        data8 0                         // bdflush
 880        data8 0                         // sysfs
 881        data8 0                         // personality          // 1140
 882        data8 0                         // afs_syscall
 883        data8 0                         // setfsuid
 884        data8 0                         // setfsgid
 885        data8 0                         // getdents
 886        data8 0                         // flock                // 1145
 887        data8 0                         // readv
 888        data8 0                         // writev
 889        data8 0                         // pread64
 890        data8 0                         // pwrite64
 891        data8 0                         // sysctl               // 1150
 892        data8 0                         // mmap
 893        data8 0                         // munmap
 894        data8 0                         // mlock
 895        data8 0                         // mlockall
 896        data8 0                         // mprotect             // 1155
 897        data8 0                         // mremap
 898        data8 0                         // msync
 899        data8 0                         // munlock
 900        data8 0                         // munlockall
 901        data8 0                         // sched_getparam       // 1160
 902        data8 0                         // sched_setparam
 903        data8 0                         // sched_getscheduler
 904        data8 0                         // sched_setscheduler
 905        data8 0                         // sched_yield
 906        data8 0                         // sched_get_priority_max       // 1165
 907        data8 0                         // sched_get_priority_min
 908        data8 0                         // sched_rr_get_interval
 909        data8 0                         // nanosleep
 910        data8 0                         // nfsservctl
 911        data8 0                         // prctl                // 1170
 912        data8 0                         // getpagesize
 913        data8 0                         // mmap2
 914        data8 0                         // pciconfig_read
 915        data8 0                         // pciconfig_write
 916        data8 0                         // perfmonctl           // 1175
 917        data8 0                         // sigaltstack
 918        data8 0                         // rt_sigaction
 919        data8 0                         // rt_sigpending
 920        data8 fsys_rt_sigprocmask       // rt_sigprocmask
 921        data8 0                         // rt_sigqueueinfo      // 1180
 922        data8 0                         // rt_sigreturn
 923        data8 0                         // rt_sigsuspend
 924        data8 0                         // rt_sigtimedwait
 925        data8 0                         // getcwd
 926        data8 0                         // capget               // 1185
 927        data8 0                         // capset
 928        data8 0                         // sendfile
 929        data8 0
 930        data8 0
 931        data8 0                         // socket               // 1190
 932        data8 0                         // bind
 933        data8 0                         // connect
 934        data8 0                         // listen
 935        data8 0                         // accept
 936        data8 0                         // getsockname          // 1195
 937        data8 0                         // getpeername
 938        data8 0                         // socketpair
 939        data8 0                         // send
 940        data8 0                         // sendto
 941        data8 0                         // recv                 // 1200
 942        data8 0                         // recvfrom
 943        data8 0                         // shutdown
 944        data8 0                         // setsockopt
 945        data8 0                         // getsockopt
 946        data8 0                         // sendmsg              // 1205
 947        data8 0                         // recvmsg
 948        data8 0                         // pivot_root
 949        data8 0                         // mincore
 950        data8 0                         // madvise
 951        data8 0                         // newstat              // 1210
 952        data8 0                         // newlstat
 953        data8 0                         // newfstat
 954        data8 0                         // clone2
 955        data8 0                         // getdents64
 956        data8 0                         // getunwind            // 1215
 957        data8 0                         // readahead
 958        data8 0                         // setxattr
 959        data8 0                         // lsetxattr
 960        data8 0                         // fsetxattr
 961        data8 0                         // getxattr             // 1220
 962        data8 0                         // lgetxattr
 963        data8 0                         // fgetxattr
 964        data8 0                         // listxattr
 965        data8 0                         // llistxattr
 966        data8 0                         // flistxattr           // 1225
 967        data8 0                         // removexattr
 968        data8 0                         // lremovexattr
 969        data8 0                         // fremovexattr
 970        data8 0                         // tkill
 971        data8 0                         // futex                // 1230
 972        data8 0                         // sched_setaffinity
 973        data8 0                         // sched_getaffinity
 974        data8 fsys_set_tid_address      // set_tid_address
 975        data8 0                         // fadvise64_64
 976        data8 0                         // tgkill               // 1235
 977        data8 0                         // exit_group
 978        data8 0                         // lookup_dcookie
 979        data8 0                         // io_setup
 980        data8 0                         // io_destroy
 981        data8 0                         // io_getevents         // 1240
 982        data8 0                         // io_submit
 983        data8 0                         // io_cancel
 984        data8 0                         // epoll_create
 985        data8 0                         // epoll_ctl
 986        data8 0                         // epoll_wait           // 1245
 987        data8 0                         // restart_syscall
 988        data8 0                         // semtimedop
 989        data8 0                         // timer_create
 990        data8 0                         // timer_settime
 991        data8 0                         // timer_gettime        // 1250
 992        data8 0                         // timer_getoverrun
 993        data8 0                         // timer_delete
 994        data8 0                         // clock_settime
 995        data8 fsys_clock_gettime        // clock_gettime
 996        data8 0                         // clock_getres         // 1255
 997        data8 0                         // clock_nanosleep
 998        data8 0                         // fstatfs64
 999        data8 0                         // statfs64
1000        data8 0                         // mbind
1001        data8 0                         // get_mempolicy        // 1260
1002        data8 0                         // set_mempolicy
1003        data8 0                         // mq_open
1004        data8 0                         // mq_unlink
1005        data8 0                         // mq_timedsend
1006        data8 0                         // mq_timedreceive      // 1265
1007        data8 0                         // mq_notify
1008        data8 0                         // mq_getsetattr
1009        data8 0                         // kexec_load
1010        data8 0                         // vserver
1011        data8 0                         // waitid               // 1270
1012        data8 0                         // add_key
1013        data8 0                         // request_key
1014        data8 0                         // keyctl
1015        data8 0                         // ioprio_set
1016        data8 0                         // ioprio_get           // 1275
1017        data8 0                         // move_pages
1018        data8 0                         // inotify_init
1019        data8 0                         // inotify_add_watch
1020        data8 0                         // inotify_rm_watch
1021        data8 0                         // migrate_pages        // 1280
1022        data8 0                         // openat
1023        data8 0                         // mkdirat
1024        data8 0                         // mknodat
1025        data8 0                         // fchownat
1026        data8 0                         // futimesat            // 1285
1027        data8 0                         // newfstatat
1028        data8 0                         // unlinkat
1029        data8 0                         // renameat
1030        data8 0                         // linkat
1031        data8 0                         // symlinkat            // 1290
1032        data8 0                         // readlinkat
1033        data8 0                         // fchmodat
1034        data8 0                         // faccessat
1035        data8 0
1036        data8 0                                                 // 1295
1037        data8 0                         // unshare
1038        data8 0                         // splice
1039        data8 0                         // set_robust_list
1040        data8 0                         // get_robust_list
1041        data8 0                         // sync_file_range      // 1300
1042        data8 0                         // tee
1043        data8 0                         // vmsplice
1044        data8 0
1045        data8 fsys_getcpu               // getcpu               // 1304
1046
1047        // fill in zeros for the remaining entries
1048        .zero:
1049        .space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0
1050