qemu/include/user/safe-syscall.h
<<
>>
Prefs
   1/*
   2 * safe-syscall.h: prototypes for linux-user signal-race-safe syscalls
   3 *
   4 *  This program is free software; you can redistribute it and/or modify
   5 *  it under the terms of the GNU General Public License as published by
   6 *  the Free Software Foundation; either version 2 of the License, or
   7 *  (at your option) any later version.
   8 *
   9 *  This program is distributed in the hope that it will be useful,
  10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 *  GNU General Public License for more details.
  13 *
  14 *  You should have received a copy of the GNU General Public License
  15 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  16 */
  17
  18#ifndef LINUX_USER_SAFE_SYSCALL_H
  19#define LINUX_USER_SAFE_SYSCALL_H
  20
  21/**
  22 * safe_syscall:
  23 * @int number: number of system call to make
  24 * ...: arguments to the system call
  25 *
  26 * Call a system call if guest signal not pending.
  27 * This has the same API as the libc syscall() function, except that it
  28 * may return -1 with errno == QEMU_ERESTARTSYS if a signal was pending.
  29 *
  30 * Returns: the system call result, or -1 with an error code in errno
  31 * (Errnos are host errnos; we rely on QEMU_ERESTARTSYS not clashing
  32 * with any of the host errno values.)
  33 */
  34
  35/*
  36 * A guide to using safe_syscall() to handle interactions between guest
  37 * syscalls and guest signals:
  38 *
  39 * Guest syscalls come in two flavours:
  40 *
  41 * (1) Non-interruptible syscalls
  42 *
  43 * These are guest syscalls that never get interrupted by signals and
  44 * so never return EINTR. They can be implemented straightforwardly in
  45 * QEMU: just make sure that if the implementation code has to make any
  46 * blocking calls that those calls are retried if they return EINTR.
  47 * It's also OK to implement these with safe_syscall, though it will be
  48 * a little less efficient if a signal is delivered at the 'wrong' moment.
  49 *
  50 * Some non-interruptible syscalls need to be handled using block_signals()
  51 * to block signals for the duration of the syscall. This mainly applies
  52 * to code which needs to modify the data structures used by the
  53 * host_signal_handler() function and the functions it calls, including
  54 * all syscalls which change the thread's signal mask.
  55 *
  56 * (2) Interruptible syscalls
  57 *
  58 * These are guest syscalls that can be interrupted by signals and
  59 * for which we need to either return EINTR or arrange for the guest
  60 * syscall to be restarted. This category includes both syscalls which
  61 * always restart (and in the kernel return -ERESTARTNOINTR), ones
  62 * which only restart if there is no handler (kernel returns -ERESTARTNOHAND
  63 * or -ERESTART_RESTARTBLOCK), and the most common kind which restart
  64 * if the handler was registered with SA_RESTART (kernel returns
  65 * -ERESTARTSYS). System calls which are only interruptible in some
  66 * situations (like 'open') also need to be handled this way.
  67 *
  68 * Here it is important that the host syscall is made
  69 * via this safe_syscall() function, and *not* via the host libc.
  70 * If the host libc is used then the implementation will appear to work
  71 * most of the time, but there will be a race condition where a
  72 * signal could arrive just before we make the host syscall inside libc,
  73 * and then the guest syscall will not correctly be interrupted.
  74 * Instead the implementation of the guest syscall can use the safe_syscall
  75 * function but otherwise just return the result or errno in the usual
  76 * way; the main loop code will take care of restarting the syscall
  77 * if appropriate.
  78 *
  79 * (If the implementation needs to make multiple host syscalls this is
  80 * OK; any which might really block must be via safe_syscall(); for those
  81 * which are only technically blocking (ie which we know in practice won't
  82 * stay in the host kernel indefinitely) it's OK to use libc if necessary.
  83 * You must be able to cope with backing out correctly if some safe_syscall
  84 * you make in the implementation returns either -QEMU_ERESTARTSYS or
  85 * EINTR though.)
  86 *
  87 * block_signals() cannot be used for interruptible syscalls.
  88 *
  89 *
  90 * How and why the safe_syscall implementation works:
  91 *
  92 * The basic setup is that we make the host syscall via a known
  93 * section of host native assembly. If a signal occurs, our signal
  94 * handler checks the interrupted host PC against the addresse of that
  95 * known section. If the PC is before or at the address of the syscall
  96 * instruction then we change the PC to point at a "return
  97 * -QEMU_ERESTARTSYS" code path instead, and then exit the signal handler
  98 * (causing the safe_syscall() call to immediately return that value).
  99 * Then in the main.c loop if we see this magic return value we adjust
 100 * the guest PC to wind it back to before the system call, and invoke
 101 * the guest signal handler as usual.
 102 *
 103 * This winding-back will happen in two cases:
 104 * (1) signal came in just before we took the host syscall (a race);
 105 *   in this case we'll take the guest signal and have another go
 106 *   at the syscall afterwards, and this is indistinguishable for the
 107 *   guest from the timing having been different such that the guest
 108 *   signal really did win the race
 109 * (2) signal came in while the host syscall was blocking, and the
 110 *   host kernel decided the syscall should be restarted;
 111 *   in this case we want to restart the guest syscall also, and so
 112 *   rewinding is the right thing. (Note that "restart" semantics mean
 113 *   "first call the signal handler, then reattempt the syscall".)
 114 * The other situation to consider is when a signal came in while the
 115 * host syscall was blocking, and the host kernel decided that the syscall
 116 * should not be restarted; in this case QEMU's host signal handler will
 117 * be invoked with the PC pointing just after the syscall instruction,
 118 * with registers indicating an EINTR return; the special code in the
 119 * handler will not kick in, and we will return EINTR to the guest as
 120 * we should.
 121 *
 122 * Notice that we can leave the host kernel to make the decision for
 123 * us about whether to do a restart of the syscall or not; we do not
 124 * need to check SA_RESTART flags in QEMU or distinguish the various
 125 * kinds of restartability.
 126 */
 127
 128/* The core part of this function is implemented in assembly */
 129extern long safe_syscall_base(int *pending, long number, ...);
 130extern long safe_syscall_set_errno_tail(int value);
 131
 132/* These are defined by the safe-syscall.inc.S file */
 133extern char safe_syscall_start[];
 134extern char safe_syscall_end[];
 135
 136#define safe_syscall(...)                                                 \
 137    safe_syscall_base(&((TaskState *)thread_cpu->opaque)->signal_pending, \
 138                      __VA_ARGS__)
 139
 140#endif
 141