linux/arch/um/os-Linux/aio.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
   3 * Licensed under the GPL
   4 */
   5
   6#include <unistd.h>
   7#include <sched.h>
   8#include <signal.h>
   9#include <errno.h>
  10#include <sys/time.h>
  11#include <asm/unistd.h>
  12#include "aio.h"
  13#include "init.h"
  14#include "kern_constants.h"
  15#include "kern_util.h"
  16#include "os.h"
  17#include "user.h"
  18
  19struct aio_thread_req {
  20        enum aio_type type;
  21        int io_fd;
  22        unsigned long long offset;
  23        char *buf;
  24        int len;
  25        struct aio_context *aio;
  26};
  27
  28#if defined(HAVE_AIO_ABI)
  29#include <linux/aio_abi.h>
  30
  31/*
  32 * If we have the headers, we are going to build with AIO enabled.
  33 * If we don't have aio in libc, we define the necessary stubs here.
  34 */
  35
  36#if !defined(HAVE_AIO_LIBC)
  37
  38static long io_setup(int n, aio_context_t *ctxp)
  39{
  40        return syscall(__NR_io_setup, n, ctxp);
  41}
  42
  43static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
  44{
  45        return syscall(__NR_io_submit, ctx, nr, iocbpp);
  46}
  47
  48static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
  49                         struct io_event *events, struct timespec *timeout)
  50{
  51        return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
  52}
  53
  54#endif
  55
  56/*
  57 * The AIO_MMAP cases force the mmapped page into memory here
  58 * rather than in whatever place first touches the data.  I used
  59 * to do this by touching the page, but that's delicate because
  60 * gcc is prone to optimizing that away.  So, what's done here
  61 * is we read from the descriptor from which the page was
  62 * mapped.  The caller is required to pass an offset which is
  63 * inside the page that was mapped.  Thus, when the read
  64 * returns, we know that the page is in the page cache, and
  65 * that it now backs the mmapped area.
  66 */
  67
  68static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
  69                  int len, unsigned long long offset, struct aio_context *aio)
  70{
  71        struct iocb *iocbp = & ((struct iocb) {
  72                                    .aio_data       = (unsigned long) aio,
  73                                    .aio_fildes     = fd,
  74                                    .aio_buf        = (unsigned long) buf,
  75                                    .aio_nbytes     = len,
  76                                    .aio_offset     = offset
  77                             });
  78        char c;
  79
  80        switch (type) {
  81        case AIO_READ:
  82                iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
  83                break;
  84        case AIO_WRITE:
  85                iocbp->aio_lio_opcode = IOCB_CMD_PWRITE;
  86                break;
  87        case AIO_MMAP:
  88                iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
  89                iocbp->aio_buf = (unsigned long) &c;
  90                iocbp->aio_nbytes = sizeof(c);
  91                break;
  92        default:
  93                printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type);
  94                return -EINVAL;
  95        }
  96
  97        return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno;
  98}
  99
 100/* Initialized in an initcall and unchanged thereafter */
 101static aio_context_t ctx = 0;
 102
 103static int aio_thread(void *arg)
 104{
 105        struct aio_thread_reply reply;
 106        struct io_event event;
 107        int err, n, reply_fd;
 108
 109        signal(SIGWINCH, SIG_IGN);
 110
 111        while (1) {
 112                n = io_getevents(ctx, 1, 1, &event, NULL);
 113                if (n < 0) {
 114                        if (errno == EINTR)
 115                                continue;
 116                        printk(UM_KERN_ERR "aio_thread - io_getevents failed, "
 117                               "errno = %d\n", errno);
 118                }
 119                else {
 120                        reply = ((struct aio_thread_reply)
 121                                { .data = (void *) (long) event.data,
 122                                                .err    = event.res });
 123                        reply_fd = ((struct aio_context *) reply.data)->reply_fd;
 124                        err = write(reply_fd, &reply, sizeof(reply));
 125                        if (err != sizeof(reply))
 126                                printk(UM_KERN_ERR "aio_thread - write failed, "
 127                                       "fd = %d, err = %d\n", reply_fd, errno);
 128                }
 129        }
 130        return 0;
 131}
 132
 133#endif
 134
 135static int do_not_aio(struct aio_thread_req *req)
 136{
 137        char c;
 138        unsigned long long actual;
 139        int n;
 140
 141        actual = lseek64(req->io_fd, req->offset, SEEK_SET);
 142        if (actual != req->offset)
 143                return -errno;
 144
 145        switch (req->type) {
 146        case AIO_READ:
 147                n = read(req->io_fd, req->buf, req->len);
 148                break;
 149        case AIO_WRITE:
 150                n = write(req->io_fd, req->buf, req->len);
 151                break;
 152        case AIO_MMAP:
 153                n = read(req->io_fd, &c, sizeof(c));
 154                break;
 155        default:
 156                printk(UM_KERN_ERR "do_not_aio - bad request type : %d\n",
 157                       req->type);
 158                return -EINVAL;
 159        }
 160
 161        if (n < 0)
 162                return -errno;
 163        return 0;
 164}
 165
 166/* These are initialized in initcalls and not changed */
 167static int aio_req_fd_r = -1;
 168static int aio_req_fd_w = -1;
 169static int aio_pid = -1;
 170static unsigned long aio_stack;
 171
 172static int not_aio_thread(void *arg)
 173{
 174        struct aio_thread_req req;
 175        struct aio_thread_reply reply;
 176        int err;
 177
 178        signal(SIGWINCH, SIG_IGN);
 179        while (1) {
 180                err = read(aio_req_fd_r, &req, sizeof(req));
 181                if (err != sizeof(req)) {
 182                        if (err < 0)
 183                                printk(UM_KERN_ERR "not_aio_thread - "
 184                                       "read failed, fd = %d, err = %d\n",
 185                                       aio_req_fd_r,
 186                                       errno);
 187                        else {
 188                                printk(UM_KERN_ERR "not_aio_thread - short "
 189                                       "read, fd = %d, length = %d\n",
 190                                       aio_req_fd_r, err);
 191                        }
 192                        continue;
 193                }
 194                err = do_not_aio(&req);
 195                reply = ((struct aio_thread_reply) { .data      = req.aio,
 196                                                     .err       = err });
 197                err = write(req.aio->reply_fd, &reply, sizeof(reply));
 198                if (err != sizeof(reply))
 199                        printk(UM_KERN_ERR "not_aio_thread - write failed, "
 200                               "fd = %d, err = %d\n", req.aio->reply_fd, errno);
 201        }
 202
 203        return 0;
 204}
 205
 206static int init_aio_24(void)
 207{
 208        int fds[2], err;
 209
 210        err = os_pipe(fds, 1, 1);
 211        if (err)
 212                goto out;
 213
 214        aio_req_fd_w = fds[0];
 215        aio_req_fd_r = fds[1];
 216
 217        err = os_set_fd_block(aio_req_fd_w, 0);
 218        if (err)
 219                goto out_close_pipe;
 220
 221        err = run_helper_thread(not_aio_thread, NULL,
 222                                CLONE_FILES | CLONE_VM, &aio_stack);
 223        if (err < 0)
 224                goto out_close_pipe;
 225
 226        aio_pid = err;
 227        goto out;
 228
 229out_close_pipe:
 230        close(fds[0]);
 231        close(fds[1]);
 232        aio_req_fd_w = -1;
 233        aio_req_fd_r = -1;
 234out:
 235#ifndef HAVE_AIO_ABI
 236        printk(UM_KERN_INFO "/usr/include/linux/aio_abi.h not present during "
 237               "build\n");
 238#endif
 239        printk(UM_KERN_INFO "2.6 host AIO support not used - falling back to "
 240               "I/O thread\n");
 241        return 0;
 242}
 243
 244#ifdef HAVE_AIO_ABI
 245#define DEFAULT_24_AIO 0
 246static int init_aio_26(void)
 247{
 248        int err;
 249
 250        if (io_setup(256, &ctx)) {
 251                err = -errno;
 252                printk(UM_KERN_ERR "aio_thread failed to initialize context, "
 253                       "err = %d\n", errno);
 254                return err;
 255        }
 256
 257        err = run_helper_thread(aio_thread, NULL,
 258                                CLONE_FILES | CLONE_VM, &aio_stack);
 259        if (err < 0)
 260                return err;
 261
 262        aio_pid = err;
 263
 264        printk(UM_KERN_INFO "Using 2.6 host AIO\n");
 265        return 0;
 266}
 267
 268static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
 269                         unsigned long long offset, struct aio_context *aio)
 270{
 271        struct aio_thread_reply reply;
 272        int err;
 273
 274        err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
 275        if (err) {
 276                reply = ((struct aio_thread_reply) { .data = aio,
 277                                         .err  = err });
 278                err = write(aio->reply_fd, &reply, sizeof(reply));
 279                if (err != sizeof(reply)) {
 280                        err = -errno;
 281                        printk(UM_KERN_ERR "submit_aio_26 - write failed, "
 282                               "fd = %d, err = %d\n", aio->reply_fd, -err);
 283                }
 284                else err = 0;
 285        }
 286
 287        return err;
 288}
 289
 290#else
 291#define DEFAULT_24_AIO 1
 292static int init_aio_26(void)
 293{
 294        return -ENOSYS;
 295}
 296
 297static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
 298                         unsigned long long offset, struct aio_context *aio)
 299{
 300        return -ENOSYS;
 301}
 302#endif
 303
 304/* Initialized in an initcall and unchanged thereafter */
 305static int aio_24 = DEFAULT_24_AIO;
 306
 307static int __init set_aio_24(char *name, int *add)
 308{
 309        aio_24 = 1;
 310        return 0;
 311}
 312
 313__uml_setup("aio=2.4", set_aio_24,
 314"aio=2.4\n"
 315"    This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
 316"    available.  2.4 AIO is a single thread that handles one request at a\n"
 317"    time, synchronously.  2.6 AIO is a thread which uses the 2.6 AIO \n"
 318"    interface to handle an arbitrary number of pending requests.  2.6 AIO \n"
 319"    is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
 320"    /usr/include/linux/aio_abi.h not available.  Many distributions don't\n"
 321"    include aio_abi.h, so you will need to copy it from a kernel tree to\n"
 322"    your /usr/include/linux in order to build an AIO-capable UML\n\n"
 323);
 324
 325static int init_aio(void)
 326{
 327        int err;
 328
 329        if (!aio_24) {
 330                err = init_aio_26();
 331                if (err && (errno == ENOSYS)) {
 332                        printk(UM_KERN_INFO "2.6 AIO not supported on the "
 333                               "host - reverting to 2.4 AIO\n");
 334                        aio_24 = 1;
 335                }
 336                else return err;
 337        }
 338
 339        if (aio_24)
 340                return init_aio_24();
 341
 342        return 0;
 343}
 344
 345/*
 346 * The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
 347 * needs to be called when the kernel is running because it calls run_helper,
 348 * which needs get_free_page.  exit_aio is a __uml_exitcall because the generic
 349 * kernel does not run __exitcalls on shutdown, and can't because many of them
 350 * break when called outside of module unloading.
 351 */
 352__initcall(init_aio);
 353
 354static void exit_aio(void)
 355{
 356        if (aio_pid != -1) {
 357                os_kill_process(aio_pid, 1);
 358                free_stack(aio_stack, 0);
 359        }
 360}
 361
 362__uml_exitcall(exit_aio);
 363
 364static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
 365                         unsigned long long offset, struct aio_context *aio)
 366{
 367        struct aio_thread_req req = { .type             = type,
 368                                      .io_fd            = io_fd,
 369                                      .offset           = offset,
 370                                      .buf              = buf,
 371                                      .len              = len,
 372                                      .aio              = aio,
 373        };
 374        int err;
 375
 376        err = write(aio_req_fd_w, &req, sizeof(req));
 377        if (err == sizeof(req))
 378                err = 0;
 379        else err = -errno;
 380
 381        return err;
 382}
 383
 384int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
 385               unsigned long long offset, int reply_fd,
 386               struct aio_context *aio)
 387{
 388        aio->reply_fd = reply_fd;
 389        if (aio_24)
 390                return submit_aio_24(type, io_fd, buf, len, offset, aio);
 391        else
 392                return submit_aio_26(type, io_fd, buf, len, offset, aio);
 393}
 394