linux/arch/um/os-Linux/aio.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
   3 * Licensed under the GPL
   4 */
   5
   6#include <unistd.h>
   7#include <sched.h>
   8#include <signal.h>
   9#include <errno.h>
  10#include <sys/time.h>
  11#include <asm/unistd.h>
  12#include <aio.h>
  13#include <init.h>
  14#include <kern_util.h>
  15#include <os.h>
  16
  17struct aio_thread_req {
  18        enum aio_type type;
  19        int io_fd;
  20        unsigned long long offset;
  21        char *buf;
  22        int len;
  23        struct aio_context *aio;
  24};
  25
  26#if defined(HAVE_AIO_ABI)
  27#include <linux/aio_abi.h>
  28
  29/*
  30 * If we have the headers, we are going to build with AIO enabled.
  31 * If we don't have aio in libc, we define the necessary stubs here.
  32 */
  33
  34#if !defined(HAVE_AIO_LIBC)
  35
  36static long io_setup(int n, aio_context_t *ctxp)
  37{
  38        return syscall(__NR_io_setup, n, ctxp);
  39}
  40
  41static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
  42{
  43        return syscall(__NR_io_submit, ctx, nr, iocbpp);
  44}
  45
  46static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
  47                         struct io_event *events, struct timespec *timeout)
  48{
  49        return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
  50}
  51
  52#endif
  53
  54/*
  55 * The AIO_MMAP cases force the mmapped page into memory here
  56 * rather than in whatever place first touches the data.  I used
  57 * to do this by touching the page, but that's delicate because
  58 * gcc is prone to optimizing that away.  So, what's done here
  59 * is we read from the descriptor from which the page was
  60 * mapped.  The caller is required to pass an offset which is
  61 * inside the page that was mapped.  Thus, when the read
  62 * returns, we know that the page is in the page cache, and
  63 * that it now backs the mmapped area.
  64 */
  65
  66static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
  67                  int len, unsigned long long offset, struct aio_context *aio)
  68{
  69        struct iocb *iocbp = & ((struct iocb) {
  70                                    .aio_data       = (unsigned long) aio,
  71                                    .aio_fildes     = fd,
  72                                    .aio_buf        = (unsigned long) buf,
  73                                    .aio_nbytes     = len,
  74                                    .aio_offset     = offset
  75                             });
  76        char c;
  77
  78        switch (type) {
  79        case AIO_READ:
  80                iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
  81                break;
  82        case AIO_WRITE:
  83                iocbp->aio_lio_opcode = IOCB_CMD_PWRITE;
  84                break;
  85        case AIO_MMAP:
  86                iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
  87                iocbp->aio_buf = (unsigned long) &c;
  88                iocbp->aio_nbytes = sizeof(c);
  89                break;
  90        default:
  91                printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type);
  92                return -EINVAL;
  93        }
  94
  95        return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno;
  96}
  97
  98/* Initialized in an initcall and unchanged thereafter */
  99static aio_context_t ctx = 0;
 100
 101static int aio_thread(void *arg)
 102{
 103        struct aio_thread_reply reply;
 104        struct io_event event;
 105        int err, n, reply_fd;
 106
 107        os_fix_helper_signals();
 108        while (1) {
 109                n = io_getevents(ctx, 1, 1, &event, NULL);
 110                if (n < 0) {
 111                        if (errno == EINTR)
 112                                continue;
 113                        printk(UM_KERN_ERR "aio_thread - io_getevents failed, "
 114                               "errno = %d\n", errno);
 115                }
 116                else {
 117                        reply = ((struct aio_thread_reply)
 118                                { .data = (void *) (long) event.data,
 119                                                .err    = event.res });
 120                        reply_fd = ((struct aio_context *) reply.data)->reply_fd;
 121                        err = write(reply_fd, &reply, sizeof(reply));
 122                        if (err != sizeof(reply))
 123                                printk(UM_KERN_ERR "aio_thread - write failed, "
 124                                       "fd = %d, err = %d\n", reply_fd, errno);
 125                }
 126        }
 127        return 0;
 128}
 129
 130#endif
 131
 132static int do_not_aio(struct aio_thread_req *req)
 133{
 134        char c;
 135        unsigned long long actual;
 136        int n;
 137
 138        actual = lseek64(req->io_fd, req->offset, SEEK_SET);
 139        if (actual != req->offset)
 140                return -errno;
 141
 142        switch (req->type) {
 143        case AIO_READ:
 144                n = read(req->io_fd, req->buf, req->len);
 145                break;
 146        case AIO_WRITE:
 147                n = write(req->io_fd, req->buf, req->len);
 148                break;
 149        case AIO_MMAP:
 150                n = read(req->io_fd, &c, sizeof(c));
 151                break;
 152        default:
 153                printk(UM_KERN_ERR "do_not_aio - bad request type : %d\n",
 154                       req->type);
 155                return -EINVAL;
 156        }
 157
 158        if (n < 0)
 159                return -errno;
 160        return 0;
 161}
 162
 163/* These are initialized in initcalls and not changed */
 164static int aio_req_fd_r = -1;
 165static int aio_req_fd_w = -1;
 166static int aio_pid = -1;
 167static unsigned long aio_stack;
 168
 169static int not_aio_thread(void *arg)
 170{
 171        struct aio_thread_req req;
 172        struct aio_thread_reply reply;
 173        int err;
 174
 175        os_fix_helper_signals();
 176        while (1) {
 177                err = read(aio_req_fd_r, &req, sizeof(req));
 178                if (err != sizeof(req)) {
 179                        if (err < 0)
 180                                printk(UM_KERN_ERR "not_aio_thread - "
 181                                       "read failed, fd = %d, err = %d\n",
 182                                       aio_req_fd_r,
 183                                       errno);
 184                        else {
 185                                printk(UM_KERN_ERR "not_aio_thread - short "
 186                                       "read, fd = %d, length = %d\n",
 187                                       aio_req_fd_r, err);
 188                        }
 189                        continue;
 190                }
 191                err = do_not_aio(&req);
 192                reply = ((struct aio_thread_reply) { .data      = req.aio,
 193                                                     .err       = err });
 194                err = write(req.aio->reply_fd, &reply, sizeof(reply));
 195                if (err != sizeof(reply))
 196                        printk(UM_KERN_ERR "not_aio_thread - write failed, "
 197                               "fd = %d, err = %d\n", req.aio->reply_fd, errno);
 198        }
 199
 200        return 0;
 201}
 202
 203static int init_aio_24(void)
 204{
 205        int fds[2], err;
 206
 207        err = os_pipe(fds, 1, 1);
 208        if (err)
 209                goto out;
 210
 211        aio_req_fd_w = fds[0];
 212        aio_req_fd_r = fds[1];
 213
 214        err = os_set_fd_block(aio_req_fd_w, 0);
 215        if (err)
 216                goto out_close_pipe;
 217
 218        err = run_helper_thread(not_aio_thread, NULL,
 219                                CLONE_FILES | CLONE_VM, &aio_stack);
 220        if (err < 0)
 221                goto out_close_pipe;
 222
 223        aio_pid = err;
 224        goto out;
 225
 226out_close_pipe:
 227        close(fds[0]);
 228        close(fds[1]);
 229        aio_req_fd_w = -1;
 230        aio_req_fd_r = -1;
 231out:
 232#ifndef HAVE_AIO_ABI
 233        printk(UM_KERN_INFO "/usr/include/linux/aio_abi.h not present during "
 234               "build\n");
 235#endif
 236        printk(UM_KERN_INFO "2.6 host AIO support not used - falling back to "
 237               "I/O thread\n");
 238        return 0;
 239}
 240
 241#ifdef HAVE_AIO_ABI
 242#define DEFAULT_24_AIO 0
 243static int init_aio_26(void)
 244{
 245        int err;
 246
 247        if (io_setup(256, &ctx)) {
 248                err = -errno;
 249                printk(UM_KERN_ERR "aio_thread failed to initialize context, "
 250                       "err = %d\n", errno);
 251                return err;
 252        }
 253
 254        err = run_helper_thread(aio_thread, NULL,
 255                                CLONE_FILES | CLONE_VM, &aio_stack);
 256        if (err < 0)
 257                return err;
 258
 259        aio_pid = err;
 260
 261        printk(UM_KERN_INFO "Using 2.6 host AIO\n");
 262        return 0;
 263}
 264
 265static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
 266                         unsigned long long offset, struct aio_context *aio)
 267{
 268        struct aio_thread_reply reply;
 269        int err;
 270
 271        err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
 272        if (err) {
 273                reply = ((struct aio_thread_reply) { .data = aio,
 274                                         .err  = err });
 275                err = write(aio->reply_fd, &reply, sizeof(reply));
 276                if (err != sizeof(reply)) {
 277                        err = -errno;
 278                        printk(UM_KERN_ERR "submit_aio_26 - write failed, "
 279                               "fd = %d, err = %d\n", aio->reply_fd, -err);
 280                }
 281                else err = 0;
 282        }
 283
 284        return err;
 285}
 286
 287#else
 288#define DEFAULT_24_AIO 1
 289static int init_aio_26(void)
 290{
 291        return -ENOSYS;
 292}
 293
 294static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
 295                         unsigned long long offset, struct aio_context *aio)
 296{
 297        return -ENOSYS;
 298}
 299#endif
 300
 301/* Initialized in an initcall and unchanged thereafter */
 302static int aio_24 = DEFAULT_24_AIO;
 303
 304static int __init set_aio_24(char *name, int *add)
 305{
 306        aio_24 = 1;
 307        return 0;
 308}
 309
 310__uml_setup("aio=2.4", set_aio_24,
 311"aio=2.4\n"
 312"    This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
 313"    available.  2.4 AIO is a single thread that handles one request at a\n"
 314"    time, synchronously.  2.6 AIO is a thread which uses the 2.6 AIO \n"
 315"    interface to handle an arbitrary number of pending requests.  2.6 AIO \n"
 316"    is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
 317"    /usr/include/linux/aio_abi.h not available.  Many distributions don't\n"
 318"    include aio_abi.h, so you will need to copy it from a kernel tree to\n"
 319"    your /usr/include/linux in order to build an AIO-capable UML\n\n"
 320);
 321
 322static int init_aio(void)
 323{
 324        int err;
 325
 326        if (!aio_24) {
 327                err = init_aio_26();
 328                if (err && (errno == ENOSYS)) {
 329                        printk(UM_KERN_INFO "2.6 AIO not supported on the "
 330                               "host - reverting to 2.4 AIO\n");
 331                        aio_24 = 1;
 332                }
 333                else return err;
 334        }
 335
 336        if (aio_24)
 337                return init_aio_24();
 338
 339        return 0;
 340}
 341
 342/*
 343 * The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
 344 * needs to be called when the kernel is running because it calls run_helper,
 345 * which needs get_free_page.  exit_aio is a __uml_exitcall because the generic
 346 * kernel does not run __exitcalls on shutdown, and can't because many of them
 347 * break when called outside of module unloading.
 348 */
 349__initcall(init_aio);
 350
 351static void exit_aio(void)
 352{
 353        if (aio_pid != -1) {
 354                os_kill_process(aio_pid, 1);
 355                free_stack(aio_stack, 0);
 356        }
 357}
 358
 359__uml_exitcall(exit_aio);
 360
 361static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
 362                         unsigned long long offset, struct aio_context *aio)
 363{
 364        struct aio_thread_req req = { .type             = type,
 365                                      .io_fd            = io_fd,
 366                                      .offset           = offset,
 367                                      .buf              = buf,
 368                                      .len              = len,
 369                                      .aio              = aio,
 370        };
 371        int err;
 372
 373        err = write(aio_req_fd_w, &req, sizeof(req));
 374        if (err == sizeof(req))
 375                err = 0;
 376        else err = -errno;
 377
 378        return err;
 379}
 380
 381int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
 382               unsigned long long offset, int reply_fd,
 383               struct aio_context *aio)
 384{
 385        aio->reply_fd = reply_fd;
 386        if (aio_24)
 387                return submit_aio_24(type, io_fd, buf, len, offset, aio);
 388        else
 389                return submit_aio_26(type, io_fd, buf, len, offset, aio);
 390}
 391