linux/fs/eventfd.c
<<
>>
Prefs
   1/*
   2 *  fs/eventfd.c
   3 *
   4 *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
   5 *
   6 */
   7
   8#include <linux/file.h>
   9#include <linux/poll.h>
  10#include <linux/init.h>
  11#include <linux/fs.h>
  12#include <linux/sched.h>
  13#include <linux/kernel.h>
  14#include <linux/list.h>
  15#include <linux/spinlock.h>
  16#include <linux/anon_inodes.h>
  17#include <linux/syscalls.h>
  18#include <linux/module.h>
  19#include <linux/kref.h>
  20#include <linux/eventfd.h>
  21
  22struct eventfd_ctx {
  23        struct kref kref;
  24        wait_queue_head_t wqh;
  25        /*
  26         * Every time that a write(2) is performed on an eventfd, the
  27         * value of the __u64 being written is added to "count" and a
  28         * wakeup is performed on "wqh". A read(2) will return the "count"
  29         * value to userspace, and will reset "count" to zero. The kernel
  30         * side eventfd_signal() also, adds to the "count" counter and
  31         * issue a wakeup.
  32         */
  33        __u64 count;
  34        unsigned int flags;
  35};
  36
  37/**
  38 * eventfd_signal - Adds @n to the eventfd counter.
  39 * @ctx: [in] Pointer to the eventfd context.
  40 * @n: [in] Value of the counter to be added to the eventfd internal counter.
  41 *          The value cannot be negative.
  42 *
  43 * This function is supposed to be called by the kernel in paths that do not
  44 * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
  45 * value, and we signal this as overflow condition by returining a POLLERR
  46 * to poll(2).
  47 *
  48 * Returns @n in case of success, a non-negative number lower than @n in case
  49 * of overflow, or the following error codes:
  50 *
  51 * -EINVAL    : The value of @n is negative.
  52 */
  53int eventfd_signal(struct eventfd_ctx *ctx, int n)
  54{
  55        unsigned long flags;
  56
  57        if (n < 0)
  58                return -EINVAL;
  59        spin_lock_irqsave(&ctx->wqh.lock, flags);
  60        if (ULLONG_MAX - ctx->count < n)
  61                n = (int) (ULLONG_MAX - ctx->count);
  62        ctx->count += n;
  63        if (waitqueue_active(&ctx->wqh))
  64                wake_up_locked_poll(&ctx->wqh, POLLIN);
  65        spin_unlock_irqrestore(&ctx->wqh.lock, flags);
  66
  67        return n;
  68}
  69EXPORT_SYMBOL_GPL(eventfd_signal);
  70
  71static void eventfd_free_ctx(struct eventfd_ctx *ctx)
  72{
  73        kfree(ctx);
  74}
  75
  76static void eventfd_free(struct kref *kref)
  77{
  78        struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
  79
  80        eventfd_free_ctx(ctx);
  81}
  82
  83/**
  84 * eventfd_ctx_get - Acquires a reference to the internal eventfd context.
  85 * @ctx: [in] Pointer to the eventfd context.
  86 *
  87 * Returns: In case of success, returns a pointer to the eventfd context.
  88 */
  89struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
  90{
  91        kref_get(&ctx->kref);
  92        return ctx;
  93}
  94EXPORT_SYMBOL_GPL(eventfd_ctx_get);
  95
  96/**
  97 * eventfd_ctx_put - Releases a reference to the internal eventfd context.
  98 * @ctx: [in] Pointer to eventfd context.
  99 *
 100 * The eventfd context reference must have been previously acquired either
 101 * with eventfd_ctx_get() or eventfd_ctx_fdget()).
 102 */
 103void eventfd_ctx_put(struct eventfd_ctx *ctx)
 104{
 105        kref_put(&ctx->kref, eventfd_free);
 106}
 107EXPORT_SYMBOL_GPL(eventfd_ctx_put);
 108
 109static int eventfd_release(struct inode *inode, struct file *file)
 110{
 111        struct eventfd_ctx *ctx = file->private_data;
 112
 113        wake_up_poll(&ctx->wqh, POLLHUP);
 114        eventfd_ctx_put(ctx);
 115        return 0;
 116}
 117
 118static unsigned int eventfd_poll(struct file *file, poll_table *wait)
 119{
 120        struct eventfd_ctx *ctx = file->private_data;
 121        unsigned int events = 0;
 122        unsigned long flags;
 123
 124        poll_wait(file, &ctx->wqh, wait);
 125
 126        spin_lock_irqsave(&ctx->wqh.lock, flags);
 127        if (ctx->count > 0)
 128                events |= POLLIN;
 129        if (ctx->count == ULLONG_MAX)
 130                events |= POLLERR;
 131        if (ULLONG_MAX - 1 > ctx->count)
 132                events |= POLLOUT;
 133        spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 134
 135        return events;
 136}
 137
 138static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 139                            loff_t *ppos)
 140{
 141        struct eventfd_ctx *ctx = file->private_data;
 142        ssize_t res;
 143        __u64 ucnt = 0;
 144        DECLARE_WAITQUEUE(wait, current);
 145
 146        if (count < sizeof(ucnt))
 147                return -EINVAL;
 148        spin_lock_irq(&ctx->wqh.lock);
 149        res = -EAGAIN;
 150        if (ctx->count > 0)
 151                res = sizeof(ucnt);
 152        else if (!(file->f_flags & O_NONBLOCK)) {
 153                __add_wait_queue(&ctx->wqh, &wait);
 154                for (res = 0;;) {
 155                        set_current_state(TASK_INTERRUPTIBLE);
 156                        if (ctx->count > 0) {
 157                                res = sizeof(ucnt);
 158                                break;
 159                        }
 160                        if (signal_pending(current)) {
 161                                res = -ERESTARTSYS;
 162                                break;
 163                        }
 164                        spin_unlock_irq(&ctx->wqh.lock);
 165                        schedule();
 166                        spin_lock_irq(&ctx->wqh.lock);
 167                }
 168                __remove_wait_queue(&ctx->wqh, &wait);
 169                __set_current_state(TASK_RUNNING);
 170        }
 171        if (likely(res > 0)) {
 172                ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
 173                ctx->count -= ucnt;
 174                if (waitqueue_active(&ctx->wqh))
 175                        wake_up_locked_poll(&ctx->wqh, POLLOUT);
 176        }
 177        spin_unlock_irq(&ctx->wqh.lock);
 178        if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
 179                return -EFAULT;
 180
 181        return res;
 182}
 183
 184static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
 185                             loff_t *ppos)
 186{
 187        struct eventfd_ctx *ctx = file->private_data;
 188        ssize_t res;
 189        __u64 ucnt;
 190        DECLARE_WAITQUEUE(wait, current);
 191
 192        if (count < sizeof(ucnt))
 193                return -EINVAL;
 194        if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
 195                return -EFAULT;
 196        if (ucnt == ULLONG_MAX)
 197                return -EINVAL;
 198        spin_lock_irq(&ctx->wqh.lock);
 199        res = -EAGAIN;
 200        if (ULLONG_MAX - ctx->count > ucnt)
 201                res = sizeof(ucnt);
 202        else if (!(file->f_flags & O_NONBLOCK)) {
 203                __add_wait_queue(&ctx->wqh, &wait);
 204                for (res = 0;;) {
 205                        set_current_state(TASK_INTERRUPTIBLE);
 206                        if (ULLONG_MAX - ctx->count > ucnt) {
 207                                res = sizeof(ucnt);
 208                                break;
 209                        }
 210                        if (signal_pending(current)) {
 211                                res = -ERESTARTSYS;
 212                                break;
 213                        }
 214                        spin_unlock_irq(&ctx->wqh.lock);
 215                        schedule();
 216                        spin_lock_irq(&ctx->wqh.lock);
 217                }
 218                __remove_wait_queue(&ctx->wqh, &wait);
 219                __set_current_state(TASK_RUNNING);
 220        }
 221        if (likely(res > 0)) {
 222                ctx->count += ucnt;
 223                if (waitqueue_active(&ctx->wqh))
 224                        wake_up_locked_poll(&ctx->wqh, POLLIN);
 225        }
 226        spin_unlock_irq(&ctx->wqh.lock);
 227
 228        return res;
 229}
 230
 231static const struct file_operations eventfd_fops = {
 232        .release        = eventfd_release,
 233        .poll           = eventfd_poll,
 234        .read           = eventfd_read,
 235        .write          = eventfd_write,
 236};
 237
 238/**
 239 * eventfd_fget - Acquire a reference of an eventfd file descriptor.
 240 * @fd: [in] Eventfd file descriptor.
 241 *
 242 * Returns a pointer to the eventfd file structure in case of success, or the
 243 * following error pointer:
 244 *
 245 * -EBADF    : Invalid @fd file descriptor.
 246 * -EINVAL   : The @fd file descriptor is not an eventfd file.
 247 */
 248struct file *eventfd_fget(int fd)
 249{
 250        struct file *file;
 251
 252        file = fget(fd);
 253        if (!file)
 254                return ERR_PTR(-EBADF);
 255        if (file->f_op != &eventfd_fops) {
 256                fput(file);
 257                return ERR_PTR(-EINVAL);
 258        }
 259
 260        return file;
 261}
 262EXPORT_SYMBOL_GPL(eventfd_fget);
 263
 264/**
 265 * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context.
 266 * @fd: [in] Eventfd file descriptor.
 267 *
 268 * Returns a pointer to the internal eventfd context, otherwise the error
 269 * pointers returned by the following functions:
 270 *
 271 * eventfd_fget
 272 */
 273struct eventfd_ctx *eventfd_ctx_fdget(int fd)
 274{
 275        struct file *file;
 276        struct eventfd_ctx *ctx;
 277
 278        file = eventfd_fget(fd);
 279        if (IS_ERR(file))
 280                return (struct eventfd_ctx *) file;
 281        ctx = eventfd_ctx_get(file->private_data);
 282        fput(file);
 283
 284        return ctx;
 285}
 286EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
 287
 288/**
 289 * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context.
 290 * @file: [in] Eventfd file pointer.
 291 *
 292 * Returns a pointer to the internal eventfd context, otherwise the error
 293 * pointer:
 294 *
 295 * -EINVAL   : The @fd file descriptor is not an eventfd file.
 296 */
 297struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
 298{
 299        if (file->f_op != &eventfd_fops)
 300                return ERR_PTR(-EINVAL);
 301
 302        return eventfd_ctx_get(file->private_data);
 303}
 304EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
 305
 306/**
 307 * eventfd_file_create - Creates an eventfd file pointer.
 308 * @count: Initial eventfd counter value.
 309 * @flags: Flags for the eventfd file.
 310 *
 311 * This function creates an eventfd file pointer, w/out installing it into
 312 * the fd table. This is useful when the eventfd file is used during the
 313 * initialization of data structures that require extra setup after the eventfd
 314 * creation. So the eventfd creation is split into the file pointer creation
 315 * phase, and the file descriptor installation phase.
 316 * In this way races with userspace closing the newly installed file descriptor
 317 * can be avoided.
 318 * Returns an eventfd file pointer, or a proper error pointer.
 319 */
 320struct file *eventfd_file_create(unsigned int count, int flags)
 321{
 322        struct file *file;
 323        struct eventfd_ctx *ctx;
 324
 325        /* Check the EFD_* constants for consistency.  */
 326        BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
 327        BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
 328
 329        if (flags & ~EFD_FLAGS_SET)
 330                return ERR_PTR(-EINVAL);
 331
 332        ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 333        if (!ctx)
 334                return ERR_PTR(-ENOMEM);
 335
 336        kref_init(&ctx->kref);
 337        init_waitqueue_head(&ctx->wqh);
 338        ctx->count = count;
 339        ctx->flags = flags;
 340
 341        file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
 342                                  flags & EFD_SHARED_FCNTL_FLAGS);
 343        if (IS_ERR(file))
 344                eventfd_free_ctx(ctx);
 345
 346        return file;
 347}
 348
 349SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 350{
 351        int fd, error;
 352        struct file *file;
 353
 354        error = get_unused_fd_flags(flags & EFD_SHARED_FCNTL_FLAGS);
 355        if (error < 0)
 356                return error;
 357        fd = error;
 358
 359        file = eventfd_file_create(count, flags);
 360        if (IS_ERR(file)) {
 361                error = PTR_ERR(file);
 362                goto err_put_unused_fd;
 363        }
 364        fd_install(fd, file);
 365
 366        return fd;
 367
 368err_put_unused_fd:
 369        put_unused_fd(fd);
 370
 371        return error;
 372}
 373
 374SYSCALL_DEFINE1(eventfd, unsigned int, count)
 375{
 376        return sys_eventfd2(count, 0);
 377}
 378
 379