linux/fs/timerfd.c
<<
>>
Prefs
   1/*
   2 *  fs/timerfd.c
   3 *
   4 *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
   5 *
   6 *
   7 *  Thanks to Thomas Gleixner for code reviews and useful comments.
   8 *
   9 */
  10
  11#include <linux/alarmtimer.h>
  12#include <linux/file.h>
  13#include <linux/poll.h>
  14#include <linux/init.h>
  15#include <linux/fs.h>
  16#include <linux/sched.h>
  17#include <linux/kernel.h>
  18#include <linux/slab.h>
  19#include <linux/list.h>
  20#include <linux/spinlock.h>
  21#include <linux/time.h>
  22#include <linux/hrtimer.h>
  23#include <linux/anon_inodes.h>
  24#include <linux/timerfd.h>
  25#include <linux/syscalls.h>
  26#include <linux/compat.h>
  27#include <linux/rcupdate.h>
  28
  29struct timerfd_ctx {
  30        union {
  31                struct hrtimer tmr;
  32                struct alarm alarm;
  33        } t;
  34        ktime_t tintv;
  35        ktime_t moffs;
  36        wait_queue_head_t wqh;
  37        u64 ticks;
  38        int clockid;
  39        short unsigned expired;
  40        short unsigned settime_flags;   /* to show in fdinfo */
  41        struct rcu_head rcu;
  42        struct list_head clist;
  43        bool might_cancel;
  44};
  45
  46static LIST_HEAD(cancel_list);
  47static DEFINE_SPINLOCK(cancel_lock);
  48
  49static inline bool isalarm(struct timerfd_ctx *ctx)
  50{
  51        return ctx->clockid == CLOCK_REALTIME_ALARM ||
  52                ctx->clockid == CLOCK_BOOTTIME_ALARM;
  53}
  54
  55/*
  56 * This gets called when the timer event triggers. We set the "expired"
  57 * flag, but we do not re-arm the timer (in case it's necessary,
  58 * tintv.tv64 != 0) until the timer is accessed.
  59 */
  60static void timerfd_triggered(struct timerfd_ctx *ctx)
  61{
  62        unsigned long flags;
  63
  64        spin_lock_irqsave(&ctx->wqh.lock, flags);
  65        ctx->expired = 1;
  66        ctx->ticks++;
  67        wake_up_locked(&ctx->wqh);
  68        spin_unlock_irqrestore(&ctx->wqh.lock, flags);
  69}
  70
  71static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
  72{
  73        struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx,
  74                                               t.tmr);
  75        timerfd_triggered(ctx);
  76        return HRTIMER_NORESTART;
  77}
  78
  79static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm,
  80        ktime_t now)
  81{
  82        struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx,
  83                                               t.alarm);
  84        timerfd_triggered(ctx);
  85        return ALARMTIMER_NORESTART;
  86}
  87
  88/*
  89 * Called when the clock was set to cancel the timers in the cancel
  90 * list. This will wake up processes waiting on these timers. The
  91 * wake-up requires ctx->ticks to be non zero, therefore we increment
  92 * it before calling wake_up_locked().
  93 */
  94void timerfd_clock_was_set(void)
  95{
  96        ktime_t moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
  97        struct timerfd_ctx *ctx;
  98        unsigned long flags;
  99
 100        rcu_read_lock();
 101        list_for_each_entry_rcu(ctx, &cancel_list, clist) {
 102                if (!ctx->might_cancel)
 103                        continue;
 104                spin_lock_irqsave(&ctx->wqh.lock, flags);
 105                if (ctx->moffs.tv64 != moffs.tv64) {
 106                        ctx->moffs.tv64 = KTIME_MAX;
 107                        ctx->ticks++;
 108                        wake_up_locked(&ctx->wqh);
 109                }
 110                spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 111        }
 112        rcu_read_unlock();
 113}
 114
 115static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
 116{
 117        if (ctx->might_cancel) {
 118                ctx->might_cancel = false;
 119                spin_lock(&cancel_lock);
 120                list_del_rcu(&ctx->clist);
 121                spin_unlock(&cancel_lock);
 122        }
 123}
 124
 125static bool timerfd_canceled(struct timerfd_ctx *ctx)
 126{
 127        if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
 128                return false;
 129        ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
 130        return true;
 131}
 132
 133static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
 134{
 135        if ((ctx->clockid == CLOCK_REALTIME ||
 136             ctx->clockid == CLOCK_REALTIME_ALARM) &&
 137            (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
 138                if (!ctx->might_cancel) {
 139                        ctx->might_cancel = true;
 140                        spin_lock(&cancel_lock);
 141                        list_add_rcu(&ctx->clist, &cancel_list);
 142                        spin_unlock(&cancel_lock);
 143                }
 144        } else if (ctx->might_cancel) {
 145                timerfd_remove_cancel(ctx);
 146        }
 147}
 148
 149static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
 150{
 151        ktime_t remaining;
 152
 153        if (isalarm(ctx))
 154                remaining = alarm_expires_remaining(&ctx->t.alarm);
 155        else
 156                remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
 157
 158        return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 159}
 160
 161static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
 162                         const struct itimerspec *ktmr)
 163{
 164        enum hrtimer_mode htmode;
 165        ktime_t texp;
 166        int clockid = ctx->clockid;
 167
 168        htmode = (flags & TFD_TIMER_ABSTIME) ?
 169                HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
 170
 171        texp = timespec_to_ktime(ktmr->it_value);
 172        ctx->expired = 0;
 173        ctx->ticks = 0;
 174        ctx->tintv = timespec_to_ktime(ktmr->it_interval);
 175
 176        if (isalarm(ctx)) {
 177                alarm_init(&ctx->t.alarm,
 178                           ctx->clockid == CLOCK_REALTIME_ALARM ?
 179                           ALARM_REALTIME : ALARM_BOOTTIME,
 180                           timerfd_alarmproc);
 181        } else {
 182                hrtimer_init(&ctx->t.tmr, clockid, htmode);
 183                hrtimer_set_expires(&ctx->t.tmr, texp);
 184                ctx->t.tmr.function = timerfd_tmrproc;
 185        }
 186
 187        if (texp.tv64 != 0) {
 188                if (isalarm(ctx)) {
 189                        if (flags & TFD_TIMER_ABSTIME)
 190                                alarm_start(&ctx->t.alarm, texp);
 191                        else
 192                                alarm_start_relative(&ctx->t.alarm, texp);
 193                } else {
 194                        hrtimer_start(&ctx->t.tmr, texp, htmode);
 195                }
 196
 197                if (timerfd_canceled(ctx))
 198                        return -ECANCELED;
 199        }
 200
 201        ctx->settime_flags = flags & TFD_SETTIME_FLAGS;
 202        return 0;
 203}
 204
 205static int timerfd_release(struct inode *inode, struct file *file)
 206{
 207        struct timerfd_ctx *ctx = file->private_data;
 208
 209        timerfd_remove_cancel(ctx);
 210
 211        if (isalarm(ctx))
 212                alarm_cancel(&ctx->t.alarm);
 213        else
 214                hrtimer_cancel(&ctx->t.tmr);
 215        kfree_rcu(ctx, rcu);
 216        return 0;
 217}
 218
 219static unsigned int timerfd_poll(struct file *file, poll_table *wait)
 220{
 221        struct timerfd_ctx *ctx = file->private_data;
 222        unsigned int events = 0;
 223        unsigned long flags;
 224
 225        poll_wait(file, &ctx->wqh, wait);
 226
 227        spin_lock_irqsave(&ctx->wqh.lock, flags);
 228        if (ctx->ticks)
 229                events |= POLLIN;
 230        spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 231
 232        return events;
 233}
 234
 235static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 236                            loff_t *ppos)
 237{
 238        struct timerfd_ctx *ctx = file->private_data;
 239        ssize_t res;
 240        u64 ticks = 0;
 241
 242        if (count < sizeof(ticks))
 243                return -EINVAL;
 244        spin_lock_irq(&ctx->wqh.lock);
 245        if (file->f_flags & O_NONBLOCK)
 246                res = -EAGAIN;
 247        else
 248                res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
 249
 250        /*
 251         * If clock has changed, we do not care about the
 252         * ticks and we do not rearm the timer. Userspace must
 253         * reevaluate anyway.
 254         */
 255        if (timerfd_canceled(ctx)) {
 256                ctx->ticks = 0;
 257                ctx->expired = 0;
 258                res = -ECANCELED;
 259        }
 260
 261        if (ctx->ticks) {
 262                ticks = ctx->ticks;
 263
 264                if (ctx->expired && ctx->tintv.tv64) {
 265                        /*
 266                         * If tintv.tv64 != 0, this is a periodic timer that
 267                         * needs to be re-armed. We avoid doing it in the timer
 268                         * callback to avoid DoS attacks specifying a very
 269                         * short timer period.
 270                         */
 271                        if (isalarm(ctx)) {
 272                                ticks += alarm_forward_now(
 273                                        &ctx->t.alarm, ctx->tintv) - 1;
 274                                alarm_restart(&ctx->t.alarm);
 275                        } else {
 276                                ticks += hrtimer_forward_now(&ctx->t.tmr,
 277                                                             ctx->tintv) - 1;
 278                                hrtimer_restart(&ctx->t.tmr);
 279                        }
 280                }
 281                ctx->expired = 0;
 282                ctx->ticks = 0;
 283        }
 284        spin_unlock_irq(&ctx->wqh.lock);
 285        if (ticks)
 286                res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
 287        return res;
 288}
 289
 290#ifdef CONFIG_PROC_FS
 291static void timerfd_show(struct seq_file *m, struct file *file)
 292{
 293        struct timerfd_ctx *ctx = file->private_data;
 294        struct itimerspec t;
 295
 296        spin_lock_irq(&ctx->wqh.lock);
 297        t.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
 298        t.it_interval = ktime_to_timespec(ctx->tintv);
 299        spin_unlock_irq(&ctx->wqh.lock);
 300
 301        seq_printf(m,
 302                   "clockid: %d\n"
 303                   "ticks: %llu\n"
 304                   "settime flags: 0%o\n"
 305                   "it_value: (%llu, %llu)\n"
 306                   "it_interval: (%llu, %llu)\n",
 307                   ctx->clockid,
 308                   (unsigned long long)ctx->ticks,
 309                   ctx->settime_flags,
 310                   (unsigned long long)t.it_value.tv_sec,
 311                   (unsigned long long)t.it_value.tv_nsec,
 312                   (unsigned long long)t.it_interval.tv_sec,
 313                   (unsigned long long)t.it_interval.tv_nsec);
 314}
 315#else
 316#define timerfd_show NULL
 317#endif
 318
 319#ifdef CONFIG_CHECKPOINT_RESTORE
 320static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 321{
 322        struct timerfd_ctx *ctx = file->private_data;
 323        int ret = 0;
 324
 325        switch (cmd) {
 326        case TFD_IOC_SET_TICKS: {
 327                u64 ticks;
 328
 329                if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks)))
 330                        return -EFAULT;
 331                if (!ticks)
 332                        return -EINVAL;
 333
 334                spin_lock_irq(&ctx->wqh.lock);
 335                if (!timerfd_canceled(ctx)) {
 336                        ctx->ticks = ticks;
 337                        wake_up_locked(&ctx->wqh);
 338                } else
 339                        ret = -ECANCELED;
 340                spin_unlock_irq(&ctx->wqh.lock);
 341                break;
 342        }
 343        default:
 344                ret = -ENOTTY;
 345                break;
 346        }
 347
 348        return ret;
 349}
 350#else
 351#define timerfd_ioctl NULL
 352#endif
 353
 354static const struct file_operations timerfd_fops = {
 355        .release        = timerfd_release,
 356        .poll           = timerfd_poll,
 357        .read           = timerfd_read,
 358        .llseek         = noop_llseek,
 359        .show_fdinfo    = timerfd_show,
 360        .unlocked_ioctl = timerfd_ioctl,
 361};
 362
 363static int timerfd_fget(int fd, struct fd *p)
 364{
 365        struct fd f = fdget(fd);
 366        if (!f.file)
 367                return -EBADF;
 368        if (f.file->f_op != &timerfd_fops) {
 369                fdput(f);
 370                return -EINVAL;
 371        }
 372        *p = f;
 373        return 0;
 374}
 375
 376SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 377{
 378        int ufd;
 379        struct timerfd_ctx *ctx;
 380
 381        /* Check the TFD_* constants for consistency.  */
 382        BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
 383        BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
 384
 385        if ((flags & ~TFD_CREATE_FLAGS) ||
 386            (clockid != CLOCK_MONOTONIC &&
 387             clockid != CLOCK_REALTIME &&
 388             clockid != CLOCK_REALTIME_ALARM &&
 389             clockid != CLOCK_BOOTTIME &&
 390             clockid != CLOCK_BOOTTIME_ALARM))
 391                return -EINVAL;
 392
 393        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 394        if (!ctx)
 395                return -ENOMEM;
 396
 397        init_waitqueue_head(&ctx->wqh);
 398        ctx->clockid = clockid;
 399
 400        if (isalarm(ctx))
 401                alarm_init(&ctx->t.alarm,
 402                           ctx->clockid == CLOCK_REALTIME_ALARM ?
 403                           ALARM_REALTIME : ALARM_BOOTTIME,
 404                           timerfd_alarmproc);
 405        else
 406                hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
 407
 408        ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
 409
 410        ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
 411                               O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
 412        if (ufd < 0)
 413                kfree(ctx);
 414
 415        return ufd;
 416}
 417
 418static int do_timerfd_settime(int ufd, int flags, 
 419                const struct itimerspec *new,
 420                struct itimerspec *old)
 421{
 422        struct fd f;
 423        struct timerfd_ctx *ctx;
 424        int ret;
 425
 426        if ((flags & ~TFD_SETTIME_FLAGS) ||
 427            !timespec_valid(&new->it_value) ||
 428            !timespec_valid(&new->it_interval))
 429                return -EINVAL;
 430
 431        ret = timerfd_fget(ufd, &f);
 432        if (ret)
 433                return ret;
 434        ctx = f.file->private_data;
 435
 436        timerfd_setup_cancel(ctx, flags);
 437
 438        /*
 439         * We need to stop the existing timer before reprogramming
 440         * it to the new values.
 441         */
 442        for (;;) {
 443                spin_lock_irq(&ctx->wqh.lock);
 444
 445                if (isalarm(ctx)) {
 446                        if (alarm_try_to_cancel(&ctx->t.alarm) >= 0)
 447                                break;
 448                } else {
 449                        if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0)
 450                                break;
 451                }
 452                spin_unlock_irq(&ctx->wqh.lock);
 453                cpu_relax();
 454        }
 455
 456        /*
 457         * If the timer is expired and it's periodic, we need to advance it
 458         * because the caller may want to know the previous expiration time.
 459         * We do not update "ticks" and "expired" since the timer will be
 460         * re-programmed again in the following timerfd_setup() call.
 461         */
 462        if (ctx->expired && ctx->tintv.tv64) {
 463                if (isalarm(ctx))
 464                        alarm_forward_now(&ctx->t.alarm, ctx->tintv);
 465                else
 466                        hrtimer_forward_now(&ctx->t.tmr, ctx->tintv);
 467        }
 468
 469        old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
 470        old->it_interval = ktime_to_timespec(ctx->tintv);
 471
 472        /*
 473         * Re-program the timer to the new value ...
 474         */
 475        ret = timerfd_setup(ctx, flags, new);
 476
 477        spin_unlock_irq(&ctx->wqh.lock);
 478        fdput(f);
 479        return ret;
 480}
 481
 482static int do_timerfd_gettime(int ufd, struct itimerspec *t)
 483{
 484        struct fd f;
 485        struct timerfd_ctx *ctx;
 486        int ret = timerfd_fget(ufd, &f);
 487        if (ret)
 488                return ret;
 489        ctx = f.file->private_data;
 490
 491        spin_lock_irq(&ctx->wqh.lock);
 492        if (ctx->expired && ctx->tintv.tv64) {
 493                ctx->expired = 0;
 494
 495                if (isalarm(ctx)) {
 496                        ctx->ticks +=
 497                                alarm_forward_now(
 498                                        &ctx->t.alarm, ctx->tintv) - 1;
 499                        alarm_restart(&ctx->t.alarm);
 500                } else {
 501                        ctx->ticks +=
 502                                hrtimer_forward_now(&ctx->t.tmr, ctx->tintv)
 503                                - 1;
 504                        hrtimer_restart(&ctx->t.tmr);
 505                }
 506        }
 507        t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
 508        t->it_interval = ktime_to_timespec(ctx->tintv);
 509        spin_unlock_irq(&ctx->wqh.lock);
 510        fdput(f);
 511        return 0;
 512}
 513
 514SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 515                const struct itimerspec __user *, utmr,
 516                struct itimerspec __user *, otmr)
 517{
 518        struct itimerspec new, old;
 519        int ret;
 520
 521        if (copy_from_user(&new, utmr, sizeof(new)))
 522                return -EFAULT;
 523        ret = do_timerfd_settime(ufd, flags, &new, &old);
 524        if (ret)
 525                return ret;
 526        if (otmr && copy_to_user(otmr, &old, sizeof(old)))
 527                return -EFAULT;
 528
 529        return ret;
 530}
 531
 532SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
 533{
 534        struct itimerspec kotmr;
 535        int ret = do_timerfd_gettime(ufd, &kotmr);
 536        if (ret)
 537                return ret;
 538        return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
 539}
 540
 541#ifdef CONFIG_COMPAT
 542COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 543                const struct compat_itimerspec __user *, utmr,
 544                struct compat_itimerspec __user *, otmr)
 545{
 546        struct itimerspec new, old;
 547        int ret;
 548
 549        if (get_compat_itimerspec(&new, utmr))
 550                return -EFAULT;
 551        ret = do_timerfd_settime(ufd, flags, &new, &old);
 552        if (ret)
 553                return ret;
 554        if (otmr && put_compat_itimerspec(otmr, &old))
 555                return -EFAULT;
 556        return ret;
 557}
 558
 559COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd,
 560                struct compat_itimerspec __user *, otmr)
 561{
 562        struct itimerspec kotmr;
 563        int ret = do_timerfd_gettime(ufd, &kotmr);
 564        if (ret)
 565                return ret;
 566        return put_compat_itimerspec(otmr, &kotmr) ? -EFAULT: 0;
 567}
 568#endif
 569