linux/fs/timerfd.c
<<
>>
Prefs
   1/*
   2 *  fs/timerfd.c
   3 *
   4 *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
   5 *
   6 *
   7 *  Thanks to Thomas Gleixner for code reviews and useful comments.
   8 *
   9 */
  10
  11#include <linux/alarmtimer.h>
  12#include <linux/file.h>
  13#include <linux/poll.h>
  14#include <linux/init.h>
  15#include <linux/fs.h>
  16#include <linux/sched.h>
  17#include <linux/kernel.h>
  18#include <linux/slab.h>
  19#include <linux/list.h>
  20#include <linux/spinlock.h>
  21#include <linux/time.h>
  22#include <linux/hrtimer.h>
  23#include <linux/anon_inodes.h>
  24#include <linux/timerfd.h>
  25#include <linux/syscalls.h>
  26#include <linux/compat.h>
  27#include <linux/rcupdate.h>
  28
  29struct timerfd_ctx {
  30        union {
  31                struct hrtimer tmr;
  32                struct alarm alarm;
  33        } t;
  34        ktime_t tintv;
  35        ktime_t moffs;
  36        wait_queue_head_t wqh;
  37        u64 ticks;
  38        int clockid;
  39        short unsigned expired;
  40        short unsigned settime_flags;   /* to show in fdinfo */
  41        struct rcu_head rcu;
  42        struct list_head clist;
  43        spinlock_t cancel_lock;
  44        bool might_cancel;
  45};
  46
  47static LIST_HEAD(cancel_list);
  48static DEFINE_SPINLOCK(cancel_lock);
  49
  50static inline bool isalarm(struct timerfd_ctx *ctx)
  51{
  52        return ctx->clockid == CLOCK_REALTIME_ALARM ||
  53                ctx->clockid == CLOCK_BOOTTIME_ALARM;
  54}
  55
  56/*
  57 * This gets called when the timer event triggers. We set the "expired"
  58 * flag, but we do not re-arm the timer (in case it's necessary,
  59 * tintv != 0) until the timer is accessed.
  60 */
  61static void timerfd_triggered(struct timerfd_ctx *ctx)
  62{
  63        unsigned long flags;
  64
  65        spin_lock_irqsave(&ctx->wqh.lock, flags);
  66        ctx->expired = 1;
  67        ctx->ticks++;
  68        wake_up_locked(&ctx->wqh);
  69        spin_unlock_irqrestore(&ctx->wqh.lock, flags);
  70}
  71
  72static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
  73{
  74        struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx,
  75                                               t.tmr);
  76        timerfd_triggered(ctx);
  77        return HRTIMER_NORESTART;
  78}
  79
  80static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm,
  81        ktime_t now)
  82{
  83        struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx,
  84                                               t.alarm);
  85        timerfd_triggered(ctx);
  86        return ALARMTIMER_NORESTART;
  87}
  88
  89/*
  90 * Called when the clock was set to cancel the timers in the cancel
  91 * list. This will wake up processes waiting on these timers. The
  92 * wake-up requires ctx->ticks to be non zero, therefore we increment
  93 * it before calling wake_up_locked().
  94 */
  95void timerfd_clock_was_set(void)
  96{
  97        ktime_t moffs = ktime_mono_to_real(0);
  98        struct timerfd_ctx *ctx;
  99        unsigned long flags;
 100
 101        rcu_read_lock();
 102        list_for_each_entry_rcu(ctx, &cancel_list, clist) {
 103                if (!ctx->might_cancel)
 104                        continue;
 105                spin_lock_irqsave(&ctx->wqh.lock, flags);
 106                if (ctx->moffs != moffs) {
 107                        ctx->moffs = KTIME_MAX;
 108                        ctx->ticks++;
 109                        wake_up_locked(&ctx->wqh);
 110                }
 111                spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 112        }
 113        rcu_read_unlock();
 114}
 115
 116static void __timerfd_remove_cancel(struct timerfd_ctx *ctx)
 117{
 118        if (ctx->might_cancel) {
 119                ctx->might_cancel = false;
 120                spin_lock(&cancel_lock);
 121                list_del_rcu(&ctx->clist);
 122                spin_unlock(&cancel_lock);
 123        }
 124}
 125
 126static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
 127{
 128        spin_lock(&ctx->cancel_lock);
 129        __timerfd_remove_cancel(ctx);
 130        spin_unlock(&ctx->cancel_lock);
 131}
 132
 133static bool timerfd_canceled(struct timerfd_ctx *ctx)
 134{
 135        if (!ctx->might_cancel || ctx->moffs != KTIME_MAX)
 136                return false;
 137        ctx->moffs = ktime_mono_to_real(0);
 138        return true;
 139}
 140
 141static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
 142{
 143        spin_lock(&ctx->cancel_lock);
 144        if ((ctx->clockid == CLOCK_REALTIME ||
 145             ctx->clockid == CLOCK_REALTIME_ALARM) &&
 146            (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
 147                if (!ctx->might_cancel) {
 148                        ctx->might_cancel = true;
 149                        spin_lock(&cancel_lock);
 150                        list_add_rcu(&ctx->clist, &cancel_list);
 151                        spin_unlock(&cancel_lock);
 152                }
 153        } else {
 154                __timerfd_remove_cancel(ctx);
 155        }
 156        spin_unlock(&ctx->cancel_lock);
 157}
 158
 159static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
 160{
 161        ktime_t remaining;
 162
 163        if (isalarm(ctx))
 164                remaining = alarm_expires_remaining(&ctx->t.alarm);
 165        else
 166                remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
 167
 168        return remaining < 0 ? 0: remaining;
 169}
 170
 171static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
 172                         const struct itimerspec64 *ktmr)
 173{
 174        enum hrtimer_mode htmode;
 175        ktime_t texp;
 176        int clockid = ctx->clockid;
 177
 178        htmode = (flags & TFD_TIMER_ABSTIME) ?
 179                HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
 180
 181        texp = timespec64_to_ktime(ktmr->it_value);
 182        ctx->expired = 0;
 183        ctx->ticks = 0;
 184        ctx->tintv = timespec64_to_ktime(ktmr->it_interval);
 185
 186        if (isalarm(ctx)) {
 187                alarm_init(&ctx->t.alarm,
 188                           ctx->clockid == CLOCK_REALTIME_ALARM ?
 189                           ALARM_REALTIME : ALARM_BOOTTIME,
 190                           timerfd_alarmproc);
 191        } else {
 192                hrtimer_init(&ctx->t.tmr, clockid, htmode);
 193                hrtimer_set_expires(&ctx->t.tmr, texp);
 194                ctx->t.tmr.function = timerfd_tmrproc;
 195        }
 196
 197        if (texp != 0) {
 198                if (isalarm(ctx)) {
 199                        if (flags & TFD_TIMER_ABSTIME)
 200                                alarm_start(&ctx->t.alarm, texp);
 201                        else
 202                                alarm_start_relative(&ctx->t.alarm, texp);
 203                } else {
 204                        hrtimer_start(&ctx->t.tmr, texp, htmode);
 205                }
 206
 207                if (timerfd_canceled(ctx))
 208                        return -ECANCELED;
 209        }
 210
 211        ctx->settime_flags = flags & TFD_SETTIME_FLAGS;
 212        return 0;
 213}
 214
 215static int timerfd_release(struct inode *inode, struct file *file)
 216{
 217        struct timerfd_ctx *ctx = file->private_data;
 218
 219        timerfd_remove_cancel(ctx);
 220
 221        if (isalarm(ctx))
 222                alarm_cancel(&ctx->t.alarm);
 223        else
 224                hrtimer_cancel(&ctx->t.tmr);
 225        kfree_rcu(ctx, rcu);
 226        return 0;
 227}
 228
 229static unsigned int timerfd_poll(struct file *file, poll_table *wait)
 230{
 231        struct timerfd_ctx *ctx = file->private_data;
 232        unsigned int events = 0;
 233        unsigned long flags;
 234
 235        poll_wait(file, &ctx->wqh, wait);
 236
 237        spin_lock_irqsave(&ctx->wqh.lock, flags);
 238        if (ctx->ticks)
 239                events |= POLLIN;
 240        spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 241
 242        return events;
 243}
 244
 245static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 246                            loff_t *ppos)
 247{
 248        struct timerfd_ctx *ctx = file->private_data;
 249        ssize_t res;
 250        u64 ticks = 0;
 251
 252        if (count < sizeof(ticks))
 253                return -EINVAL;
 254        spin_lock_irq(&ctx->wqh.lock);
 255        if (file->f_flags & O_NONBLOCK)
 256                res = -EAGAIN;
 257        else
 258                res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
 259
 260        /*
 261         * If clock has changed, we do not care about the
 262         * ticks and we do not rearm the timer. Userspace must
 263         * reevaluate anyway.
 264         */
 265        if (timerfd_canceled(ctx)) {
 266                ctx->ticks = 0;
 267                ctx->expired = 0;
 268                res = -ECANCELED;
 269        }
 270
 271        if (ctx->ticks) {
 272                ticks = ctx->ticks;
 273
 274                if (ctx->expired && ctx->tintv) {
 275                        /*
 276                         * If tintv != 0, this is a periodic timer that
 277                         * needs to be re-armed. We avoid doing it in the timer
 278                         * callback to avoid DoS attacks specifying a very
 279                         * short timer period.
 280                         */
 281                        if (isalarm(ctx)) {
 282                                ticks += alarm_forward_now(
 283                                        &ctx->t.alarm, ctx->tintv) - 1;
 284                                alarm_restart(&ctx->t.alarm);
 285                        } else {
 286                                ticks += hrtimer_forward_now(&ctx->t.tmr,
 287                                                             ctx->tintv) - 1;
 288                                hrtimer_restart(&ctx->t.tmr);
 289                        }
 290                }
 291                ctx->expired = 0;
 292                ctx->ticks = 0;
 293        }
 294        spin_unlock_irq(&ctx->wqh.lock);
 295        if (ticks)
 296                res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
 297        return res;
 298}
 299
 300#ifdef CONFIG_PROC_FS
 301static void timerfd_show(struct seq_file *m, struct file *file)
 302{
 303        struct timerfd_ctx *ctx = file->private_data;
 304        struct itimerspec t;
 305
 306        spin_lock_irq(&ctx->wqh.lock);
 307        t.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
 308        t.it_interval = ktime_to_timespec(ctx->tintv);
 309        spin_unlock_irq(&ctx->wqh.lock);
 310
 311        seq_printf(m,
 312                   "clockid: %d\n"
 313                   "ticks: %llu\n"
 314                   "settime flags: 0%o\n"
 315                   "it_value: (%llu, %llu)\n"
 316                   "it_interval: (%llu, %llu)\n",
 317                   ctx->clockid,
 318                   (unsigned long long)ctx->ticks,
 319                   ctx->settime_flags,
 320                   (unsigned long long)t.it_value.tv_sec,
 321                   (unsigned long long)t.it_value.tv_nsec,
 322                   (unsigned long long)t.it_interval.tv_sec,
 323                   (unsigned long long)t.it_interval.tv_nsec);
 324}
 325#else
 326#define timerfd_show NULL
 327#endif
 328
 329#ifdef CONFIG_CHECKPOINT_RESTORE
 330static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 331{
 332        struct timerfd_ctx *ctx = file->private_data;
 333        int ret = 0;
 334
 335        switch (cmd) {
 336        case TFD_IOC_SET_TICKS: {
 337                u64 ticks;
 338
 339                if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks)))
 340                        return -EFAULT;
 341                if (!ticks)
 342                        return -EINVAL;
 343
 344                spin_lock_irq(&ctx->wqh.lock);
 345                if (!timerfd_canceled(ctx)) {
 346                        ctx->ticks = ticks;
 347                        wake_up_locked(&ctx->wqh);
 348                } else
 349                        ret = -ECANCELED;
 350                spin_unlock_irq(&ctx->wqh.lock);
 351                break;
 352        }
 353        default:
 354                ret = -ENOTTY;
 355                break;
 356        }
 357
 358        return ret;
 359}
 360#else
 361#define timerfd_ioctl NULL
 362#endif
 363
 364static const struct file_operations timerfd_fops = {
 365        .release        = timerfd_release,
 366        .poll           = timerfd_poll,
 367        .read           = timerfd_read,
 368        .llseek         = noop_llseek,
 369        .show_fdinfo    = timerfd_show,
 370        .unlocked_ioctl = timerfd_ioctl,
 371};
 372
 373static int timerfd_fget(int fd, struct fd *p)
 374{
 375        struct fd f = fdget(fd);
 376        if (!f.file)
 377                return -EBADF;
 378        if (f.file->f_op != &timerfd_fops) {
 379                fdput(f);
 380                return -EINVAL;
 381        }
 382        *p = f;
 383        return 0;
 384}
 385
 386SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 387{
 388        int ufd;
 389        struct timerfd_ctx *ctx;
 390
 391        /* Check the TFD_* constants for consistency.  */
 392        BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
 393        BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
 394
 395        if ((flags & ~TFD_CREATE_FLAGS) ||
 396            (clockid != CLOCK_MONOTONIC &&
 397             clockid != CLOCK_REALTIME &&
 398             clockid != CLOCK_REALTIME_ALARM &&
 399             clockid != CLOCK_BOOTTIME &&
 400             clockid != CLOCK_BOOTTIME_ALARM))
 401                return -EINVAL;
 402
 403        if ((clockid == CLOCK_REALTIME_ALARM ||
 404             clockid == CLOCK_BOOTTIME_ALARM) &&
 405            !capable(CAP_WAKE_ALARM))
 406                return -EPERM;
 407
 408        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 409        if (!ctx)
 410                return -ENOMEM;
 411
 412        init_waitqueue_head(&ctx->wqh);
 413        spin_lock_init(&ctx->cancel_lock);
 414        ctx->clockid = clockid;
 415
 416        if (isalarm(ctx))
 417                alarm_init(&ctx->t.alarm,
 418                           ctx->clockid == CLOCK_REALTIME_ALARM ?
 419                           ALARM_REALTIME : ALARM_BOOTTIME,
 420                           timerfd_alarmproc);
 421        else
 422                hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
 423
 424        ctx->moffs = ktime_mono_to_real(0);
 425
 426        ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
 427                               O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
 428        if (ufd < 0)
 429                kfree(ctx);
 430
 431        return ufd;
 432}
 433
 434static int do_timerfd_settime(int ufd, int flags, 
 435                const struct itimerspec64 *new,
 436                struct itimerspec64 *old)
 437{
 438        struct fd f;
 439        struct timerfd_ctx *ctx;
 440        int ret;
 441
 442        if ((flags & ~TFD_SETTIME_FLAGS) ||
 443                 !itimerspec64_valid(new))
 444                return -EINVAL;
 445
 446        ret = timerfd_fget(ufd, &f);
 447        if (ret)
 448                return ret;
 449        ctx = f.file->private_data;
 450
 451        if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) {
 452                fdput(f);
 453                return -EPERM;
 454        }
 455
 456        timerfd_setup_cancel(ctx, flags);
 457
 458        /*
 459         * We need to stop the existing timer before reprogramming
 460         * it to the new values.
 461         */
 462        for (;;) {
 463                spin_lock_irq(&ctx->wqh.lock);
 464
 465                if (isalarm(ctx)) {
 466                        if (alarm_try_to_cancel(&ctx->t.alarm) >= 0)
 467                                break;
 468                } else {
 469                        if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0)
 470                                break;
 471                }
 472                spin_unlock_irq(&ctx->wqh.lock);
 473                cpu_relax();
 474        }
 475
 476        /*
 477         * If the timer is expired and it's periodic, we need to advance it
 478         * because the caller may want to know the previous expiration time.
 479         * We do not update "ticks" and "expired" since the timer will be
 480         * re-programmed again in the following timerfd_setup() call.
 481         */
 482        if (ctx->expired && ctx->tintv) {
 483                if (isalarm(ctx))
 484                        alarm_forward_now(&ctx->t.alarm, ctx->tintv);
 485                else
 486                        hrtimer_forward_now(&ctx->t.tmr, ctx->tintv);
 487        }
 488
 489        old->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx));
 490        old->it_interval = ktime_to_timespec64(ctx->tintv);
 491
 492        /*
 493         * Re-program the timer to the new value ...
 494         */
 495        ret = timerfd_setup(ctx, flags, new);
 496
 497        spin_unlock_irq(&ctx->wqh.lock);
 498        fdput(f);
 499        return ret;
 500}
 501
 502static int do_timerfd_gettime(int ufd, struct itimerspec64 *t)
 503{
 504        struct fd f;
 505        struct timerfd_ctx *ctx;
 506        int ret = timerfd_fget(ufd, &f);
 507        if (ret)
 508                return ret;
 509        ctx = f.file->private_data;
 510
 511        spin_lock_irq(&ctx->wqh.lock);
 512        if (ctx->expired && ctx->tintv) {
 513                ctx->expired = 0;
 514
 515                if (isalarm(ctx)) {
 516                        ctx->ticks +=
 517                                alarm_forward_now(
 518                                        &ctx->t.alarm, ctx->tintv) - 1;
 519                        alarm_restart(&ctx->t.alarm);
 520                } else {
 521                        ctx->ticks +=
 522                                hrtimer_forward_now(&ctx->t.tmr, ctx->tintv)
 523                                - 1;
 524                        hrtimer_restart(&ctx->t.tmr);
 525                }
 526        }
 527        t->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx));
 528        t->it_interval = ktime_to_timespec64(ctx->tintv);
 529        spin_unlock_irq(&ctx->wqh.lock);
 530        fdput(f);
 531        return 0;
 532}
 533
 534SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 535                const struct itimerspec __user *, utmr,
 536                struct itimerspec __user *, otmr)
 537{
 538        struct itimerspec64 new, old;
 539        int ret;
 540
 541        if (get_itimerspec64(&new, utmr))
 542                return -EFAULT;
 543        ret = do_timerfd_settime(ufd, flags, &new, &old);
 544        if (ret)
 545                return ret;
 546        if (otmr && put_itimerspec64(&old, otmr))
 547                return -EFAULT;
 548
 549        return ret;
 550}
 551
 552SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
 553{
 554        struct itimerspec64 kotmr;
 555        int ret = do_timerfd_gettime(ufd, &kotmr);
 556        if (ret)
 557                return ret;
 558        return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
 559}
 560
 561#ifdef CONFIG_COMPAT
 562COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 563                const struct compat_itimerspec __user *, utmr,
 564                struct compat_itimerspec __user *, otmr)
 565{
 566        struct itimerspec64 new, old;
 567        int ret;
 568
 569        if (get_compat_itimerspec64(&new, utmr))
 570                return -EFAULT;
 571        ret = do_timerfd_settime(ufd, flags, &new, &old);
 572        if (ret)
 573                return ret;
 574        if (otmr && put_compat_itimerspec64(&old, otmr))
 575                return -EFAULT;
 576        return ret;
 577}
 578
 579COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd,
 580                struct compat_itimerspec __user *, otmr)
 581{
 582        struct itimerspec64 kotmr;
 583        int ret = do_timerfd_gettime(ufd, &kotmr);
 584        if (ret)
 585                return ret;
 586        return put_compat_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
 587}
 588#endif
 589