linux/kernel/time/posix-cpu-timers.c
<<
>>
Prefs
   1/*
   2 * Implement CPU time clocks for the POSIX clock interface.
   3 */
   4
   5#include <linux/sched.h>
   6#include <linux/posix-timers.h>
   7#include <linux/errno.h>
   8#include <linux/math64.h>
   9#include <asm/uaccess.h>
  10#include <linux/kernel_stat.h>
  11#include <trace/events/timer.h>
  12#include <linux/random.h>
  13#include <linux/tick.h>
  14#include <linux/workqueue.h>
  15
  16/*
  17 * Called after updating RLIMIT_CPU to run cpu timer and update
  18 * tsk->signal->cputime_expires expiration cache if necessary. Needs
  19 * siglock protection since other code may update expiration cache as
  20 * well.
  21 */
  22void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
  23{
  24        cputime_t cputime = secs_to_cputime(rlim_new);
  25
  26        spin_lock_irq(&task->sighand->siglock);
  27        set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL);
  28        spin_unlock_irq(&task->sighand->siglock);
  29}
  30
  31static int check_clock(const clockid_t which_clock)
  32{
  33        int error = 0;
  34        struct task_struct *p;
  35        const pid_t pid = CPUCLOCK_PID(which_clock);
  36
  37        if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
  38                return -EINVAL;
  39
  40        if (pid == 0)
  41                return 0;
  42
  43        rcu_read_lock();
  44        p = find_task_by_vpid(pid);
  45        if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
  46                   same_thread_group(p, current) : has_group_leader_pid(p))) {
  47                error = -EINVAL;
  48        }
  49        rcu_read_unlock();
  50
  51        return error;
  52}
  53
  54static inline unsigned long long
  55timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
  56{
  57        unsigned long long ret;
  58
  59        ret = 0;                /* high half always zero when .cpu used */
  60        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
  61                ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
  62        } else {
  63                ret = cputime_to_expires(timespec_to_cputime(tp));
  64        }
  65        return ret;
  66}
  67
  68static void sample_to_timespec(const clockid_t which_clock,
  69                               unsigned long long expires,
  70                               struct timespec *tp)
  71{
  72        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
  73                *tp = ns_to_timespec(expires);
  74        else
  75                cputime_to_timespec((__force cputime_t)expires, tp);
  76}
  77
  78/*
  79 * Update expiry time from increment, and increase overrun count,
  80 * given the current clock sample.
  81 */
  82static void bump_cpu_timer(struct k_itimer *timer,
  83                           unsigned long long now)
  84{
  85        int i;
  86        unsigned long long delta, incr;
  87
  88        if (timer->it.cpu.incr == 0)
  89                return;
  90
  91        if (now < timer->it.cpu.expires)
  92                return;
  93
  94        incr = timer->it.cpu.incr;
  95        delta = now + incr - timer->it.cpu.expires;
  96
  97        /* Don't use (incr*2 < delta), incr*2 might overflow. */
  98        for (i = 0; incr < delta - incr; i++)
  99                incr = incr << 1;
 100
 101        for (; i >= 0; incr >>= 1, i--) {
 102                if (delta < incr)
 103                        continue;
 104
 105                timer->it.cpu.expires += incr;
 106                timer->it_overrun += 1 << i;
 107                delta -= incr;
 108        }
 109}
 110
 111/**
 112 * task_cputime_zero - Check a task_cputime struct for all zero fields.
 113 *
 114 * @cputime:    The struct to compare.
 115 *
 116 * Checks @cputime to see if all fields are zero.  Returns true if all fields
 117 * are zero, false if any field is nonzero.
 118 */
 119static inline int task_cputime_zero(const struct task_cputime *cputime)
 120{
 121        if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
 122                return 1;
 123        return 0;
 124}
 125
 126static inline unsigned long long prof_ticks(struct task_struct *p)
 127{
 128        cputime_t utime, stime;
 129
 130        task_cputime(p, &utime, &stime);
 131
 132        return cputime_to_expires(utime + stime);
 133}
 134static inline unsigned long long virt_ticks(struct task_struct *p)
 135{
 136        cputime_t utime;
 137
 138        task_cputime(p, &utime, NULL);
 139
 140        return cputime_to_expires(utime);
 141}
 142
 143static int
 144posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
 145{
 146        int error = check_clock(which_clock);
 147        if (!error) {
 148                tp->tv_sec = 0;
 149                tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
 150                if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 151                        /*
 152                         * If sched_clock is using a cycle counter, we
 153                         * don't have any idea of its true resolution
 154                         * exported, but it is much more than 1s/HZ.
 155                         */
 156                        tp->tv_nsec = 1;
 157                }
 158        }
 159        return error;
 160}
 161
 162static int
 163posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
 164{
 165        /*
 166         * You can never reset a CPU clock, but we check for other errors
 167         * in the call before failing with EPERM.
 168         */
 169        int error = check_clock(which_clock);
 170        if (error == 0) {
 171                error = -EPERM;
 172        }
 173        return error;
 174}
 175
 176
 177/*
 178 * Sample a per-thread clock for the given task.
 179 */
 180static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 181                            unsigned long long *sample)
 182{
 183        switch (CPUCLOCK_WHICH(which_clock)) {
 184        default:
 185                return -EINVAL;
 186        case CPUCLOCK_PROF:
 187                *sample = prof_ticks(p);
 188                break;
 189        case CPUCLOCK_VIRT:
 190                *sample = virt_ticks(p);
 191                break;
 192        case CPUCLOCK_SCHED:
 193                *sample = task_sched_runtime(p);
 194                break;
 195        }
 196        return 0;
 197}
 198
 199static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
 200{
 201        if (b->utime > a->utime)
 202                a->utime = b->utime;
 203
 204        if (b->stime > a->stime)
 205                a->stime = b->stime;
 206
 207        if (b->sum_exec_runtime > a->sum_exec_runtime)
 208                a->sum_exec_runtime = b->sum_exec_runtime;
 209}
 210
 211void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 212{
 213        struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 214        struct task_cputime sum;
 215        unsigned long flags;
 216
 217        if (!cputimer->running) {
 218                /*
 219                 * The POSIX timer interface allows for absolute time expiry
 220                 * values through the TIMER_ABSTIME flag, therefore we have
 221                 * to synchronize the timer to the clock every time we start
 222                 * it.
 223                 */
 224                thread_group_cputime(tsk, &sum);
 225                raw_spin_lock_irqsave(&cputimer->lock, flags);
 226                cputimer->running = 1;
 227                update_gt_cputime(&cputimer->cputime, &sum);
 228        } else
 229                raw_spin_lock_irqsave(&cputimer->lock, flags);
 230        *times = cputimer->cputime;
 231        raw_spin_unlock_irqrestore(&cputimer->lock, flags);
 232}
 233
 234/*
 235 * Sample a process (thread group) clock for the given group_leader task.
 236 * Must be called with task sighand lock held for safe while_each_thread()
 237 * traversal.
 238 */
 239static int cpu_clock_sample_group(const clockid_t which_clock,
 240                                  struct task_struct *p,
 241                                  unsigned long long *sample)
 242{
 243        struct task_cputime cputime;
 244
 245        switch (CPUCLOCK_WHICH(which_clock)) {
 246        default:
 247                return -EINVAL;
 248        case CPUCLOCK_PROF:
 249                thread_group_cputime(p, &cputime);
 250                *sample = cputime_to_expires(cputime.utime + cputime.stime);
 251                break;
 252        case CPUCLOCK_VIRT:
 253                thread_group_cputime(p, &cputime);
 254                *sample = cputime_to_expires(cputime.utime);
 255                break;
 256        case CPUCLOCK_SCHED:
 257                thread_group_cputime(p, &cputime);
 258                *sample = cputime.sum_exec_runtime;
 259                break;
 260        }
 261        return 0;
 262}
 263
 264static int posix_cpu_clock_get_task(struct task_struct *tsk,
 265                                    const clockid_t which_clock,
 266                                    struct timespec *tp)
 267{
 268        int err = -EINVAL;
 269        unsigned long long rtn;
 270
 271        if (CPUCLOCK_PERTHREAD(which_clock)) {
 272                if (same_thread_group(tsk, current))
 273                        err = cpu_clock_sample(which_clock, tsk, &rtn);
 274        } else {
 275                if (tsk == current || thread_group_leader(tsk))
 276                        err = cpu_clock_sample_group(which_clock, tsk, &rtn);
 277        }
 278
 279        if (!err)
 280                sample_to_timespec(which_clock, rtn, tp);
 281
 282        return err;
 283}
 284
 285
 286static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 287{
 288        const pid_t pid = CPUCLOCK_PID(which_clock);
 289        int err = -EINVAL;
 290
 291        if (pid == 0) {
 292                /*
 293                 * Special case constant value for our own clocks.
 294                 * We don't have to do any lookup to find ourselves.
 295                 */
 296                err = posix_cpu_clock_get_task(current, which_clock, tp);
 297        } else {
 298                /*
 299                 * Find the given PID, and validate that the caller
 300                 * should be able to see it.
 301                 */
 302                struct task_struct *p;
 303                rcu_read_lock();
 304                p = find_task_by_vpid(pid);
 305                if (p)
 306                        err = posix_cpu_clock_get_task(p, which_clock, tp);
 307                rcu_read_unlock();
 308        }
 309
 310        return err;
 311}
 312
 313
 314/*
 315 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
 316 * This is called from sys_timer_create() and do_cpu_nanosleep() with the
 317 * new timer already all-zeros initialized.
 318 */
 319static int posix_cpu_timer_create(struct k_itimer *new_timer)
 320{
 321        int ret = 0;
 322        const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
 323        struct task_struct *p;
 324
 325        if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
 326                return -EINVAL;
 327
 328        INIT_LIST_HEAD(&new_timer->it.cpu.entry);
 329
 330        rcu_read_lock();
 331        if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
 332                if (pid == 0) {
 333                        p = current;
 334                } else {
 335                        p = find_task_by_vpid(pid);
 336                        if (p && !same_thread_group(p, current))
 337                                p = NULL;
 338                }
 339        } else {
 340                if (pid == 0) {
 341                        p = current->group_leader;
 342                } else {
 343                        p = find_task_by_vpid(pid);
 344                        if (p && !has_group_leader_pid(p))
 345                                p = NULL;
 346                }
 347        }
 348        new_timer->it.cpu.task = p;
 349        if (p) {
 350                get_task_struct(p);
 351        } else {
 352                ret = -EINVAL;
 353        }
 354        rcu_read_unlock();
 355
 356        return ret;
 357}
 358
 359/*
 360 * Clean up a CPU-clock timer that is about to be destroyed.
 361 * This is called from timer deletion with the timer already locked.
 362 * If we return TIMER_RETRY, it's necessary to release the timer's lock
 363 * and try again.  (This happens when the timer is in the middle of firing.)
 364 */
 365static int posix_cpu_timer_del(struct k_itimer *timer)
 366{
 367        int ret = 0;
 368        unsigned long flags;
 369        struct sighand_struct *sighand;
 370        struct task_struct *p = timer->it.cpu.task;
 371
 372        WARN_ON_ONCE(p == NULL);
 373
 374        /*
 375         * Protect against sighand release/switch in exit/exec and process/
 376         * thread timer list entry concurrent read/writes.
 377         */
 378        sighand = lock_task_sighand(p, &flags);
 379        if (unlikely(sighand == NULL)) {
 380                /*
 381                 * We raced with the reaping of the task.
 382                 * The deletion should have cleared us off the list.
 383                 */
 384                WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
 385        } else {
 386                if (timer->it.cpu.firing)
 387                        ret = TIMER_RETRY;
 388                else
 389                        list_del(&timer->it.cpu.entry);
 390
 391                unlock_task_sighand(p, &flags);
 392        }
 393
 394        if (!ret)
 395                put_task_struct(p);
 396
 397        return ret;
 398}
 399
 400static void cleanup_timers_list(struct list_head *head)
 401{
 402        struct cpu_timer_list *timer, *next;
 403
 404        list_for_each_entry_safe(timer, next, head, entry)
 405                list_del_init(&timer->entry);
 406}
 407
 408/*
 409 * Clean out CPU timers still ticking when a thread exited.  The task
 410 * pointer is cleared, and the expiry time is replaced with the residual
 411 * time for later timer_gettime calls to return.
 412 * This must be called with the siglock held.
 413 */
 414static void cleanup_timers(struct list_head *head)
 415{
 416        cleanup_timers_list(head);
 417        cleanup_timers_list(++head);
 418        cleanup_timers_list(++head);
 419}
 420
 421/*
 422 * These are both called with the siglock held, when the current thread
 423 * is being reaped.  When the final (leader) thread in the group is reaped,
 424 * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
 425 */
 426void posix_cpu_timers_exit(struct task_struct *tsk)
 427{
 428        add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
 429                                                sizeof(unsigned long long));
 430        cleanup_timers(tsk->cpu_timers);
 431
 432}
 433void posix_cpu_timers_exit_group(struct task_struct *tsk)
 434{
 435        cleanup_timers(tsk->signal->cpu_timers);
 436}
 437
 438static inline int expires_gt(cputime_t expires, cputime_t new_exp)
 439{
 440        return expires == 0 || expires > new_exp;
 441}
 442
 443/*
 444 * Insert the timer on the appropriate list before any timers that
 445 * expire later.  This must be called with the sighand lock held.
 446 */
 447static void arm_timer(struct k_itimer *timer)
 448{
 449        struct task_struct *p = timer->it.cpu.task;
 450        struct list_head *head, *listpos;
 451        struct task_cputime *cputime_expires;
 452        struct cpu_timer_list *const nt = &timer->it.cpu;
 453        struct cpu_timer_list *next;
 454
 455        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 456                head = p->cpu_timers;
 457                cputime_expires = &p->cputime_expires;
 458        } else {
 459                head = p->signal->cpu_timers;
 460                cputime_expires = &p->signal->cputime_expires;
 461        }
 462        head += CPUCLOCK_WHICH(timer->it_clock);
 463
 464        listpos = head;
 465        list_for_each_entry(next, head, entry) {
 466                if (nt->expires < next->expires)
 467                        break;
 468                listpos = &next->entry;
 469        }
 470        list_add(&nt->entry, listpos);
 471
 472        if (listpos == head) {
 473                unsigned long long exp = nt->expires;
 474
 475                /*
 476                 * We are the new earliest-expiring POSIX 1.b timer, hence
 477                 * need to update expiration cache. Take into account that
 478                 * for process timers we share expiration cache with itimers
 479                 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
 480                 */
 481
 482                switch (CPUCLOCK_WHICH(timer->it_clock)) {
 483                case CPUCLOCK_PROF:
 484                        if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp)))
 485                                cputime_expires->prof_exp = expires_to_cputime(exp);
 486                        break;
 487                case CPUCLOCK_VIRT:
 488                        if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp)))
 489                                cputime_expires->virt_exp = expires_to_cputime(exp);
 490                        break;
 491                case CPUCLOCK_SCHED:
 492                        if (cputime_expires->sched_exp == 0 ||
 493                            cputime_expires->sched_exp > exp)
 494                                cputime_expires->sched_exp = exp;
 495                        break;
 496                }
 497        }
 498}
 499
 500/*
 501 * The timer is locked, fire it and arrange for its reload.
 502 */
 503static void cpu_timer_fire(struct k_itimer *timer)
 504{
 505        if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
 506                /*
 507                 * User don't want any signal.
 508                 */
 509                timer->it.cpu.expires = 0;
 510        } else if (unlikely(timer->sigq == NULL)) {
 511                /*
 512                 * This a special case for clock_nanosleep,
 513                 * not a normal timer from sys_timer_create.
 514                 */
 515                wake_up_process(timer->it_process);
 516                timer->it.cpu.expires = 0;
 517        } else if (timer->it.cpu.incr == 0) {
 518                /*
 519                 * One-shot timer.  Clear it as soon as it's fired.
 520                 */
 521                posix_timer_event(timer, 0);
 522                timer->it.cpu.expires = 0;
 523        } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
 524                /*
 525                 * The signal did not get queued because the signal
 526                 * was ignored, so we won't get any callback to
 527                 * reload the timer.  But we need to keep it
 528                 * ticking in case the signal is deliverable next time.
 529                 */
 530                posix_cpu_timer_schedule(timer);
 531        }
 532}
 533
 534/*
 535 * Sample a process (thread group) timer for the given group_leader task.
 536 * Must be called with task sighand lock held for safe while_each_thread()
 537 * traversal.
 538 */
 539static int cpu_timer_sample_group(const clockid_t which_clock,
 540                                  struct task_struct *p,
 541                                  unsigned long long *sample)
 542{
 543        struct task_cputime cputime;
 544
 545        thread_group_cputimer(p, &cputime);
 546        switch (CPUCLOCK_WHICH(which_clock)) {
 547        default:
 548                return -EINVAL;
 549        case CPUCLOCK_PROF:
 550                *sample = cputime_to_expires(cputime.utime + cputime.stime);
 551                break;
 552        case CPUCLOCK_VIRT:
 553                *sample = cputime_to_expires(cputime.utime);
 554                break;
 555        case CPUCLOCK_SCHED:
 556                *sample = cputime.sum_exec_runtime;
 557                break;
 558        }
 559        return 0;
 560}
 561
 562#ifdef CONFIG_NO_HZ_FULL
 563static void nohz_kick_work_fn(struct work_struct *work)
 564{
 565        tick_nohz_full_kick_all();
 566}
 567
 568static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
 569
 570/*
 571 * We need the IPIs to be sent from sane process context.
 572 * The posix cpu timers are always set with irqs disabled.
 573 */
 574static void posix_cpu_timer_kick_nohz(void)
 575{
 576        if (context_tracking_is_enabled())
 577                schedule_work(&nohz_kick_work);
 578}
 579
 580bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
 581{
 582        if (!task_cputime_zero(&tsk->cputime_expires))
 583                return false;
 584
 585        if (tsk->signal->cputimer.running)
 586                return false;
 587
 588        return true;
 589}
 590#else
 591static inline void posix_cpu_timer_kick_nohz(void) { }
 592#endif
 593
 594/*
 595 * Guts of sys_timer_settime for CPU timers.
 596 * This is called with the timer locked and interrupts disabled.
 597 * If we return TIMER_RETRY, it's necessary to release the timer's lock
 598 * and try again.  (This happens when the timer is in the middle of firing.)
 599 */
 600static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 601                               struct itimerspec *new, struct itimerspec *old)
 602{
 603        unsigned long flags;
 604        struct sighand_struct *sighand;
 605        struct task_struct *p = timer->it.cpu.task;
 606        unsigned long long old_expires, new_expires, old_incr, val;
 607        int ret;
 608
 609        WARN_ON_ONCE(p == NULL);
 610
 611        new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
 612
 613        /*
 614         * Protect against sighand release/switch in exit/exec and p->cpu_timers
 615         * and p->signal->cpu_timers read/write in arm_timer()
 616         */
 617        sighand = lock_task_sighand(p, &flags);
 618        /*
 619         * If p has just been reaped, we can no
 620         * longer get any information about it at all.
 621         */
 622        if (unlikely(sighand == NULL)) {
 623                return -ESRCH;
 624        }
 625
 626        /*
 627         * Disarm any old timer after extracting its expiry time.
 628         */
 629        WARN_ON_ONCE(!irqs_disabled());
 630
 631        ret = 0;
 632        old_incr = timer->it.cpu.incr;
 633        old_expires = timer->it.cpu.expires;
 634        if (unlikely(timer->it.cpu.firing)) {
 635                timer->it.cpu.firing = -1;
 636                ret = TIMER_RETRY;
 637        } else
 638                list_del_init(&timer->it.cpu.entry);
 639
 640        /*
 641         * We need to sample the current value to convert the new
 642         * value from to relative and absolute, and to convert the
 643         * old value from absolute to relative.  To set a process
 644         * timer, we need a sample to balance the thread expiry
 645         * times (in arm_timer).  With an absolute time, we must
 646         * check if it's already passed.  In short, we need a sample.
 647         */
 648        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 649                cpu_clock_sample(timer->it_clock, p, &val);
 650        } else {
 651                cpu_timer_sample_group(timer->it_clock, p, &val);
 652        }
 653
 654        if (old) {
 655                if (old_expires == 0) {
 656                        old->it_value.tv_sec = 0;
 657                        old->it_value.tv_nsec = 0;
 658                } else {
 659                        /*
 660                         * Update the timer in case it has
 661                         * overrun already.  If it has,
 662                         * we'll report it as having overrun
 663                         * and with the next reloaded timer
 664                         * already ticking, though we are
 665                         * swallowing that pending
 666                         * notification here to install the
 667                         * new setting.
 668                         */
 669                        bump_cpu_timer(timer, val);
 670                        if (val < timer->it.cpu.expires) {
 671                                old_expires = timer->it.cpu.expires - val;
 672                                sample_to_timespec(timer->it_clock,
 673                                                   old_expires,
 674                                                   &old->it_value);
 675                        } else {
 676                                old->it_value.tv_nsec = 1;
 677                                old->it_value.tv_sec = 0;
 678                        }
 679                }
 680        }
 681
 682        if (unlikely(ret)) {
 683                /*
 684                 * We are colliding with the timer actually firing.
 685                 * Punt after filling in the timer's old value, and
 686                 * disable this firing since we are already reporting
 687                 * it as an overrun (thanks to bump_cpu_timer above).
 688                 */
 689                unlock_task_sighand(p, &flags);
 690                goto out;
 691        }
 692
 693        if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
 694                new_expires += val;
 695        }
 696
 697        /*
 698         * Install the new expiry time (or zero).
 699         * For a timer with no notification action, we don't actually
 700         * arm the timer (we'll just fake it for timer_gettime).
 701         */
 702        timer->it.cpu.expires = new_expires;
 703        if (new_expires != 0 && val < new_expires) {
 704                arm_timer(timer);
 705        }
 706
 707        unlock_task_sighand(p, &flags);
 708        /*
 709         * Install the new reload setting, and
 710         * set up the signal and overrun bookkeeping.
 711         */
 712        timer->it.cpu.incr = timespec_to_sample(timer->it_clock,
 713                                                &new->it_interval);
 714
 715        /*
 716         * This acts as a modification timestamp for the timer,
 717         * so any automatic reload attempt will punt on seeing
 718         * that we have reset the timer manually.
 719         */
 720        timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
 721                ~REQUEUE_PENDING;
 722        timer->it_overrun_last = 0;
 723        timer->it_overrun = -1;
 724
 725        if (new_expires != 0 && !(val < new_expires)) {
 726                /*
 727                 * The designated time already passed, so we notify
 728                 * immediately, even if the thread never runs to
 729                 * accumulate more time on this clock.
 730                 */
 731                cpu_timer_fire(timer);
 732        }
 733
 734        ret = 0;
 735 out:
 736        if (old) {
 737                sample_to_timespec(timer->it_clock,
 738                                   old_incr, &old->it_interval);
 739        }
 740        if (!ret)
 741                posix_cpu_timer_kick_nohz();
 742        return ret;
 743}
 744
 745static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 746{
 747        unsigned long long now;
 748        struct task_struct *p = timer->it.cpu.task;
 749
 750        WARN_ON_ONCE(p == NULL);
 751
 752        /*
 753         * Easy part: convert the reload time.
 754         */
 755        sample_to_timespec(timer->it_clock,
 756                           timer->it.cpu.incr, &itp->it_interval);
 757
 758        if (timer->it.cpu.expires == 0) {       /* Timer not armed at all.  */
 759                itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
 760                return;
 761        }
 762
 763        /*
 764         * Sample the clock to take the difference with the expiry time.
 765         */
 766        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 767                cpu_clock_sample(timer->it_clock, p, &now);
 768        } else {
 769                struct sighand_struct *sighand;
 770                unsigned long flags;
 771
 772                /*
 773                 * Protect against sighand release/switch in exit/exec and
 774                 * also make timer sampling safe if it ends up calling
 775                 * thread_group_cputime().
 776                 */
 777                sighand = lock_task_sighand(p, &flags);
 778                if (unlikely(sighand == NULL)) {
 779                        /*
 780                         * The process has been reaped.
 781                         * We can't even collect a sample any more.
 782                         * Call the timer disarmed, nothing else to do.
 783                         */
 784                        timer->it.cpu.expires = 0;
 785                        sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
 786                                           &itp->it_value);
 787                } else {
 788                        cpu_timer_sample_group(timer->it_clock, p, &now);
 789                        unlock_task_sighand(p, &flags);
 790                }
 791        }
 792
 793        if (now < timer->it.cpu.expires) {
 794                sample_to_timespec(timer->it_clock,
 795                                   timer->it.cpu.expires - now,
 796                                   &itp->it_value);
 797        } else {
 798                /*
 799                 * The timer should have expired already, but the firing
 800                 * hasn't taken place yet.  Say it's just about to expire.
 801                 */
 802                itp->it_value.tv_nsec = 1;
 803                itp->it_value.tv_sec = 0;
 804        }
 805}
 806
 807static unsigned long long
 808check_timers_list(struct list_head *timers,
 809                  struct list_head *firing,
 810                  unsigned long long curr)
 811{
 812        int maxfire = 20;
 813
 814        while (!list_empty(timers)) {
 815                struct cpu_timer_list *t;
 816
 817                t = list_first_entry(timers, struct cpu_timer_list, entry);
 818
 819                if (!--maxfire || curr < t->expires)
 820                        return t->expires;
 821
 822                t->firing = 1;
 823                list_move_tail(&t->entry, firing);
 824        }
 825
 826        return 0;
 827}
 828
 829/*
 830 * Check for any per-thread CPU timers that have fired and move them off
 831 * the tsk->cpu_timers[N] list onto the firing list.  Here we update the
 832 * tsk->it_*_expires values to reflect the remaining thread CPU timers.
 833 */
 834static void check_thread_timers(struct task_struct *tsk,
 835                                struct list_head *firing)
 836{
 837        struct list_head *timers = tsk->cpu_timers;
 838        struct signal_struct *const sig = tsk->signal;
 839        struct task_cputime *tsk_expires = &tsk->cputime_expires;
 840        unsigned long long expires;
 841        unsigned long soft;
 842
 843        expires = check_timers_list(timers, firing, prof_ticks(tsk));
 844        tsk_expires->prof_exp = expires_to_cputime(expires);
 845
 846        expires = check_timers_list(++timers, firing, virt_ticks(tsk));
 847        tsk_expires->virt_exp = expires_to_cputime(expires);
 848
 849        tsk_expires->sched_exp = check_timers_list(++timers, firing,
 850                                                   tsk->se.sum_exec_runtime);
 851
 852        /*
 853         * Check for the special case thread timers.
 854         */
 855        soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
 856        if (soft != RLIM_INFINITY) {
 857                unsigned long hard =
 858                        ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
 859
 860                if (hard != RLIM_INFINITY &&
 861                    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
 862                        /*
 863                         * At the hard limit, we just die.
 864                         * No need to calculate anything else now.
 865                         */
 866                        __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 867                        return;
 868                }
 869                if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
 870                        /*
 871                         * At the soft limit, send a SIGXCPU every second.
 872                         */
 873                        if (soft < hard) {
 874                                soft += USEC_PER_SEC;
 875                                sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
 876                        }
 877                        printk(KERN_INFO
 878                                "RT Watchdog Timeout: %s[%d]\n",
 879                                tsk->comm, task_pid_nr(tsk));
 880                        __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 881                }
 882        }
 883}
 884
 885static void stop_process_timers(struct signal_struct *sig)
 886{
 887        struct thread_group_cputimer *cputimer = &sig->cputimer;
 888        unsigned long flags;
 889
 890        raw_spin_lock_irqsave(&cputimer->lock, flags);
 891        cputimer->running = 0;
 892        raw_spin_unlock_irqrestore(&cputimer->lock, flags);
 893}
 894
 895static u32 onecputick;
 896
 897static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 898                             unsigned long long *expires,
 899                             unsigned long long cur_time, int signo)
 900{
 901        if (!it->expires)
 902                return;
 903
 904        if (cur_time >= it->expires) {
 905                if (it->incr) {
 906                        it->expires += it->incr;
 907                        it->error += it->incr_error;
 908                        if (it->error >= onecputick) {
 909                                it->expires -= cputime_one_jiffy;
 910                                it->error -= onecputick;
 911                        }
 912                } else {
 913                        it->expires = 0;
 914                }
 915
 916                trace_itimer_expire(signo == SIGPROF ?
 917                                    ITIMER_PROF : ITIMER_VIRTUAL,
 918                                    tsk->signal->leader_pid, cur_time);
 919                __group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 920        }
 921
 922        if (it->expires && (!*expires || it->expires < *expires)) {
 923                *expires = it->expires;
 924        }
 925}
 926
 927/*
 928 * Check for any per-thread CPU timers that have fired and move them
 929 * off the tsk->*_timers list onto the firing list.  Per-thread timers
 930 * have already been taken off.
 931 */
 932static void check_process_timers(struct task_struct *tsk,
 933                                 struct list_head *firing)
 934{
 935        struct signal_struct *const sig = tsk->signal;
 936        unsigned long long utime, ptime, virt_expires, prof_expires;
 937        unsigned long long sum_sched_runtime, sched_expires;
 938        struct list_head *timers = sig->cpu_timers;
 939        struct task_cputime cputime;
 940        unsigned long soft;
 941
 942        /*
 943         * Collect the current process totals.
 944         */
 945        thread_group_cputimer(tsk, &cputime);
 946        utime = cputime_to_expires(cputime.utime);
 947        ptime = utime + cputime_to_expires(cputime.stime);
 948        sum_sched_runtime = cputime.sum_exec_runtime;
 949
 950        prof_expires = check_timers_list(timers, firing, ptime);
 951        virt_expires = check_timers_list(++timers, firing, utime);
 952        sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);
 953
 954        /*
 955         * Check for the special case process timers.
 956         */
 957        check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
 958                         SIGPROF);
 959        check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
 960                         SIGVTALRM);
 961        soft = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
 962        if (soft != RLIM_INFINITY) {
 963                unsigned long psecs = cputime_to_secs(ptime);
 964                unsigned long hard =
 965                        ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
 966                cputime_t x;
 967                if (psecs >= hard) {
 968                        /*
 969                         * At the hard limit, we just die.
 970                         * No need to calculate anything else now.
 971                         */
 972                        __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 973                        return;
 974                }
 975                if (psecs >= soft) {
 976                        /*
 977                         * At the soft limit, send a SIGXCPU every second.
 978                         */
 979                        __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 980                        if (soft < hard) {
 981                                soft++;
 982                                sig->rlim[RLIMIT_CPU].rlim_cur = soft;
 983                        }
 984                }
 985                x = secs_to_cputime(soft);
 986                if (!prof_expires || x < prof_expires) {
 987                        prof_expires = x;
 988                }
 989        }
 990
 991        sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires);
 992        sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires);
 993        sig->cputime_expires.sched_exp = sched_expires;
 994        if (task_cputime_zero(&sig->cputime_expires))
 995                stop_process_timers(sig);
 996}
 997
 998/*
 999 * This is called from the signal code (via do_schedule_next_timer)
1000 * when the last timer signal was delivered and we have to reload the timer.
1001 */
1002void posix_cpu_timer_schedule(struct k_itimer *timer)
1003{
1004        struct sighand_struct *sighand;
1005        unsigned long flags;
1006        struct task_struct *p = timer->it.cpu.task;
1007        unsigned long long now;
1008
1009        WARN_ON_ONCE(p == NULL);
1010
1011        /*
1012         * Fetch the current sample and update the timer's expiry time.
1013         */
1014        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
1015                cpu_clock_sample(timer->it_clock, p, &now);
1016                bump_cpu_timer(timer, now);
1017                if (unlikely(p->exit_state))
1018                        goto out;
1019
1020                /* Protect timer list r/w in arm_timer() */
1021                sighand = lock_task_sighand(p, &flags);
1022                if (!sighand)
1023                        goto out;
1024        } else {
1025                /*
1026                 * Protect arm_timer() and timer sampling in case of call to
1027                 * thread_group_cputime().
1028                 */
1029                sighand = lock_task_sighand(p, &flags);
1030                if (unlikely(sighand == NULL)) {
1031                        /*
1032                         * The process has been reaped.
1033                         * We can't even collect a sample any more.
1034                         */
1035                        timer->it.cpu.expires = 0;
1036                        goto out;
1037                } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
1038                        unlock_task_sighand(p, &flags);
1039                        /* Optimizations: if the process is dying, no need to rearm */
1040                        goto out;
1041                }
1042                cpu_timer_sample_group(timer->it_clock, p, &now);
1043                bump_cpu_timer(timer, now);
1044                /* Leave the sighand locked for the call below.  */
1045        }
1046
1047        /*
1048         * Now re-arm for the new expiry time.
1049         */
1050        WARN_ON_ONCE(!irqs_disabled());
1051        arm_timer(timer);
1052        unlock_task_sighand(p, &flags);
1053
1054        /* Kick full dynticks CPUs in case they need to tick on the new timer */
1055        posix_cpu_timer_kick_nohz();
1056out:
1057        timer->it_overrun_last = timer->it_overrun;
1058        timer->it_overrun = -1;
1059        ++timer->it_requeue_pending;
1060}
1061
1062/**
1063 * task_cputime_expired - Compare two task_cputime entities.
1064 *
1065 * @sample:     The task_cputime structure to be checked for expiration.
1066 * @expires:    Expiration times, against which @sample will be checked.
1067 *
1068 * Checks @sample against @expires to see if any field of @sample has expired.
1069 * Returns true if any field of the former is greater than the corresponding
1070 * field of the latter if the latter field is set.  Otherwise returns false.
1071 */
1072static inline int task_cputime_expired(const struct task_cputime *sample,
1073                                        const struct task_cputime *expires)
1074{
1075        if (expires->utime && sample->utime >= expires->utime)
1076                return 1;
1077        if (expires->stime && sample->utime + sample->stime >= expires->stime)
1078                return 1;
1079        if (expires->sum_exec_runtime != 0 &&
1080            sample->sum_exec_runtime >= expires->sum_exec_runtime)
1081                return 1;
1082        return 0;
1083}
1084
1085/**
1086 * fastpath_timer_check - POSIX CPU timers fast path.
1087 *
1088 * @tsk:        The task (thread) being checked.
1089 *
1090 * Check the task and thread group timers.  If both are zero (there are no
1091 * timers set) return false.  Otherwise snapshot the task and thread group
1092 * timers and compare them with the corresponding expiration times.  Return
1093 * true if a timer has expired, else return false.
1094 */
1095static inline int fastpath_timer_check(struct task_struct *tsk)
1096{
1097        struct signal_struct *sig;
1098        cputime_t utime, stime;
1099
1100        task_cputime(tsk, &utime, &stime);
1101
1102        if (!task_cputime_zero(&tsk->cputime_expires)) {
1103                struct task_cputime task_sample = {
1104                        .utime = utime,
1105                        .stime = stime,
1106                        .sum_exec_runtime = tsk->se.sum_exec_runtime
1107                };
1108
1109                if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
1110                        return 1;
1111        }
1112
1113        sig = tsk->signal;
1114        if (sig->cputimer.running) {
1115                struct task_cputime group_sample;
1116
1117                raw_spin_lock(&sig->cputimer.lock);
1118                group_sample = sig->cputimer.cputime;
1119                raw_spin_unlock(&sig->cputimer.lock);
1120
1121                if (task_cputime_expired(&group_sample, &sig->cputime_expires))
1122                        return 1;
1123        }
1124
1125        return 0;
1126}
1127
1128/*
1129 * This is called from the timer interrupt handler.  The irq handler has
1130 * already updated our counts.  We need to check if any timers fire now.
1131 * Interrupts are disabled.
1132 */
1133void run_posix_cpu_timers(struct task_struct *tsk)
1134{
1135        LIST_HEAD(firing);
1136        struct k_itimer *timer, *next;
1137        unsigned long flags;
1138
1139        WARN_ON_ONCE(!irqs_disabled());
1140
1141        /*
1142         * The fast path checks that there are no expired thread or thread
1143         * group timers.  If that's so, just return.
1144         */
1145        if (!fastpath_timer_check(tsk))
1146                return;
1147
1148        if (!lock_task_sighand(tsk, &flags))
1149                return;
1150        /*
1151         * Here we take off tsk->signal->cpu_timers[N] and
1152         * tsk->cpu_timers[N] all the timers that are firing, and
1153         * put them on the firing list.
1154         */
1155        check_thread_timers(tsk, &firing);
1156        /*
1157         * If there are any active process wide timers (POSIX 1.b, itimers,
1158         * RLIMIT_CPU) cputimer must be running.
1159         */
1160        if (tsk->signal->cputimer.running)
1161                check_process_timers(tsk, &firing);
1162
1163        /*
1164         * We must release these locks before taking any timer's lock.
1165         * There is a potential race with timer deletion here, as the
1166         * siglock now protects our private firing list.  We have set
1167         * the firing flag in each timer, so that a deletion attempt
1168         * that gets the timer lock before we do will give it up and
1169         * spin until we've taken care of that timer below.
1170         */
1171        unlock_task_sighand(tsk, &flags);
1172
1173        /*
1174         * Now that all the timers on our list have the firing flag,
1175         * no one will touch their list entries but us.  We'll take
1176         * each timer's lock before clearing its firing flag, so no
1177         * timer call will interfere.
1178         */
1179        list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
1180                int cpu_firing;
1181
1182                spin_lock(&timer->it_lock);
1183                list_del_init(&timer->it.cpu.entry);
1184                cpu_firing = timer->it.cpu.firing;
1185                timer->it.cpu.firing = 0;
1186                /*
1187                 * The firing flag is -1 if we collided with a reset
1188                 * of the timer, which already reported this
1189                 * almost-firing as an overrun.  So don't generate an event.
1190                 */
1191                if (likely(cpu_firing >= 0))
1192                        cpu_timer_fire(timer);
1193                spin_unlock(&timer->it_lock);
1194        }
1195}
1196
1197/*
1198 * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
1199 * The tsk->sighand->siglock must be held by the caller.
1200 */
1201void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1202                           cputime_t *newval, cputime_t *oldval)
1203{
1204        unsigned long long now;
1205
1206        WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
1207        cpu_timer_sample_group(clock_idx, tsk, &now);
1208
1209        if (oldval) {
1210                /*
1211                 * We are setting itimer. The *oldval is absolute and we update
1212                 * it to be relative, *newval argument is relative and we update
1213                 * it to be absolute.
1214                 */
1215                if (*oldval) {
1216                        if (*oldval <= now) {
1217                                /* Just about to fire. */
1218                                *oldval = cputime_one_jiffy;
1219                        } else {
1220                                *oldval -= now;
1221                        }
1222                }
1223
1224                if (!*newval)
1225                        goto out;
1226                *newval += now;
1227        }
1228
1229        /*
1230         * Update expiration cache if we are the earliest timer, or eventually
1231         * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
1232         */
1233        switch (clock_idx) {
1234        case CPUCLOCK_PROF:
1235                if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
1236                        tsk->signal->cputime_expires.prof_exp = *newval;
1237                break;
1238        case CPUCLOCK_VIRT:
1239                if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
1240                        tsk->signal->cputime_expires.virt_exp = *newval;
1241                break;
1242        }
1243out:
1244        posix_cpu_timer_kick_nohz();
1245}
1246
1247static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1248                            struct timespec *rqtp, struct itimerspec *it)
1249{
1250        struct k_itimer timer;
1251        int error;
1252
1253        /*
1254         * Set up a temporary timer and then wait for it to go off.
1255         */
1256        memset(&timer, 0, sizeof timer);
1257        spin_lock_init(&timer.it_lock);
1258        timer.it_clock = which_clock;
1259        timer.it_overrun = -1;
1260        error = posix_cpu_timer_create(&timer);
1261        timer.it_process = current;
1262        if (!error) {
1263                static struct itimerspec zero_it;
1264
1265                memset(it, 0, sizeof *it);
1266                it->it_value = *rqtp;
1267
1268                spin_lock_irq(&timer.it_lock);
1269                error = posix_cpu_timer_set(&timer, flags, it, NULL);
1270                if (error) {
1271                        spin_unlock_irq(&timer.it_lock);
1272                        return error;
1273                }
1274
1275                while (!signal_pending(current)) {
1276                        if (timer.it.cpu.expires == 0) {
1277                                /*
1278                                 * Our timer fired and was reset, below
1279                                 * deletion can not fail.
1280                                 */
1281                                posix_cpu_timer_del(&timer);
1282                                spin_unlock_irq(&timer.it_lock);
1283                                return 0;
1284                        }
1285
1286                        /*
1287                         * Block until cpu_timer_fire (or a signal) wakes us.
1288                         */
1289                        __set_current_state(TASK_INTERRUPTIBLE);
1290                        spin_unlock_irq(&timer.it_lock);
1291                        schedule();
1292                        spin_lock_irq(&timer.it_lock);
1293                }
1294
1295                /*
1296                 * We were interrupted by a signal.
1297                 */
1298                sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
1299                error = posix_cpu_timer_set(&timer, 0, &zero_it, it);
1300                if (!error) {
1301                        /*
1302                         * Timer is now unarmed, deletion can not fail.
1303                         */
1304                        posix_cpu_timer_del(&timer);
1305                }
1306                spin_unlock_irq(&timer.it_lock);
1307
1308                while (error == TIMER_RETRY) {
1309                        /*
1310                         * We need to handle case when timer was or is in the
1311                         * middle of firing. In other cases we already freed
1312                         * resources.
1313                         */
1314                        spin_lock_irq(&timer.it_lock);
1315                        error = posix_cpu_timer_del(&timer);
1316                        spin_unlock_irq(&timer.it_lock);
1317                }
1318
1319                if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
1320                        /*
1321                         * It actually did fire already.
1322                         */
1323                        return 0;
1324                }
1325
1326                error = -ERESTART_RESTARTBLOCK;
1327        }
1328
1329        return error;
1330}
1331
1332static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
1333
1334static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1335                            struct timespec *rqtp, struct timespec __user *rmtp)
1336{
1337        struct restart_block *restart_block =
1338                &current_thread_info()->restart_block;
1339        struct itimerspec it;
1340        int error;
1341
1342        /*
1343         * Diagnose required errors first.
1344         */
1345        if (CPUCLOCK_PERTHREAD(which_clock) &&
1346            (CPUCLOCK_PID(which_clock) == 0 ||
1347             CPUCLOCK_PID(which_clock) == current->pid))
1348                return -EINVAL;
1349
1350        error = do_cpu_nanosleep(which_clock, flags, rqtp, &it);
1351
1352        if (error == -ERESTART_RESTARTBLOCK) {
1353
1354                if (flags & TIMER_ABSTIME)
1355                        return -ERESTARTNOHAND;
1356                /*
1357                 * Report back to the user the time still remaining.
1358                 */
1359                if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1360                        return -EFAULT;
1361
1362                restart_block->fn = posix_cpu_nsleep_restart;
1363                restart_block->nanosleep.clockid = which_clock;
1364                restart_block->nanosleep.rmtp = rmtp;
1365                restart_block->nanosleep.expires = timespec_to_ns(rqtp);
1366        }
1367        return error;
1368}
1369
1370static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1371{
1372        clockid_t which_clock = restart_block->nanosleep.clockid;
1373        struct timespec t;
1374        struct itimerspec it;
1375        int error;
1376
1377        t = ns_to_timespec(restart_block->nanosleep.expires);
1378
1379        error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
1380
1381        if (error == -ERESTART_RESTARTBLOCK) {
1382                struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
1383                /*
1384                 * Report back to the user the time still remaining.
1385                 */
1386                if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1387                        return -EFAULT;
1388
1389                restart_block->nanosleep.expires = timespec_to_ns(&t);
1390        }
1391        return error;
1392
1393}
1394
1395#define PROCESS_CLOCK   MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
1396#define THREAD_CLOCK    MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
1397
1398static int process_cpu_clock_getres(const clockid_t which_clock,
1399                                    struct timespec *tp)
1400{
1401        return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
1402}
1403static int process_cpu_clock_get(const clockid_t which_clock,
1404                                 struct timespec *tp)
1405{
1406        return posix_cpu_clock_get(PROCESS_CLOCK, tp);
1407}
1408static int process_cpu_timer_create(struct k_itimer *timer)
1409{
1410        timer->it_clock = PROCESS_CLOCK;
1411        return posix_cpu_timer_create(timer);
1412}
1413static int process_cpu_nsleep(const clockid_t which_clock, int flags,
1414                              struct timespec *rqtp,
1415                              struct timespec __user *rmtp)
1416{
1417        return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
1418}
1419static long process_cpu_nsleep_restart(struct restart_block *restart_block)
1420{
1421        return -EINVAL;
1422}
1423static int thread_cpu_clock_getres(const clockid_t which_clock,
1424                                   struct timespec *tp)
1425{
1426        return posix_cpu_clock_getres(THREAD_CLOCK, tp);
1427}
1428static int thread_cpu_clock_get(const clockid_t which_clock,
1429                                struct timespec *tp)
1430{
1431        return posix_cpu_clock_get(THREAD_CLOCK, tp);
1432}
1433static int thread_cpu_timer_create(struct k_itimer *timer)
1434{
1435        timer->it_clock = THREAD_CLOCK;
1436        return posix_cpu_timer_create(timer);
1437}
1438
1439struct k_clock clock_posix_cpu = {
1440        .clock_getres   = posix_cpu_clock_getres,
1441        .clock_set      = posix_cpu_clock_set,
1442        .clock_get      = posix_cpu_clock_get,
1443        .timer_create   = posix_cpu_timer_create,
1444        .nsleep         = posix_cpu_nsleep,
1445        .nsleep_restart = posix_cpu_nsleep_restart,
1446        .timer_set      = posix_cpu_timer_set,
1447        .timer_del      = posix_cpu_timer_del,
1448        .timer_get      = posix_cpu_timer_get,
1449};
1450
1451static __init int init_posix_cpu_timers(void)
1452{
1453        struct k_clock process = {
1454                .clock_getres   = process_cpu_clock_getres,
1455                .clock_get      = process_cpu_clock_get,
1456                .timer_create   = process_cpu_timer_create,
1457                .nsleep         = process_cpu_nsleep,
1458                .nsleep_restart = process_cpu_nsleep_restart,
1459        };
1460        struct k_clock thread = {
1461                .clock_getres   = thread_cpu_clock_getres,
1462                .clock_get      = thread_cpu_clock_get,
1463                .timer_create   = thread_cpu_timer_create,
1464        };
1465        struct timespec ts;
1466
1467        posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
1468        posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
1469
1470        cputime_to_timespec(cputime_one_jiffy, &ts);
1471        onecputick = ts.tv_nsec;
1472        WARN_ON(ts.tv_sec != 0);
1473
1474        return 0;
1475}
1476__initcall(init_posix_cpu_timers);
1477