linux/kernel/time/namespace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Author: Andrei Vagin <avagin@openvz.org>
   4 * Author: Dmitry Safonov <dima@arista.com>
   5 */
   6
   7#include <linux/time_namespace.h>
   8#include <linux/user_namespace.h>
   9#include <linux/sched/signal.h>
  10#include <linux/sched/task.h>
  11#include <linux/clocksource.h>
  12#include <linux/seq_file.h>
  13#include <linux/proc_ns.h>
  14#include <linux/export.h>
  15#include <linux/time.h>
  16#include <linux/slab.h>
  17#include <linux/cred.h>
  18#include <linux/err.h>
  19#include <linux/mm.h>
  20
  21#include <vdso/datapage.h>
  22
  23ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
  24                                struct timens_offsets *ns_offsets)
  25{
  26        ktime_t offset;
  27
  28        switch (clockid) {
  29        case CLOCK_MONOTONIC:
  30                offset = timespec64_to_ktime(ns_offsets->monotonic);
  31                break;
  32        case CLOCK_BOOTTIME:
  33        case CLOCK_BOOTTIME_ALARM:
  34                offset = timespec64_to_ktime(ns_offsets->boottime);
  35                break;
  36        default:
  37                return tim;
  38        }
  39
  40        /*
  41         * Check that @tim value is in [offset, KTIME_MAX + offset]
  42         * and subtract offset.
  43         */
  44        if (tim < offset) {
  45                /*
  46                 * User can specify @tim *absolute* value - if it's lesser than
  47                 * the time namespace's offset - it's already expired.
  48                 */
  49                tim = 0;
  50        } else {
  51                tim = ktime_sub(tim, offset);
  52                if (unlikely(tim > KTIME_MAX))
  53                        tim = KTIME_MAX;
  54        }
  55
  56        return tim;
  57}
  58
  59static struct ucounts *inc_time_namespaces(struct user_namespace *ns)
  60{
  61        return inc_ucount(ns, current_euid(), UCOUNT_TIME_NAMESPACES);
  62}
  63
  64static void dec_time_namespaces(struct ucounts *ucounts)
  65{
  66        dec_ucount(ucounts, UCOUNT_TIME_NAMESPACES);
  67}
  68
  69/**
  70 * clone_time_ns - Clone a time namespace
  71 * @user_ns:    User namespace which owns a new namespace.
  72 * @old_ns:     Namespace to clone
  73 *
  74 * Clone @old_ns and set the clone refcount to 1
  75 *
  76 * Return: The new namespace or ERR_PTR.
  77 */
  78static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
  79                                          struct time_namespace *old_ns)
  80{
  81        struct time_namespace *ns;
  82        struct ucounts *ucounts;
  83        int err;
  84
  85        err = -ENOSPC;
  86        ucounts = inc_time_namespaces(user_ns);
  87        if (!ucounts)
  88                goto fail;
  89
  90        err = -ENOMEM;
  91        ns = kmalloc(sizeof(*ns), GFP_KERNEL_ACCOUNT);
  92        if (!ns)
  93                goto fail_dec;
  94
  95        refcount_set(&ns->ns.count, 1);
  96
  97        ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
  98        if (!ns->vvar_page)
  99                goto fail_free;
 100
 101        err = ns_alloc_inum(&ns->ns);
 102        if (err)
 103                goto fail_free_page;
 104
 105        ns->ucounts = ucounts;
 106        ns->ns.ops = &timens_operations;
 107        ns->user_ns = get_user_ns(user_ns);
 108        ns->offsets = old_ns->offsets;
 109        ns->frozen_offsets = false;
 110        return ns;
 111
 112fail_free_page:
 113        __free_page(ns->vvar_page);
 114fail_free:
 115        kfree(ns);
 116fail_dec:
 117        dec_time_namespaces(ucounts);
 118fail:
 119        return ERR_PTR(err);
 120}
 121
 122/**
 123 * copy_time_ns - Create timens_for_children from @old_ns
 124 * @flags:      Cloning flags
 125 * @user_ns:    User namespace which owns a new namespace.
 126 * @old_ns:     Namespace to clone
 127 *
 128 * If CLONE_NEWTIME specified in @flags, creates a new timens_for_children;
 129 * adds a refcounter to @old_ns otherwise.
 130 *
 131 * Return: timens_for_children namespace or ERR_PTR.
 132 */
 133struct time_namespace *copy_time_ns(unsigned long flags,
 134        struct user_namespace *user_ns, struct time_namespace *old_ns)
 135{
 136        if (!(flags & CLONE_NEWTIME))
 137                return get_time_ns(old_ns);
 138
 139        return clone_time_ns(user_ns, old_ns);
 140}
 141
 142static struct timens_offset offset_from_ts(struct timespec64 off)
 143{
 144        struct timens_offset ret;
 145
 146        ret.sec = off.tv_sec;
 147        ret.nsec = off.tv_nsec;
 148
 149        return ret;
 150}
 151
 152/*
 153 * A time namespace VVAR page has the same layout as the VVAR page which
 154 * contains the system wide VDSO data.
 155 *
 156 * For a normal task the VVAR pages are installed in the normal ordering:
 157 *     VVAR
 158 *     PVCLOCK
 159 *     HVCLOCK
 160 *     TIMENS   <- Not really required
 161 *
 162 * Now for a timens task the pages are installed in the following order:
 163 *     TIMENS
 164 *     PVCLOCK
 165 *     HVCLOCK
 166 *     VVAR
 167 *
 168 * The check for vdso_data->clock_mode is in the unlikely path of
 169 * the seq begin magic. So for the non-timens case most of the time
 170 * 'seq' is even, so the branch is not taken.
 171 *
 172 * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
 173 * for vdso_data->clock_mode is a non-issue. The task is spin waiting for the
 174 * update to finish and for 'seq' to become even anyway.
 175 *
 176 * Timens page has vdso_data->clock_mode set to VDSO_CLOCKMODE_TIMENS which
 177 * enforces the time namespace handling path.
 178 */
 179static void timens_setup_vdso_data(struct vdso_data *vdata,
 180                                   struct time_namespace *ns)
 181{
 182        struct timens_offset *offset = vdata->offset;
 183        struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
 184        struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
 185
 186        vdata->seq                      = 1;
 187        vdata->clock_mode               = VDSO_CLOCKMODE_TIMENS;
 188        offset[CLOCK_MONOTONIC]         = monotonic;
 189        offset[CLOCK_MONOTONIC_RAW]     = monotonic;
 190        offset[CLOCK_MONOTONIC_COARSE]  = monotonic;
 191        offset[CLOCK_BOOTTIME]          = boottime;
 192        offset[CLOCK_BOOTTIME_ALARM]    = boottime;
 193}
 194
 195/*
 196 * Protects possibly multiple offsets writers racing each other
 197 * and tasks entering the namespace.
 198 */
 199static DEFINE_MUTEX(offset_lock);
 200
 201static void timens_set_vvar_page(struct task_struct *task,
 202                                struct time_namespace *ns)
 203{
 204        struct vdso_data *vdata;
 205        unsigned int i;
 206
 207        if (ns == &init_time_ns)
 208                return;
 209
 210        /* Fast-path, taken by every task in namespace except the first. */
 211        if (likely(ns->frozen_offsets))
 212                return;
 213
 214        mutex_lock(&offset_lock);
 215        /* Nothing to-do: vvar_page has been already initialized. */
 216        if (ns->frozen_offsets)
 217                goto out;
 218
 219        ns->frozen_offsets = true;
 220        vdata = arch_get_vdso_data(page_address(ns->vvar_page));
 221
 222        for (i = 0; i < CS_BASES; i++)
 223                timens_setup_vdso_data(&vdata[i], ns);
 224
 225out:
 226        mutex_unlock(&offset_lock);
 227}
 228
 229void free_time_ns(struct time_namespace *ns)
 230{
 231        dec_time_namespaces(ns->ucounts);
 232        put_user_ns(ns->user_ns);
 233        ns_free_inum(&ns->ns);
 234        __free_page(ns->vvar_page);
 235        kfree(ns);
 236}
 237
 238static struct time_namespace *to_time_ns(struct ns_common *ns)
 239{
 240        return container_of(ns, struct time_namespace, ns);
 241}
 242
 243static struct ns_common *timens_get(struct task_struct *task)
 244{
 245        struct time_namespace *ns = NULL;
 246        struct nsproxy *nsproxy;
 247
 248        task_lock(task);
 249        nsproxy = task->nsproxy;
 250        if (nsproxy) {
 251                ns = nsproxy->time_ns;
 252                get_time_ns(ns);
 253        }
 254        task_unlock(task);
 255
 256        return ns ? &ns->ns : NULL;
 257}
 258
 259static struct ns_common *timens_for_children_get(struct task_struct *task)
 260{
 261        struct time_namespace *ns = NULL;
 262        struct nsproxy *nsproxy;
 263
 264        task_lock(task);
 265        nsproxy = task->nsproxy;
 266        if (nsproxy) {
 267                ns = nsproxy->time_ns_for_children;
 268                get_time_ns(ns);
 269        }
 270        task_unlock(task);
 271
 272        return ns ? &ns->ns : NULL;
 273}
 274
 275static void timens_put(struct ns_common *ns)
 276{
 277        put_time_ns(to_time_ns(ns));
 278}
 279
 280void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
 281{
 282        timens_set_vvar_page(tsk, ns);
 283        vdso_join_timens(tsk, ns);
 284}
 285
 286static int timens_install(struct nsset *nsset, struct ns_common *new)
 287{
 288        struct nsproxy *nsproxy = nsset->nsproxy;
 289        struct time_namespace *ns = to_time_ns(new);
 290
 291        if (!current_is_single_threaded())
 292                return -EUSERS;
 293
 294        if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
 295            !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
 296                return -EPERM;
 297
 298        get_time_ns(ns);
 299        put_time_ns(nsproxy->time_ns);
 300        nsproxy->time_ns = ns;
 301
 302        get_time_ns(ns);
 303        put_time_ns(nsproxy->time_ns_for_children);
 304        nsproxy->time_ns_for_children = ns;
 305        return 0;
 306}
 307
 308void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk)
 309{
 310        struct ns_common *nsc = &nsproxy->time_ns_for_children->ns;
 311        struct time_namespace *ns = to_time_ns(nsc);
 312
 313        /* create_new_namespaces() already incremented the ref counter */
 314        if (nsproxy->time_ns == nsproxy->time_ns_for_children)
 315                return;
 316
 317        get_time_ns(ns);
 318        put_time_ns(nsproxy->time_ns);
 319        nsproxy->time_ns = ns;
 320
 321        timens_commit(tsk, ns);
 322}
 323
 324static struct user_namespace *timens_owner(struct ns_common *ns)
 325{
 326        return to_time_ns(ns)->user_ns;
 327}
 328
 329static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts)
 330{
 331        char *clock;
 332
 333        switch (clockid) {
 334        case CLOCK_BOOTTIME:
 335                clock = "boottime";
 336                break;
 337        case CLOCK_MONOTONIC:
 338                clock = "monotonic";
 339                break;
 340        default:
 341                clock = "unknown";
 342                break;
 343        }
 344        seq_printf(m, "%-10s %10lld %9ld\n", clock, ts->tv_sec, ts->tv_nsec);
 345}
 346
 347void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m)
 348{
 349        struct ns_common *ns;
 350        struct time_namespace *time_ns;
 351
 352        ns = timens_for_children_get(p);
 353        if (!ns)
 354                return;
 355        time_ns = to_time_ns(ns);
 356
 357        show_offset(m, CLOCK_MONOTONIC, &time_ns->offsets.monotonic);
 358        show_offset(m, CLOCK_BOOTTIME, &time_ns->offsets.boottime);
 359        put_time_ns(time_ns);
 360}
 361
 362int proc_timens_set_offset(struct file *file, struct task_struct *p,
 363                           struct proc_timens_offset *offsets, int noffsets)
 364{
 365        struct ns_common *ns;
 366        struct time_namespace *time_ns;
 367        struct timespec64 tp;
 368        int i, err;
 369
 370        ns = timens_for_children_get(p);
 371        if (!ns)
 372                return -ESRCH;
 373        time_ns = to_time_ns(ns);
 374
 375        if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) {
 376                put_time_ns(time_ns);
 377                return -EPERM;
 378        }
 379
 380        for (i = 0; i < noffsets; i++) {
 381                struct proc_timens_offset *off = &offsets[i];
 382
 383                switch (off->clockid) {
 384                case CLOCK_MONOTONIC:
 385                        ktime_get_ts64(&tp);
 386                        break;
 387                case CLOCK_BOOTTIME:
 388                        ktime_get_boottime_ts64(&tp);
 389                        break;
 390                default:
 391                        err = -EINVAL;
 392                        goto out;
 393                }
 394
 395                err = -ERANGE;
 396
 397                if (off->val.tv_sec > KTIME_SEC_MAX ||
 398                    off->val.tv_sec < -KTIME_SEC_MAX)
 399                        goto out;
 400
 401                tp = timespec64_add(tp, off->val);
 402                /*
 403                 * KTIME_SEC_MAX is divided by 2 to be sure that KTIME_MAX is
 404                 * still unreachable.
 405                 */
 406                if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2)
 407                        goto out;
 408        }
 409
 410        mutex_lock(&offset_lock);
 411        if (time_ns->frozen_offsets) {
 412                err = -EACCES;
 413                goto out_unlock;
 414        }
 415
 416        err = 0;
 417        /* Don't report errors after this line */
 418        for (i = 0; i < noffsets; i++) {
 419                struct proc_timens_offset *off = &offsets[i];
 420                struct timespec64 *offset = NULL;
 421
 422                switch (off->clockid) {
 423                case CLOCK_MONOTONIC:
 424                        offset = &time_ns->offsets.monotonic;
 425                        break;
 426                case CLOCK_BOOTTIME:
 427                        offset = &time_ns->offsets.boottime;
 428                        break;
 429                }
 430
 431                *offset = off->val;
 432        }
 433
 434out_unlock:
 435        mutex_unlock(&offset_lock);
 436out:
 437        put_time_ns(time_ns);
 438
 439        return err;
 440}
 441
 442const struct proc_ns_operations timens_operations = {
 443        .name           = "time",
 444        .type           = CLONE_NEWTIME,
 445        .get            = timens_get,
 446        .put            = timens_put,
 447        .install        = timens_install,
 448        .owner          = timens_owner,
 449};
 450
 451const struct proc_ns_operations timens_for_children_operations = {
 452        .name           = "time_for_children",
 453        .real_ns_name   = "time",
 454        .type           = CLONE_NEWTIME,
 455        .get            = timens_for_children_get,
 456        .put            = timens_put,
 457        .install        = timens_install,
 458        .owner          = timens_owner,
 459};
 460
 461struct time_namespace init_time_ns = {
 462        .ns.count       = REFCOUNT_INIT(3),
 463        .user_ns        = &init_user_ns,
 464        .ns.inum        = PROC_TIME_INIT_INO,
 465        .ns.ops         = &timens_operations,
 466        .frozen_offsets = true,
 467};
 468