linux/kernel/time/namespace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Author: Andrei Vagin <avagin@openvz.org>
   4 * Author: Dmitry Safonov <dima@arista.com>
   5 */
   6
   7#include <linux/time_namespace.h>
   8#include <linux/user_namespace.h>
   9#include <linux/sched/signal.h>
  10#include <linux/sched/task.h>
  11#include <linux/seq_file.h>
  12#include <linux/proc_ns.h>
  13#include <linux/export.h>
  14#include <linux/time.h>
  15#include <linux/slab.h>
  16#include <linux/cred.h>
  17#include <linux/err.h>
  18#include <linux/mm.h>
  19
  20#include <vdso/datapage.h>
  21
  22ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
  23                                struct timens_offsets *ns_offsets)
  24{
  25        ktime_t offset;
  26
  27        switch (clockid) {
  28        case CLOCK_MONOTONIC:
  29                offset = timespec64_to_ktime(ns_offsets->monotonic);
  30                break;
  31        case CLOCK_BOOTTIME:
  32        case CLOCK_BOOTTIME_ALARM:
  33                offset = timespec64_to_ktime(ns_offsets->boottime);
  34                break;
  35        default:
  36                return tim;
  37        }
  38
  39        /*
  40         * Check that @tim value is in [offset, KTIME_MAX + offset]
  41         * and subtract offset.
  42         */
  43        if (tim < offset) {
  44                /*
  45                 * User can specify @tim *absolute* value - if it's lesser than
  46                 * the time namespace's offset - it's already expired.
  47                 */
  48                tim = 0;
  49        } else {
  50                tim = ktime_sub(tim, offset);
  51                if (unlikely(tim > KTIME_MAX))
  52                        tim = KTIME_MAX;
  53        }
  54
  55        return tim;
  56}
  57
  58static struct ucounts *inc_time_namespaces(struct user_namespace *ns)
  59{
  60        return inc_ucount(ns, current_euid(), UCOUNT_TIME_NAMESPACES);
  61}
  62
  63static void dec_time_namespaces(struct ucounts *ucounts)
  64{
  65        dec_ucount(ucounts, UCOUNT_TIME_NAMESPACES);
  66}
  67
  68/**
  69 * clone_time_ns - Clone a time namespace
  70 * @user_ns:    User namespace which owns a new namespace.
  71 * @old_ns:     Namespace to clone
  72 *
  73 * Clone @old_ns and set the clone refcount to 1
  74 *
  75 * Return: The new namespace or ERR_PTR.
  76 */
  77static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
  78                                          struct time_namespace *old_ns)
  79{
  80        struct time_namespace *ns;
  81        struct ucounts *ucounts;
  82        int err;
  83
  84        err = -ENOSPC;
  85        ucounts = inc_time_namespaces(user_ns);
  86        if (!ucounts)
  87                goto fail;
  88
  89        err = -ENOMEM;
  90        ns = kmalloc(sizeof(*ns), GFP_KERNEL);
  91        if (!ns)
  92                goto fail_dec;
  93
  94        kref_init(&ns->kref);
  95
  96        ns->vvar_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  97        if (!ns->vvar_page)
  98                goto fail_free;
  99
 100        err = ns_alloc_inum(&ns->ns);
 101        if (err)
 102                goto fail_free_page;
 103
 104        ns->ucounts = ucounts;
 105        ns->ns.ops = &timens_operations;
 106        ns->user_ns = get_user_ns(user_ns);
 107        ns->offsets = old_ns->offsets;
 108        ns->frozen_offsets = false;
 109        return ns;
 110
 111fail_free_page:
 112        __free_page(ns->vvar_page);
 113fail_free:
 114        kfree(ns);
 115fail_dec:
 116        dec_time_namespaces(ucounts);
 117fail:
 118        return ERR_PTR(err);
 119}
 120
 121/**
 122 * copy_time_ns - Create timens_for_children from @old_ns
 123 * @flags:      Cloning flags
 124 * @user_ns:    User namespace which owns a new namespace.
 125 * @old_ns:     Namespace to clone
 126 *
 127 * If CLONE_NEWTIME specified in @flags, creates a new timens_for_children;
 128 * adds a refcounter to @old_ns otherwise.
 129 *
 130 * Return: timens_for_children namespace or ERR_PTR.
 131 */
 132struct time_namespace *copy_time_ns(unsigned long flags,
 133        struct user_namespace *user_ns, struct time_namespace *old_ns)
 134{
 135        if (!(flags & CLONE_NEWTIME))
 136                return get_time_ns(old_ns);
 137
 138        return clone_time_ns(user_ns, old_ns);
 139}
 140
 141static struct timens_offset offset_from_ts(struct timespec64 off)
 142{
 143        struct timens_offset ret;
 144
 145        ret.sec = off.tv_sec;
 146        ret.nsec = off.tv_nsec;
 147
 148        return ret;
 149}
 150
 151/*
 152 * A time namespace VVAR page has the same layout as the VVAR page which
 153 * contains the system wide VDSO data.
 154 *
 155 * For a normal task the VVAR pages are installed in the normal ordering:
 156 *     VVAR
 157 *     PVCLOCK
 158 *     HVCLOCK
 159 *     TIMENS   <- Not really required
 160 *
 161 * Now for a timens task the pages are installed in the following order:
 162 *     TIMENS
 163 *     PVCLOCK
 164 *     HVCLOCK
 165 *     VVAR
 166 *
 167 * The check for vdso_data->clock_mode is in the unlikely path of
 168 * the seq begin magic. So for the non-timens case most of the time
 169 * 'seq' is even, so the branch is not taken.
 170 *
 171 * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
 172 * for vdso_data->clock_mode is a non-issue. The task is spin waiting for the
 173 * update to finish and for 'seq' to become even anyway.
 174 *
 175 * Timens page has vdso_data->clock_mode set to VCLOCK_TIMENS which enforces
 176 * the time namespace handling path.
 177 */
 178static void timens_setup_vdso_data(struct vdso_data *vdata,
 179                                   struct time_namespace *ns)
 180{
 181        struct timens_offset *offset = vdata->offset;
 182        struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
 183        struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
 184
 185        vdata->seq                      = 1;
 186        vdata->clock_mode               = VCLOCK_TIMENS;
 187        offset[CLOCK_MONOTONIC]         = monotonic;
 188        offset[CLOCK_MONOTONIC_RAW]     = monotonic;
 189        offset[CLOCK_MONOTONIC_COARSE]  = monotonic;
 190        offset[CLOCK_BOOTTIME]          = boottime;
 191        offset[CLOCK_BOOTTIME_ALARM]    = boottime;
 192}
 193
 194/*
 195 * Protects possibly multiple offsets writers racing each other
 196 * and tasks entering the namespace.
 197 */
 198static DEFINE_MUTEX(offset_lock);
 199
 200static void timens_set_vvar_page(struct task_struct *task,
 201                                struct time_namespace *ns)
 202{
 203        struct vdso_data *vdata;
 204        unsigned int i;
 205
 206        if (ns == &init_time_ns)
 207                return;
 208
 209        /* Fast-path, taken by every task in namespace except the first. */
 210        if (likely(ns->frozen_offsets))
 211                return;
 212
 213        mutex_lock(&offset_lock);
 214        /* Nothing to-do: vvar_page has been already initialized. */
 215        if (ns->frozen_offsets)
 216                goto out;
 217
 218        ns->frozen_offsets = true;
 219        vdata = arch_get_vdso_data(page_address(ns->vvar_page));
 220
 221        for (i = 0; i < CS_BASES; i++)
 222                timens_setup_vdso_data(&vdata[i], ns);
 223
 224out:
 225        mutex_unlock(&offset_lock);
 226}
 227
 228void free_time_ns(struct kref *kref)
 229{
 230        struct time_namespace *ns;
 231
 232        ns = container_of(kref, struct time_namespace, kref);
 233        dec_time_namespaces(ns->ucounts);
 234        put_user_ns(ns->user_ns);
 235        ns_free_inum(&ns->ns);
 236        __free_page(ns->vvar_page);
 237        kfree(ns);
 238}
 239
 240static struct time_namespace *to_time_ns(struct ns_common *ns)
 241{
 242        return container_of(ns, struct time_namespace, ns);
 243}
 244
 245static struct ns_common *timens_get(struct task_struct *task)
 246{
 247        struct time_namespace *ns = NULL;
 248        struct nsproxy *nsproxy;
 249
 250        task_lock(task);
 251        nsproxy = task->nsproxy;
 252        if (nsproxy) {
 253                ns = nsproxy->time_ns;
 254                get_time_ns(ns);
 255        }
 256        task_unlock(task);
 257
 258        return ns ? &ns->ns : NULL;
 259}
 260
 261static struct ns_common *timens_for_children_get(struct task_struct *task)
 262{
 263        struct time_namespace *ns = NULL;
 264        struct nsproxy *nsproxy;
 265
 266        task_lock(task);
 267        nsproxy = task->nsproxy;
 268        if (nsproxy) {
 269                ns = nsproxy->time_ns_for_children;
 270                get_time_ns(ns);
 271        }
 272        task_unlock(task);
 273
 274        return ns ? &ns->ns : NULL;
 275}
 276
 277static void timens_put(struct ns_common *ns)
 278{
 279        put_time_ns(to_time_ns(ns));
 280}
 281
 282static int timens_install(struct nsproxy *nsproxy, struct ns_common *new)
 283{
 284        struct time_namespace *ns = to_time_ns(new);
 285        int err;
 286
 287        if (!current_is_single_threaded())
 288                return -EUSERS;
 289
 290        if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
 291            !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
 292                return -EPERM;
 293
 294        timens_set_vvar_page(current, ns);
 295
 296        err = vdso_join_timens(current, ns);
 297        if (err)
 298                return err;
 299
 300        get_time_ns(ns);
 301        put_time_ns(nsproxy->time_ns);
 302        nsproxy->time_ns = ns;
 303
 304        get_time_ns(ns);
 305        put_time_ns(nsproxy->time_ns_for_children);
 306        nsproxy->time_ns_for_children = ns;
 307        return 0;
 308}
 309
 310int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk)
 311{
 312        struct ns_common *nsc = &nsproxy->time_ns_for_children->ns;
 313        struct time_namespace *ns = to_time_ns(nsc);
 314        int err;
 315
 316        /* create_new_namespaces() already incremented the ref counter */
 317        if (nsproxy->time_ns == nsproxy->time_ns_for_children)
 318                return 0;
 319
 320        timens_set_vvar_page(tsk, ns);
 321
 322        err = vdso_join_timens(tsk, ns);
 323        if (err)
 324                return err;
 325
 326        get_time_ns(ns);
 327        put_time_ns(nsproxy->time_ns);
 328        nsproxy->time_ns = ns;
 329
 330        return 0;
 331}
 332
 333static struct user_namespace *timens_owner(struct ns_common *ns)
 334{
 335        return to_time_ns(ns)->user_ns;
 336}
 337
 338static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts)
 339{
 340        seq_printf(m, "%d %lld %ld\n", clockid, ts->tv_sec, ts->tv_nsec);
 341}
 342
 343void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m)
 344{
 345        struct ns_common *ns;
 346        struct time_namespace *time_ns;
 347
 348        ns = timens_for_children_get(p);
 349        if (!ns)
 350                return;
 351        time_ns = to_time_ns(ns);
 352
 353        show_offset(m, CLOCK_MONOTONIC, &time_ns->offsets.monotonic);
 354        show_offset(m, CLOCK_BOOTTIME, &time_ns->offsets.boottime);
 355        put_time_ns(time_ns);
 356}
 357
 358int proc_timens_set_offset(struct file *file, struct task_struct *p,
 359                           struct proc_timens_offset *offsets, int noffsets)
 360{
 361        struct ns_common *ns;
 362        struct time_namespace *time_ns;
 363        struct timespec64 tp;
 364        int i, err;
 365
 366        ns = timens_for_children_get(p);
 367        if (!ns)
 368                return -ESRCH;
 369        time_ns = to_time_ns(ns);
 370
 371        if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) {
 372                put_time_ns(time_ns);
 373                return -EPERM;
 374        }
 375
 376        for (i = 0; i < noffsets; i++) {
 377                struct proc_timens_offset *off = &offsets[i];
 378
 379                switch (off->clockid) {
 380                case CLOCK_MONOTONIC:
 381                        ktime_get_ts64(&tp);
 382                        break;
 383                case CLOCK_BOOTTIME:
 384                        ktime_get_boottime_ts64(&tp);
 385                        break;
 386                default:
 387                        err = -EINVAL;
 388                        goto out;
 389                }
 390
 391                err = -ERANGE;
 392
 393                if (off->val.tv_sec > KTIME_SEC_MAX ||
 394                    off->val.tv_sec < -KTIME_SEC_MAX)
 395                        goto out;
 396
 397                tp = timespec64_add(tp, off->val);
 398                /*
 399                 * KTIME_SEC_MAX is divided by 2 to be sure that KTIME_MAX is
 400                 * still unreachable.
 401                 */
 402                if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2)
 403                        goto out;
 404        }
 405
 406        mutex_lock(&offset_lock);
 407        if (time_ns->frozen_offsets) {
 408                err = -EACCES;
 409                goto out_unlock;
 410        }
 411
 412        err = 0;
 413        /* Don't report errors after this line */
 414        for (i = 0; i < noffsets; i++) {
 415                struct proc_timens_offset *off = &offsets[i];
 416                struct timespec64 *offset = NULL;
 417
 418                switch (off->clockid) {
 419                case CLOCK_MONOTONIC:
 420                        offset = &time_ns->offsets.monotonic;
 421                        break;
 422                case CLOCK_BOOTTIME:
 423                        offset = &time_ns->offsets.boottime;
 424                        break;
 425                }
 426
 427                *offset = off->val;
 428        }
 429
 430out_unlock:
 431        mutex_unlock(&offset_lock);
 432out:
 433        put_time_ns(time_ns);
 434
 435        return err;
 436}
 437
 438const struct proc_ns_operations timens_operations = {
 439        .name           = "time",
 440        .type           = CLONE_NEWTIME,
 441        .get            = timens_get,
 442        .put            = timens_put,
 443        .install        = timens_install,
 444        .owner          = timens_owner,
 445};
 446
 447const struct proc_ns_operations timens_for_children_operations = {
 448        .name           = "time_for_children",
 449        .type           = CLONE_NEWTIME,
 450        .get            = timens_for_children_get,
 451        .put            = timens_put,
 452        .install        = timens_install,
 453        .owner          = timens_owner,
 454};
 455
 456struct time_namespace init_time_ns = {
 457        .kref           = KREF_INIT(3),
 458        .user_ns        = &init_user_ns,
 459        .ns.inum        = PROC_TIME_INIT_INO,
 460        .ns.ops         = &timens_operations,
 461        .frozen_offsets = true,
 462};
 463
 464static int __init time_ns_init(void)
 465{
 466        return 0;
 467}
 468subsys_initcall(time_ns_init);
 469