linux/kernel/sys.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/sys.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/export.h>
   8#include <linux/mm.h>
   9#include <linux/utsname.h>
  10#include <linux/mman.h>
  11#include <linux/reboot.h>
  12#include <linux/prctl.h>
  13#include <linux/highuid.h>
  14#include <linux/fs.h>
  15#include <linux/kmod.h>
  16#include <linux/perf_event.h>
  17#include <linux/resource.h>
  18#include <linux/kernel.h>
  19#include <linux/kexec.h>
  20#include <linux/workqueue.h>
  21#include <linux/capability.h>
  22#include <linux/device.h>
  23#include <linux/key.h>
  24#include <linux/times.h>
  25#include <linux/posix-timers.h>
  26#include <linux/security.h>
  27#include <linux/dcookies.h>
  28#include <linux/suspend.h>
  29#include <linux/tty.h>
  30#include <linux/signal.h>
  31#include <linux/cn_proc.h>
  32#include <linux/getcpu.h>
  33#include <linux/task_io_accounting_ops.h>
  34#include <linux/seccomp.h>
  35#include <linux/cpu.h>
  36#include <linux/personality.h>
  37#include <linux/ptrace.h>
  38#include <linux/fs_struct.h>
  39#include <linux/file.h>
  40#include <linux/mount.h>
  41#include <linux/gfp.h>
  42#include <linux/syscore_ops.h>
  43#include <linux/version.h>
  44#include <linux/ctype.h>
  45
  46#include <linux/compat.h>
  47#include <linux/syscalls.h>
  48#include <linux/kprobes.h>
  49#include <linux/user_namespace.h>
  50#include <linux/binfmts.h>
  51
  52#include <linux/sched.h>
  53#include <linux/rcupdate.h>
  54#include <linux/uidgid.h>
  55#include <linux/cred.h>
  56
  57#include <linux/kmsg_dump.h>
  58/* Move somewhere else to avoid recompiling? */
  59#include <generated/utsrelease.h>
  60
  61#include <asm/uaccess.h>
  62#include <asm/io.h>
  63#include <asm/unistd.h>
  64
  65#ifndef SET_UNALIGN_CTL
  66# define SET_UNALIGN_CTL(a,b)   (-EINVAL)
  67#endif
  68#ifndef GET_UNALIGN_CTL
  69# define GET_UNALIGN_CTL(a,b)   (-EINVAL)
  70#endif
  71#ifndef SET_FPEMU_CTL
  72# define SET_FPEMU_CTL(a,b)     (-EINVAL)
  73#endif
  74#ifndef GET_FPEMU_CTL
  75# define GET_FPEMU_CTL(a,b)     (-EINVAL)
  76#endif
  77#ifndef SET_FPEXC_CTL
  78# define SET_FPEXC_CTL(a,b)     (-EINVAL)
  79#endif
  80#ifndef GET_FPEXC_CTL
  81# define GET_FPEXC_CTL(a,b)     (-EINVAL)
  82#endif
  83#ifndef GET_ENDIAN
  84# define GET_ENDIAN(a,b)        (-EINVAL)
  85#endif
  86#ifndef SET_ENDIAN
  87# define SET_ENDIAN(a,b)        (-EINVAL)
  88#endif
  89#ifndef GET_TSC_CTL
  90# define GET_TSC_CTL(a)         (-EINVAL)
  91#endif
  92#ifndef SET_TSC_CTL
  93# define SET_TSC_CTL(a)         (-EINVAL)
  94#endif
  95#ifndef MPX_ENABLE_MANAGEMENT
  96# define MPX_ENABLE_MANAGEMENT()        (-EINVAL)
  97#endif
  98#ifndef MPX_DISABLE_MANAGEMENT
  99# define MPX_DISABLE_MANAGEMENT()       (-EINVAL)
 100#endif
 101
 102/*
 103 * this is where the system-wide overflow UID and GID are defined, for
 104 * architectures that now have 32-bit UID/GID but didn't in the past
 105 */
 106
 107int overflowuid = DEFAULT_OVERFLOWUID;
 108int overflowgid = DEFAULT_OVERFLOWGID;
 109
 110EXPORT_SYMBOL(overflowuid);
 111EXPORT_SYMBOL(overflowgid);
 112
 113/*
 114 * the same as above, but for filesystems which can only store a 16-bit
 115 * UID and GID. as such, this is needed on all architectures
 116 */
 117
 118int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
 119int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
 120
 121EXPORT_SYMBOL(fs_overflowuid);
 122EXPORT_SYMBOL(fs_overflowgid);
 123
 124/*
 125 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
 126 */
 127
 128int C_A_D = 1;
 129struct pid *cad_pid;
 130EXPORT_SYMBOL(cad_pid);
 131
 132/*
 133 * If set, this is used for preparing the system to power off.
 134 */
 135
 136void (*pm_power_off_prepare)(void);
 137
 138/*
 139 * Returns true if current's euid is same as p's uid or euid,
 140 * or has CAP_SYS_NICE to p's user_ns.
 141 *
 142 * Called with rcu_read_lock, creds are safe
 143 */
 144static bool set_one_prio_perm(struct task_struct *p)
 145{
 146        const struct cred *cred = current_cred(), *pcred = __task_cred(p);
 147
 148        if (uid_eq(pcred->uid,  cred->euid) ||
 149            uid_eq(pcred->euid, cred->euid))
 150                return true;
 151        if (ns_capable(pcred->user_ns, CAP_SYS_NICE))
 152                return true;
 153        return false;
 154}
 155
 156/*
 157 * set the priority of a task
 158 * - the caller must hold the RCU read lock
 159 */
 160static int set_one_prio(struct task_struct *p, int niceval, int error)
 161{
 162        int no_nice;
 163
 164        if (!set_one_prio_perm(p)) {
 165                error = -EPERM;
 166                goto out;
 167        }
 168        if (niceval < task_nice(p) && !can_nice(p, niceval)) {
 169                error = -EACCES;
 170                goto out;
 171        }
 172        no_nice = security_task_setnice(p, niceval);
 173        if (no_nice) {
 174                error = no_nice;
 175                goto out;
 176        }
 177        if (error == -ESRCH)
 178                error = 0;
 179        set_user_nice(p, niceval);
 180out:
 181        return error;
 182}
 183
 184SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
 185{
 186        struct task_struct *g, *p;
 187        struct user_struct *user;
 188        const struct cred *cred = current_cred();
 189        int error = -EINVAL;
 190        struct pid *pgrp;
 191        kuid_t uid;
 192
 193        if (which > PRIO_USER || which < PRIO_PROCESS)
 194                goto out;
 195
 196        /* normalize: avoid signed division (rounding problems) */
 197        error = -ESRCH;
 198        if (niceval < -20)
 199                niceval = -20;
 200        if (niceval > 19)
 201                niceval = 19;
 202
 203        rcu_read_lock();
 204        tasklist_read_lock();
 205        switch (which) {
 206                case PRIO_PROCESS:
 207                        if (who)
 208                                p = find_task_by_vpid(who);
 209                        else
 210                                p = current;
 211                        if (p)
 212                                error = set_one_prio(p, niceval, error);
 213                        break;
 214                case PRIO_PGRP:
 215                        if (who)
 216                                pgrp = find_vpid(who);
 217                        else
 218                                pgrp = task_pgrp(current);
 219                        do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
 220                                error = set_one_prio(p, niceval, error);
 221                        } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
 222                        break;
 223                case PRIO_USER:
 224                        uid = make_kuid(cred->user_ns, who);
 225                        user = cred->user;
 226                        if (!who)
 227                                uid = cred->uid;
 228                        else if (!uid_eq(uid, cred->uid) &&
 229                                 !(user = find_user(uid)))
 230                                goto out_unlock;        /* No processes for this user */
 231
 232                        do_each_thread(g, p) {
 233                                if (uid_eq(task_uid(p), uid))
 234                                        error = set_one_prio(p, niceval, error);
 235                        } while_each_thread(g, p);
 236                        if (!uid_eq(uid, cred->uid))
 237                                free_uid(user);         /* For find_user() */
 238                        break;
 239        }
 240out_unlock:
 241        qread_unlock(&tasklist_lock);
 242        rcu_read_unlock();
 243out:
 244        return error;
 245}
 246
 247/*
 248 * Ugh. To avoid negative return values, "getpriority()" will
 249 * not return the normal nice-value, but a negated value that
 250 * has been offset by 20 (ie it returns 40..1 instead of -20..19)
 251 * to stay compatible.
 252 */
 253SYSCALL_DEFINE2(getpriority, int, which, int, who)
 254{
 255        struct task_struct *g, *p;
 256        struct user_struct *user;
 257        const struct cred *cred = current_cred();
 258        long niceval, retval = -ESRCH;
 259        struct pid *pgrp;
 260        kuid_t uid;
 261
 262        if (which > PRIO_USER || which < PRIO_PROCESS)
 263                return -EINVAL;
 264
 265        rcu_read_lock();
 266        tasklist_read_lock();
 267        switch (which) {
 268                case PRIO_PROCESS:
 269                        if (who)
 270                                p = find_task_by_vpid(who);
 271                        else
 272                                p = current;
 273                        if (p) {
 274                                niceval = 20 - task_nice(p);
 275                                if (niceval > retval)
 276                                        retval = niceval;
 277                        }
 278                        break;
 279                case PRIO_PGRP:
 280                        if (who)
 281                                pgrp = find_vpid(who);
 282                        else
 283                                pgrp = task_pgrp(current);
 284                        do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
 285                                niceval = 20 - task_nice(p);
 286                                if (niceval > retval)
 287                                        retval = niceval;
 288                        } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
 289                        break;
 290                case PRIO_USER:
 291                        uid = make_kuid(cred->user_ns, who);
 292                        user = cred->user;
 293                        if (!who)
 294                                uid = cred->uid;
 295                        else if (!uid_eq(uid, cred->uid) &&
 296                                 !(user = find_user(uid)))
 297                                goto out_unlock;        /* No processes for this user */
 298
 299                        do_each_thread(g, p) {
 300                                if (uid_eq(task_uid(p), uid)) {
 301                                        niceval = 20 - task_nice(p);
 302                                        if (niceval > retval)
 303                                                retval = niceval;
 304                                }
 305                        } while_each_thread(g, p);
 306                        if (!uid_eq(uid, cred->uid))
 307                                free_uid(user);         /* for find_user() */
 308                        break;
 309        }
 310out_unlock:
 311        qread_unlock(&tasklist_lock);
 312        rcu_read_unlock();
 313
 314        return retval;
 315}
 316
 317/**
 318 *      emergency_restart - reboot the system
 319 *
 320 *      Without shutting down any hardware or taking any locks
 321 *      reboot the system.  This is called when we know we are in
 322 *      trouble so this is our best effort to reboot.  This is
 323 *      safe to call in interrupt context.
 324 */
 325void emergency_restart(void)
 326{
 327        kmsg_dump(KMSG_DUMP_EMERG);
 328        machine_emergency_restart();
 329}
 330EXPORT_SYMBOL_GPL(emergency_restart);
 331
 332void kernel_restart_prepare(char *cmd)
 333{
 334        blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 335        system_state = SYSTEM_RESTART;
 336        usermodehelper_disable();
 337        device_shutdown();
 338}
 339
 340/**
 341 *      register_reboot_notifier - Register function to be called at reboot time
 342 *      @nb: Info about notifier function to be called
 343 *
 344 *      Registers a function with the list of functions
 345 *      to be called at reboot time.
 346 *
 347 *      Currently always returns zero, as blocking_notifier_chain_register()
 348 *      always returns zero.
 349 */
 350int register_reboot_notifier(struct notifier_block *nb)
 351{
 352        return blocking_notifier_chain_register(&reboot_notifier_list, nb);
 353}
 354EXPORT_SYMBOL(register_reboot_notifier);
 355
 356/**
 357 *      unregister_reboot_notifier - Unregister previously registered reboot notifier
 358 *      @nb: Hook to be unregistered
 359 *
 360 *      Unregisters a previously registered reboot
 361 *      notifier function.
 362 *
 363 *      Returns zero on success, or %-ENOENT on failure.
 364 */
 365int unregister_reboot_notifier(struct notifier_block *nb)
 366{
 367        return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
 368}
 369EXPORT_SYMBOL(unregister_reboot_notifier);
 370
 371/* Add backwards compatibility for stable trees. */
 372#ifndef PF_NO_SETAFFINITY
 373#define PF_NO_SETAFFINITY               PF_THREAD_BOUND
 374#endif
 375
 376static void migrate_to_reboot_cpu(void)
 377{
 378        /* The boot cpu is always logical cpu 0 */
 379        int cpu = 0;
 380
 381        cpu_hotplug_disable();
 382
 383        /* Make certain the cpu I'm about to reboot on is online */
 384        if (!cpu_online(cpu))
 385                cpu = cpumask_first(cpu_online_mask);
 386
 387        /* Prevent races with other tasks migrating this task */
 388        current->flags |= PF_NO_SETAFFINITY;
 389
 390        /* Make certain I only run on the appropriate processor */
 391        set_cpus_allowed_ptr(current, cpumask_of(cpu));
 392}
 393
 394/**
 395 *      kernel_restart - reboot the system
 396 *      @cmd: pointer to buffer containing command to execute for restart
 397 *              or %NULL
 398 *
 399 *      Shutdown everything and perform a clean reboot.
 400 *      This is not safe to call in interrupt context.
 401 */
 402void kernel_restart(char *cmd)
 403{
 404        kernel_restart_prepare(cmd);
 405        migrate_to_reboot_cpu();
 406        syscore_shutdown();
 407        if (!cmd)
 408                printk(KERN_EMERG "Restarting system.\n");
 409        else
 410                printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
 411        kmsg_dump(KMSG_DUMP_RESTART);
 412        machine_restart(cmd);
 413}
 414EXPORT_SYMBOL_GPL(kernel_restart);
 415
 416static void kernel_shutdown_prepare(enum system_states state)
 417{
 418        blocking_notifier_call_chain(&reboot_notifier_list,
 419                (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
 420        system_state = state;
 421        usermodehelper_disable();
 422        device_shutdown();
 423}
 424/**
 425 *      kernel_halt - halt the system
 426 *
 427 *      Shutdown everything and perform a clean system halt.
 428 */
 429void kernel_halt(void)
 430{
 431        kernel_shutdown_prepare(SYSTEM_HALT);
 432        migrate_to_reboot_cpu();
 433        syscore_shutdown();
 434        printk(KERN_EMERG "System halted.\n");
 435        kmsg_dump(KMSG_DUMP_HALT);
 436        machine_halt();
 437}
 438
 439EXPORT_SYMBOL_GPL(kernel_halt);
 440
 441/**
 442 *      kernel_power_off - power_off the system
 443 *
 444 *      Shutdown everything and perform a clean system power_off.
 445 */
 446void kernel_power_off(void)
 447{
 448        kernel_shutdown_prepare(SYSTEM_POWER_OFF);
 449        if (pm_power_off_prepare)
 450                pm_power_off_prepare();
 451        migrate_to_reboot_cpu();
 452        syscore_shutdown();
 453        printk(KERN_EMERG "Power down.\n");
 454        kmsg_dump(KMSG_DUMP_POWEROFF);
 455        machine_power_off();
 456}
 457EXPORT_SYMBOL_GPL(kernel_power_off);
 458
 459static DEFINE_MUTEX(reboot_mutex);
 460
 461/*
 462 * Reboot system call: for obvious reasons only root may call it,
 463 * and even root needs to set up some magic numbers in the registers
 464 * so that some mistake won't make this reboot the whole machine.
 465 * You can also set the meaning of the ctrl-alt-del-key here.
 466 *
 467 * reboot doesn't sync: do that yourself before calling this.
 468 */
 469SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 470                void __user *, arg)
 471{
 472        struct pid_namespace *pid_ns = task_active_pid_ns(current);
 473        char buffer[256];
 474        int ret = 0;
 475
 476        /* We only trust the superuser with rebooting the system. */
 477        if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT))
 478                return -EPERM;
 479
 480        /* For safety, we require "magic" arguments. */
 481        if (magic1 != LINUX_REBOOT_MAGIC1 ||
 482            (magic2 != LINUX_REBOOT_MAGIC2 &&
 483                        magic2 != LINUX_REBOOT_MAGIC2A &&
 484                        magic2 != LINUX_REBOOT_MAGIC2B &&
 485                        magic2 != LINUX_REBOOT_MAGIC2C))
 486                return -EINVAL;
 487
 488        /*
 489         * If pid namespaces are enabled and the current task is in a child
 490         * pid_namespace, the command is handled by reboot_pid_ns() which will
 491         * call do_exit().
 492         */
 493        ret = reboot_pid_ns(pid_ns, cmd);
 494        if (ret)
 495                return ret;
 496
 497        /* Instead of trying to make the power_off code look like
 498         * halt when pm_power_off is not set do it the easy way.
 499         */
 500        if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
 501                cmd = LINUX_REBOOT_CMD_HALT;
 502
 503        mutex_lock(&reboot_mutex);
 504        switch (cmd) {
 505        case LINUX_REBOOT_CMD_RESTART:
 506                kernel_restart(NULL);
 507                break;
 508
 509        case LINUX_REBOOT_CMD_CAD_ON:
 510                C_A_D = 1;
 511                break;
 512
 513        case LINUX_REBOOT_CMD_CAD_OFF:
 514                C_A_D = 0;
 515                break;
 516
 517        case LINUX_REBOOT_CMD_HALT:
 518                kernel_halt();
 519                do_exit(0);
 520                panic("cannot halt");
 521
 522        case LINUX_REBOOT_CMD_POWER_OFF:
 523                kernel_power_off();
 524                do_exit(0);
 525                break;
 526
 527        case LINUX_REBOOT_CMD_RESTART2:
 528                if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
 529                        ret = -EFAULT;
 530                        break;
 531                }
 532                buffer[sizeof(buffer) - 1] = '\0';
 533
 534                kernel_restart(buffer);
 535                break;
 536
 537#ifdef CONFIG_KEXEC_CORE
 538        case LINUX_REBOOT_CMD_KEXEC:
 539                ret = kernel_kexec();
 540                break;
 541#endif
 542
 543#ifdef CONFIG_HIBERNATION
 544        case LINUX_REBOOT_CMD_SW_SUSPEND:
 545                ret = hibernate();
 546                break;
 547#endif
 548
 549        default:
 550                ret = -EINVAL;
 551                break;
 552        }
 553        mutex_unlock(&reboot_mutex);
 554        return ret;
 555}
 556
 557static void deferred_cad(struct work_struct *dummy)
 558{
 559        kernel_restart(NULL);
 560}
 561
 562/*
 563 * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
 564 * As it's called within an interrupt, it may NOT sync: the only choice
 565 * is whether to reboot at once, or just ignore the ctrl-alt-del.
 566 */
 567void ctrl_alt_del(void)
 568{
 569        static DECLARE_WORK(cad_work, deferred_cad);
 570
 571        if (C_A_D)
 572                schedule_work(&cad_work);
 573        else
 574                kill_cad_pid(SIGINT, 1);
 575}
 576        
 577/*
 578 * Unprivileged users may change the real gid to the effective gid
 579 * or vice versa.  (BSD-style)
 580 *
 581 * If you set the real gid at all, or set the effective gid to a value not
 582 * equal to the real gid, then the saved gid is set to the new effective gid.
 583 *
 584 * This makes it possible for a setgid program to completely drop its
 585 * privileges, which is often a useful assertion to make when you are doing
 586 * a security audit over a program.
 587 *
 588 * The general idea is that a program which uses just setregid() will be
 589 * 100% compatible with BSD.  A program which uses just setgid() will be
 590 * 100% compatible with POSIX with saved IDs. 
 591 *
 592 * SMP: There are not races, the GIDs are checked only by filesystem
 593 *      operations (as far as semantic preservation is concerned).
 594 */
 595SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
 596{
 597        struct user_namespace *ns = current_user_ns();
 598        const struct cred *old;
 599        struct cred *new;
 600        int retval;
 601        kgid_t krgid, kegid;
 602
 603        krgid = make_kgid(ns, rgid);
 604        kegid = make_kgid(ns, egid);
 605
 606        if ((rgid != (gid_t) -1) && !gid_valid(krgid))
 607                return -EINVAL;
 608        if ((egid != (gid_t) -1) && !gid_valid(kegid))
 609                return -EINVAL;
 610
 611        new = prepare_creds();
 612        if (!new)
 613                return -ENOMEM;
 614        old = current_cred();
 615
 616        retval = -EPERM;
 617        if (rgid != (gid_t) -1) {
 618                if (gid_eq(old->gid, krgid) ||
 619                    gid_eq(old->egid, krgid) ||
 620                    ns_capable(old->user_ns, CAP_SETGID))
 621                        new->gid = krgid;
 622                else
 623                        goto error;
 624        }
 625        if (egid != (gid_t) -1) {
 626                if (gid_eq(old->gid, kegid) ||
 627                    gid_eq(old->egid, kegid) ||
 628                    gid_eq(old->sgid, kegid) ||
 629                    ns_capable(old->user_ns, CAP_SETGID))
 630                        new->egid = kegid;
 631                else
 632                        goto error;
 633        }
 634
 635        if (rgid != (gid_t) -1 ||
 636            (egid != (gid_t) -1 && !gid_eq(kegid, old->gid)))
 637                new->sgid = new->egid;
 638        new->fsgid = new->egid;
 639
 640        return commit_creds(new);
 641
 642error:
 643        abort_creds(new);
 644        return retval;
 645}
 646
 647/*
 648 * setgid() is implemented like SysV w/ SAVED_IDS 
 649 *
 650 * SMP: Same implicit races as above.
 651 */
 652SYSCALL_DEFINE1(setgid, gid_t, gid)
 653{
 654        struct user_namespace *ns = current_user_ns();
 655        const struct cred *old;
 656        struct cred *new;
 657        int retval;
 658        kgid_t kgid;
 659
 660        kgid = make_kgid(ns, gid);
 661        if (!gid_valid(kgid))
 662                return -EINVAL;
 663
 664        new = prepare_creds();
 665        if (!new)
 666                return -ENOMEM;
 667        old = current_cred();
 668
 669        retval = -EPERM;
 670        if (ns_capable(old->user_ns, CAP_SETGID))
 671                new->gid = new->egid = new->sgid = new->fsgid = kgid;
 672        else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid))
 673                new->egid = new->fsgid = kgid;
 674        else
 675                goto error;
 676
 677        return commit_creds(new);
 678
 679error:
 680        abort_creds(new);
 681        return retval;
 682}
 683
 684/*
 685 * change the user struct in a credentials set to match the new UID
 686 */
 687static int set_user(struct cred *new)
 688{
 689        struct user_struct *new_user;
 690
 691        new_user = alloc_uid(new->uid);
 692        if (!new_user)
 693                return -EAGAIN;
 694
 695        /*
 696         * We don't fail in case of NPROC limit excess here because too many
 697         * poorly written programs don't check set*uid() return code, assuming
 698         * it never fails if called by root.  We may still enforce NPROC limit
 699         * for programs doing set*uid()+execve() by harmlessly deferring the
 700         * failure to the execve() stage.
 701         */
 702        if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
 703                        new_user != INIT_USER)
 704                current->flags |= PF_NPROC_EXCEEDED;
 705        else
 706                current->flags &= ~PF_NPROC_EXCEEDED;
 707
 708        free_uid(new->user);
 709        new->user = new_user;
 710        return 0;
 711}
 712
 713/*
 714 * Unprivileged users may change the real uid to the effective uid
 715 * or vice versa.  (BSD-style)
 716 *
 717 * If you set the real uid at all, or set the effective uid to a value not
 718 * equal to the real uid, then the saved uid is set to the new effective uid.
 719 *
 720 * This makes it possible for a setuid program to completely drop its
 721 * privileges, which is often a useful assertion to make when you are doing
 722 * a security audit over a program.
 723 *
 724 * The general idea is that a program which uses just setreuid() will be
 725 * 100% compatible with BSD.  A program which uses just setuid() will be
 726 * 100% compatible with POSIX with saved IDs. 
 727 */
 728SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
 729{
 730        struct user_namespace *ns = current_user_ns();
 731        const struct cred *old;
 732        struct cred *new;
 733        int retval;
 734        kuid_t kruid, keuid;
 735
 736        kruid = make_kuid(ns, ruid);
 737        keuid = make_kuid(ns, euid);
 738
 739        if ((ruid != (uid_t) -1) && !uid_valid(kruid))
 740                return -EINVAL;
 741        if ((euid != (uid_t) -1) && !uid_valid(keuid))
 742                return -EINVAL;
 743
 744        new = prepare_creds();
 745        if (!new)
 746                return -ENOMEM;
 747        old = current_cred();
 748
 749        retval = -EPERM;
 750        if (ruid != (uid_t) -1) {
 751                new->uid = kruid;
 752                if (!uid_eq(old->uid, kruid) &&
 753                    !uid_eq(old->euid, kruid) &&
 754                    !ns_capable(old->user_ns, CAP_SETUID))
 755                        goto error;
 756        }
 757
 758        if (euid != (uid_t) -1) {
 759                new->euid = keuid;
 760                if (!uid_eq(old->uid, keuid) &&
 761                    !uid_eq(old->euid, keuid) &&
 762                    !uid_eq(old->suid, keuid) &&
 763                    !ns_capable(old->user_ns, CAP_SETUID))
 764                        goto error;
 765        }
 766
 767        if (!uid_eq(new->uid, old->uid)) {
 768                retval = set_user(new);
 769                if (retval < 0)
 770                        goto error;
 771        }
 772        if (ruid != (uid_t) -1 ||
 773            (euid != (uid_t) -1 && !uid_eq(keuid, old->uid)))
 774                new->suid = new->euid;
 775        new->fsuid = new->euid;
 776
 777        retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
 778        if (retval < 0)
 779                goto error;
 780
 781        return commit_creds(new);
 782
 783error:
 784        abort_creds(new);
 785        return retval;
 786}
 787                
 788/*
 789 * setuid() is implemented like SysV with SAVED_IDS 
 790 * 
 791 * Note that SAVED_ID's is deficient in that a setuid root program
 792 * like sendmail, for example, cannot set its uid to be a normal 
 793 * user and then switch back, because if you're root, setuid() sets
 794 * the saved uid too.  If you don't like this, blame the bright people
 795 * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
 796 * will allow a root program to temporarily drop privileges and be able to
 797 * regain them by swapping the real and effective uid.  
 798 */
 799SYSCALL_DEFINE1(setuid, uid_t, uid)
 800{
 801        struct user_namespace *ns = current_user_ns();
 802        const struct cred *old;
 803        struct cred *new;
 804        int retval;
 805        kuid_t kuid;
 806
 807        kuid = make_kuid(ns, uid);
 808        if (!uid_valid(kuid))
 809                return -EINVAL;
 810
 811        new = prepare_creds();
 812        if (!new)
 813                return -ENOMEM;
 814        old = current_cred();
 815
 816        retval = -EPERM;
 817        if (ns_capable(old->user_ns, CAP_SETUID)) {
 818                new->suid = new->uid = kuid;
 819                if (!uid_eq(kuid, old->uid)) {
 820                        retval = set_user(new);
 821                        if (retval < 0)
 822                                goto error;
 823                }
 824        } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) {
 825                goto error;
 826        }
 827
 828        new->fsuid = new->euid = kuid;
 829
 830        retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
 831        if (retval < 0)
 832                goto error;
 833
 834        return commit_creds(new);
 835
 836error:
 837        abort_creds(new);
 838        return retval;
 839}
 840
 841
 842/*
 843 * This function implements a generic ability to update ruid, euid,
 844 * and suid.  This allows you to implement the 4.4 compatible seteuid().
 845 */
 846SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
 847{
 848        struct user_namespace *ns = current_user_ns();
 849        const struct cred *old;
 850        struct cred *new;
 851        int retval;
 852        kuid_t kruid, keuid, ksuid;
 853
 854        kruid = make_kuid(ns, ruid);
 855        keuid = make_kuid(ns, euid);
 856        ksuid = make_kuid(ns, suid);
 857
 858        if ((ruid != (uid_t) -1) && !uid_valid(kruid))
 859                return -EINVAL;
 860
 861        if ((euid != (uid_t) -1) && !uid_valid(keuid))
 862                return -EINVAL;
 863
 864        if ((suid != (uid_t) -1) && !uid_valid(ksuid))
 865                return -EINVAL;
 866
 867        new = prepare_creds();
 868        if (!new)
 869                return -ENOMEM;
 870
 871        old = current_cred();
 872
 873        retval = -EPERM;
 874        if (!ns_capable(old->user_ns, CAP_SETUID)) {
 875                if (ruid != (uid_t) -1        && !uid_eq(kruid, old->uid) &&
 876                    !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
 877                        goto error;
 878                if (euid != (uid_t) -1        && !uid_eq(keuid, old->uid) &&
 879                    !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
 880                        goto error;
 881                if (suid != (uid_t) -1        && !uid_eq(ksuid, old->uid) &&
 882                    !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
 883                        goto error;
 884        }
 885
 886        if (ruid != (uid_t) -1) {
 887                new->uid = kruid;
 888                if (!uid_eq(kruid, old->uid)) {
 889                        retval = set_user(new);
 890                        if (retval < 0)
 891                                goto error;
 892                }
 893        }
 894        if (euid != (uid_t) -1)
 895                new->euid = keuid;
 896        if (suid != (uid_t) -1)
 897                new->suid = ksuid;
 898        new->fsuid = new->euid;
 899
 900        retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
 901        if (retval < 0)
 902                goto error;
 903
 904        return commit_creds(new);
 905
 906error:
 907        abort_creds(new);
 908        return retval;
 909}
 910
 911SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp)
 912{
 913        const struct cred *cred = current_cred();
 914        int retval;
 915        uid_t ruid, euid, suid;
 916
 917        ruid = from_kuid_munged(cred->user_ns, cred->uid);
 918        euid = from_kuid_munged(cred->user_ns, cred->euid);
 919        suid = from_kuid_munged(cred->user_ns, cred->suid);
 920
 921        if (!(retval   = put_user(ruid, ruidp)) &&
 922            !(retval   = put_user(euid, euidp)))
 923                retval = put_user(suid, suidp);
 924
 925        return retval;
 926}
 927
 928/*
 929 * Same as above, but for rgid, egid, sgid.
 930 */
 931SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
 932{
 933        struct user_namespace *ns = current_user_ns();
 934        const struct cred *old;
 935        struct cred *new;
 936        int retval;
 937        kgid_t krgid, kegid, ksgid;
 938
 939        krgid = make_kgid(ns, rgid);
 940        kegid = make_kgid(ns, egid);
 941        ksgid = make_kgid(ns, sgid);
 942
 943        if ((rgid != (gid_t) -1) && !gid_valid(krgid))
 944                return -EINVAL;
 945        if ((egid != (gid_t) -1) && !gid_valid(kegid))
 946                return -EINVAL;
 947        if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
 948                return -EINVAL;
 949
 950        new = prepare_creds();
 951        if (!new)
 952                return -ENOMEM;
 953        old = current_cred();
 954
 955        retval = -EPERM;
 956        if (!ns_capable(old->user_ns, CAP_SETGID)) {
 957                if (rgid != (gid_t) -1        && !gid_eq(krgid, old->gid) &&
 958                    !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
 959                        goto error;
 960                if (egid != (gid_t) -1        && !gid_eq(kegid, old->gid) &&
 961                    !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
 962                        goto error;
 963                if (sgid != (gid_t) -1        && !gid_eq(ksgid, old->gid) &&
 964                    !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
 965                        goto error;
 966        }
 967
 968        if (rgid != (gid_t) -1)
 969                new->gid = krgid;
 970        if (egid != (gid_t) -1)
 971                new->egid = kegid;
 972        if (sgid != (gid_t) -1)
 973                new->sgid = ksgid;
 974        new->fsgid = new->egid;
 975
 976        return commit_creds(new);
 977
 978error:
 979        abort_creds(new);
 980        return retval;
 981}
 982
 983SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp)
 984{
 985        const struct cred *cred = current_cred();
 986        int retval;
 987        gid_t rgid, egid, sgid;
 988
 989        rgid = from_kgid_munged(cred->user_ns, cred->gid);
 990        egid = from_kgid_munged(cred->user_ns, cred->egid);
 991        sgid = from_kgid_munged(cred->user_ns, cred->sgid);
 992
 993        if (!(retval   = put_user(rgid, rgidp)) &&
 994            !(retval   = put_user(egid, egidp)))
 995                retval = put_user(sgid, sgidp);
 996
 997        return retval;
 998}
 999
1000
1001/*
1002 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
1003 * is used for "access()" and for the NFS daemon (letting nfsd stay at
1004 * whatever uid it wants to). It normally shadows "euid", except when
1005 * explicitly set by setfsuid() or for access..
1006 */
1007SYSCALL_DEFINE1(setfsuid, uid_t, uid)
1008{
1009        const struct cred *old;
1010        struct cred *new;
1011        uid_t old_fsuid;
1012        kuid_t kuid;
1013
1014        old = current_cred();
1015        old_fsuid = from_kuid_munged(old->user_ns, old->fsuid);
1016
1017        kuid = make_kuid(old->user_ns, uid);
1018        if (!uid_valid(kuid))
1019                return old_fsuid;
1020
1021        new = prepare_creds();
1022        if (!new)
1023                return old_fsuid;
1024
1025        if (uid_eq(kuid, old->uid)  || uid_eq(kuid, old->euid)  ||
1026            uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
1027            ns_capable(old->user_ns, CAP_SETUID)) {
1028                if (!uid_eq(kuid, old->fsuid)) {
1029                        new->fsuid = kuid;
1030                        if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
1031                                goto change_okay;
1032                }
1033        }
1034
1035        abort_creds(new);
1036        return old_fsuid;
1037
1038change_okay:
1039        commit_creds(new);
1040        return old_fsuid;
1041}
1042
1043/*
1044 * Samma på svenska..
1045 */
1046SYSCALL_DEFINE1(setfsgid, gid_t, gid)
1047{
1048        const struct cred *old;
1049        struct cred *new;
1050        gid_t old_fsgid;
1051        kgid_t kgid;
1052
1053        old = current_cred();
1054        old_fsgid = from_kgid_munged(old->user_ns, old->fsgid);
1055
1056        kgid = make_kgid(old->user_ns, gid);
1057        if (!gid_valid(kgid))
1058                return old_fsgid;
1059
1060        new = prepare_creds();
1061        if (!new)
1062                return old_fsgid;
1063
1064        if (gid_eq(kgid, old->gid)  || gid_eq(kgid, old->egid)  ||
1065            gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) ||
1066            ns_capable(old->user_ns, CAP_SETGID)) {
1067                if (!gid_eq(kgid, old->fsgid)) {
1068                        new->fsgid = kgid;
1069                        goto change_okay;
1070                }
1071        }
1072
1073        abort_creds(new);
1074        return old_fsgid;
1075
1076change_okay:
1077        commit_creds(new);
1078        return old_fsgid;
1079}
1080
1081/**
1082 * sys_getpid - return the thread group id of the current process
1083 *
1084 * Note, despite the name, this returns the tgid not the pid.  The tgid and
1085 * the pid are identical unless CLONE_THREAD was specified on clone() in
1086 * which case the tgid is the same in all threads of the same group.
1087 *
1088 * This is SMP safe as current->tgid does not change.
1089 */
1090SYSCALL_DEFINE0(getpid)
1091{
1092        return task_tgid_vnr(current);
1093}
1094
1095/* Thread ID - the internal kernel "pid" */
1096SYSCALL_DEFINE0(gettid)
1097{
1098        return task_pid_vnr(current);
1099}
1100
1101/*
1102 * Accessing ->real_parent is not SMP-safe, it could
1103 * change from under us. However, we can use a stale
1104 * value of ->real_parent under rcu_read_lock(), see
1105 * release_task()->call_rcu(delayed_put_task_struct).
1106 */
1107SYSCALL_DEFINE0(getppid)
1108{
1109        int pid;
1110
1111        rcu_read_lock();
1112        pid = task_tgid_vnr(rcu_dereference(current->real_parent));
1113        rcu_read_unlock();
1114
1115        return pid;
1116}
1117
1118SYSCALL_DEFINE0(getuid)
1119{
1120        /* Only we change this so SMP safe */
1121        return from_kuid_munged(current_user_ns(), current_uid());
1122}
1123
1124SYSCALL_DEFINE0(geteuid)
1125{
1126        /* Only we change this so SMP safe */
1127        return from_kuid_munged(current_user_ns(), current_euid());
1128}
1129
1130SYSCALL_DEFINE0(getgid)
1131{
1132        /* Only we change this so SMP safe */
1133        return from_kgid_munged(current_user_ns(), current_gid());
1134}
1135
1136SYSCALL_DEFINE0(getegid)
1137{
1138        /* Only we change this so SMP safe */
1139        return from_kgid_munged(current_user_ns(), current_egid());
1140}
1141
1142void do_sys_times(struct tms *tms)
1143{
1144        cputime_t tgutime, tgstime, cutime, cstime;
1145
1146        thread_group_cputime_adjusted(current, &tgutime, &tgstime);
1147        cutime = current->signal->cutime;
1148        cstime = current->signal->cstime;
1149        tms->tms_utime = cputime_to_clock_t(tgutime);
1150        tms->tms_stime = cputime_to_clock_t(tgstime);
1151        tms->tms_cutime = cputime_to_clock_t(cutime);
1152        tms->tms_cstime = cputime_to_clock_t(cstime);
1153}
1154
1155SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
1156{
1157        if (tbuf) {
1158                struct tms tmp;
1159
1160                do_sys_times(&tmp);
1161                if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
1162                        return -EFAULT;
1163        }
1164        force_successful_syscall_return();
1165        return (long) jiffies_64_to_clock_t(get_jiffies_64());
1166}
1167
1168/*
1169 * This needs some heavy checking ...
1170 * I just haven't the stomach for it. I also don't fully
1171 * understand sessions/pgrp etc. Let somebody who does explain it.
1172 *
1173 * OK, I think I have the protection semantics right.... this is really
1174 * only important on a multi-user system anyway, to make sure one user
1175 * can't send a signal to a process owned by another.  -TYT, 12/12/91
1176 *
1177 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
1178 * LBT 04.03.94
1179 */
1180SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
1181{
1182        struct task_struct *p;
1183        struct task_struct *group_leader = current->group_leader;
1184        struct pid *pgrp;
1185        int err;
1186
1187        if (!pid)
1188                pid = task_pid_vnr(group_leader);
1189        if (!pgid)
1190                pgid = pid;
1191        if (pgid < 0)
1192                return -EINVAL;
1193        rcu_read_lock();
1194
1195        /* From this point forward we keep holding onto the tasklist lock
1196         * so that our parent does not change from under us. -DaveM
1197         */
1198        tasklist_write_lock_irq();
1199
1200        err = -ESRCH;
1201        p = find_task_by_vpid(pid);
1202        if (!p)
1203                goto out;
1204
1205        err = -EINVAL;
1206        if (!thread_group_leader(p))
1207                goto out;
1208
1209        if (same_thread_group(p->real_parent, group_leader)) {
1210                err = -EPERM;
1211                if (task_session(p) != task_session(group_leader))
1212                        goto out;
1213                err = -EACCES;
1214                if (p->did_exec)
1215                        goto out;
1216        } else {
1217                err = -ESRCH;
1218                if (p != group_leader)
1219                        goto out;
1220        }
1221
1222        err = -EPERM;
1223        if (p->signal->leader)
1224                goto out;
1225
1226        pgrp = task_pid(p);
1227        if (pgid != pid) {
1228                struct task_struct *g;
1229
1230                pgrp = find_vpid(pgid);
1231                g = pid_task(pgrp, PIDTYPE_PGID);
1232                if (!g || task_session(g) != task_session(group_leader))
1233                        goto out;
1234        }
1235
1236        err = security_task_setpgid(p, pgid);
1237        if (err)
1238                goto out;
1239
1240        if (task_pgrp(p) != pgrp)
1241                change_pid(p, PIDTYPE_PGID, pgrp);
1242
1243        err = 0;
1244out:
1245        /* All paths lead to here, thus we are safe. -DaveM */
1246        qwrite_unlock_irq(&tasklist_lock);
1247        rcu_read_unlock();
1248        return err;
1249}
1250
1251SYSCALL_DEFINE1(getpgid, pid_t, pid)
1252{
1253        struct task_struct *p;
1254        struct pid *grp;
1255        int retval;
1256
1257        rcu_read_lock();
1258        if (!pid)
1259                grp = task_pgrp(current);
1260        else {
1261                retval = -ESRCH;
1262                p = find_task_by_vpid(pid);
1263                if (!p)
1264                        goto out;
1265                grp = task_pgrp(p);
1266                if (!grp)
1267                        goto out;
1268
1269                retval = security_task_getpgid(p);
1270                if (retval)
1271                        goto out;
1272        }
1273        retval = pid_vnr(grp);
1274out:
1275        rcu_read_unlock();
1276        return retval;
1277}
1278
1279#ifdef __ARCH_WANT_SYS_GETPGRP
1280
1281SYSCALL_DEFINE0(getpgrp)
1282{
1283        return sys_getpgid(0);
1284}
1285
1286#endif
1287
1288SYSCALL_DEFINE1(getsid, pid_t, pid)
1289{
1290        struct task_struct *p;
1291        struct pid *sid;
1292        int retval;
1293
1294        rcu_read_lock();
1295        if (!pid)
1296                sid = task_session(current);
1297        else {
1298                retval = -ESRCH;
1299                p = find_task_by_vpid(pid);
1300                if (!p)
1301                        goto out;
1302                sid = task_session(p);
1303                if (!sid)
1304                        goto out;
1305
1306                retval = security_task_getsid(p);
1307                if (retval)
1308                        goto out;
1309        }
1310        retval = pid_vnr(sid);
1311out:
1312        rcu_read_unlock();
1313        return retval;
1314}
1315
1316SYSCALL_DEFINE0(setsid)
1317{
1318        struct task_struct *group_leader = current->group_leader;
1319        struct pid *sid = task_pid(group_leader);
1320        pid_t session = pid_vnr(sid);
1321        int err = -EPERM;
1322
1323        tasklist_write_lock_irq();
1324        /* Fail if I am already a session leader */
1325        if (group_leader->signal->leader)
1326                goto out;
1327
1328        /* Fail if a process group id already exists that equals the
1329         * proposed session id.
1330         */
1331        if (pid_task(sid, PIDTYPE_PGID))
1332                goto out;
1333
1334        group_leader->signal->leader = 1;
1335        __set_special_pids(sid);
1336
1337        proc_clear_tty(group_leader);
1338
1339        err = session;
1340out:
1341        qwrite_unlock_irq(&tasklist_lock);
1342        if (err > 0) {
1343                proc_sid_connector(group_leader);
1344                sched_autogroup_create_attach(group_leader);
1345        }
1346        return err;
1347}
1348
1349DECLARE_RWSEM(uts_sem);
1350
1351#ifdef COMPAT_UTS_MACHINE
1352#define override_architecture(name) \
1353        (personality(current->personality) == PER_LINUX32 && \
1354         copy_to_user(name->machine, COMPAT_UTS_MACHINE, \
1355                      sizeof(COMPAT_UTS_MACHINE)))
1356#else
1357#define override_architecture(name)     0
1358#endif
1359
1360/*
1361 * Work around broken programs that cannot handle "Linux 3.0".
1362 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
1363 */
1364static int override_release(char __user *release, size_t len)
1365{
1366        int ret = 0;
1367
1368        if (current->personality & UNAME26) {
1369                const char *rest = UTS_RELEASE;
1370                char buf[65] = { 0 };
1371                int ndots = 0;
1372                unsigned v;
1373                size_t copy;
1374
1375                while (*rest) {
1376                        if (*rest == '.' && ++ndots >= 3)
1377                                break;
1378                        if (!isdigit(*rest) && *rest != '.')
1379                                break;
1380                        rest++;
1381                }
1382                v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
1383                copy = clamp_t(size_t, len, 1, sizeof(buf));
1384                copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
1385                ret = copy_to_user(release, buf, copy + 1);
1386        }
1387        return ret;
1388}
1389
1390SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
1391{
1392        int errno = 0;
1393
1394        down_read(&uts_sem);
1395        if (copy_to_user(name, utsname(), sizeof *name))
1396                errno = -EFAULT;
1397        up_read(&uts_sem);
1398
1399        if (!errno && override_release(name->release, sizeof(name->release)))
1400                errno = -EFAULT;
1401        if (!errno && override_architecture(name))
1402                errno = -EFAULT;
1403        return errno;
1404}
1405
1406#ifdef __ARCH_WANT_SYS_OLD_UNAME
1407/*
1408 * Old cruft
1409 */
1410SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
1411{
1412        int error = 0;
1413
1414        if (!name)
1415                return -EFAULT;
1416
1417        down_read(&uts_sem);
1418        if (copy_to_user(name, utsname(), sizeof(*name)))
1419                error = -EFAULT;
1420        up_read(&uts_sem);
1421
1422        if (!error && override_release(name->release, sizeof(name->release)))
1423                error = -EFAULT;
1424        if (!error && override_architecture(name))
1425                error = -EFAULT;
1426        return error;
1427}
1428
1429SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
1430{
1431        int error;
1432
1433        if (!name)
1434                return -EFAULT;
1435        if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
1436                return -EFAULT;
1437
1438        down_read(&uts_sem);
1439        error = __copy_to_user(&name->sysname, &utsname()->sysname,
1440                               __OLD_UTS_LEN);
1441        error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
1442        error |= __copy_to_user(&name->nodename, &utsname()->nodename,
1443                                __OLD_UTS_LEN);
1444        error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
1445        error |= __copy_to_user(&name->release, &utsname()->release,
1446                                __OLD_UTS_LEN);
1447        error |= __put_user(0, name->release + __OLD_UTS_LEN);
1448        error |= __copy_to_user(&name->version, &utsname()->version,
1449                                __OLD_UTS_LEN);
1450        error |= __put_user(0, name->version + __OLD_UTS_LEN);
1451        error |= __copy_to_user(&name->machine, &utsname()->machine,
1452                                __OLD_UTS_LEN);
1453        error |= __put_user(0, name->machine + __OLD_UTS_LEN);
1454        up_read(&uts_sem);
1455
1456        if (!error && override_architecture(name))
1457                error = -EFAULT;
1458        if (!error && override_release(name->release, sizeof(name->release)))
1459                error = -EFAULT;
1460        return error ? -EFAULT : 0;
1461}
1462#endif
1463
1464SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1465{
1466        int errno;
1467        char tmp[__NEW_UTS_LEN];
1468
1469        if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
1470                return -EPERM;
1471
1472        if (len < 0 || len > __NEW_UTS_LEN)
1473                return -EINVAL;
1474        down_write(&uts_sem);
1475        errno = -EFAULT;
1476        if (!copy_from_user(tmp, name, len)) {
1477                struct new_utsname *u = utsname();
1478
1479                memcpy(u->nodename, tmp, len);
1480                memset(u->nodename + len, 0, sizeof(u->nodename) - len);
1481                errno = 0;
1482                uts_proc_notify(UTS_PROC_HOSTNAME);
1483        }
1484        up_write(&uts_sem);
1485        return errno;
1486}
1487
1488#ifdef __ARCH_WANT_SYS_GETHOSTNAME
1489
1490SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
1491{
1492        int i, errno;
1493        struct new_utsname *u;
1494
1495        if (len < 0)
1496                return -EINVAL;
1497        down_read(&uts_sem);
1498        u = utsname();
1499        i = 1 + strlen(u->nodename);
1500        if (i > len)
1501                i = len;
1502        errno = 0;
1503        if (copy_to_user(name, u->nodename, i))
1504                errno = -EFAULT;
1505        up_read(&uts_sem);
1506        return errno;
1507}
1508
1509#endif
1510
1511/*
1512 * Only setdomainname; getdomainname can be implemented by calling
1513 * uname()
1514 */
1515SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
1516{
1517        int errno;
1518        char tmp[__NEW_UTS_LEN];
1519
1520        if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
1521                return -EPERM;
1522        if (len < 0 || len > __NEW_UTS_LEN)
1523                return -EINVAL;
1524
1525        down_write(&uts_sem);
1526        errno = -EFAULT;
1527        if (!copy_from_user(tmp, name, len)) {
1528                struct new_utsname *u = utsname();
1529
1530                memcpy(u->domainname, tmp, len);
1531                memset(u->domainname + len, 0, sizeof(u->domainname) - len);
1532                errno = 0;
1533                uts_proc_notify(UTS_PROC_DOMAINNAME);
1534        }
1535        up_write(&uts_sem);
1536        return errno;
1537}
1538
1539SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1540{
1541        struct rlimit value;
1542        int ret;
1543
1544        ret = do_prlimit(current, resource, NULL, &value);
1545        if (!ret)
1546                ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
1547
1548        return ret;
1549}
1550
1551#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
1552
1553/*
1554 *      Back compatibility for getrlimit. Needed for some apps.
1555 */
1556 
1557SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
1558                struct rlimit __user *, rlim)
1559{
1560        struct rlimit x;
1561        if (resource >= RLIM_NLIMITS)
1562                return -EINVAL;
1563
1564        task_lock(current->group_leader);
1565        x = current->signal->rlim[resource];
1566        task_unlock(current->group_leader);
1567        if (x.rlim_cur > 0x7FFFFFFF)
1568                x.rlim_cur = 0x7FFFFFFF;
1569        if (x.rlim_max > 0x7FFFFFFF)
1570                x.rlim_max = 0x7FFFFFFF;
1571        return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1572}
1573
1574#endif
1575
1576static inline bool rlim64_is_infinity(__u64 rlim64)
1577{
1578#if BITS_PER_LONG < 64
1579        return rlim64 >= ULONG_MAX;
1580#else
1581        return rlim64 == RLIM64_INFINITY;
1582#endif
1583}
1584
1585static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
1586{
1587        if (rlim->rlim_cur == RLIM_INFINITY)
1588                rlim64->rlim_cur = RLIM64_INFINITY;
1589        else
1590                rlim64->rlim_cur = rlim->rlim_cur;
1591        if (rlim->rlim_max == RLIM_INFINITY)
1592                rlim64->rlim_max = RLIM64_INFINITY;
1593        else
1594                rlim64->rlim_max = rlim->rlim_max;
1595}
1596
1597static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
1598{
1599        if (rlim64_is_infinity(rlim64->rlim_cur))
1600                rlim->rlim_cur = RLIM_INFINITY;
1601        else
1602                rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
1603        if (rlim64_is_infinity(rlim64->rlim_max))
1604                rlim->rlim_max = RLIM_INFINITY;
1605        else
1606                rlim->rlim_max = (unsigned long)rlim64->rlim_max;
1607}
1608
1609/* make sure you are allowed to change @tsk limits before calling this */
1610int do_prlimit(struct task_struct *tsk, unsigned int resource,
1611                struct rlimit *new_rlim, struct rlimit *old_rlim)
1612{
1613        struct rlimit *rlim;
1614        int retval = 0;
1615
1616        if (resource >= RLIM_NLIMITS)
1617                return -EINVAL;
1618        if (new_rlim) {
1619                if (new_rlim->rlim_cur > new_rlim->rlim_max)
1620                        return -EINVAL;
1621                if (resource == RLIMIT_NOFILE &&
1622                                new_rlim->rlim_max > sysctl_nr_open)
1623                        return -EPERM;
1624        }
1625
1626        /* protect tsk->signal and tsk->sighand from disappearing */
1627        tasklist_read_lock();
1628        if (!tsk->sighand) {
1629                retval = -ESRCH;
1630                goto out;
1631        }
1632
1633        rlim = tsk->signal->rlim + resource;
1634        task_lock(tsk->group_leader);
1635        if (new_rlim) {
1636                /* Keep the capable check against init_user_ns until
1637                   cgroups can contain all limits */
1638                if (new_rlim->rlim_max > rlim->rlim_max &&
1639                                !capable(CAP_SYS_RESOURCE))
1640                        retval = -EPERM;
1641                if (!retval)
1642                        retval = security_task_setrlimit(tsk->group_leader,
1643                                        resource, new_rlim);
1644                if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
1645                        /*
1646                         * The caller is asking for an immediate RLIMIT_CPU
1647                         * expiry.  But we use the zero value to mean "it was
1648                         * never set".  So let's cheat and make it one second
1649                         * instead
1650                         */
1651                        new_rlim->rlim_cur = 1;
1652                }
1653        }
1654        if (!retval) {
1655                if (old_rlim)
1656                        *old_rlim = *rlim;
1657                if (new_rlim)
1658                        *rlim = *new_rlim;
1659        }
1660        task_unlock(tsk->group_leader);
1661
1662        /*
1663         * RLIMIT_CPU handling.   Note that the kernel fails to return an error
1664         * code if it rejected the user's attempt to set RLIMIT_CPU.  This is a
1665         * very long-standing error, and fixing it now risks breakage of
1666         * applications, so we live with it
1667         */
1668         if (!retval && new_rlim && resource == RLIMIT_CPU &&
1669                         new_rlim->rlim_cur != RLIM_INFINITY)
1670                update_rlimit_cpu(tsk, new_rlim->rlim_cur);
1671out:
1672        qread_unlock(&tasklist_lock);
1673        return retval;
1674}
1675
1676/* rcu lock must be held */
1677static int check_prlimit_permission(struct task_struct *task)
1678{
1679        const struct cred *cred = current_cred(), *tcred;
1680
1681        if (current == task)
1682                return 0;
1683
1684        tcred = __task_cred(task);
1685        if (uid_eq(cred->uid, tcred->euid) &&
1686            uid_eq(cred->uid, tcred->suid) &&
1687            uid_eq(cred->uid, tcred->uid)  &&
1688            gid_eq(cred->gid, tcred->egid) &&
1689            gid_eq(cred->gid, tcred->sgid) &&
1690            gid_eq(cred->gid, tcred->gid))
1691                return 0;
1692        if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
1693                return 0;
1694
1695        return -EPERM;
1696}
1697
1698SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
1699                const struct rlimit64 __user *, new_rlim,
1700                struct rlimit64 __user *, old_rlim)
1701{
1702        struct rlimit64 old64, new64;
1703        struct rlimit old, new;
1704        struct task_struct *tsk;
1705        int ret;
1706
1707        if (new_rlim) {
1708                if (copy_from_user(&new64, new_rlim, sizeof(new64)))
1709                        return -EFAULT;
1710                rlim64_to_rlim(&new64, &new);
1711        }
1712
1713        rcu_read_lock();
1714        tsk = pid ? find_task_by_vpid(pid) : current;
1715        if (!tsk) {
1716                rcu_read_unlock();
1717                return -ESRCH;
1718        }
1719        ret = check_prlimit_permission(tsk);
1720        if (ret) {
1721                rcu_read_unlock();
1722                return ret;
1723        }
1724        get_task_struct(tsk);
1725        rcu_read_unlock();
1726
1727        ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
1728                        old_rlim ? &old : NULL);
1729
1730        if (!ret && old_rlim) {
1731                rlim_to_rlim64(&old, &old64);
1732                if (copy_to_user(old_rlim, &old64, sizeof(old64)))
1733                        ret = -EFAULT;
1734        }
1735
1736        put_task_struct(tsk);
1737        return ret;
1738}
1739
1740SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1741{
1742        struct rlimit new_rlim;
1743
1744        if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1745                return -EFAULT;
1746        return do_prlimit(current, resource, &new_rlim, NULL);
1747}
1748
1749/*
1750 * It would make sense to put struct rusage in the task_struct,
1751 * except that would make the task_struct be *really big*.  After
1752 * task_struct gets moved into malloc'ed memory, it would
1753 * make sense to do this.  It will make moving the rest of the information
1754 * a lot simpler!  (Which we're not doing right now because we're not
1755 * measuring them yet).
1756 *
1757 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
1758 * races with threads incrementing their own counters.  But since word
1759 * reads are atomic, we either get new values or old values and we don't
1760 * care which for the sums.  We always take the siglock to protect reading
1761 * the c* fields from p->signal from races with exit.c updating those
1762 * fields when reaping, so a sample either gets all the additions of a
1763 * given child after it's reaped, or none so this sample is before reaping.
1764 *
1765 * Locking:
1766 * We need to take the siglock for CHILDEREN, SELF and BOTH
1767 * for  the cases current multithreaded, non-current single threaded
1768 * non-current multithreaded.  Thread traversal is now safe with
1769 * the siglock held.
1770 * Strictly speaking, we donot need to take the siglock if we are current and
1771 * single threaded,  as no one else can take our signal_struct away, no one
1772 * else can  reap the  children to update signal->c* counters, and no one else
1773 * can race with the signal-> fields. If we do not take any lock, the
1774 * signal-> fields could be read out of order while another thread was just
1775 * exiting. So we should  place a read memory barrier when we avoid the lock.
1776 * On the writer side,  write memory barrier is implied in  __exit_signal
1777 * as __exit_signal releases  the siglock spinlock after updating the signal->
1778 * fields. But we don't do this yet to keep things simple.
1779 *
1780 */
1781
1782static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
1783{
1784        r->ru_nvcsw += t->nvcsw;
1785        r->ru_nivcsw += t->nivcsw;
1786        r->ru_minflt += t->min_flt;
1787        r->ru_majflt += t->maj_flt;
1788        r->ru_inblock += task_io_get_inblock(t);
1789        r->ru_oublock += task_io_get_oublock(t);
1790}
1791
1792static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1793{
1794        struct task_struct *t;
1795        unsigned long flags;
1796        cputime_t tgutime, tgstime, utime, stime;
1797        unsigned long maxrss = 0;
1798
1799        memset((char *) r, 0, sizeof *r);
1800        utime = stime = 0;
1801
1802        if (who == RUSAGE_THREAD) {
1803                task_cputime_adjusted(current, &utime, &stime);
1804                accumulate_thread_rusage(p, r);
1805                maxrss = p->signal->maxrss;
1806                goto out;
1807        }
1808
1809        if (!lock_task_sighand(p, &flags))
1810                return;
1811
1812        switch (who) {
1813                case RUSAGE_BOTH:
1814                case RUSAGE_CHILDREN:
1815                        utime = p->signal->cutime;
1816                        stime = p->signal->cstime;
1817                        r->ru_nvcsw = p->signal->cnvcsw;
1818                        r->ru_nivcsw = p->signal->cnivcsw;
1819                        r->ru_minflt = p->signal->cmin_flt;
1820                        r->ru_majflt = p->signal->cmaj_flt;
1821                        r->ru_inblock = p->signal->cinblock;
1822                        r->ru_oublock = p->signal->coublock;
1823                        maxrss = p->signal->cmaxrss;
1824
1825                        if (who == RUSAGE_CHILDREN)
1826                                break;
1827
1828                case RUSAGE_SELF:
1829                        thread_group_cputime_adjusted(p, &tgutime, &tgstime);
1830                        utime += tgutime;
1831                        stime += tgstime;
1832                        r->ru_nvcsw += p->signal->nvcsw;
1833                        r->ru_nivcsw += p->signal->nivcsw;
1834                        r->ru_minflt += p->signal->min_flt;
1835                        r->ru_majflt += p->signal->maj_flt;
1836                        r->ru_inblock += p->signal->inblock;
1837                        r->ru_oublock += p->signal->oublock;
1838                        if (maxrss < p->signal->maxrss)
1839                                maxrss = p->signal->maxrss;
1840                        t = p;
1841                        do {
1842                                accumulate_thread_rusage(t, r);
1843                                t = next_thread(t);
1844                        } while (t != p);
1845                        break;
1846
1847                default:
1848                        BUG();
1849        }
1850        unlock_task_sighand(p, &flags);
1851
1852out:
1853        cputime_to_timeval(utime, &r->ru_utime);
1854        cputime_to_timeval(stime, &r->ru_stime);
1855
1856        if (who != RUSAGE_CHILDREN) {
1857                struct mm_struct *mm = get_task_mm(p);
1858                if (mm) {
1859                        setmax_mm_hiwater_rss(&maxrss, mm);
1860                        mmput(mm);
1861                }
1862        }
1863        r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
1864}
1865
1866int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1867{
1868        struct rusage r;
1869        k_getrusage(p, who, &r);
1870        return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1871}
1872
1873SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
1874{
1875        if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
1876            who != RUSAGE_THREAD)
1877                return -EINVAL;
1878        return getrusage(current, who, ru);
1879}
1880
1881#ifdef CONFIG_COMPAT
1882COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru)
1883{
1884        struct rusage r;
1885
1886        if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
1887            who != RUSAGE_THREAD)
1888                return -EINVAL;
1889
1890        k_getrusage(current, who, &r);
1891        return put_compat_rusage(&r, ru);
1892}
1893#endif
1894
1895SYSCALL_DEFINE1(umask, int, mask)
1896{
1897        mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1898        return mask;
1899}
1900
1901static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1902{
1903        struct fd exe;
1904        struct file *old_exe, *exe_file;
1905        struct inode *inode;
1906        int err;
1907
1908        exe = fdget(fd);
1909        if (!exe.file)
1910                return -EBADF;
1911
1912        inode = file_inode(exe.file);
1913
1914        /*
1915         * Because the original mm->exe_file points to executable file, make
1916         * sure that this one is executable as well, to avoid breaking an
1917         * overall picture.
1918         */
1919        err = -EACCES;
1920        if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path))
1921                goto exit;
1922
1923        err = inode_permission(inode, MAY_EXEC);
1924        if (err)
1925                goto exit;
1926
1927        /*
1928         * Forbid mm->exe_file change if old file still mapped.
1929         */
1930        exe_file = get_mm_exe_file(mm);
1931        err = -EBUSY;
1932        if (exe_file) {
1933                struct vm_area_struct *vma;
1934
1935                down_read(&mm->mmap_sem);
1936                for (vma = mm->mmap; vma; vma = vma->vm_next) {
1937                        if (!vma->vm_file)
1938                                continue;
1939                        if (path_equal(&vma->vm_file->f_path,
1940                                       &exe_file->f_path))
1941                                goto exit_err;
1942                }
1943
1944                up_read(&mm->mmap_sem);
1945                fput(exe_file);
1946        }
1947
1948        /*
1949         * The symlink can be changed only once, just to disallow arbitrary
1950         * transitions malicious software might bring in. This means one
1951         * could make a snapshot over all processes running and monitor
1952         * /proc/pid/exe changes to notice unusual activity if needed.
1953         */
1954        err = -EPERM;
1955        if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
1956                goto exit;
1957
1958        err = 0;
1959        /* set the new file, lockless */
1960        get_file(exe.file);
1961        old_exe = xchg(&mm->exe_file, exe.file);
1962        if (old_exe)
1963                fput(old_exe);
1964exit:
1965        fdput(exe);
1966        return err;
1967exit_err:
1968        up_read(&mm->mmap_sem);
1969        fput(exe_file);
1970        goto exit;
1971}
1972
1973/*
1974 * WARNING: we don't require any capability here so be very careful
1975 * in what is allowed for modification from userspace.
1976 */
1977static int validate_prctl_map(struct prctl_mm_map *prctl_map)
1978{
1979        unsigned long mmap_max_addr = TASK_SIZE;
1980        struct mm_struct *mm = current->mm;
1981        int error = -EINVAL, i;
1982
1983        static const unsigned char offsets[] = {
1984                offsetof(struct prctl_mm_map, start_code),
1985                offsetof(struct prctl_mm_map, end_code),
1986                offsetof(struct prctl_mm_map, start_data),
1987                offsetof(struct prctl_mm_map, end_data),
1988                offsetof(struct prctl_mm_map, start_brk),
1989                offsetof(struct prctl_mm_map, brk),
1990                offsetof(struct prctl_mm_map, start_stack),
1991                offsetof(struct prctl_mm_map, arg_start),
1992                offsetof(struct prctl_mm_map, arg_end),
1993                offsetof(struct prctl_mm_map, env_start),
1994                offsetof(struct prctl_mm_map, env_end),
1995        };
1996
1997        /*
1998         * Make sure the members are not somewhere outside
1999         * of allowed address space.
2000         */
2001        for (i = 0; i < ARRAY_SIZE(offsets); i++) {
2002                u64 val = *(u64 *)((char *)prctl_map + offsets[i]);
2003
2004                if ((unsigned long)val >= mmap_max_addr ||
2005                    (unsigned long)val < mmap_min_addr)
2006                        goto out;
2007        }
2008
2009        /*
2010         * Make sure the pairs are ordered.
2011         */
2012#define __prctl_check_order(__m1, __op, __m2)                           \
2013        ((unsigned long)prctl_map->__m1 __op                            \
2014         (unsigned long)prctl_map->__m2) ? 0 : -EINVAL
2015        error  = __prctl_check_order(start_code, <, end_code);
2016        error |= __prctl_check_order(start_data, <, end_data);
2017        error |= __prctl_check_order(start_brk, <=, brk);
2018        error |= __prctl_check_order(arg_start, <=, arg_end);
2019        error |= __prctl_check_order(env_start, <=, env_end);
2020        if (error)
2021                goto out;
2022#undef __prctl_check_order
2023
2024        error = -EINVAL;
2025
2026        /*
2027         * @brk should be after @end_data in traditional maps.
2028         */
2029        if (prctl_map->start_brk <= prctl_map->end_data ||
2030            prctl_map->brk <= prctl_map->end_data)
2031                goto out;
2032
2033        /*
2034         * Neither we should allow to override limits if they set.
2035         */
2036        if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk,
2037                              prctl_map->start_brk, prctl_map->end_data,
2038                              prctl_map->start_data))
2039                        goto out;
2040
2041        /*
2042         * Someone is trying to cheat the auxv vector.
2043         */
2044        if (prctl_map->auxv_size) {
2045                if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv))
2046                        goto out;
2047        }
2048
2049        /*
2050         * Finally, make sure the caller has the rights to
2051         * change /proc/pid/exe link: only local root should
2052         * be allowed to.
2053         */
2054        if (prctl_map->exe_fd != (u32)-1) {
2055                struct user_namespace *ns = current_user_ns();
2056                const struct cred *cred = current_cred();
2057
2058                if (!uid_eq(cred->uid, make_kuid(ns, 0)) ||
2059                    !gid_eq(cred->gid, make_kgid(ns, 0)))
2060                        goto out;
2061        }
2062
2063        error = 0;
2064out:
2065        return error;
2066}
2067
2068#ifdef CONFIG_CHECKPOINT_RESTORE
2069static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size)
2070{
2071        struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, };
2072        unsigned long user_auxv[AT_VECTOR_SIZE];
2073        struct mm_struct *mm = current->mm;
2074        int error;
2075
2076        BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
2077        BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256);
2078
2079        if (opt == PR_SET_MM_MAP_SIZE)
2080                return put_user((unsigned int)sizeof(prctl_map),
2081                                (unsigned int __user *)addr);
2082
2083        if (data_size != sizeof(prctl_map))
2084                return -EINVAL;
2085
2086        if (copy_from_user(&prctl_map, addr, sizeof(prctl_map)))
2087                return -EFAULT;
2088
2089        error = validate_prctl_map(&prctl_map);
2090        if (error)
2091                return error;
2092
2093        if (prctl_map.auxv_size) {
2094                memset(user_auxv, 0, sizeof(user_auxv));
2095                if (copy_from_user(user_auxv,
2096                                   (const void __user *)prctl_map.auxv,
2097                                   prctl_map.auxv_size))
2098                        return -EFAULT;
2099
2100                /* Last entry must be AT_NULL as specification requires */
2101                user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL;
2102                user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
2103        }
2104
2105        if (prctl_map.exe_fd != (u32)-1) {
2106                error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd);
2107                if (error)
2108                        return error;
2109        }
2110
2111        down_write(&mm->mmap_sem);
2112
2113        /*
2114         * We don't validate if these members are pointing to
2115         * real present VMAs because application may have correspond
2116         * VMAs already unmapped and kernel uses these members for statistics
2117         * output in procfs mostly, except
2118         *
2119         *  - @start_brk/@brk which are used in do_brk but kernel lookups
2120         *    for VMAs when updating these memvers so anything wrong written
2121         *    here cause kernel to swear at userspace program but won't lead
2122         *    to any problem in kernel itself
2123         */
2124
2125        mm->start_code  = prctl_map.start_code;
2126        mm->end_code    = prctl_map.end_code;
2127        mm->start_data  = prctl_map.start_data;
2128        mm->end_data    = prctl_map.end_data;
2129        mm->start_brk   = prctl_map.start_brk;
2130        mm->brk         = prctl_map.brk;
2131        mm->start_stack = prctl_map.start_stack;
2132        mm->arg_start   = prctl_map.arg_start;
2133        mm->arg_end     = prctl_map.arg_end;
2134        mm->env_start   = prctl_map.env_start;
2135        mm->env_end     = prctl_map.env_end;
2136
2137        /*
2138         * Note this update of @saved_auxv is lockless thus
2139         * if someone reads this member in procfs while we're
2140         * updating -- it may get partly updated results. It's
2141         * known and acceptable trade off: we leave it as is to
2142         * not introduce additional locks here making the kernel
2143         * more complex.
2144         */
2145        if (prctl_map.auxv_size)
2146                memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
2147
2148        up_write(&mm->mmap_sem);
2149        return 0;
2150}
2151#endif /* CONFIG_CHECKPOINT_RESTORE */
2152
2153static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,
2154                          unsigned long len)
2155{
2156        /*
2157         * This doesn't move the auxiliary vector itself since it's pinned to
2158         * mm_struct, but it permits filling the vector with new values.  It's
2159         * up to the caller to provide sane values here, otherwise userspace
2160         * tools which use this vector might be unhappy.
2161         */
2162        unsigned long user_auxv[AT_VECTOR_SIZE];
2163
2164        if (len > sizeof(user_auxv))
2165                return -EINVAL;
2166
2167        if (copy_from_user(user_auxv, (const void __user *)addr, len))
2168                return -EFAULT;
2169
2170        /* Make sure the last entry is always AT_NULL */
2171        user_auxv[AT_VECTOR_SIZE - 2] = 0;
2172        user_auxv[AT_VECTOR_SIZE - 1] = 0;
2173
2174        BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
2175
2176        task_lock(current);
2177        memcpy(mm->saved_auxv, user_auxv, len);
2178        task_unlock(current);
2179
2180        return 0;
2181}
2182
2183static int prctl_set_mm(int opt, unsigned long addr,
2184                        unsigned long arg4, unsigned long arg5)
2185{
2186        struct mm_struct *mm = current->mm;
2187        struct prctl_mm_map prctl_map;
2188        struct vm_area_struct *vma;
2189        int error;
2190
2191        if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV &&
2192                              opt != PR_SET_MM_MAP &&
2193                              opt != PR_SET_MM_MAP_SIZE)))
2194                return -EINVAL;
2195
2196#ifdef CONFIG_CHECKPOINT_RESTORE
2197        if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE)
2198                return prctl_set_mm_map(opt, (const void __user *)addr, arg4);
2199#endif
2200
2201        if (!capable(CAP_SYS_RESOURCE))
2202                return -EPERM;
2203
2204        if (opt == PR_SET_MM_EXE_FILE)
2205                return prctl_set_mm_exe_file(mm, (unsigned int)addr);
2206
2207        if (opt == PR_SET_MM_AUXV)
2208                return prctl_set_auxv(mm, addr, arg4);
2209
2210        if (addr >= TASK_SIZE || addr < mmap_min_addr)
2211                return -EINVAL;
2212
2213        error = -EINVAL;
2214
2215        down_write(&mm->mmap_sem);
2216        vma = find_vma(mm, addr);
2217
2218        prctl_map.start_code    = mm->start_code;
2219        prctl_map.end_code      = mm->end_code;
2220        prctl_map.start_data    = mm->start_data;
2221        prctl_map.end_data      = mm->end_data;
2222        prctl_map.start_brk     = mm->start_brk;
2223        prctl_map.brk           = mm->brk;
2224        prctl_map.start_stack   = mm->start_stack;
2225        prctl_map.arg_start     = mm->arg_start;
2226        prctl_map.arg_end       = mm->arg_end;
2227        prctl_map.env_start     = mm->env_start;
2228        prctl_map.env_end       = mm->env_end;
2229        prctl_map.auxv          = NULL;
2230        prctl_map.auxv_size     = 0;
2231        prctl_map.exe_fd        = -1;
2232
2233        switch (opt) {
2234        case PR_SET_MM_START_CODE:
2235                prctl_map.start_code = addr;
2236                break;
2237        case PR_SET_MM_END_CODE:
2238                prctl_map.end_code = addr;
2239                break;
2240        case PR_SET_MM_START_DATA:
2241                prctl_map.start_data = addr;
2242                break;
2243        case PR_SET_MM_END_DATA:
2244                prctl_map.end_data = addr;
2245                break;
2246        case PR_SET_MM_START_STACK:
2247                prctl_map.start_stack = addr;
2248                break;
2249        case PR_SET_MM_START_BRK:
2250                prctl_map.start_brk = addr;
2251                break;
2252        case PR_SET_MM_BRK:
2253                prctl_map.brk = addr;
2254                break;
2255        case PR_SET_MM_ARG_START:
2256                prctl_map.arg_start = addr;
2257                break;
2258        case PR_SET_MM_ARG_END:
2259                prctl_map.arg_end = addr;
2260                break;
2261        case PR_SET_MM_ENV_START:
2262                prctl_map.env_start = addr;
2263                break;
2264        case PR_SET_MM_ENV_END:
2265                prctl_map.env_end = addr;
2266                break;
2267        default:
2268                goto out;
2269        }
2270
2271        error = validate_prctl_map(&prctl_map);
2272        if (error)
2273                goto out;
2274
2275        switch (opt) {
2276        /*
2277         * If command line arguments and environment
2278         * are placed somewhere else on stack, we can
2279         * set them up here, ARG_START/END to setup
2280         * command line argumets and ENV_START/END
2281         * for environment.
2282         */
2283        case PR_SET_MM_START_STACK:
2284        case PR_SET_MM_ARG_START:
2285        case PR_SET_MM_ARG_END:
2286        case PR_SET_MM_ENV_START:
2287        case PR_SET_MM_ENV_END:
2288                if (!vma) {
2289                        error = -EFAULT;
2290                        goto out;
2291                }
2292        }
2293
2294        mm->start_code  = prctl_map.start_code;
2295        mm->end_code    = prctl_map.end_code;
2296        mm->start_data  = prctl_map.start_data;
2297        mm->end_data    = prctl_map.end_data;
2298        mm->start_brk   = prctl_map.start_brk;
2299        mm->brk         = prctl_map.brk;
2300        mm->start_stack = prctl_map.start_stack;
2301        mm->arg_start   = prctl_map.arg_start;
2302        mm->arg_end     = prctl_map.arg_end;
2303        mm->env_start   = prctl_map.env_start;
2304        mm->env_end     = prctl_map.env_end;
2305
2306        error = 0;
2307out:
2308        up_write(&mm->mmap_sem);
2309        return error;
2310}
2311
2312#ifdef CONFIG_CHECKPOINT_RESTORE
2313static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
2314{
2315        return put_user(me->clear_child_tid, tid_addr);
2316}
2317#else
2318static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
2319{
2320        return -EINVAL;
2321}
2322#endif
2323
2324SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2325                unsigned long, arg4, unsigned long, arg5)
2326{
2327        struct task_struct *me = current;
2328        unsigned char comm[sizeof(me->comm)];
2329        long error;
2330
2331        error = security_task_prctl(option, arg2, arg3, arg4, arg5);
2332        if (error != -ENOSYS)
2333                return error;
2334
2335        error = 0;
2336        switch (option) {
2337        case PR_SET_PDEATHSIG:
2338                if (!valid_signal(arg2)) {
2339                        error = -EINVAL;
2340                        break;
2341                }
2342                me->pdeath_signal = arg2;
2343                break;
2344        case PR_GET_PDEATHSIG:
2345                error = put_user(me->pdeath_signal, (int __user *)arg2);
2346                break;
2347        case PR_GET_DUMPABLE:
2348                error = get_dumpable(me->mm);
2349                break;
2350        case PR_SET_DUMPABLE:
2351                if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) {
2352                        error = -EINVAL;
2353                        break;
2354                }
2355                set_dumpable(me->mm, arg2);
2356                break;
2357
2358        case PR_SET_UNALIGN:
2359                error = SET_UNALIGN_CTL(me, arg2);
2360                break;
2361        case PR_GET_UNALIGN:
2362                error = GET_UNALIGN_CTL(me, arg2);
2363                break;
2364        case PR_SET_FPEMU:
2365                error = SET_FPEMU_CTL(me, arg2);
2366                break;
2367        case PR_GET_FPEMU:
2368                error = GET_FPEMU_CTL(me, arg2);
2369                break;
2370        case PR_SET_FPEXC:
2371                error = SET_FPEXC_CTL(me, arg2);
2372                break;
2373        case PR_GET_FPEXC:
2374                error = GET_FPEXC_CTL(me, arg2);
2375                break;
2376        case PR_GET_TIMING:
2377                error = PR_TIMING_STATISTICAL;
2378                break;
2379        case PR_SET_TIMING:
2380                if (arg2 != PR_TIMING_STATISTICAL)
2381                        error = -EINVAL;
2382                break;
2383        case PR_SET_NAME:
2384                comm[sizeof(me->comm) - 1] = 0;
2385                if (strncpy_from_user(comm, (char __user *)arg2,
2386                                      sizeof(me->comm) - 1) < 0)
2387                        return -EFAULT;
2388                set_task_comm(me, comm);
2389                proc_comm_connector(me);
2390                break;
2391        case PR_GET_NAME:
2392                get_task_comm(comm, me);
2393                if (copy_to_user((char __user *)arg2, comm, sizeof(comm)))
2394                        return -EFAULT;
2395                break;
2396        case PR_GET_ENDIAN:
2397                error = GET_ENDIAN(me, arg2);
2398                break;
2399        case PR_SET_ENDIAN:
2400                error = SET_ENDIAN(me, arg2);
2401                break;
2402        case PR_GET_SECCOMP:
2403                error = prctl_get_seccomp();
2404                break;
2405        case PR_SET_SECCOMP:
2406                error = prctl_set_seccomp(arg2, (char __user *)arg3);
2407                break;
2408        case PR_GET_TSC:
2409                error = GET_TSC_CTL(arg2);
2410                break;
2411        case PR_SET_TSC:
2412                error = SET_TSC_CTL(arg2);
2413                break;
2414        case PR_TASK_PERF_EVENTS_DISABLE:
2415                error = perf_event_task_disable();
2416                break;
2417        case PR_TASK_PERF_EVENTS_ENABLE:
2418                error = perf_event_task_enable();
2419                break;
2420        case PR_GET_TIMERSLACK:
2421                error = current->timer_slack_ns;
2422                break;
2423        case PR_SET_TIMERSLACK:
2424                if (arg2 <= 0)
2425                        current->timer_slack_ns =
2426                                        current->default_timer_slack_ns;
2427                else
2428                        current->timer_slack_ns = arg2;
2429                break;
2430        case PR_MCE_KILL:
2431                if (arg4 | arg5)
2432                        return -EINVAL;
2433                switch (arg2) {
2434                case PR_MCE_KILL_CLEAR:
2435                        if (arg3 != 0)
2436                                return -EINVAL;
2437                        current->flags &= ~PF_MCE_PROCESS;
2438                        break;
2439                case PR_MCE_KILL_SET:
2440                        current->flags |= PF_MCE_PROCESS;
2441                        if (arg3 == PR_MCE_KILL_EARLY)
2442                                current->flags |= PF_MCE_EARLY;
2443                        else if (arg3 == PR_MCE_KILL_LATE)
2444                                current->flags &= ~PF_MCE_EARLY;
2445                        else if (arg3 == PR_MCE_KILL_DEFAULT)
2446                                current->flags &=
2447                                                ~(PF_MCE_EARLY|PF_MCE_PROCESS);
2448                        else
2449                                return -EINVAL;
2450                        break;
2451                default:
2452                        return -EINVAL;
2453                }
2454                break;
2455        case PR_MCE_KILL_GET:
2456                if (arg2 | arg3 | arg4 | arg5)
2457                        return -EINVAL;
2458                if (current->flags & PF_MCE_PROCESS)
2459                        error = (current->flags & PF_MCE_EARLY) ?
2460                                PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
2461                else
2462                        error = PR_MCE_KILL_DEFAULT;
2463                break;
2464        case PR_SET_MM:
2465                error = prctl_set_mm(arg2, arg3, arg4, arg5);
2466                break;
2467        case PR_GET_TID_ADDRESS:
2468                error = prctl_get_tid_address(me, (int __user **)arg2);
2469                break;
2470        case PR_SET_CHILD_SUBREAPER:
2471                me->signal->is_child_subreaper = !!arg2;
2472                break;
2473        case PR_GET_CHILD_SUBREAPER:
2474                error = put_user(me->signal->is_child_subreaper,
2475                                 (int __user *)arg2);
2476                break;
2477        case PR_SET_NO_NEW_PRIVS:
2478                if (arg2 != 1 || arg3 || arg4 || arg5)
2479                        return -EINVAL;
2480
2481                task_set_no_new_privs(current);
2482                break;
2483        case PR_GET_NO_NEW_PRIVS:
2484                if (arg2 || arg3 || arg4 || arg5)
2485                        return -EINVAL;
2486                return task_no_new_privs(current) ? 1 : 0;
2487        case PR_GET_THP_DISABLE:
2488                if (arg2 || arg3 || arg4 || arg5)
2489                        return -EINVAL;
2490                error = !!(me->mm->def_flags & VM_NOHUGEPAGE);
2491                break;
2492        case PR_SET_THP_DISABLE:
2493                if (arg3 || arg4 || arg5)
2494                        return -EINVAL;
2495                down_write(&me->mm->mmap_sem);
2496                if (arg2)
2497                        me->mm->def_flags |= VM_NOHUGEPAGE;
2498                else
2499                        me->mm->def_flags &= ~VM_NOHUGEPAGE;
2500                up_write(&me->mm->mmap_sem);
2501                break;
2502        case PR_MPX_ENABLE_MANAGEMENT:
2503                error = MPX_ENABLE_MANAGEMENT();
2504                break;
2505        case PR_MPX_DISABLE_MANAGEMENT:
2506                error = MPX_DISABLE_MANAGEMENT();
2507                break;
2508        default:
2509                error = -EINVAL;
2510                break;
2511        }
2512        return error;
2513}
2514
2515SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
2516                struct getcpu_cache __user *, unused)
2517{
2518        int err = 0;
2519        int cpu = raw_smp_processor_id();
2520        if (cpup)
2521                err |= put_user(cpu, cpup);
2522        if (nodep)
2523                err |= put_user(cpu_to_node(cpu), nodep);
2524        return err ? -EFAULT : 0;
2525}
2526
2527char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
2528static const char reboot_cmd[] = "/sbin/reboot";
2529
2530static int run_cmd(const char *cmd)
2531{
2532        char **argv;
2533        static char *envp[] = {
2534                "HOME=/",
2535                "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
2536                NULL
2537        };
2538        int ret;
2539        argv = argv_split(GFP_KERNEL, cmd, NULL);
2540        if (argv) {
2541                ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
2542                argv_free(argv);
2543        } else {
2544                ret = -ENOMEM;
2545        }
2546
2547        return ret;
2548}
2549
2550static int __orderly_reboot(void)
2551{
2552        int ret;
2553
2554        ret = run_cmd(reboot_cmd);
2555
2556        if (ret) {
2557                pr_warn("Failed to start orderly reboot: forcing the issue\n");
2558                emergency_sync();
2559                kernel_restart(NULL);
2560        }
2561
2562        return ret;
2563}
2564
2565static int __orderly_poweroff(bool force)
2566{
2567        int ret;
2568
2569        ret = run_cmd(poweroff_cmd);
2570
2571        if (ret && force) {
2572                pr_warn("Failed to start orderly shutdown: forcing the issue\n");
2573
2574                /*
2575                 * I guess this should try to kick off some daemon to sync and
2576                 * poweroff asap.  Or not even bother syncing if we're doing an
2577                 * emergency shutdown?
2578                 */
2579                emergency_sync();
2580                kernel_power_off();
2581        }
2582
2583        return ret;
2584}
2585
2586static bool poweroff_force;
2587
2588static void poweroff_work_func(struct work_struct *work)
2589{
2590        __orderly_poweroff(poweroff_force);
2591}
2592
2593static DECLARE_WORK(poweroff_work, poweroff_work_func);
2594
2595/**
2596 * orderly_poweroff - Trigger an orderly system poweroff
2597 * @force: force poweroff if command execution fails
2598 *
2599 * This may be called from any context to trigger a system shutdown.
2600 * If the orderly shutdown fails, it will force an immediate shutdown.
2601 */
2602void orderly_poweroff(bool force)
2603{
2604        if (force) /* do not override the pending "true" */
2605                poweroff_force = true;
2606        schedule_work(&poweroff_work);
2607}
2608EXPORT_SYMBOL_GPL(orderly_poweroff);
2609
2610static void reboot_work_func(struct work_struct *work)
2611{
2612        __orderly_reboot();
2613}
2614
2615static DECLARE_WORK(reboot_work, reboot_work_func);
2616
2617/**
2618 * orderly_reboot - Trigger an orderly system reboot
2619 *
2620 * This may be called from any context to trigger a system reboot.
2621 * If the orderly reboot fails, it will force an immediate reboot.
2622 */
2623void orderly_reboot(void)
2624{
2625        schedule_work(&reboot_work);
2626}
2627EXPORT_SYMBOL_GPL(orderly_reboot);
2628
2629/**
2630 * do_sysinfo - fill in sysinfo struct
2631 * @info: pointer to buffer to fill
2632 */
2633static int do_sysinfo(struct sysinfo *info)
2634{
2635        unsigned long mem_total, sav_total;
2636        unsigned int mem_unit, bitcount;
2637        struct timespec tp;
2638
2639        memset(info, 0, sizeof(struct sysinfo));
2640
2641        get_monotonic_boottime(&tp);
2642        info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
2643
2644        get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
2645
2646        info->procs = nr_threads;
2647
2648        si_meminfo(info);
2649        si_swapinfo(info);
2650
2651        /*
2652         * If the sum of all the available memory (i.e. ram + swap)
2653         * is less than can be stored in a 32 bit unsigned long then
2654         * we can be binary compatible with 2.2.x kernels.  If not,
2655         * well, in that case 2.2.x was broken anyways...
2656         *
2657         *  -Erik Andersen <andersee@debian.org>
2658         */
2659
2660        mem_total = info->totalram + info->totalswap;
2661        if (mem_total < info->totalram || mem_total < info->totalswap)
2662                goto out;
2663        bitcount = 0;
2664        mem_unit = info->mem_unit;
2665        while (mem_unit > 1) {
2666                bitcount++;
2667                mem_unit >>= 1;
2668                sav_total = mem_total;
2669                mem_total <<= 1;
2670                if (mem_total < sav_total)
2671                        goto out;
2672        }
2673
2674        /*
2675         * If mem_total did not overflow, multiply all memory values by
2676         * info->mem_unit and set it to 1.  This leaves things compatible
2677         * with 2.2.x, and also retains compatibility with earlier 2.4.x
2678         * kernels...
2679         */
2680
2681        info->mem_unit = 1;
2682        info->totalram <<= bitcount;
2683        info->freeram <<= bitcount;
2684        info->sharedram <<= bitcount;
2685        info->bufferram <<= bitcount;
2686        info->totalswap <<= bitcount;
2687        info->freeswap <<= bitcount;
2688        info->totalhigh <<= bitcount;
2689        info->freehigh <<= bitcount;
2690
2691out:
2692        return 0;
2693}
2694
2695SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
2696{
2697        struct sysinfo val;
2698
2699        do_sysinfo(&val);
2700
2701        if (copy_to_user(info, &val, sizeof(struct sysinfo)))
2702                return -EFAULT;
2703
2704        return 0;
2705}
2706
2707#ifdef CONFIG_COMPAT
2708struct compat_sysinfo {
2709        s32 uptime;
2710        u32 loads[3];
2711        u32 totalram;
2712        u32 freeram;
2713        u32 sharedram;
2714        u32 bufferram;
2715        u32 totalswap;
2716        u32 freeswap;
2717        u16 procs;
2718        u16 pad;
2719        u32 totalhigh;
2720        u32 freehigh;
2721        u32 mem_unit;
2722        char _f[20-2*sizeof(u32)-sizeof(int)];
2723};
2724
2725COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
2726{
2727        struct sysinfo s;
2728
2729        do_sysinfo(&s);
2730
2731        /* Check to see if any memory value is too large for 32-bit and scale
2732         *  down if needed
2733         */
2734        if ((s.totalram >> 32) || (s.totalswap >> 32)) {
2735                int bitcount = 0;
2736
2737                while (s.mem_unit < PAGE_SIZE) {
2738                        s.mem_unit <<= 1;
2739                        bitcount++;
2740                }
2741
2742                s.totalram >>= bitcount;
2743                s.freeram >>= bitcount;
2744                s.sharedram >>= bitcount;
2745                s.bufferram >>= bitcount;
2746                s.totalswap >>= bitcount;
2747                s.freeswap >>= bitcount;
2748                s.totalhigh >>= bitcount;
2749                s.freehigh >>= bitcount;
2750        }
2751
2752        if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) ||
2753            __put_user(s.uptime, &info->uptime) ||
2754            __put_user(s.loads[0], &info->loads[0]) ||
2755            __put_user(s.loads[1], &info->loads[1]) ||
2756            __put_user(s.loads[2], &info->loads[2]) ||
2757            __put_user(s.totalram, &info->totalram) ||
2758            __put_user(s.freeram, &info->freeram) ||
2759            __put_user(s.sharedram, &info->sharedram) ||
2760            __put_user(s.bufferram, &info->bufferram) ||
2761            __put_user(s.totalswap, &info->totalswap) ||
2762            __put_user(s.freeswap, &info->freeswap) ||
2763            __put_user(s.procs, &info->procs) ||
2764            __put_user(s.totalhigh, &info->totalhigh) ||
2765            __put_user(s.freehigh, &info->freehigh) ||
2766            __put_user(s.mem_unit, &info->mem_unit))
2767                return -EFAULT;
2768
2769        return 0;
2770}
2771#endif /* CONFIG_COMPAT */
2772