linux/kernel/sys.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/sys.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/module.h>
   8#include <linux/mm.h>
   9#include <linux/utsname.h>
  10#include <linux/mman.h>
  11#include <linux/smp_lock.h>
  12#include <linux/notifier.h>
  13#include <linux/reboot.h>
  14#include <linux/prctl.h>
  15#include <linux/highuid.h>
  16#include <linux/fs.h>
  17#include <linux/resource.h>
  18#include <linux/kernel.h>
  19#include <linux/kexec.h>
  20#include <linux/workqueue.h>
  21#include <linux/capability.h>
  22#include <linux/device.h>
  23#include <linux/key.h>
  24#include <linux/times.h>
  25#include <linux/posix-timers.h>
  26#include <linux/security.h>
  27#include <linux/dcookies.h>
  28#include <linux/suspend.h>
  29#include <linux/tty.h>
  30#include <linux/signal.h>
  31#include <linux/cn_proc.h>
  32#include <linux/getcpu.h>
  33#include <linux/task_io_accounting_ops.h>
  34#include <linux/seccomp.h>
  35#include <linux/cpu.h>
  36
  37#include <linux/compat.h>
  38#include <linux/syscalls.h>
  39#include <linux/kprobes.h>
  40#include <linux/user_namespace.h>
  41
  42#include <asm/uaccess.h>
  43#include <asm/io.h>
  44#include <asm/unistd.h>
  45
  46#ifndef SET_UNALIGN_CTL
  47# define SET_UNALIGN_CTL(a,b)   (-EINVAL)
  48#endif
  49#ifndef GET_UNALIGN_CTL
  50# define GET_UNALIGN_CTL(a,b)   (-EINVAL)
  51#endif
  52#ifndef SET_FPEMU_CTL
  53# define SET_FPEMU_CTL(a,b)     (-EINVAL)
  54#endif
  55#ifndef GET_FPEMU_CTL
  56# define GET_FPEMU_CTL(a,b)     (-EINVAL)
  57#endif
  58#ifndef SET_FPEXC_CTL
  59# define SET_FPEXC_CTL(a,b)     (-EINVAL)
  60#endif
  61#ifndef GET_FPEXC_CTL
  62# define GET_FPEXC_CTL(a,b)     (-EINVAL)
  63#endif
  64#ifndef GET_ENDIAN
  65# define GET_ENDIAN(a,b)        (-EINVAL)
  66#endif
  67#ifndef SET_ENDIAN
  68# define SET_ENDIAN(a,b)        (-EINVAL)
  69#endif
  70
  71/*
  72 * this is where the system-wide overflow UID and GID are defined, for
  73 * architectures that now have 32-bit UID/GID but didn't in the past
  74 */
  75
  76int overflowuid = DEFAULT_OVERFLOWUID;
  77int overflowgid = DEFAULT_OVERFLOWGID;
  78
  79#ifdef CONFIG_UID16
  80EXPORT_SYMBOL(overflowuid);
  81EXPORT_SYMBOL(overflowgid);
  82#endif
  83
  84/*
  85 * the same as above, but for filesystems which can only store a 16-bit
  86 * UID and GID. as such, this is needed on all architectures
  87 */
  88
  89int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
  90int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
  91
  92EXPORT_SYMBOL(fs_overflowuid);
  93EXPORT_SYMBOL(fs_overflowgid);
  94
  95/*
  96 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
  97 */
  98
  99int C_A_D = 1;
 100struct pid *cad_pid;
 101EXPORT_SYMBOL(cad_pid);
 102
 103/*
 104 * If set, this is used for preparing the system to power off.
 105 */
 106
 107void (*pm_power_off_prepare)(void);
 108
 109static int set_one_prio(struct task_struct *p, int niceval, int error)
 110{
 111        int no_nice;
 112
 113        if (p->uid != current->euid &&
 114                p->euid != current->euid && !capable(CAP_SYS_NICE)) {
 115                error = -EPERM;
 116                goto out;
 117        }
 118        if (niceval < task_nice(p) && !can_nice(p, niceval)) {
 119                error = -EACCES;
 120                goto out;
 121        }
 122        no_nice = security_task_setnice(p, niceval);
 123        if (no_nice) {
 124                error = no_nice;
 125                goto out;
 126        }
 127        if (error == -ESRCH)
 128                error = 0;
 129        set_user_nice(p, niceval);
 130out:
 131        return error;
 132}
 133
 134asmlinkage long sys_setpriority(int which, int who, int niceval)
 135{
 136        struct task_struct *g, *p;
 137        struct user_struct *user;
 138        int error = -EINVAL;
 139        struct pid *pgrp;
 140
 141        if (which > PRIO_USER || which < PRIO_PROCESS)
 142                goto out;
 143
 144        /* normalize: avoid signed division (rounding problems) */
 145        error = -ESRCH;
 146        if (niceval < -20)
 147                niceval = -20;
 148        if (niceval > 19)
 149                niceval = 19;
 150
 151        read_lock(&tasklist_lock);
 152        switch (which) {
 153                case PRIO_PROCESS:
 154                        if (who)
 155                                p = find_task_by_vpid(who);
 156                        else
 157                                p = current;
 158                        if (p)
 159                                error = set_one_prio(p, niceval, error);
 160                        break;
 161                case PRIO_PGRP:
 162                        if (who)
 163                                pgrp = find_vpid(who);
 164                        else
 165                                pgrp = task_pgrp(current);
 166                        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 167                                error = set_one_prio(p, niceval, error);
 168                        } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 169                        break;
 170                case PRIO_USER:
 171                        user = current->user;
 172                        if (!who)
 173                                who = current->uid;
 174                        else
 175                                if ((who != current->uid) && !(user = find_user(who)))
 176                                        goto out_unlock;        /* No processes for this user */
 177
 178                        do_each_thread(g, p)
 179                                if (p->uid == who)
 180                                        error = set_one_prio(p, niceval, error);
 181                        while_each_thread(g, p);
 182                        if (who != current->uid)
 183                                free_uid(user);         /* For find_user() */
 184                        break;
 185        }
 186out_unlock:
 187        read_unlock(&tasklist_lock);
 188out:
 189        return error;
 190}
 191
 192/*
 193 * Ugh. To avoid negative return values, "getpriority()" will
 194 * not return the normal nice-value, but a negated value that
 195 * has been offset by 20 (ie it returns 40..1 instead of -20..19)
 196 * to stay compatible.
 197 */
 198asmlinkage long sys_getpriority(int which, int who)
 199{
 200        struct task_struct *g, *p;
 201        struct user_struct *user;
 202        long niceval, retval = -ESRCH;
 203        struct pid *pgrp;
 204
 205        if (which > PRIO_USER || which < PRIO_PROCESS)
 206                return -EINVAL;
 207
 208        read_lock(&tasklist_lock);
 209        switch (which) {
 210                case PRIO_PROCESS:
 211                        if (who)
 212                                p = find_task_by_vpid(who);
 213                        else
 214                                p = current;
 215                        if (p) {
 216                                niceval = 20 - task_nice(p);
 217                                if (niceval > retval)
 218                                        retval = niceval;
 219                        }
 220                        break;
 221                case PRIO_PGRP:
 222                        if (who)
 223                                pgrp = find_vpid(who);
 224                        else
 225                                pgrp = task_pgrp(current);
 226                        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 227                                niceval = 20 - task_nice(p);
 228                                if (niceval > retval)
 229                                        retval = niceval;
 230                        } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 231                        break;
 232                case PRIO_USER:
 233                        user = current->user;
 234                        if (!who)
 235                                who = current->uid;
 236                        else
 237                                if ((who != current->uid) && !(user = find_user(who)))
 238                                        goto out_unlock;        /* No processes for this user */
 239
 240                        do_each_thread(g, p)
 241                                if (p->uid == who) {
 242                                        niceval = 20 - task_nice(p);
 243                                        if (niceval > retval)
 244                                                retval = niceval;
 245                                }
 246                        while_each_thread(g, p);
 247                        if (who != current->uid)
 248                                free_uid(user);         /* for find_user() */
 249                        break;
 250        }
 251out_unlock:
 252        read_unlock(&tasklist_lock);
 253
 254        return retval;
 255}
 256
 257/**
 258 *      emergency_restart - reboot the system
 259 *
 260 *      Without shutting down any hardware or taking any locks
 261 *      reboot the system.  This is called when we know we are in
 262 *      trouble so this is our best effort to reboot.  This is
 263 *      safe to call in interrupt context.
 264 */
 265void emergency_restart(void)
 266{
 267        machine_emergency_restart();
 268}
 269EXPORT_SYMBOL_GPL(emergency_restart);
 270
 271static void kernel_restart_prepare(char *cmd)
 272{
 273        blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 274        system_state = SYSTEM_RESTART;
 275        device_shutdown();
 276        sysdev_shutdown();
 277}
 278
 279/**
 280 *      kernel_restart - reboot the system
 281 *      @cmd: pointer to buffer containing command to execute for restart
 282 *              or %NULL
 283 *
 284 *      Shutdown everything and perform a clean reboot.
 285 *      This is not safe to call in interrupt context.
 286 */
 287void kernel_restart(char *cmd)
 288{
 289        kernel_restart_prepare(cmd);
 290        if (!cmd)
 291                printk(KERN_EMERG "Restarting system.\n");
 292        else
 293                printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
 294        machine_restart(cmd);
 295}
 296EXPORT_SYMBOL_GPL(kernel_restart);
 297
 298/**
 299 *      kernel_kexec - reboot the system
 300 *
 301 *      Move into place and start executing a preloaded standalone
 302 *      executable.  If nothing was preloaded return an error.
 303 */
 304static void kernel_kexec(void)
 305{
 306#ifdef CONFIG_KEXEC
 307        struct kimage *image;
 308        image = xchg(&kexec_image, NULL);
 309        if (!image)
 310                return;
 311        kernel_restart_prepare(NULL);
 312        printk(KERN_EMERG "Starting new kernel\n");
 313        machine_shutdown();
 314        machine_kexec(image);
 315#endif
 316}
 317
 318void kernel_shutdown_prepare(enum system_states state)
 319{
 320        blocking_notifier_call_chain(&reboot_notifier_list,
 321                (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
 322        system_state = state;
 323        device_shutdown();
 324}
 325/**
 326 *      kernel_halt - halt the system
 327 *
 328 *      Shutdown everything and perform a clean system halt.
 329 */
 330void kernel_halt(void)
 331{
 332        kernel_shutdown_prepare(SYSTEM_HALT);
 333        sysdev_shutdown();
 334        printk(KERN_EMERG "System halted.\n");
 335        machine_halt();
 336}
 337
 338EXPORT_SYMBOL_GPL(kernel_halt);
 339
 340/**
 341 *      kernel_power_off - power_off the system
 342 *
 343 *      Shutdown everything and perform a clean system power_off.
 344 */
 345void kernel_power_off(void)
 346{
 347        kernel_shutdown_prepare(SYSTEM_POWER_OFF);
 348        if (pm_power_off_prepare)
 349                pm_power_off_prepare();
 350        disable_nonboot_cpus();
 351        sysdev_shutdown();
 352        printk(KERN_EMERG "Power down.\n");
 353        machine_power_off();
 354}
 355EXPORT_SYMBOL_GPL(kernel_power_off);
 356/*
 357 * Reboot system call: for obvious reasons only root may call it,
 358 * and even root needs to set up some magic numbers in the registers
 359 * so that some mistake won't make this reboot the whole machine.
 360 * You can also set the meaning of the ctrl-alt-del-key here.
 361 *
 362 * reboot doesn't sync: do that yourself before calling this.
 363 */
 364asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg)
 365{
 366        char buffer[256];
 367
 368        /* We only trust the superuser with rebooting the system. */
 369        if (!capable(CAP_SYS_BOOT))
 370                return -EPERM;
 371
 372        /* For safety, we require "magic" arguments. */
 373        if (magic1 != LINUX_REBOOT_MAGIC1 ||
 374            (magic2 != LINUX_REBOOT_MAGIC2 &&
 375                        magic2 != LINUX_REBOOT_MAGIC2A &&
 376                        magic2 != LINUX_REBOOT_MAGIC2B &&
 377                        magic2 != LINUX_REBOOT_MAGIC2C))
 378                return -EINVAL;
 379
 380        /* Instead of trying to make the power_off code look like
 381         * halt when pm_power_off is not set do it the easy way.
 382         */
 383        if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
 384                cmd = LINUX_REBOOT_CMD_HALT;
 385
 386        lock_kernel();
 387        switch (cmd) {
 388        case LINUX_REBOOT_CMD_RESTART:
 389                kernel_restart(NULL);
 390                break;
 391
 392        case LINUX_REBOOT_CMD_CAD_ON:
 393                C_A_D = 1;
 394                break;
 395
 396        case LINUX_REBOOT_CMD_CAD_OFF:
 397                C_A_D = 0;
 398                break;
 399
 400        case LINUX_REBOOT_CMD_HALT:
 401                kernel_halt();
 402                unlock_kernel();
 403                do_exit(0);
 404                break;
 405
 406        case LINUX_REBOOT_CMD_POWER_OFF:
 407                kernel_power_off();
 408                unlock_kernel();
 409                do_exit(0);
 410                break;
 411
 412        case LINUX_REBOOT_CMD_RESTART2:
 413                if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
 414                        unlock_kernel();
 415                        return -EFAULT;
 416                }
 417                buffer[sizeof(buffer) - 1] = '\0';
 418
 419                kernel_restart(buffer);
 420                break;
 421
 422        case LINUX_REBOOT_CMD_KEXEC:
 423                kernel_kexec();
 424                unlock_kernel();
 425                return -EINVAL;
 426
 427#ifdef CONFIG_HIBERNATION
 428        case LINUX_REBOOT_CMD_SW_SUSPEND:
 429                {
 430                        int ret = hibernate();
 431                        unlock_kernel();
 432                        return ret;
 433                }
 434#endif
 435
 436        default:
 437                unlock_kernel();
 438                return -EINVAL;
 439        }
 440        unlock_kernel();
 441        return 0;
 442}
 443
 444static void deferred_cad(struct work_struct *dummy)
 445{
 446        kernel_restart(NULL);
 447}
 448
 449/*
 450 * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
 451 * As it's called within an interrupt, it may NOT sync: the only choice
 452 * is whether to reboot at once, or just ignore the ctrl-alt-del.
 453 */
 454void ctrl_alt_del(void)
 455{
 456        static DECLARE_WORK(cad_work, deferred_cad);
 457
 458        if (C_A_D)
 459                schedule_work(&cad_work);
 460        else
 461                kill_cad_pid(SIGINT, 1);
 462}
 463        
 464/*
 465 * Unprivileged users may change the real gid to the effective gid
 466 * or vice versa.  (BSD-style)
 467 *
 468 * If you set the real gid at all, or set the effective gid to a value not
 469 * equal to the real gid, then the saved gid is set to the new effective gid.
 470 *
 471 * This makes it possible for a setgid program to completely drop its
 472 * privileges, which is often a useful assertion to make when you are doing
 473 * a security audit over a program.
 474 *
 475 * The general idea is that a program which uses just setregid() will be
 476 * 100% compatible with BSD.  A program which uses just setgid() will be
 477 * 100% compatible with POSIX with saved IDs. 
 478 *
 479 * SMP: There are not races, the GIDs are checked only by filesystem
 480 *      operations (as far as semantic preservation is concerned).
 481 */
 482asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 483{
 484        int old_rgid = current->gid;
 485        int old_egid = current->egid;
 486        int new_rgid = old_rgid;
 487        int new_egid = old_egid;
 488        int retval;
 489
 490        retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
 491        if (retval)
 492                return retval;
 493
 494        if (rgid != (gid_t) -1) {
 495                if ((old_rgid == rgid) ||
 496                    (current->egid==rgid) ||
 497                    capable(CAP_SETGID))
 498                        new_rgid = rgid;
 499                else
 500                        return -EPERM;
 501        }
 502        if (egid != (gid_t) -1) {
 503                if ((old_rgid == egid) ||
 504                    (current->egid == egid) ||
 505                    (current->sgid == egid) ||
 506                    capable(CAP_SETGID))
 507                        new_egid = egid;
 508                else
 509                        return -EPERM;
 510        }
 511        if (new_egid != old_egid) {
 512                set_dumpable(current->mm, suid_dumpable);
 513                smp_wmb();
 514        }
 515        if (rgid != (gid_t) -1 ||
 516            (egid != (gid_t) -1 && egid != old_rgid))
 517                current->sgid = new_egid;
 518        current->fsgid = new_egid;
 519        current->egid = new_egid;
 520        current->gid = new_rgid;
 521        key_fsgid_changed(current);
 522        proc_id_connector(current, PROC_EVENT_GID);
 523        return 0;
 524}
 525
 526/*
 527 * setgid() is implemented like SysV w/ SAVED_IDS 
 528 *
 529 * SMP: Same implicit races as above.
 530 */
 531asmlinkage long sys_setgid(gid_t gid)
 532{
 533        int old_egid = current->egid;
 534        int retval;
 535
 536        retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
 537        if (retval)
 538                return retval;
 539
 540        if (capable(CAP_SETGID)) {
 541                if (old_egid != gid) {
 542                        set_dumpable(current->mm, suid_dumpable);
 543                        smp_wmb();
 544                }
 545                current->gid = current->egid = current->sgid = current->fsgid = gid;
 546        } else if ((gid == current->gid) || (gid == current->sgid)) {
 547                if (old_egid != gid) {
 548                        set_dumpable(current->mm, suid_dumpable);
 549                        smp_wmb();
 550                }
 551                current->egid = current->fsgid = gid;
 552        }
 553        else
 554                return -EPERM;
 555
 556        key_fsgid_changed(current);
 557        proc_id_connector(current, PROC_EVENT_GID);
 558        return 0;
 559}
 560  
 561static int set_user(uid_t new_ruid, int dumpclear)
 562{
 563        struct user_struct *new_user;
 564
 565        new_user = alloc_uid(current->nsproxy->user_ns, new_ruid);
 566        if (!new_user)
 567                return -EAGAIN;
 568
 569        if (atomic_read(&new_user->processes) >=
 570                                current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
 571                        new_user != current->nsproxy->user_ns->root_user) {
 572                free_uid(new_user);
 573                return -EAGAIN;
 574        }
 575
 576        switch_uid(new_user);
 577
 578        if (dumpclear) {
 579                set_dumpable(current->mm, suid_dumpable);
 580                smp_wmb();
 581        }
 582        current->uid = new_ruid;
 583        return 0;
 584}
 585
 586/*
 587 * Unprivileged users may change the real uid to the effective uid
 588 * or vice versa.  (BSD-style)
 589 *
 590 * If you set the real uid at all, or set the effective uid to a value not
 591 * equal to the real uid, then the saved uid is set to the new effective uid.
 592 *
 593 * This makes it possible for a setuid program to completely drop its
 594 * privileges, which is often a useful assertion to make when you are doing
 595 * a security audit over a program.
 596 *
 597 * The general idea is that a program which uses just setreuid() will be
 598 * 100% compatible with BSD.  A program which uses just setuid() will be
 599 * 100% compatible with POSIX with saved IDs. 
 600 */
 601asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 602{
 603        int old_ruid, old_euid, old_suid, new_ruid, new_euid;
 604        int retval;
 605
 606        retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
 607        if (retval)
 608                return retval;
 609
 610        new_ruid = old_ruid = current->uid;
 611        new_euid = old_euid = current->euid;
 612        old_suid = current->suid;
 613
 614        if (ruid != (uid_t) -1) {
 615                new_ruid = ruid;
 616                if ((old_ruid != ruid) &&
 617                    (current->euid != ruid) &&
 618                    !capable(CAP_SETUID))
 619                        return -EPERM;
 620        }
 621
 622        if (euid != (uid_t) -1) {
 623                new_euid = euid;
 624                if ((old_ruid != euid) &&
 625                    (current->euid != euid) &&
 626                    (current->suid != euid) &&
 627                    !capable(CAP_SETUID))
 628                        return -EPERM;
 629        }
 630
 631        if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0)
 632                return -EAGAIN;
 633
 634        if (new_euid != old_euid) {
 635                set_dumpable(current->mm, suid_dumpable);
 636                smp_wmb();
 637        }
 638        current->fsuid = current->euid = new_euid;
 639        if (ruid != (uid_t) -1 ||
 640            (euid != (uid_t) -1 && euid != old_ruid))
 641                current->suid = current->euid;
 642        current->fsuid = current->euid;
 643
 644        key_fsuid_changed(current);
 645        proc_id_connector(current, PROC_EVENT_UID);
 646
 647        return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE);
 648}
 649
 650
 651                
 652/*
 653 * setuid() is implemented like SysV with SAVED_IDS 
 654 * 
 655 * Note that SAVED_ID's is deficient in that a setuid root program
 656 * like sendmail, for example, cannot set its uid to be a normal 
 657 * user and then switch back, because if you're root, setuid() sets
 658 * the saved uid too.  If you don't like this, blame the bright people
 659 * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
 660 * will allow a root program to temporarily drop privileges and be able to
 661 * regain them by swapping the real and effective uid.  
 662 */
 663asmlinkage long sys_setuid(uid_t uid)
 664{
 665        int old_euid = current->euid;
 666        int old_ruid, old_suid, new_suid;
 667        int retval;
 668
 669        retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
 670        if (retval)
 671                return retval;
 672
 673        old_ruid = current->uid;
 674        old_suid = current->suid;
 675        new_suid = old_suid;
 676        
 677        if (capable(CAP_SETUID)) {
 678                if (uid != old_ruid && set_user(uid, old_euid != uid) < 0)
 679                        return -EAGAIN;
 680                new_suid = uid;
 681        } else if ((uid != current->uid) && (uid != new_suid))
 682                return -EPERM;
 683
 684        if (old_euid != uid) {
 685                set_dumpable(current->mm, suid_dumpable);
 686                smp_wmb();
 687        }
 688        current->fsuid = current->euid = uid;
 689        current->suid = new_suid;
 690
 691        key_fsuid_changed(current);
 692        proc_id_connector(current, PROC_EVENT_UID);
 693
 694        return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID);
 695}
 696
 697
 698/*
 699 * This function implements a generic ability to update ruid, euid,
 700 * and suid.  This allows you to implement the 4.4 compatible seteuid().
 701 */
 702asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 703{
 704        int old_ruid = current->uid;
 705        int old_euid = current->euid;
 706        int old_suid = current->suid;
 707        int retval;
 708
 709        retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
 710        if (retval)
 711                return retval;
 712
 713        if (!capable(CAP_SETUID)) {
 714                if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
 715                    (ruid != current->euid) && (ruid != current->suid))
 716                        return -EPERM;
 717                if ((euid != (uid_t) -1) && (euid != current->uid) &&
 718                    (euid != current->euid) && (euid != current->suid))
 719                        return -EPERM;
 720                if ((suid != (uid_t) -1) && (suid != current->uid) &&
 721                    (suid != current->euid) && (suid != current->suid))
 722                        return -EPERM;
 723        }
 724        if (ruid != (uid_t) -1) {
 725                if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0)
 726                        return -EAGAIN;
 727        }
 728        if (euid != (uid_t) -1) {
 729                if (euid != current->euid) {
 730                        set_dumpable(current->mm, suid_dumpable);
 731                        smp_wmb();
 732                }
 733                current->euid = euid;
 734        }
 735        current->fsuid = current->euid;
 736        if (suid != (uid_t) -1)
 737                current->suid = suid;
 738
 739        key_fsuid_changed(current);
 740        proc_id_connector(current, PROC_EVENT_UID);
 741
 742        return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES);
 743}
 744
 745asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid)
 746{
 747        int retval;
 748
 749        if (!(retval = put_user(current->uid, ruid)) &&
 750            !(retval = put_user(current->euid, euid)))
 751                retval = put_user(current->suid, suid);
 752
 753        return retval;
 754}
 755
 756/*
 757 * Same as above, but for rgid, egid, sgid.
 758 */
 759asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 760{
 761        int retval;
 762
 763        retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
 764        if (retval)
 765                return retval;
 766
 767        if (!capable(CAP_SETGID)) {
 768                if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
 769                    (rgid != current->egid) && (rgid != current->sgid))
 770                        return -EPERM;
 771                if ((egid != (gid_t) -1) && (egid != current->gid) &&
 772                    (egid != current->egid) && (egid != current->sgid))
 773                        return -EPERM;
 774                if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
 775                    (sgid != current->egid) && (sgid != current->sgid))
 776                        return -EPERM;
 777        }
 778        if (egid != (gid_t) -1) {
 779                if (egid != current->egid) {
 780                        set_dumpable(current->mm, suid_dumpable);
 781                        smp_wmb();
 782                }
 783                current->egid = egid;
 784        }
 785        current->fsgid = current->egid;
 786        if (rgid != (gid_t) -1)
 787                current->gid = rgid;
 788        if (sgid != (gid_t) -1)
 789                current->sgid = sgid;
 790
 791        key_fsgid_changed(current);
 792        proc_id_connector(current, PROC_EVENT_GID);
 793        return 0;
 794}
 795
 796asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid)
 797{
 798        int retval;
 799
 800        if (!(retval = put_user(current->gid, rgid)) &&
 801            !(retval = put_user(current->egid, egid)))
 802                retval = put_user(current->sgid, sgid);
 803
 804        return retval;
 805}
 806
 807
 808/*
 809 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
 810 * is used for "access()" and for the NFS daemon (letting nfsd stay at
 811 * whatever uid it wants to). It normally shadows "euid", except when
 812 * explicitly set by setfsuid() or for access..
 813 */
 814asmlinkage long sys_setfsuid(uid_t uid)
 815{
 816        int old_fsuid;
 817
 818        old_fsuid = current->fsuid;
 819        if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS))
 820                return old_fsuid;
 821
 822        if (uid == current->uid || uid == current->euid ||
 823            uid == current->suid || uid == current->fsuid || 
 824            capable(CAP_SETUID)) {
 825                if (uid != old_fsuid) {
 826                        set_dumpable(current->mm, suid_dumpable);
 827                        smp_wmb();
 828                }
 829                current->fsuid = uid;
 830        }
 831
 832        key_fsuid_changed(current);
 833        proc_id_connector(current, PROC_EVENT_UID);
 834
 835        security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS);
 836
 837        return old_fsuid;
 838}
 839
 840/*
 841 * Samma på svenska..
 842 */
 843asmlinkage long sys_setfsgid(gid_t gid)
 844{
 845        int old_fsgid;
 846
 847        old_fsgid = current->fsgid;
 848        if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
 849                return old_fsgid;
 850
 851        if (gid == current->gid || gid == current->egid ||
 852            gid == current->sgid || gid == current->fsgid || 
 853            capable(CAP_SETGID)) {
 854                if (gid != old_fsgid) {
 855                        set_dumpable(current->mm, suid_dumpable);
 856                        smp_wmb();
 857                }
 858                current->fsgid = gid;
 859                key_fsgid_changed(current);
 860                proc_id_connector(current, PROC_EVENT_GID);
 861        }
 862        return old_fsgid;
 863}
 864
 865asmlinkage long sys_times(struct tms __user * tbuf)
 866{
 867        /*
 868         *      In the SMP world we might just be unlucky and have one of
 869         *      the times increment as we use it. Since the value is an
 870         *      atomically safe type this is just fine. Conceptually its
 871         *      as if the syscall took an instant longer to occur.
 872         */
 873        if (tbuf) {
 874                struct tms tmp;
 875                struct task_struct *tsk = current;
 876                struct task_struct *t;
 877                cputime_t utime, stime, cutime, cstime;
 878
 879                spin_lock_irq(&tsk->sighand->siglock);
 880                utime = tsk->signal->utime;
 881                stime = tsk->signal->stime;
 882                t = tsk;
 883                do {
 884                        utime = cputime_add(utime, t->utime);
 885                        stime = cputime_add(stime, t->stime);
 886                        t = next_thread(t);
 887                } while (t != tsk);
 888
 889                cutime = tsk->signal->cutime;
 890                cstime = tsk->signal->cstime;
 891                spin_unlock_irq(&tsk->sighand->siglock);
 892
 893                tmp.tms_utime = cputime_to_clock_t(utime);
 894                tmp.tms_stime = cputime_to_clock_t(stime);
 895                tmp.tms_cutime = cputime_to_clock_t(cutime);
 896                tmp.tms_cstime = cputime_to_clock_t(cstime);
 897                if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 898                        return -EFAULT;
 899        }
 900        return (long) jiffies_64_to_clock_t(get_jiffies_64());
 901}
 902
 903/*
 904 * This needs some heavy checking ...
 905 * I just haven't the stomach for it. I also don't fully
 906 * understand sessions/pgrp etc. Let somebody who does explain it.
 907 *
 908 * OK, I think I have the protection semantics right.... this is really
 909 * only important on a multi-user system anyway, to make sure one user
 910 * can't send a signal to a process owned by another.  -TYT, 12/12/91
 911 *
 912 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
 913 * LBT 04.03.94
 914 */
 915asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 916{
 917        struct task_struct *p;
 918        struct task_struct *group_leader = current->group_leader;
 919        int err = -EINVAL;
 920        struct pid_namespace *ns;
 921
 922        if (!pid)
 923                pid = task_pid_vnr(group_leader);
 924        if (!pgid)
 925                pgid = pid;
 926        if (pgid < 0)
 927                return -EINVAL;
 928
 929        /* From this point forward we keep holding onto the tasklist lock
 930         * so that our parent does not change from under us. -DaveM
 931         */
 932        ns = current->nsproxy->pid_ns;
 933
 934        write_lock_irq(&tasklist_lock);
 935
 936        err = -ESRCH;
 937        p = find_task_by_pid_ns(pid, ns);
 938        if (!p)
 939                goto out;
 940
 941        err = -EINVAL;
 942        if (!thread_group_leader(p))
 943                goto out;
 944
 945        if (p->real_parent->tgid == group_leader->tgid) {
 946                err = -EPERM;
 947                if (task_session(p) != task_session(group_leader))
 948                        goto out;
 949                err = -EACCES;
 950                if (p->did_exec)
 951                        goto out;
 952        } else {
 953                err = -ESRCH;
 954                if (p != group_leader)
 955                        goto out;
 956        }
 957
 958        err = -EPERM;
 959        if (p->signal->leader)
 960                goto out;
 961
 962        if (pgid != pid) {
 963                struct task_struct *g;
 964
 965                g = find_task_by_pid_type_ns(PIDTYPE_PGID, pgid, ns);
 966                if (!g || task_session(g) != task_session(group_leader))
 967                        goto out;
 968        }
 969
 970        err = security_task_setpgid(p, pgid);
 971        if (err)
 972                goto out;
 973
 974        if (task_pgrp_nr_ns(p, ns) != pgid) {
 975                struct pid *pid;
 976
 977                detach_pid(p, PIDTYPE_PGID);
 978                pid = find_vpid(pgid);
 979                attach_pid(p, PIDTYPE_PGID, pid);
 980                set_task_pgrp(p, pid_nr(pid));
 981        }
 982
 983        err = 0;
 984out:
 985        /* All paths lead to here, thus we are safe. -DaveM */
 986        write_unlock_irq(&tasklist_lock);
 987        return err;
 988}
 989
 990asmlinkage long sys_getpgid(pid_t pid)
 991{
 992        if (!pid)
 993                return task_pgrp_vnr(current);
 994        else {
 995                int retval;
 996                struct task_struct *p;
 997                struct pid_namespace *ns;
 998
 999                ns = current->nsproxy->pid_ns;
1000
1001                read_lock(&tasklist_lock);
1002                p = find_task_by_pid_ns(pid, ns);
1003                retval = -ESRCH;
1004                if (p) {
1005                        retval = security_task_getpgid(p);
1006                        if (!retval)
1007                                retval = task_pgrp_nr_ns(p, ns);
1008                }
1009                read_unlock(&tasklist_lock);
1010                return retval;
1011        }
1012}
1013
1014#ifdef __ARCH_WANT_SYS_GETPGRP
1015
1016asmlinkage long sys_getpgrp(void)
1017{
1018        /* SMP - assuming writes are word atomic this is fine */
1019        return task_pgrp_vnr(current);
1020}
1021
1022#endif
1023
1024asmlinkage long sys_getsid(pid_t pid)
1025{
1026        if (!pid)
1027                return task_session_vnr(current);
1028        else {
1029                int retval;
1030                struct task_struct *p;
1031                struct pid_namespace *ns;
1032
1033                ns = current->nsproxy->pid_ns;
1034
1035                read_lock(&tasklist_lock);
1036                p = find_task_by_pid_ns(pid, ns);
1037                retval = -ESRCH;
1038                if (p) {
1039                        retval = security_task_getsid(p);
1040                        if (!retval)
1041                                retval = task_session_nr_ns(p, ns);
1042                }
1043                read_unlock(&tasklist_lock);
1044                return retval;
1045        }
1046}
1047
1048asmlinkage long sys_setsid(void)
1049{
1050        struct task_struct *group_leader = current->group_leader;
1051        pid_t session;
1052        int err = -EPERM;
1053
1054        write_lock_irq(&tasklist_lock);
1055
1056        /* Fail if I am already a session leader */
1057        if (group_leader->signal->leader)
1058                goto out;
1059
1060        session = group_leader->pid;
1061        /* Fail if a process group id already exists that equals the
1062         * proposed session id.
1063         *
1064         * Don't check if session id == 1 because kernel threads use this
1065         * session id and so the check will always fail and make it so
1066         * init cannot successfully call setsid.
1067         */
1068        if (session > 1 && find_task_by_pid_type_ns(PIDTYPE_PGID,
1069                                session, &init_pid_ns))
1070                goto out;
1071
1072        group_leader->signal->leader = 1;
1073        __set_special_pids(session, session);
1074
1075        spin_lock(&group_leader->sighand->siglock);
1076        group_leader->signal->tty = NULL;
1077        spin_unlock(&group_leader->sighand->siglock);
1078
1079        err = task_pgrp_vnr(group_leader);
1080out:
1081        write_unlock_irq(&tasklist_lock);
1082        return err;
1083}
1084
1085/*
1086 * Supplementary group IDs
1087 */
1088
1089/* init to 2 - one for init_task, one to ensure it is never freed */
1090struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
1091
1092struct group_info *groups_alloc(int gidsetsize)
1093{
1094        struct group_info *group_info;
1095        int nblocks;
1096        int i;
1097
1098        nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
1099        /* Make sure we always allocate at least one indirect block pointer */
1100        nblocks = nblocks ? : 1;
1101        group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
1102        if (!group_info)
1103                return NULL;
1104        group_info->ngroups = gidsetsize;
1105        group_info->nblocks = nblocks;
1106        atomic_set(&group_info->usage, 1);
1107
1108        if (gidsetsize <= NGROUPS_SMALL)
1109                group_info->blocks[0] = group_info->small_block;
1110        else {
1111                for (i = 0; i < nblocks; i++) {
1112                        gid_t *b;
1113                        b = (void *)__get_free_page(GFP_USER);
1114                        if (!b)
1115                                goto out_undo_partial_alloc;
1116                        group_info->blocks[i] = b;
1117                }
1118        }
1119        return group_info;
1120
1121out_undo_partial_alloc:
1122        while (--i >= 0) {
1123                free_page((unsigned long)group_info->blocks[i]);
1124        }
1125        kfree(group_info);
1126        return NULL;
1127}
1128
1129EXPORT_SYMBOL(groups_alloc);
1130
1131void groups_free(struct group_info *group_info)
1132{
1133        if (group_info->blocks[0] != group_info->small_block) {
1134                int i;
1135                for (i = 0; i < group_info->nblocks; i++)
1136                        free_page((unsigned long)group_info->blocks[i]);
1137        }
1138        kfree(group_info);
1139}
1140
1141EXPORT_SYMBOL(groups_free);
1142
1143/* export the group_info to a user-space array */
1144static int groups_to_user(gid_t __user *grouplist,
1145    struct group_info *group_info)
1146{
1147        int i;
1148        int count = group_info->ngroups;
1149
1150        for (i = 0; i < group_info->nblocks; i++) {
1151                int cp_count = min(NGROUPS_PER_BLOCK, count);
1152                int off = i * NGROUPS_PER_BLOCK;
1153                int len = cp_count * sizeof(*grouplist);
1154
1155                if (copy_to_user(grouplist+off, group_info->blocks[i], len))
1156                        return -EFAULT;
1157
1158                count -= cp_count;
1159        }
1160        return 0;
1161}
1162
1163/* fill a group_info from a user-space array - it must be allocated already */
1164static int groups_from_user(struct group_info *group_info,
1165    gid_t __user *grouplist)
1166{
1167        int i;
1168        int count = group_info->ngroups;
1169
1170        for (i = 0; i < group_info->nblocks; i++) {
1171                int cp_count = min(NGROUPS_PER_BLOCK, count);
1172                int off = i * NGROUPS_PER_BLOCK;
1173                int len = cp_count * sizeof(*grouplist);
1174
1175                if (copy_from_user(group_info->blocks[i], grouplist+off, len))
1176                        return -EFAULT;
1177
1178                count -= cp_count;
1179        }
1180        return 0;
1181}
1182
1183/* a simple Shell sort */
1184static void groups_sort(struct group_info *group_info)
1185{
1186        int base, max, stride;
1187        int gidsetsize = group_info->ngroups;
1188
1189        for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
1190                ; /* nothing */
1191        stride /= 3;
1192
1193        while (stride) {
1194                max = gidsetsize - stride;
1195                for (base = 0; base < max; base++) {
1196                        int left = base;
1197                        int right = left + stride;
1198                        gid_t tmp = GROUP_AT(group_info, right);
1199
1200                        while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
1201                                GROUP_AT(group_info, right) =
1202                                    GROUP_AT(group_info, left);
1203                                right = left;
1204                                left -= stride;
1205                        }
1206                        GROUP_AT(group_info, right) = tmp;
1207                }
1208                stride /= 3;
1209        }
1210}
1211
1212/* a simple bsearch */
1213int groups_search(struct group_info *group_info, gid_t grp)
1214{
1215        unsigned int left, right;
1216
1217        if (!group_info)
1218                return 0;
1219
1220        left = 0;
1221        right = group_info->ngroups;
1222        while (left < right) {
1223                unsigned int mid = (left+right)/2;
1224                int cmp = grp - GROUP_AT(group_info, mid);
1225                if (cmp > 0)
1226                        left = mid + 1;
1227                else if (cmp < 0)
1228                        right = mid;
1229                else
1230                        return 1;
1231        }
1232        return 0;
1233}
1234
1235/* validate and set current->group_info */
1236int set_current_groups(struct group_info *group_info)
1237{
1238        int retval;
1239        struct group_info *old_info;
1240
1241        retval = security_task_setgroups(group_info);
1242        if (retval)
1243                return retval;
1244
1245        groups_sort(group_info);
1246        get_group_info(group_info);
1247
1248        task_lock(current);
1249        old_info = current->group_info;
1250        current->group_info = group_info;
1251        task_unlock(current);
1252
1253        put_group_info(old_info);
1254
1255        return 0;
1256}
1257
1258EXPORT_SYMBOL(set_current_groups);
1259
1260asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
1261{
1262        int i = 0;
1263
1264        /*
1265         *      SMP: Nobody else can change our grouplist. Thus we are
1266         *      safe.
1267         */
1268
1269        if (gidsetsize < 0)
1270                return -EINVAL;
1271
1272        /* no need to grab task_lock here; it cannot change */
1273        i = current->group_info->ngroups;
1274        if (gidsetsize) {
1275                if (i > gidsetsize) {
1276                        i = -EINVAL;
1277                        goto out;
1278                }
1279                if (groups_to_user(grouplist, current->group_info)) {
1280                        i = -EFAULT;
1281                        goto out;
1282                }
1283        }
1284out:
1285        return i;
1286}
1287
1288/*
1289 *      SMP: Our groups are copy-on-write. We can set them safely
1290 *      without another task interfering.
1291 */
1292 
1293asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist)
1294{
1295        struct group_info *group_info;
1296        int retval;
1297
1298        if (!capable(CAP_SETGID))
1299                return -EPERM;
1300        if ((unsigned)gidsetsize > NGROUPS_MAX)
1301                return -EINVAL;
1302
1303        group_info = groups_alloc(gidsetsize);
1304        if (!group_info)
1305                return -ENOMEM;
1306        retval = groups_from_user(group_info, grouplist);
1307        if (retval) {
1308                put_group_info(group_info);
1309                return retval;
1310        }
1311
1312        retval = set_current_groups(group_info);
1313        put_group_info(group_info);
1314
1315        return retval;
1316}
1317
1318/*
1319 * Check whether we're fsgid/egid or in the supplemental group..
1320 */
1321int in_group_p(gid_t grp)
1322{
1323        int retval = 1;
1324        if (grp != current->fsgid)
1325                retval = groups_search(current->group_info, grp);
1326        return retval;
1327}
1328
1329EXPORT_SYMBOL(in_group_p);
1330
1331int in_egroup_p(gid_t grp)
1332{
1333        int retval = 1;
1334        if (grp != current->egid)
1335                retval = groups_search(current->group_info, grp);
1336        return retval;
1337}
1338
1339EXPORT_SYMBOL(in_egroup_p);
1340
1341DECLARE_RWSEM(uts_sem);
1342
1343EXPORT_SYMBOL(uts_sem);
1344
1345asmlinkage long sys_newuname(struct new_utsname __user * name)
1346{
1347        int errno = 0;
1348
1349        down_read(&uts_sem);
1350        if (copy_to_user(name, utsname(), sizeof *name))
1351                errno = -EFAULT;
1352        up_read(&uts_sem);
1353        return errno;
1354}
1355
1356asmlinkage long sys_sethostname(char __user *name, int len)
1357{
1358        int errno;
1359        char tmp[__NEW_UTS_LEN];
1360
1361        if (!capable(CAP_SYS_ADMIN))
1362                return -EPERM;
1363        if (len < 0 || len > __NEW_UTS_LEN)
1364                return -EINVAL;
1365        down_write(&uts_sem);
1366        errno = -EFAULT;
1367        if (!copy_from_user(tmp, name, len)) {
1368                memcpy(utsname()->nodename, tmp, len);
1369                utsname()->nodename[len] = 0;
1370                errno = 0;
1371        }
1372        up_write(&uts_sem);
1373        return errno;
1374}
1375
1376#ifdef __ARCH_WANT_SYS_GETHOSTNAME
1377
1378asmlinkage long sys_gethostname(char __user *name, int len)
1379{
1380        int i, errno;
1381
1382        if (len < 0)
1383                return -EINVAL;
1384        down_read(&uts_sem);
1385        i = 1 + strlen(utsname()->nodename);
1386        if (i > len)
1387                i = len;
1388        errno = 0;
1389        if (copy_to_user(name, utsname()->nodename, i))
1390                errno = -EFAULT;
1391        up_read(&uts_sem);
1392        return errno;
1393}
1394
1395#endif
1396
1397/*
1398 * Only setdomainname; getdomainname can be implemented by calling
1399 * uname()
1400 */
1401asmlinkage long sys_setdomainname(char __user *name, int len)
1402{
1403        int errno;
1404        char tmp[__NEW_UTS_LEN];
1405
1406        if (!capable(CAP_SYS_ADMIN))
1407                return -EPERM;
1408        if (len < 0 || len > __NEW_UTS_LEN)
1409                return -EINVAL;
1410
1411        down_write(&uts_sem);
1412        errno = -EFAULT;
1413        if (!copy_from_user(tmp, name, len)) {
1414                memcpy(utsname()->domainname, tmp, len);
1415                utsname()->domainname[len] = 0;
1416                errno = 0;
1417        }
1418        up_write(&uts_sem);
1419        return errno;
1420}
1421
1422asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim)
1423{
1424        if (resource >= RLIM_NLIMITS)
1425                return -EINVAL;
1426        else {
1427                struct rlimit value;
1428                task_lock(current->group_leader);
1429                value = current->signal->rlim[resource];
1430                task_unlock(current->group_leader);
1431                return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
1432        }
1433}
1434
1435#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
1436
1437/*
1438 *      Back compatibility for getrlimit. Needed for some apps.
1439 */
1440 
1441asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim)
1442{
1443        struct rlimit x;
1444        if (resource >= RLIM_NLIMITS)
1445                return -EINVAL;
1446
1447        task_lock(current->group_leader);
1448        x = current->signal->rlim[resource];
1449        task_unlock(current->group_leader);
1450        if (x.rlim_cur > 0x7FFFFFFF)
1451                x.rlim_cur = 0x7FFFFFFF;
1452        if (x.rlim_max > 0x7FFFFFFF)
1453                x.rlim_max = 0x7FFFFFFF;
1454        return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1455}
1456
1457#endif
1458
1459asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1460{
1461        struct rlimit new_rlim, *old_rlim;
1462        unsigned long it_prof_secs;
1463        int retval;
1464
1465        if (resource >= RLIM_NLIMITS)
1466                return -EINVAL;
1467        if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1468                return -EFAULT;
1469        if (new_rlim.rlim_cur > new_rlim.rlim_max)
1470                return -EINVAL;
1471        old_rlim = current->signal->rlim + resource;
1472        if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
1473            !capable(CAP_SYS_RESOURCE))
1474                return -EPERM;
1475        if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
1476                return -EPERM;
1477
1478        retval = security_task_setrlimit(resource, &new_rlim);
1479        if (retval)
1480                return retval;
1481
1482        if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
1483                /*
1484                 * The caller is asking for an immediate RLIMIT_CPU
1485                 * expiry.  But we use the zero value to mean "it was
1486                 * never set".  So let's cheat and make it one second
1487                 * instead
1488                 */
1489                new_rlim.rlim_cur = 1;
1490        }
1491
1492        task_lock(current->group_leader);
1493        *old_rlim = new_rlim;
1494        task_unlock(current->group_leader);
1495
1496        if (resource != RLIMIT_CPU)
1497                goto out;
1498
1499        /*
1500         * RLIMIT_CPU handling.   Note that the kernel fails to return an error
1501         * code if it rejected the user's attempt to set RLIMIT_CPU.  This is a
1502         * very long-standing error, and fixing it now risks breakage of
1503         * applications, so we live with it
1504         */
1505        if (new_rlim.rlim_cur == RLIM_INFINITY)
1506                goto out;
1507
1508        it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
1509        if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) {
1510                unsigned long rlim_cur = new_rlim.rlim_cur;
1511                cputime_t cputime;
1512
1513                cputime = secs_to_cputime(rlim_cur);
1514                read_lock(&tasklist_lock);
1515                spin_lock_irq(&current->sighand->siglock);
1516                set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
1517                spin_unlock_irq(&current->sighand->siglock);
1518                read_unlock(&tasklist_lock);
1519        }
1520out:
1521        return 0;
1522}
1523
1524/*
1525 * It would make sense to put struct rusage in the task_struct,
1526 * except that would make the task_struct be *really big*.  After
1527 * task_struct gets moved into malloc'ed memory, it would
1528 * make sense to do this.  It will make moving the rest of the information
1529 * a lot simpler!  (Which we're not doing right now because we're not
1530 * measuring them yet).
1531 *
1532 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
1533 * races with threads incrementing their own counters.  But since word
1534 * reads are atomic, we either get new values or old values and we don't
1535 * care which for the sums.  We always take the siglock to protect reading
1536 * the c* fields from p->signal from races with exit.c updating those
1537 * fields when reaping, so a sample either gets all the additions of a
1538 * given child after it's reaped, or none so this sample is before reaping.
1539 *
1540 * Locking:
1541 * We need to take the siglock for CHILDEREN, SELF and BOTH
1542 * for  the cases current multithreaded, non-current single threaded
1543 * non-current multithreaded.  Thread traversal is now safe with
1544 * the siglock held.
1545 * Strictly speaking, we donot need to take the siglock if we are current and
1546 * single threaded,  as no one else can take our signal_struct away, no one
1547 * else can  reap the  children to update signal->c* counters, and no one else
1548 * can race with the signal-> fields. If we do not take any lock, the
1549 * signal-> fields could be read out of order while another thread was just
1550 * exiting. So we should  place a read memory barrier when we avoid the lock.
1551 * On the writer side,  write memory barrier is implied in  __exit_signal
1552 * as __exit_signal releases  the siglock spinlock after updating the signal->
1553 * fields. But we don't do this yet to keep things simple.
1554 *
1555 */
1556
1557static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1558{
1559        struct task_struct *t;
1560        unsigned long flags;
1561        cputime_t utime, stime;
1562
1563        memset((char *) r, 0, sizeof *r);
1564        utime = stime = cputime_zero;
1565
1566        rcu_read_lock();
1567        if (!lock_task_sighand(p, &flags)) {
1568                rcu_read_unlock();
1569                return;
1570        }
1571
1572        switch (who) {
1573                case RUSAGE_BOTH:
1574                case RUSAGE_CHILDREN:
1575                        utime = p->signal->cutime;
1576                        stime = p->signal->cstime;
1577                        r->ru_nvcsw = p->signal->cnvcsw;
1578                        r->ru_nivcsw = p->signal->cnivcsw;
1579                        r->ru_minflt = p->signal->cmin_flt;
1580                        r->ru_majflt = p->signal->cmaj_flt;
1581                        r->ru_inblock = p->signal->cinblock;
1582                        r->ru_oublock = p->signal->coublock;
1583
1584                        if (who == RUSAGE_CHILDREN)
1585                                break;
1586
1587                case RUSAGE_SELF:
1588                        utime = cputime_add(utime, p->signal->utime);
1589                        stime = cputime_add(stime, p->signal->stime);
1590                        r->ru_nvcsw += p->signal->nvcsw;
1591                        r->ru_nivcsw += p->signal->nivcsw;
1592                        r->ru_minflt += p->signal->min_flt;
1593                        r->ru_majflt += p->signal->maj_flt;
1594                        r->ru_inblock += p->signal->inblock;
1595                        r->ru_oublock += p->signal->oublock;
1596                        t = p;
1597                        do {
1598                                utime = cputime_add(utime, t->utime);
1599                                stime = cputime_add(stime, t->stime);
1600                                r->ru_nvcsw += t->nvcsw;
1601                                r->ru_nivcsw += t->nivcsw;
1602                                r->ru_minflt += t->min_flt;
1603                                r->ru_majflt += t->maj_flt;
1604                                r->ru_inblock += task_io_get_inblock(t);
1605                                r->ru_oublock += task_io_get_oublock(t);
1606                                t = next_thread(t);
1607                        } while (t != p);
1608                        break;
1609
1610                default:
1611                        BUG();
1612        }
1613
1614        unlock_task_sighand(p, &flags);
1615        rcu_read_unlock();
1616
1617        cputime_to_timeval(utime, &r->ru_utime);
1618        cputime_to_timeval(stime, &r->ru_stime);
1619}
1620
1621int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1622{
1623        struct rusage r;
1624        k_getrusage(p, who, &r);
1625        return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1626}
1627
1628asmlinkage long sys_getrusage(int who, struct rusage __user *ru)
1629{
1630        if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
1631                return -EINVAL;
1632        return getrusage(current, who, ru);
1633}
1634
1635asmlinkage long sys_umask(int mask)
1636{
1637        mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1638        return mask;
1639}
1640    
1641asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1642                          unsigned long arg4, unsigned long arg5)
1643{
1644        long error;
1645
1646        error = security_task_prctl(option, arg2, arg3, arg4, arg5);
1647        if (error)
1648                return error;
1649
1650        switch (option) {
1651                case PR_SET_PDEATHSIG:
1652                        if (!valid_signal(arg2)) {
1653                                error = -EINVAL;
1654                                break;
1655                        }
1656                        current->pdeath_signal = arg2;
1657                        break;
1658                case PR_GET_PDEATHSIG:
1659                        error = put_user(current->pdeath_signal, (int __user *)arg2);
1660                        break;
1661                case PR_GET_DUMPABLE:
1662                        error = get_dumpable(current->mm);
1663                        break;
1664                case PR_SET_DUMPABLE:
1665                        if (arg2 < 0 || arg2 > 1) {
1666                                error = -EINVAL;
1667                                break;
1668                        }
1669                        set_dumpable(current->mm, arg2);
1670                        break;
1671
1672                case PR_SET_UNALIGN:
1673                        error = SET_UNALIGN_CTL(current, arg2);
1674                        break;
1675                case PR_GET_UNALIGN:
1676                        error = GET_UNALIGN_CTL(current, arg2);
1677                        break;
1678                case PR_SET_FPEMU:
1679                        error = SET_FPEMU_CTL(current, arg2);
1680                        break;
1681                case PR_GET_FPEMU:
1682                        error = GET_FPEMU_CTL(current, arg2);
1683                        break;
1684                case PR_SET_FPEXC:
1685                        error = SET_FPEXC_CTL(current, arg2);
1686                        break;
1687                case PR_GET_FPEXC:
1688                        error = GET_FPEXC_CTL(current, arg2);
1689                        break;
1690                case PR_GET_TIMING:
1691                        error = PR_TIMING_STATISTICAL;
1692                        break;
1693                case PR_SET_TIMING:
1694                        if (arg2 == PR_TIMING_STATISTICAL)
1695                                error = 0;
1696                        else
1697                                error = -EINVAL;
1698                        break;
1699
1700                case PR_GET_KEEPCAPS:
1701                        if (current->keep_capabilities)
1702                                error = 1;
1703                        break;
1704                case PR_SET_KEEPCAPS:
1705                        if (arg2 != 0 && arg2 != 1) {
1706                                error = -EINVAL;
1707                                break;
1708                        }
1709                        current->keep_capabilities = arg2;
1710                        break;
1711                case PR_SET_NAME: {
1712                        struct task_struct *me = current;
1713                        unsigned char ncomm[sizeof(me->comm)];
1714
1715                        ncomm[sizeof(me->comm)-1] = 0;
1716                        if (strncpy_from_user(ncomm, (char __user *)arg2,
1717                                                sizeof(me->comm)-1) < 0)
1718                                return -EFAULT;
1719                        set_task_comm(me, ncomm);
1720                        return 0;
1721                }
1722                case PR_GET_NAME: {
1723                        struct task_struct *me = current;
1724                        unsigned char tcomm[sizeof(me->comm)];
1725
1726                        get_task_comm(tcomm, me);
1727                        if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm)))
1728                                return -EFAULT;
1729                        return 0;
1730                }
1731                case PR_GET_ENDIAN:
1732                        error = GET_ENDIAN(current, arg2);
1733                        break;
1734                case PR_SET_ENDIAN:
1735                        error = SET_ENDIAN(current, arg2);
1736                        break;
1737
1738                case PR_GET_SECCOMP:
1739                        error = prctl_get_seccomp();
1740                        break;
1741                case PR_SET_SECCOMP:
1742                        error = prctl_set_seccomp(arg2);
1743                        break;
1744
1745                default:
1746                        error = -EINVAL;
1747                        break;
1748        }
1749        return error;
1750}
1751
1752asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
1753                           struct getcpu_cache __user *unused)
1754{
1755        int err = 0;
1756        int cpu = raw_smp_processor_id();
1757        if (cpup)
1758                err |= put_user(cpu, cpup);
1759        if (nodep)
1760                err |= put_user(cpu_to_node(cpu), nodep);
1761        return err ? -EFAULT : 0;
1762}
1763
1764char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
1765
1766static void argv_cleanup(char **argv, char **envp)
1767{
1768        argv_free(argv);
1769}
1770
1771/**
1772 * orderly_poweroff - Trigger an orderly system poweroff
1773 * @force: force poweroff if command execution fails
1774 *
1775 * This may be called from any context to trigger a system shutdown.
1776 * If the orderly shutdown fails, it will force an immediate shutdown.
1777 */
1778int orderly_poweroff(bool force)
1779{
1780        int argc;
1781        char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
1782        static char *envp[] = {
1783                "HOME=/",
1784                "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
1785                NULL
1786        };
1787        int ret = -ENOMEM;
1788        struct subprocess_info *info;
1789
1790        if (argv == NULL) {
1791                printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
1792                       __func__, poweroff_cmd);
1793                goto out;
1794        }
1795
1796        info = call_usermodehelper_setup(argv[0], argv, envp);
1797        if (info == NULL) {
1798                argv_free(argv);
1799                goto out;
1800        }
1801
1802        call_usermodehelper_setcleanup(info, argv_cleanup);
1803
1804        ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
1805
1806  out:
1807        if (ret && force) {
1808                printk(KERN_WARNING "Failed to start orderly shutdown: "
1809                       "forcing the issue\n");
1810
1811                /* I guess this should try to kick off some daemon to
1812                   sync and poweroff asap.  Or not even bother syncing
1813                   if we're doing an emergency shutdown? */
1814                emergency_sync();
1815                kernel_power_off();
1816        }
1817
1818        return ret;
1819}
1820EXPORT_SYMBOL_GPL(orderly_poweroff);
1821