linux/fs/proc/base.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/proc/base.c
   3 *
   4 *  Copyright (C) 1991, 1992 Linus Torvalds
   5 *
   6 *  proc base directory handling functions
   7 *
   8 *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
   9 *  Instead of using magical inumbers to determine the kind of object
  10 *  we allocate and fill in-core inodes upon lookup. They don't even
  11 *  go into icache. We cache the reference to task_struct upon lookup too.
  12 *  Eventually it should become a filesystem in its own. We don't use the
  13 *  rest of procfs anymore.
  14 *
  15 *
  16 *  Changelog:
  17 *  17-Jan-2005
  18 *  Allan Bezerra
  19 *  Bruna Moreira <bruna.moreira@indt.org.br>
  20 *  Edjard Mota <edjard.mota@indt.org.br>
  21 *  Ilias Biris <ilias.biris@indt.org.br>
  22 *  Mauricio Lin <mauricio.lin@indt.org.br>
  23 *
  24 *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
  25 *
  26 *  A new process specific entry (smaps) included in /proc. It shows the
  27 *  size of rss for each memory area. The maps entry lacks information
  28 *  about physical memory size (rss) for each mapped file, i.e.,
  29 *  rss information for executables and library files.
  30 *  This additional information is useful for any tools that need to know
  31 *  about physical memory consumption for a process specific library.
  32 *
  33 *  Changelog:
  34 *  21-Feb-2005
  35 *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
  36 *  Pud inclusion in the page table walking.
  37 *
  38 *  ChangeLog:
  39 *  10-Mar-2005
  40 *  10LE Instituto Nokia de Tecnologia - INdT:
  41 *  A better way to walks through the page table as suggested by Hugh Dickins.
  42 *
  43 *  Simo Piiroinen <simo.piiroinen@nokia.com>:
  44 *  Smaps information related to shared, private, clean and dirty pages.
  45 *
  46 *  Paul Mundt <paul.mundt@nokia.com>:
  47 *  Overall revision about smaps.
  48 */
  49
  50#include <asm/uaccess.h>
  51
  52#include <linux/errno.h>
  53#include <linux/time.h>
  54#include <linux/proc_fs.h>
  55#include <linux/stat.h>
  56#include <linux/task_io_accounting_ops.h>
  57#include <linux/init.h>
  58#include <linux/capability.h>
  59#include <linux/file.h>
  60#include <linux/fdtable.h>
  61#include <linux/string.h>
  62#include <linux/seq_file.h>
  63#include <linux/namei.h>
  64#include <linux/mnt_namespace.h>
  65#include <linux/mm.h>
  66#include <linux/swap.h>
  67#include <linux/rcupdate.h>
  68#include <linux/kallsyms.h>
  69#include <linux/stacktrace.h>
  70#include <linux/resource.h>
  71#include <linux/module.h>
  72#include <linux/mount.h>
  73#include <linux/security.h>
  74#include <linux/ptrace.h>
  75#include <linux/tracehook.h>
  76#include <linux/cgroup.h>
  77#include <linux/cpuset.h>
  78#include <linux/audit.h>
  79#include <linux/poll.h>
  80#include <linux/nsproxy.h>
  81#include <linux/oom.h>
  82#include <linux/elf.h>
  83#include <linux/pid_namespace.h>
  84#include <linux/fs_struct.h>
  85#include <linux/slab.h>
  86#include <linux/flex_array.h>
  87#ifdef CONFIG_HARDWALL
  88#include <asm/hardwall.h>
  89#endif
  90#include <trace/events/oom.h>
  91#include "internal.h"
  92
  93/* NOTE:
  94 *      Implementing inode permission operations in /proc is almost
  95 *      certainly an error.  Permission checks need to happen during
  96 *      each system call not at open time.  The reason is that most of
  97 *      what we wish to check for permissions in /proc varies at runtime.
  98 *
  99 *      The classic example of a problem is opening file descriptors
 100 *      in /proc for a task before it execs a suid executable.
 101 */
 102
 103struct pid_entry {
 104        char *name;
 105        int len;
 106        umode_t mode;
 107        const struct inode_operations *iop;
 108        const struct file_operations *fop;
 109        union proc_op op;
 110};
 111
 112#define NOD(NAME, MODE, IOP, FOP, OP) {                 \
 113        .name = (NAME),                                 \
 114        .len  = sizeof(NAME) - 1,                       \
 115        .mode = MODE,                                   \
 116        .iop  = IOP,                                    \
 117        .fop  = FOP,                                    \
 118        .op   = OP,                                     \
 119}
 120
 121#define DIR(NAME, MODE, iops, fops)     \
 122        NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
 123#define LNK(NAME, get_link)                                     \
 124        NOD(NAME, (S_IFLNK|S_IRWXUGO),                          \
 125                &proc_pid_link_inode_operations, NULL,          \
 126                { .proc_get_link = get_link } )
 127#define REG(NAME, MODE, fops)                           \
 128        NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
 129#define INF(NAME, MODE, read)                           \
 130        NOD(NAME, (S_IFREG|(MODE)),                     \
 131                NULL, &proc_info_file_operations,       \
 132                { .proc_read = read } )
 133#define ONE(NAME, MODE, show)                           \
 134        NOD(NAME, (S_IFREG|(MODE)),                     \
 135                NULL, &proc_single_file_operations,     \
 136                { .proc_show = show } )
 137
 138static int proc_fd_permission(struct inode *inode, int mask);
 139
 140/*
 141 * Count the number of hardlinks for the pid_entry table, excluding the .
 142 * and .. links.
 143 */
 144static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
 145        unsigned int n)
 146{
 147        unsigned int i;
 148        unsigned int count;
 149
 150        count = 0;
 151        for (i = 0; i < n; ++i) {
 152                if (S_ISDIR(entries[i].mode))
 153                        ++count;
 154        }
 155
 156        return count;
 157}
 158
 159static int get_task_root(struct task_struct *task, struct path *root)
 160{
 161        int result = -ENOENT;
 162
 163        task_lock(task);
 164        if (task->fs) {
 165                get_fs_root(task->fs, root);
 166                result = 0;
 167        }
 168        task_unlock(task);
 169        return result;
 170}
 171
 172static int proc_cwd_link(struct dentry *dentry, struct path *path)
 173{
 174        struct task_struct *task = get_proc_task(dentry->d_inode);
 175        int result = -ENOENT;
 176
 177        if (task) {
 178                task_lock(task);
 179                if (task->fs) {
 180                        get_fs_pwd(task->fs, path);
 181                        result = 0;
 182                }
 183                task_unlock(task);
 184                put_task_struct(task);
 185        }
 186        return result;
 187}
 188
 189static int proc_root_link(struct dentry *dentry, struct path *path)
 190{
 191        struct task_struct *task = get_proc_task(dentry->d_inode);
 192        int result = -ENOENT;
 193
 194        if (task) {
 195                result = get_task_root(task, path);
 196                put_task_struct(task);
 197        }
 198        return result;
 199}
 200
 201struct mm_struct *mm_for_maps(struct task_struct *task)
 202{
 203        return mm_access(task, PTRACE_MODE_READ);
 204}
 205
 206static int proc_pid_cmdline(struct task_struct *task, char * buffer)
 207{
 208        int res = 0;
 209        unsigned int len;
 210        struct mm_struct *mm = get_task_mm(task);
 211        if (!mm)
 212                goto out;
 213        if (!mm->arg_end)
 214                goto out_mm;    /* Shh! No looking before we're done */
 215
 216        len = mm->arg_end - mm->arg_start;
 217 
 218        if (len > PAGE_SIZE)
 219                len = PAGE_SIZE;
 220 
 221        res = access_process_vm(task, mm->arg_start, buffer, len, 0);
 222
 223        // If the nul at the end of args has been overwritten, then
 224        // assume application is using setproctitle(3).
 225        if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
 226                len = strnlen(buffer, res);
 227                if (len < res) {
 228                    res = len;
 229                } else {
 230                        len = mm->env_end - mm->env_start;
 231                        if (len > PAGE_SIZE - res)
 232                                len = PAGE_SIZE - res;
 233                        res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
 234                        res = strnlen(buffer, res);
 235                }
 236        }
 237out_mm:
 238        mmput(mm);
 239out:
 240        return res;
 241}
 242
 243static int proc_pid_auxv(struct task_struct *task, char *buffer)
 244{
 245        struct mm_struct *mm = mm_for_maps(task);
 246        int res = PTR_ERR(mm);
 247        if (mm && !IS_ERR(mm)) {
 248                unsigned int nwords = 0;
 249                do {
 250                        nwords += 2;
 251                } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
 252                res = nwords * sizeof(mm->saved_auxv[0]);
 253                if (res > PAGE_SIZE)
 254                        res = PAGE_SIZE;
 255                memcpy(buffer, mm->saved_auxv, res);
 256                mmput(mm);
 257        }
 258        return res;
 259}
 260
 261
 262#ifdef CONFIG_KALLSYMS
 263/*
 264 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
 265 * Returns the resolved symbol.  If that fails, simply return the address.
 266 */
 267static int proc_pid_wchan(struct task_struct *task, char *buffer)
 268{
 269        unsigned long wchan;
 270        char symname[KSYM_NAME_LEN];
 271
 272        wchan = get_wchan(task);
 273
 274        if (lookup_symbol_name(wchan, symname) < 0)
 275                if (!ptrace_may_access(task, PTRACE_MODE_READ))
 276                        return 0;
 277                else
 278                        return sprintf(buffer, "%lu", wchan);
 279        else
 280                return sprintf(buffer, "%s", symname);
 281}
 282#endif /* CONFIG_KALLSYMS */
 283
 284static int lock_trace(struct task_struct *task)
 285{
 286        int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
 287        if (err)
 288                return err;
 289        if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
 290                mutex_unlock(&task->signal->cred_guard_mutex);
 291                return -EPERM;
 292        }
 293        return 0;
 294}
 295
 296static void unlock_trace(struct task_struct *task)
 297{
 298        mutex_unlock(&task->signal->cred_guard_mutex);
 299}
 300
 301#ifdef CONFIG_STACKTRACE
 302
 303#define MAX_STACK_TRACE_DEPTH   64
 304
 305static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
 306                          struct pid *pid, struct task_struct *task)
 307{
 308        struct stack_trace trace;
 309        unsigned long *entries;
 310        int err;
 311        int i;
 312
 313        entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
 314        if (!entries)
 315                return -ENOMEM;
 316
 317        trace.nr_entries        = 0;
 318        trace.max_entries       = MAX_STACK_TRACE_DEPTH;
 319        trace.entries           = entries;
 320        trace.skip              = 0;
 321
 322        err = lock_trace(task);
 323        if (!err) {
 324                save_stack_trace_tsk(task, &trace);
 325
 326                for (i = 0; i < trace.nr_entries; i++) {
 327                        seq_printf(m, "[<%pK>] %pS\n",
 328                                   (void *)entries[i], (void *)entries[i]);
 329                }
 330                unlock_trace(task);
 331        }
 332        kfree(entries);
 333
 334        return err;
 335}
 336#endif
 337
 338#ifdef CONFIG_SCHEDSTATS
 339/*
 340 * Provides /proc/PID/schedstat
 341 */
 342static int proc_pid_schedstat(struct task_struct *task, char *buffer)
 343{
 344        return sprintf(buffer, "%llu %llu %lu\n",
 345                        (unsigned long long)task->se.sum_exec_runtime,
 346                        (unsigned long long)task->sched_info.run_delay,
 347                        task->sched_info.pcount);
 348}
 349#endif
 350
 351#ifdef CONFIG_LATENCYTOP
 352static int lstats_show_proc(struct seq_file *m, void *v)
 353{
 354        int i;
 355        struct inode *inode = m->private;
 356        struct task_struct *task = get_proc_task(inode);
 357
 358        if (!task)
 359                return -ESRCH;
 360        seq_puts(m, "Latency Top version : v0.1\n");
 361        for (i = 0; i < 32; i++) {
 362                struct latency_record *lr = &task->latency_record[i];
 363                if (lr->backtrace[0]) {
 364                        int q;
 365                        seq_printf(m, "%i %li %li",
 366                                   lr->count, lr->time, lr->max);
 367                        for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
 368                                unsigned long bt = lr->backtrace[q];
 369                                if (!bt)
 370                                        break;
 371                                if (bt == ULONG_MAX)
 372                                        break;
 373                                seq_printf(m, " %ps", (void *)bt);
 374                        }
 375                        seq_putc(m, '\n');
 376                }
 377
 378        }
 379        put_task_struct(task);
 380        return 0;
 381}
 382
 383static int lstats_open(struct inode *inode, struct file *file)
 384{
 385        return single_open(file, lstats_show_proc, inode);
 386}
 387
 388static ssize_t lstats_write(struct file *file, const char __user *buf,
 389                            size_t count, loff_t *offs)
 390{
 391        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
 392
 393        if (!task)
 394                return -ESRCH;
 395        clear_all_latency_tracing(task);
 396        put_task_struct(task);
 397
 398        return count;
 399}
 400
 401static const struct file_operations proc_lstats_operations = {
 402        .open           = lstats_open,
 403        .read           = seq_read,
 404        .write          = lstats_write,
 405        .llseek         = seq_lseek,
 406        .release        = single_release,
 407};
 408
 409#endif
 410
 411static int proc_oom_score(struct task_struct *task, char *buffer)
 412{
 413        unsigned long points = 0;
 414
 415        read_lock(&tasklist_lock);
 416        if (pid_alive(task))
 417                points = oom_badness(task, NULL, NULL,
 418                                        totalram_pages + total_swap_pages);
 419        read_unlock(&tasklist_lock);
 420        return sprintf(buffer, "%lu\n", points);
 421}
 422
 423struct limit_names {
 424        char *name;
 425        char *unit;
 426};
 427
 428static const struct limit_names lnames[RLIM_NLIMITS] = {
 429        [RLIMIT_CPU] = {"Max cpu time", "seconds"},
 430        [RLIMIT_FSIZE] = {"Max file size", "bytes"},
 431        [RLIMIT_DATA] = {"Max data size", "bytes"},
 432        [RLIMIT_STACK] = {"Max stack size", "bytes"},
 433        [RLIMIT_CORE] = {"Max core file size", "bytes"},
 434        [RLIMIT_RSS] = {"Max resident set", "bytes"},
 435        [RLIMIT_NPROC] = {"Max processes", "processes"},
 436        [RLIMIT_NOFILE] = {"Max open files", "files"},
 437        [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
 438        [RLIMIT_AS] = {"Max address space", "bytes"},
 439        [RLIMIT_LOCKS] = {"Max file locks", "locks"},
 440        [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
 441        [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
 442        [RLIMIT_NICE] = {"Max nice priority", NULL},
 443        [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
 444        [RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
 445};
 446
 447/* Display limits for a process */
 448static int proc_pid_limits(struct task_struct *task, char *buffer)
 449{
 450        unsigned int i;
 451        int count = 0;
 452        unsigned long flags;
 453        char *bufptr = buffer;
 454
 455        struct rlimit rlim[RLIM_NLIMITS];
 456
 457        if (!lock_task_sighand(task, &flags))
 458                return 0;
 459        memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
 460        unlock_task_sighand(task, &flags);
 461
 462        /*
 463         * print the file header
 464         */
 465        count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
 466                        "Limit", "Soft Limit", "Hard Limit", "Units");
 467
 468        for (i = 0; i < RLIM_NLIMITS; i++) {
 469                if (rlim[i].rlim_cur == RLIM_INFINITY)
 470                        count += sprintf(&bufptr[count], "%-25s %-20s ",
 471                                         lnames[i].name, "unlimited");
 472                else
 473                        count += sprintf(&bufptr[count], "%-25s %-20lu ",
 474                                         lnames[i].name, rlim[i].rlim_cur);
 475
 476                if (rlim[i].rlim_max == RLIM_INFINITY)
 477                        count += sprintf(&bufptr[count], "%-20s ", "unlimited");
 478                else
 479                        count += sprintf(&bufptr[count], "%-20lu ",
 480                                         rlim[i].rlim_max);
 481
 482                if (lnames[i].unit)
 483                        count += sprintf(&bufptr[count], "%-10s\n",
 484                                         lnames[i].unit);
 485                else
 486                        count += sprintf(&bufptr[count], "\n");
 487        }
 488
 489        return count;
 490}
 491
 492#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
 493static int proc_pid_syscall(struct task_struct *task, char *buffer)
 494{
 495        long nr;
 496        unsigned long args[6], sp, pc;
 497        int res = lock_trace(task);
 498        if (res)
 499                return res;
 500
 501        if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
 502                res = sprintf(buffer, "running\n");
 503        else if (nr < 0)
 504                res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
 505        else
 506                res = sprintf(buffer,
 507                       "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
 508                       nr,
 509                       args[0], args[1], args[2], args[3], args[4], args[5],
 510                       sp, pc);
 511        unlock_trace(task);
 512        return res;
 513}
 514#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
 515
 516/************************************************************************/
 517/*                       Here the fs part begins                        */
 518/************************************************************************/
 519
 520/* permission checks */
 521static int proc_fd_access_allowed(struct inode *inode)
 522{
 523        struct task_struct *task;
 524        int allowed = 0;
 525        /* Allow access to a task's file descriptors if it is us or we
 526         * may use ptrace attach to the process and find out that
 527         * information.
 528         */
 529        task = get_proc_task(inode);
 530        if (task) {
 531                allowed = ptrace_may_access(task, PTRACE_MODE_READ);
 532                put_task_struct(task);
 533        }
 534        return allowed;
 535}
 536
 537int proc_setattr(struct dentry *dentry, struct iattr *attr)
 538{
 539        int error;
 540        struct inode *inode = dentry->d_inode;
 541
 542        if (attr->ia_valid & ATTR_MODE)
 543                return -EPERM;
 544
 545        error = inode_change_ok(inode, attr);
 546        if (error)
 547                return error;
 548
 549        if ((attr->ia_valid & ATTR_SIZE) &&
 550            attr->ia_size != i_size_read(inode)) {
 551                error = vmtruncate(inode, attr->ia_size);
 552                if (error)
 553                        return error;
 554        }
 555
 556        setattr_copy(inode, attr);
 557        mark_inode_dirty(inode);
 558        return 0;
 559}
 560
 561/*
 562 * May current process learn task's sched/cmdline info (for hide_pid_min=1)
 563 * or euid/egid (for hide_pid_min=2)?
 564 */
 565static bool has_pid_permissions(struct pid_namespace *pid,
 566                                 struct task_struct *task,
 567                                 int hide_pid_min)
 568{
 569        if (pid->hide_pid < hide_pid_min)
 570                return true;
 571        if (in_group_p(pid->pid_gid))
 572                return true;
 573        return ptrace_may_access(task, PTRACE_MODE_READ);
 574}
 575
 576
 577static int proc_pid_permission(struct inode *inode, int mask)
 578{
 579        struct pid_namespace *pid = inode->i_sb->s_fs_info;
 580        struct task_struct *task;
 581        bool has_perms;
 582
 583        task = get_proc_task(inode);
 584        if (!task)
 585                return -ESRCH;
 586        has_perms = has_pid_permissions(pid, task, 1);
 587        put_task_struct(task);
 588
 589        if (!has_perms) {
 590                if (pid->hide_pid == 2) {
 591                        /*
 592                         * Let's make getdents(), stat(), and open()
 593                         * consistent with each other.  If a process
 594                         * may not stat() a file, it shouldn't be seen
 595                         * in procfs at all.
 596                         */
 597                        return -ENOENT;
 598                }
 599
 600                return -EPERM;
 601        }
 602        return generic_permission(inode, mask);
 603}
 604
 605
 606
 607static const struct inode_operations proc_def_inode_operations = {
 608        .setattr        = proc_setattr,
 609};
 610
 611#define PROC_BLOCK_SIZE (3*1024)                /* 4K page size but our output routines use some slack for overruns */
 612
 613static ssize_t proc_info_read(struct file * file, char __user * buf,
 614                          size_t count, loff_t *ppos)
 615{
 616        struct inode * inode = file->f_path.dentry->d_inode;
 617        unsigned long page;
 618        ssize_t length;
 619        struct task_struct *task = get_proc_task(inode);
 620
 621        length = -ESRCH;
 622        if (!task)
 623                goto out_no_task;
 624
 625        if (count > PROC_BLOCK_SIZE)
 626                count = PROC_BLOCK_SIZE;
 627
 628        length = -ENOMEM;
 629        if (!(page = __get_free_page(GFP_TEMPORARY)))
 630                goto out;
 631
 632        length = PROC_I(inode)->op.proc_read(task, (char*)page);
 633
 634        if (length >= 0)
 635                length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
 636        free_page(page);
 637out:
 638        put_task_struct(task);
 639out_no_task:
 640        return length;
 641}
 642
 643static const struct file_operations proc_info_file_operations = {
 644        .read           = proc_info_read,
 645        .llseek         = generic_file_llseek,
 646};
 647
 648static int proc_single_show(struct seq_file *m, void *v)
 649{
 650        struct inode *inode = m->private;
 651        struct pid_namespace *ns;
 652        struct pid *pid;
 653        struct task_struct *task;
 654        int ret;
 655
 656        ns = inode->i_sb->s_fs_info;
 657        pid = proc_pid(inode);
 658        task = get_pid_task(pid, PIDTYPE_PID);
 659        if (!task)
 660                return -ESRCH;
 661
 662        ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
 663
 664        put_task_struct(task);
 665        return ret;
 666}
 667
 668static int proc_single_open(struct inode *inode, struct file *filp)
 669{
 670        return single_open(filp, proc_single_show, inode);
 671}
 672
 673static const struct file_operations proc_single_file_operations = {
 674        .open           = proc_single_open,
 675        .read           = seq_read,
 676        .llseek         = seq_lseek,
 677        .release        = single_release,
 678};
 679
 680static int mem_open(struct inode* inode, struct file* file)
 681{
 682        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 683        struct mm_struct *mm;
 684
 685        if (!task)
 686                return -ESRCH;
 687
 688        mm = mm_access(task, PTRACE_MODE_ATTACH);
 689        put_task_struct(task);
 690
 691        if (IS_ERR(mm))
 692                return PTR_ERR(mm);
 693
 694        if (mm) {
 695                /* ensure this mm_struct can't be freed */
 696                atomic_inc(&mm->mm_count);
 697                /* but do not pin its memory */
 698                mmput(mm);
 699        }
 700
 701        /* OK to pass negative loff_t, we can catch out-of-range */
 702        file->f_mode |= FMODE_UNSIGNED_OFFSET;
 703        file->private_data = mm;
 704
 705        return 0;
 706}
 707
 708static ssize_t mem_rw(struct file *file, char __user *buf,
 709                        size_t count, loff_t *ppos, int write)
 710{
 711        struct mm_struct *mm = file->private_data;
 712        unsigned long addr = *ppos;
 713        ssize_t copied;
 714        char *page;
 715
 716        if (!mm)
 717                return 0;
 718
 719        page = (char *)__get_free_page(GFP_TEMPORARY);
 720        if (!page)
 721                return -ENOMEM;
 722
 723        copied = 0;
 724        if (!atomic_inc_not_zero(&mm->mm_users))
 725                goto free;
 726
 727        while (count > 0) {
 728                int this_len = min_t(int, count, PAGE_SIZE);
 729
 730                if (write && copy_from_user(page, buf, this_len)) {
 731                        copied = -EFAULT;
 732                        break;
 733                }
 734
 735                this_len = access_remote_vm(mm, addr, page, this_len, write);
 736                if (!this_len) {
 737                        if (!copied)
 738                                copied = -EIO;
 739                        break;
 740                }
 741
 742                if (!write && copy_to_user(buf, page, this_len)) {
 743                        copied = -EFAULT;
 744                        break;
 745                }
 746
 747                buf += this_len;
 748                addr += this_len;
 749                copied += this_len;
 750                count -= this_len;
 751        }
 752        *ppos = addr;
 753
 754        mmput(mm);
 755free:
 756        free_page((unsigned long) page);
 757        return copied;
 758}
 759
 760static ssize_t mem_read(struct file *file, char __user *buf,
 761                        size_t count, loff_t *ppos)
 762{
 763        return mem_rw(file, buf, count, ppos, 0);
 764}
 765
 766static ssize_t mem_write(struct file *file, const char __user *buf,
 767                         size_t count, loff_t *ppos)
 768{
 769        return mem_rw(file, (char __user*)buf, count, ppos, 1);
 770}
 771
 772loff_t mem_lseek(struct file *file, loff_t offset, int orig)
 773{
 774        switch (orig) {
 775        case 0:
 776                file->f_pos = offset;
 777                break;
 778        case 1:
 779                file->f_pos += offset;
 780                break;
 781        default:
 782                return -EINVAL;
 783        }
 784        force_successful_syscall_return();
 785        return file->f_pos;
 786}
 787
 788static int mem_release(struct inode *inode, struct file *file)
 789{
 790        struct mm_struct *mm = file->private_data;
 791        if (mm)
 792                mmdrop(mm);
 793        return 0;
 794}
 795
 796static const struct file_operations proc_mem_operations = {
 797        .llseek         = mem_lseek,
 798        .read           = mem_read,
 799        .write          = mem_write,
 800        .open           = mem_open,
 801        .release        = mem_release,
 802};
 803
 804static ssize_t environ_read(struct file *file, char __user *buf,
 805                        size_t count, loff_t *ppos)
 806{
 807        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
 808        char *page;
 809        unsigned long src = *ppos;
 810        int ret = -ESRCH;
 811        struct mm_struct *mm;
 812
 813        if (!task)
 814                goto out_no_task;
 815
 816        ret = -ENOMEM;
 817        page = (char *)__get_free_page(GFP_TEMPORARY);
 818        if (!page)
 819                goto out;
 820
 821
 822        mm = mm_for_maps(task);
 823        ret = PTR_ERR(mm);
 824        if (!mm || IS_ERR(mm))
 825                goto out_free;
 826
 827        ret = 0;
 828        while (count > 0) {
 829                int this_len, retval, max_len;
 830
 831                this_len = mm->env_end - (mm->env_start + src);
 832
 833                if (this_len <= 0)
 834                        break;
 835
 836                max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
 837                this_len = (this_len > max_len) ? max_len : this_len;
 838
 839                retval = access_process_vm(task, (mm->env_start + src),
 840                        page, this_len, 0);
 841
 842                if (retval <= 0) {
 843                        ret = retval;
 844                        break;
 845                }
 846
 847                if (copy_to_user(buf, page, retval)) {
 848                        ret = -EFAULT;
 849                        break;
 850                }
 851
 852                ret += retval;
 853                src += retval;
 854                buf += retval;
 855                count -= retval;
 856        }
 857        *ppos = src;
 858
 859        mmput(mm);
 860out_free:
 861        free_page((unsigned long) page);
 862out:
 863        put_task_struct(task);
 864out_no_task:
 865        return ret;
 866}
 867
 868static const struct file_operations proc_environ_operations = {
 869        .read           = environ_read,
 870        .llseek         = generic_file_llseek,
 871};
 872
 873static ssize_t oom_adjust_read(struct file *file, char __user *buf,
 874                                size_t count, loff_t *ppos)
 875{
 876        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 877        char buffer[PROC_NUMBUF];
 878        size_t len;
 879        int oom_adjust = OOM_DISABLE;
 880        unsigned long flags;
 881
 882        if (!task)
 883                return -ESRCH;
 884
 885        if (lock_task_sighand(task, &flags)) {
 886                oom_adjust = task->signal->oom_adj;
 887                unlock_task_sighand(task, &flags);
 888        }
 889
 890        put_task_struct(task);
 891
 892        len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
 893
 894        return simple_read_from_buffer(buf, count, ppos, buffer, len);
 895}
 896
 897static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 898                                size_t count, loff_t *ppos)
 899{
 900        struct task_struct *task;
 901        char buffer[PROC_NUMBUF];
 902        int oom_adjust;
 903        unsigned long flags;
 904        int err;
 905
 906        memset(buffer, 0, sizeof(buffer));
 907        if (count > sizeof(buffer) - 1)
 908                count = sizeof(buffer) - 1;
 909        if (copy_from_user(buffer, buf, count)) {
 910                err = -EFAULT;
 911                goto out;
 912        }
 913
 914        err = kstrtoint(strstrip(buffer), 0, &oom_adjust);
 915        if (err)
 916                goto out;
 917        if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
 918             oom_adjust != OOM_DISABLE) {
 919                err = -EINVAL;
 920                goto out;
 921        }
 922
 923        task = get_proc_task(file->f_path.dentry->d_inode);
 924        if (!task) {
 925                err = -ESRCH;
 926                goto out;
 927        }
 928
 929        task_lock(task);
 930        if (!task->mm) {
 931                err = -EINVAL;
 932                goto err_task_lock;
 933        }
 934
 935        if (!lock_task_sighand(task, &flags)) {
 936                err = -ESRCH;
 937                goto err_task_lock;
 938        }
 939
 940        if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
 941                err = -EACCES;
 942                goto err_sighand;
 943        }
 944
 945        /*
 946         * Warn that /proc/pid/oom_adj is deprecated, see
 947         * Documentation/feature-removal-schedule.txt.
 948         */
 949        printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
 950                  current->comm, task_pid_nr(current), task_pid_nr(task),
 951                  task_pid_nr(task));
 952        task->signal->oom_adj = oom_adjust;
 953        /*
 954         * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
 955         * value is always attainable.
 956         */
 957        if (task->signal->oom_adj == OOM_ADJUST_MAX)
 958                task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX;
 959        else
 960                task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
 961                                                                -OOM_DISABLE;
 962        trace_oom_score_adj_update(task);
 963err_sighand:
 964        unlock_task_sighand(task, &flags);
 965err_task_lock:
 966        task_unlock(task);
 967        put_task_struct(task);
 968out:
 969        return err < 0 ? err : count;
 970}
 971
 972static const struct file_operations proc_oom_adjust_operations = {
 973        .read           = oom_adjust_read,
 974        .write          = oom_adjust_write,
 975        .llseek         = generic_file_llseek,
 976};
 977
 978static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
 979                                        size_t count, loff_t *ppos)
 980{
 981        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 982        char buffer[PROC_NUMBUF];
 983        int oom_score_adj = OOM_SCORE_ADJ_MIN;
 984        unsigned long flags;
 985        size_t len;
 986
 987        if (!task)
 988                return -ESRCH;
 989        if (lock_task_sighand(task, &flags)) {
 990                oom_score_adj = task->signal->oom_score_adj;
 991                unlock_task_sighand(task, &flags);
 992        }
 993        put_task_struct(task);
 994        len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj);
 995        return simple_read_from_buffer(buf, count, ppos, buffer, len);
 996}
 997
 998static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
 999                                        size_t count, loff_t *ppos)
1000{
1001        struct task_struct *task;
1002        char buffer[PROC_NUMBUF];
1003        unsigned long flags;
1004        int oom_score_adj;
1005        int err;
1006
1007        memset(buffer, 0, sizeof(buffer));
1008        if (count > sizeof(buffer) - 1)
1009                count = sizeof(buffer) - 1;
1010        if (copy_from_user(buffer, buf, count)) {
1011                err = -EFAULT;
1012                goto out;
1013        }
1014
1015        err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
1016        if (err)
1017                goto out;
1018        if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
1019                        oom_score_adj > OOM_SCORE_ADJ_MAX) {
1020                err = -EINVAL;
1021                goto out;
1022        }
1023
1024        task = get_proc_task(file->f_path.dentry->d_inode);
1025        if (!task) {
1026                err = -ESRCH;
1027                goto out;
1028        }
1029
1030        task_lock(task);
1031        if (!task->mm) {
1032                err = -EINVAL;
1033                goto err_task_lock;
1034        }
1035
1036        if (!lock_task_sighand(task, &flags)) {
1037                err = -ESRCH;
1038                goto err_task_lock;
1039        }
1040
1041        if (oom_score_adj < task->signal->oom_score_adj_min &&
1042                        !capable(CAP_SYS_RESOURCE)) {
1043                err = -EACCES;
1044                goto err_sighand;
1045        }
1046
1047        task->signal->oom_score_adj = oom_score_adj;
1048        if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
1049                task->signal->oom_score_adj_min = oom_score_adj;
1050        trace_oom_score_adj_update(task);
1051        /*
1052         * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
1053         * always attainable.
1054         */
1055        if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1056                task->signal->oom_adj = OOM_DISABLE;
1057        else
1058                task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
1059                                                        OOM_SCORE_ADJ_MAX;
1060err_sighand:
1061        unlock_task_sighand(task, &flags);
1062err_task_lock:
1063        task_unlock(task);
1064        put_task_struct(task);
1065out:
1066        return err < 0 ? err : count;
1067}
1068
1069static const struct file_operations proc_oom_score_adj_operations = {
1070        .read           = oom_score_adj_read,
1071        .write          = oom_score_adj_write,
1072        .llseek         = default_llseek,
1073};
1074
1075#ifdef CONFIG_AUDITSYSCALL
1076#define TMPBUFLEN 21
1077static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
1078                                  size_t count, loff_t *ppos)
1079{
1080        struct inode * inode = file->f_path.dentry->d_inode;
1081        struct task_struct *task = get_proc_task(inode);
1082        ssize_t length;
1083        char tmpbuf[TMPBUFLEN];
1084
1085        if (!task)
1086                return -ESRCH;
1087        length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1088                                audit_get_loginuid(task));
1089        put_task_struct(task);
1090        return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1091}
1092
1093static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1094                                   size_t count, loff_t *ppos)
1095{
1096        struct inode * inode = file->f_path.dentry->d_inode;
1097        char *page, *tmp;
1098        ssize_t length;
1099        uid_t loginuid;
1100
1101        rcu_read_lock();
1102        if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
1103                rcu_read_unlock();
1104                return -EPERM;
1105        }
1106        rcu_read_unlock();
1107
1108        if (count >= PAGE_SIZE)
1109                count = PAGE_SIZE - 1;
1110
1111        if (*ppos != 0) {
1112                /* No partial writes. */
1113                return -EINVAL;
1114        }
1115        page = (char*)__get_free_page(GFP_TEMPORARY);
1116        if (!page)
1117                return -ENOMEM;
1118        length = -EFAULT;
1119        if (copy_from_user(page, buf, count))
1120                goto out_free_page;
1121
1122        page[count] = '\0';
1123        loginuid = simple_strtoul(page, &tmp, 10);
1124        if (tmp == page) {
1125                length = -EINVAL;
1126                goto out_free_page;
1127
1128        }
1129        length = audit_set_loginuid(loginuid);
1130        if (likely(length == 0))
1131                length = count;
1132
1133out_free_page:
1134        free_page((unsigned long) page);
1135        return length;
1136}
1137
1138static const struct file_operations proc_loginuid_operations = {
1139        .read           = proc_loginuid_read,
1140        .write          = proc_loginuid_write,
1141        .llseek         = generic_file_llseek,
1142};
1143
1144static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
1145                                  size_t count, loff_t *ppos)
1146{
1147        struct inode * inode = file->f_path.dentry->d_inode;
1148        struct task_struct *task = get_proc_task(inode);
1149        ssize_t length;
1150        char tmpbuf[TMPBUFLEN];
1151
1152        if (!task)
1153                return -ESRCH;
1154        length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1155                                audit_get_sessionid(task));
1156        put_task_struct(task);
1157        return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1158}
1159
1160static const struct file_operations proc_sessionid_operations = {
1161        .read           = proc_sessionid_read,
1162        .llseek         = generic_file_llseek,
1163};
1164#endif
1165
1166#ifdef CONFIG_FAULT_INJECTION
1167static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
1168                                      size_t count, loff_t *ppos)
1169{
1170        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
1171        char buffer[PROC_NUMBUF];
1172        size_t len;
1173        int make_it_fail;
1174
1175        if (!task)
1176                return -ESRCH;
1177        make_it_fail = task->make_it_fail;
1178        put_task_struct(task);
1179
1180        len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
1181
1182        return simple_read_from_buffer(buf, count, ppos, buffer, len);
1183}
1184
1185static ssize_t proc_fault_inject_write(struct file * file,
1186                        const char __user * buf, size_t count, loff_t *ppos)
1187{
1188        struct task_struct *task;
1189        char buffer[PROC_NUMBUF], *end;
1190        int make_it_fail;
1191
1192        if (!capable(CAP_SYS_RESOURCE))
1193                return -EPERM;
1194        memset(buffer, 0, sizeof(buffer));
1195        if (count > sizeof(buffer) - 1)
1196                count = sizeof(buffer) - 1;
1197        if (copy_from_user(buffer, buf, count))
1198                return -EFAULT;
1199        make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
1200        if (*end)
1201                return -EINVAL;
1202        task = get_proc_task(file->f_dentry->d_inode);
1203        if (!task)
1204                return -ESRCH;
1205        task->make_it_fail = make_it_fail;
1206        put_task_struct(task);
1207
1208        return count;
1209}
1210
1211static const struct file_operations proc_fault_inject_operations = {
1212        .read           = proc_fault_inject_read,
1213        .write          = proc_fault_inject_write,
1214        .llseek         = generic_file_llseek,
1215};
1216#endif
1217
1218
1219#ifdef CONFIG_SCHED_DEBUG
1220/*
1221 * Print out various scheduling related per-task fields:
1222 */
1223static int sched_show(struct seq_file *m, void *v)
1224{
1225        struct inode *inode = m->private;
1226        struct task_struct *p;
1227
1228        p = get_proc_task(inode);
1229        if (!p)
1230                return -ESRCH;
1231        proc_sched_show_task(p, m);
1232
1233        put_task_struct(p);
1234
1235        return 0;
1236}
1237
1238static ssize_t
1239sched_write(struct file *file, const char __user *buf,
1240            size_t count, loff_t *offset)
1241{
1242        struct inode *inode = file->f_path.dentry->d_inode;
1243        struct task_struct *p;
1244
1245        p = get_proc_task(inode);
1246        if (!p)
1247                return -ESRCH;
1248        proc_sched_set_task(p);
1249
1250        put_task_struct(p);
1251
1252        return count;
1253}
1254
1255static int sched_open(struct inode *inode, struct file *filp)
1256{
1257        return single_open(filp, sched_show, inode);
1258}
1259
1260static const struct file_operations proc_pid_sched_operations = {
1261        .open           = sched_open,
1262        .read           = seq_read,
1263        .write          = sched_write,
1264        .llseek         = seq_lseek,
1265        .release        = single_release,
1266};
1267
1268#endif
1269
1270#ifdef CONFIG_SCHED_AUTOGROUP
1271/*
1272 * Print out autogroup related information:
1273 */
1274static int sched_autogroup_show(struct seq_file *m, void *v)
1275{
1276        struct inode *inode = m->private;
1277        struct task_struct *p;
1278
1279        p = get_proc_task(inode);
1280        if (!p)
1281                return -ESRCH;
1282        proc_sched_autogroup_show_task(p, m);
1283
1284        put_task_struct(p);
1285
1286        return 0;
1287}
1288
1289static ssize_t
1290sched_autogroup_write(struct file *file, const char __user *buf,
1291            size_t count, loff_t *offset)
1292{
1293        struct inode *inode = file->f_path.dentry->d_inode;
1294        struct task_struct *p;
1295        char buffer[PROC_NUMBUF];
1296        int nice;
1297        int err;
1298
1299        memset(buffer, 0, sizeof(buffer));
1300        if (count > sizeof(buffer) - 1)
1301                count = sizeof(buffer) - 1;
1302        if (copy_from_user(buffer, buf, count))
1303                return -EFAULT;
1304
1305        err = kstrtoint(strstrip(buffer), 0, &nice);
1306        if (err < 0)
1307                return err;
1308
1309        p = get_proc_task(inode);
1310        if (!p)
1311                return -ESRCH;
1312
1313        err = nice;
1314        err = proc_sched_autogroup_set_nice(p, &err);
1315        if (err)
1316                count = err;
1317
1318        put_task_struct(p);
1319
1320        return count;
1321}
1322
1323static int sched_autogroup_open(struct inode *inode, struct file *filp)
1324{
1325        int ret;
1326
1327        ret = single_open(filp, sched_autogroup_show, NULL);
1328        if (!ret) {
1329                struct seq_file *m = filp->private_data;
1330
1331                m->private = inode;
1332        }
1333        return ret;
1334}
1335
1336static const struct file_operations proc_pid_sched_autogroup_operations = {
1337        .open           = sched_autogroup_open,
1338        .read           = seq_read,
1339        .write          = sched_autogroup_write,
1340        .llseek         = seq_lseek,
1341        .release        = single_release,
1342};
1343
1344#endif /* CONFIG_SCHED_AUTOGROUP */
1345
1346static ssize_t comm_write(struct file *file, const char __user *buf,
1347                                size_t count, loff_t *offset)
1348{
1349        struct inode *inode = file->f_path.dentry->d_inode;
1350        struct task_struct *p;
1351        char buffer[TASK_COMM_LEN];
1352
1353        memset(buffer, 0, sizeof(buffer));
1354        if (count > sizeof(buffer) - 1)
1355                count = sizeof(buffer) - 1;
1356        if (copy_from_user(buffer, buf, count))
1357                return -EFAULT;
1358
1359        p = get_proc_task(inode);
1360        if (!p)
1361                return -ESRCH;
1362
1363        if (same_thread_group(current, p))
1364                set_task_comm(p, buffer);
1365        else
1366                count = -EINVAL;
1367
1368        put_task_struct(p);
1369
1370        return count;
1371}
1372
1373static int comm_show(struct seq_file *m, void *v)
1374{
1375        struct inode *inode = m->private;
1376        struct task_struct *p;
1377
1378        p = get_proc_task(inode);
1379        if (!p)
1380                return -ESRCH;
1381
1382        task_lock(p);
1383        seq_printf(m, "%s\n", p->comm);
1384        task_unlock(p);
1385
1386        put_task_struct(p);
1387
1388        return 0;
1389}
1390
1391static int comm_open(struct inode *inode, struct file *filp)
1392{
1393        return single_open(filp, comm_show, inode);
1394}
1395
1396static const struct file_operations proc_pid_set_comm_operations = {
1397        .open           = comm_open,
1398        .read           = seq_read,
1399        .write          = comm_write,
1400        .llseek         = seq_lseek,
1401        .release        = single_release,
1402};
1403
1404static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
1405{
1406        struct task_struct *task;
1407        struct mm_struct *mm;
1408        struct file *exe_file;
1409
1410        task = get_proc_task(dentry->d_inode);
1411        if (!task)
1412                return -ENOENT;
1413        mm = get_task_mm(task);
1414        put_task_struct(task);
1415        if (!mm)
1416                return -ENOENT;
1417        exe_file = get_mm_exe_file(mm);
1418        mmput(mm);
1419        if (exe_file) {
1420                *exe_path = exe_file->f_path;
1421                path_get(&exe_file->f_path);
1422                fput(exe_file);
1423                return 0;
1424        } else
1425                return -ENOENT;
1426}
1427
1428static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1429{
1430        struct inode *inode = dentry->d_inode;
1431        int error = -EACCES;
1432
1433        /* We don't need a base pointer in the /proc filesystem */
1434        path_put(&nd->path);
1435
1436        /* Are we allowed to snoop on the tasks file descriptors? */
1437        if (!proc_fd_access_allowed(inode))
1438                goto out;
1439
1440        error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path);
1441out:
1442        return ERR_PTR(error);
1443}
1444
1445static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
1446{
1447        char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
1448        char *pathname;
1449        int len;
1450
1451        if (!tmp)
1452                return -ENOMEM;
1453
1454        pathname = d_path(path, tmp, PAGE_SIZE);
1455        len = PTR_ERR(pathname);
1456        if (IS_ERR(pathname))
1457                goto out;
1458        len = tmp + PAGE_SIZE - 1 - pathname;
1459
1460        if (len > buflen)
1461                len = buflen;
1462        if (copy_to_user(buffer, pathname, len))
1463                len = -EFAULT;
1464 out:
1465        free_page((unsigned long)tmp);
1466        return len;
1467}
1468
1469static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
1470{
1471        int error = -EACCES;
1472        struct inode *inode = dentry->d_inode;
1473        struct path path;
1474
1475        /* Are we allowed to snoop on the tasks file descriptors? */
1476        if (!proc_fd_access_allowed(inode))
1477                goto out;
1478
1479        error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1480        if (error)
1481                goto out;
1482
1483        error = do_proc_readlink(&path, buffer, buflen);
1484        path_put(&path);
1485out:
1486        return error;
1487}
1488
1489static const struct inode_operations proc_pid_link_inode_operations = {
1490        .readlink       = proc_pid_readlink,
1491        .follow_link    = proc_pid_follow_link,
1492        .setattr        = proc_setattr,
1493};
1494
1495
1496/* building an inode */
1497
1498static int task_dumpable(struct task_struct *task)
1499{
1500        int dumpable = 0;
1501        struct mm_struct *mm;
1502
1503        task_lock(task);
1504        mm = task->mm;
1505        if (mm)
1506                dumpable = get_dumpable(mm);
1507        task_unlock(task);
1508        if(dumpable == 1)
1509                return 1;
1510        return 0;
1511}
1512
1513struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1514{
1515        struct inode * inode;
1516        struct proc_inode *ei;
1517        const struct cred *cred;
1518
1519        /* We need a new inode */
1520
1521        inode = new_inode(sb);
1522        if (!inode)
1523                goto out;
1524
1525        /* Common stuff */
1526        ei = PROC_I(inode);
1527        inode->i_ino = get_next_ino();
1528        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1529        inode->i_op = &proc_def_inode_operations;
1530
1531        /*
1532         * grab the reference to task.
1533         */
1534        ei->pid = get_task_pid(task, PIDTYPE_PID);
1535        if (!ei->pid)
1536                goto out_unlock;
1537
1538        if (task_dumpable(task)) {
1539                rcu_read_lock();
1540                cred = __task_cred(task);
1541                inode->i_uid = cred->euid;
1542                inode->i_gid = cred->egid;
1543                rcu_read_unlock();
1544        }
1545        security_task_to_inode(task, inode);
1546
1547out:
1548        return inode;
1549
1550out_unlock:
1551        iput(inode);
1552        return NULL;
1553}
1554
1555int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1556{
1557        struct inode *inode = dentry->d_inode;
1558        struct task_struct *task;
1559        const struct cred *cred;
1560        struct pid_namespace *pid = dentry->d_sb->s_fs_info;
1561
1562        generic_fillattr(inode, stat);
1563
1564        rcu_read_lock();
1565        stat->uid = 0;
1566        stat->gid = 0;
1567        task = pid_task(proc_pid(inode), PIDTYPE_PID);
1568        if (task) {
1569                if (!has_pid_permissions(pid, task, 2)) {
1570                        rcu_read_unlock();
1571                        /*
1572                         * This doesn't prevent learning whether PID exists,
1573                         * it only makes getattr() consistent with readdir().
1574                         */
1575                        return -ENOENT;
1576                }
1577                if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1578                    task_dumpable(task)) {
1579                        cred = __task_cred(task);
1580                        stat->uid = cred->euid;
1581                        stat->gid = cred->egid;
1582                }
1583        }
1584        rcu_read_unlock();
1585        return 0;
1586}
1587
1588/* dentry stuff */
1589
1590/*
1591 *      Exceptional case: normally we are not allowed to unhash a busy
1592 * directory. In this case, however, we can do it - no aliasing problems
1593 * due to the way we treat inodes.
1594 *
1595 * Rewrite the inode's ownerships here because the owning task may have
1596 * performed a setuid(), etc.
1597 *
1598 * Before the /proc/pid/status file was created the only way to read
1599 * the effective uid of a /process was to stat /proc/pid.  Reading
1600 * /proc/pid/status is slow enough that procps and other packages
1601 * kept stating /proc/pid.  To keep the rules in /proc simple I have
1602 * made this apply to all per process world readable and executable
1603 * directories.
1604 */
1605int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1606{
1607        struct inode *inode;
1608        struct task_struct *task;
1609        const struct cred *cred;
1610
1611        if (nd && nd->flags & LOOKUP_RCU)
1612                return -ECHILD;
1613
1614        inode = dentry->d_inode;
1615        task = get_proc_task(inode);
1616
1617        if (task) {
1618                if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1619                    task_dumpable(task)) {
1620                        rcu_read_lock();
1621                        cred = __task_cred(task);
1622                        inode->i_uid = cred->euid;
1623                        inode->i_gid = cred->egid;
1624                        rcu_read_unlock();
1625                } else {
1626                        inode->i_uid = 0;
1627                        inode->i_gid = 0;
1628                }
1629                inode->i_mode &= ~(S_ISUID | S_ISGID);
1630                security_task_to_inode(task, inode);
1631                put_task_struct(task);
1632                return 1;
1633        }
1634        d_drop(dentry);
1635        return 0;
1636}
1637
1638static int pid_delete_dentry(const struct dentry * dentry)
1639{
1640        /* Is the task we represent dead?
1641         * If so, then don't put the dentry on the lru list,
1642         * kill it immediately.
1643         */
1644        return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1645}
1646
1647const struct dentry_operations pid_dentry_operations =
1648{
1649        .d_revalidate   = pid_revalidate,
1650        .d_delete       = pid_delete_dentry,
1651};
1652
1653/* Lookups */
1654
1655/*
1656 * Fill a directory entry.
1657 *
1658 * If possible create the dcache entry and derive our inode number and
1659 * file type from dcache entry.
1660 *
1661 * Since all of the proc inode numbers are dynamically generated, the inode
1662 * numbers do not exist until the inode is cache.  This means creating the
1663 * the dcache entry in readdir is necessary to keep the inode numbers
1664 * reported by readdir in sync with the inode numbers reported
1665 * by stat.
1666 */
1667int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1668        const char *name, int len,
1669        instantiate_t instantiate, struct task_struct *task, const void *ptr)
1670{
1671        struct dentry *child, *dir = filp->f_path.dentry;
1672        struct inode *inode;
1673        struct qstr qname;
1674        ino_t ino = 0;
1675        unsigned type = DT_UNKNOWN;
1676
1677        qname.name = name;
1678        qname.len  = len;
1679        qname.hash = full_name_hash(name, len);
1680
1681        child = d_lookup(dir, &qname);
1682        if (!child) {
1683                struct dentry *new;
1684                new = d_alloc(dir, &qname);
1685                if (new) {
1686                        child = instantiate(dir->d_inode, new, task, ptr);
1687                        if (child)
1688                                dput(new);
1689                        else
1690                                child = new;
1691                }
1692        }
1693        if (!child || IS_ERR(child) || !child->d_inode)
1694                goto end_instantiate;
1695        inode = child->d_inode;
1696        if (inode) {
1697                ino = inode->i_ino;
1698                type = inode->i_mode >> 12;
1699        }
1700        dput(child);
1701end_instantiate:
1702        if (!ino)
1703                ino = find_inode_number(dir, &qname);
1704        if (!ino)
1705                ino = 1;
1706        return filldir(dirent, name, len, filp->f_pos, ino, type);
1707}
1708
1709static unsigned name_to_int(struct dentry *dentry)
1710{
1711        const char *name = dentry->d_name.name;
1712        int len = dentry->d_name.len;
1713        unsigned n = 0;
1714
1715        if (len > 1 && *name == '0')
1716                goto out;
1717        while (len-- > 0) {
1718                unsigned c = *name++ - '0';
1719                if (c > 9)
1720                        goto out;
1721                if (n >= (~0U-9)/10)
1722                        goto out;
1723                n *= 10;
1724                n += c;
1725        }
1726        return n;
1727out:
1728        return ~0U;
1729}
1730
1731#define PROC_FDINFO_MAX 64
1732
1733static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1734{
1735        struct task_struct *task = get_proc_task(inode);
1736        struct files_struct *files = NULL;
1737        struct file *file;
1738        int fd = proc_fd(inode);
1739
1740        if (task) {
1741                files = get_files_struct(task);
1742                put_task_struct(task);
1743        }
1744        if (files) {
1745                /*
1746                 * We are not taking a ref to the file structure, so we must
1747                 * hold ->file_lock.
1748                 */
1749                spin_lock(&files->file_lock);
1750                file = fcheck_files(files, fd);
1751                if (file) {
1752                        unsigned int f_flags;
1753                        struct fdtable *fdt;
1754
1755                        fdt = files_fdtable(files);
1756                        f_flags = file->f_flags & ~O_CLOEXEC;
1757                        if (FD_ISSET(fd, fdt->close_on_exec))
1758                                f_flags |= O_CLOEXEC;
1759
1760                        if (path) {
1761                                *path = file->f_path;
1762                                path_get(&file->f_path);
1763                        }
1764                        if (info)
1765                                snprintf(info, PROC_FDINFO_MAX,
1766                                         "pos:\t%lli\n"
1767                                         "flags:\t0%o\n",
1768                                         (long long) file->f_pos,
1769                                         f_flags);
1770                        spin_unlock(&files->file_lock);
1771                        put_files_struct(files);
1772                        return 0;
1773                }
1774                spin_unlock(&files->file_lock);
1775                put_files_struct(files);
1776        }
1777        return -ENOENT;
1778}
1779
1780static int proc_fd_link(struct dentry *dentry, struct path *path)
1781{
1782        return proc_fd_info(dentry->d_inode, path, NULL);
1783}
1784
1785static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1786{
1787        struct inode *inode;
1788        struct task_struct *task;
1789        int fd;
1790        struct files_struct *files;
1791        const struct cred *cred;
1792
1793        if (nd && nd->flags & LOOKUP_RCU)
1794                return -ECHILD;
1795
1796        inode = dentry->d_inode;
1797        task = get_proc_task(inode);
1798        fd = proc_fd(inode);
1799
1800        if (task) {
1801                files = get_files_struct(task);
1802                if (files) {
1803                        rcu_read_lock();
1804                        if (fcheck_files(files, fd)) {
1805                                rcu_read_unlock();
1806                                put_files_struct(files);
1807                                if (task_dumpable(task)) {
1808                                        rcu_read_lock();
1809                                        cred = __task_cred(task);
1810                                        inode->i_uid = cred->euid;
1811                                        inode->i_gid = cred->egid;
1812                                        rcu_read_unlock();
1813                                } else {
1814                                        inode->i_uid = 0;
1815                                        inode->i_gid = 0;
1816                                }
1817                                inode->i_mode &= ~(S_ISUID | S_ISGID);
1818                                security_task_to_inode(task, inode);
1819                                put_task_struct(task);
1820                                return 1;
1821                        }
1822                        rcu_read_unlock();
1823                        put_files_struct(files);
1824                }
1825                put_task_struct(task);
1826        }
1827        d_drop(dentry);
1828        return 0;
1829}
1830
1831static const struct dentry_operations tid_fd_dentry_operations =
1832{
1833        .d_revalidate   = tid_fd_revalidate,
1834        .d_delete       = pid_delete_dentry,
1835};
1836
1837static struct dentry *proc_fd_instantiate(struct inode *dir,
1838        struct dentry *dentry, struct task_struct *task, const void *ptr)
1839{
1840        unsigned fd = *(const unsigned *)ptr;
1841        struct file *file;
1842        struct files_struct *files;
1843        struct inode *inode;
1844        struct proc_inode *ei;
1845        struct dentry *error = ERR_PTR(-ENOENT);
1846
1847        inode = proc_pid_make_inode(dir->i_sb, task);
1848        if (!inode)
1849                goto out;
1850        ei = PROC_I(inode);
1851        ei->fd = fd;
1852        files = get_files_struct(task);
1853        if (!files)
1854                goto out_iput;
1855        inode->i_mode = S_IFLNK;
1856
1857        /*
1858         * We are not taking a ref to the file structure, so we must
1859         * hold ->file_lock.
1860         */
1861        spin_lock(&files->file_lock);
1862        file = fcheck_files(files, fd);
1863        if (!file)
1864                goto out_unlock;
1865        if (file->f_mode & FMODE_READ)
1866                inode->i_mode |= S_IRUSR | S_IXUSR;
1867        if (file->f_mode & FMODE_WRITE)
1868                inode->i_mode |= S_IWUSR | S_IXUSR;
1869        spin_unlock(&files->file_lock);
1870        put_files_struct(files);
1871
1872        inode->i_op = &proc_pid_link_inode_operations;
1873        inode->i_size = 64;
1874        ei->op.proc_get_link = proc_fd_link;
1875        d_set_d_op(dentry, &tid_fd_dentry_operations);
1876        d_add(dentry, inode);
1877        /* Close the race of the process dying before we return the dentry */
1878        if (tid_fd_revalidate(dentry, NULL))
1879                error = NULL;
1880
1881 out:
1882        return error;
1883out_unlock:
1884        spin_unlock(&files->file_lock);
1885        put_files_struct(files);
1886out_iput:
1887        iput(inode);
1888        goto out;
1889}
1890
1891static struct dentry *proc_lookupfd_common(struct inode *dir,
1892                                           struct dentry *dentry,
1893                                           instantiate_t instantiate)
1894{
1895        struct task_struct *task = get_proc_task(dir);
1896        unsigned fd = name_to_int(dentry);
1897        struct dentry *result = ERR_PTR(-ENOENT);
1898
1899        if (!task)
1900                goto out_no_task;
1901        if (fd == ~0U)
1902                goto out;
1903
1904        result = instantiate(dir, dentry, task, &fd);
1905out:
1906        put_task_struct(task);
1907out_no_task:
1908        return result;
1909}
1910
1911static int proc_readfd_common(struct file * filp, void * dirent,
1912                              filldir_t filldir, instantiate_t instantiate)
1913{
1914        struct dentry *dentry = filp->f_path.dentry;
1915        struct inode *inode = dentry->d_inode;
1916        struct task_struct *p = get_proc_task(inode);
1917        unsigned int fd, ino;
1918        int retval;
1919        struct files_struct * files;
1920
1921        retval = -ENOENT;
1922        if (!p)
1923                goto out_no_task;
1924        retval = 0;
1925
1926        fd = filp->f_pos;
1927        switch (fd) {
1928                case 0:
1929                        if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
1930                                goto out;
1931                        filp->f_pos++;
1932                case 1:
1933                        ino = parent_ino(dentry);
1934                        if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1935                                goto out;
1936                        filp->f_pos++;
1937                default:
1938                        files = get_files_struct(p);
1939                        if (!files)
1940                                goto out;
1941                        rcu_read_lock();
1942                        for (fd = filp->f_pos-2;
1943                             fd < files_fdtable(files)->max_fds;
1944                             fd++, filp->f_pos++) {
1945                                char name[PROC_NUMBUF];
1946                                int len;
1947
1948                                if (!fcheck_files(files, fd))
1949                                        continue;
1950                                rcu_read_unlock();
1951
1952                                len = snprintf(name, sizeof(name), "%d", fd);
1953                                if (proc_fill_cache(filp, dirent, filldir,
1954                                                    name, len, instantiate,
1955                                                    p, &fd) < 0) {
1956                                        rcu_read_lock();
1957                                        break;
1958                                }
1959                                rcu_read_lock();
1960                        }
1961                        rcu_read_unlock();
1962                        put_files_struct(files);
1963        }
1964out:
1965        put_task_struct(p);
1966out_no_task:
1967        return retval;
1968}
1969
1970static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
1971                                    struct nameidata *nd)
1972{
1973        return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
1974}
1975
1976static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
1977{
1978        return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
1979}
1980
1981static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
1982                                      size_t len, loff_t *ppos)
1983{
1984        char tmp[PROC_FDINFO_MAX];
1985        int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
1986        if (!err)
1987                err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
1988        return err;
1989}
1990
1991static const struct file_operations proc_fdinfo_file_operations = {
1992        .open           = nonseekable_open,
1993        .read           = proc_fdinfo_read,
1994        .llseek         = no_llseek,
1995};
1996
1997static const struct file_operations proc_fd_operations = {
1998        .read           = generic_read_dir,
1999        .readdir        = proc_readfd,
2000        .llseek         = default_llseek,
2001};
2002
2003#ifdef CONFIG_CHECKPOINT_RESTORE
2004
2005/*
2006 * dname_to_vma_addr - maps a dentry name into two unsigned longs
2007 * which represent vma start and end addresses.
2008 */
2009static int dname_to_vma_addr(struct dentry *dentry,
2010                             unsigned long *start, unsigned long *end)
2011{
2012        if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
2013                return -EINVAL;
2014
2015        return 0;
2016}
2017
2018static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
2019{
2020        unsigned long vm_start, vm_end;
2021        bool exact_vma_exists = false;
2022        struct mm_struct *mm = NULL;
2023        struct task_struct *task;
2024        const struct cred *cred;
2025        struct inode *inode;
2026        int status = 0;
2027
2028        if (nd && nd->flags & LOOKUP_RCU)
2029                return -ECHILD;
2030
2031        if (!capable(CAP_SYS_ADMIN)) {
2032                status = -EACCES;
2033                goto out_notask;
2034        }
2035
2036        inode = dentry->d_inode;
2037        task = get_proc_task(inode);
2038        if (!task)
2039                goto out_notask;
2040
2041        if (!ptrace_may_access(task, PTRACE_MODE_READ))
2042                goto out;
2043
2044        mm = get_task_mm(task);
2045        if (!mm)
2046                goto out;
2047
2048        if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
2049                down_read(&mm->mmap_sem);
2050                exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
2051                up_read(&mm->mmap_sem);
2052        }
2053
2054        mmput(mm);
2055
2056        if (exact_vma_exists) {
2057                if (task_dumpable(task)) {
2058                        rcu_read_lock();
2059                        cred = __task_cred(task);
2060                        inode->i_uid = cred->euid;
2061                        inode->i_gid = cred->egid;
2062                        rcu_read_unlock();
2063                } else {
2064                        inode->i_uid = 0;
2065                        inode->i_gid = 0;
2066                }
2067                security_task_to_inode(task, inode);
2068                status = 1;
2069        }
2070
2071out:
2072        put_task_struct(task);
2073
2074out_notask:
2075        if (status <= 0)
2076                d_drop(dentry);
2077
2078        return status;
2079}
2080
2081static const struct dentry_operations tid_map_files_dentry_operations = {
2082        .d_revalidate   = map_files_d_revalidate,
2083        .d_delete       = pid_delete_dentry,
2084};
2085
2086static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
2087{
2088        unsigned long vm_start, vm_end;
2089        struct vm_area_struct *vma;
2090        struct task_struct *task;
2091        struct mm_struct *mm;
2092        int rc;
2093
2094        rc = -ENOENT;
2095        task = get_proc_task(dentry->d_inode);
2096        if (!task)
2097                goto out;
2098
2099        mm = get_task_mm(task);
2100        put_task_struct(task);
2101        if (!mm)
2102                goto out;
2103
2104        rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
2105        if (rc)
2106                goto out_mmput;
2107
2108        down_read(&mm->mmap_sem);
2109        vma = find_exact_vma(mm, vm_start, vm_end);
2110        if (vma && vma->vm_file) {
2111                *path = vma->vm_file->f_path;
2112                path_get(path);
2113                rc = 0;
2114        }
2115        up_read(&mm->mmap_sem);
2116
2117out_mmput:
2118        mmput(mm);
2119out:
2120        return rc;
2121}
2122
2123struct map_files_info {
2124        struct file     *file;
2125        unsigned long   len;
2126        unsigned char   name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
2127};
2128
2129static struct dentry *
2130proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
2131                           struct task_struct *task, const void *ptr)
2132{
2133        const struct file *file = ptr;
2134        struct proc_inode *ei;
2135        struct inode *inode;
2136
2137        if (!file)
2138                return ERR_PTR(-ENOENT);
2139
2140        inode = proc_pid_make_inode(dir->i_sb, task);
2141        if (!inode)
2142                return ERR_PTR(-ENOENT);
2143
2144        ei = PROC_I(inode);
2145        ei->op.proc_get_link = proc_map_files_get_link;
2146
2147        inode->i_op = &proc_pid_link_inode_operations;
2148        inode->i_size = 64;
2149        inode->i_mode = S_IFLNK;
2150
2151        if (file->f_mode & FMODE_READ)
2152                inode->i_mode |= S_IRUSR;
2153        if (file->f_mode & FMODE_WRITE)
2154                inode->i_mode |= S_IWUSR;
2155
2156        d_set_d_op(dentry, &tid_map_files_dentry_operations);
2157        d_add(dentry, inode);
2158
2159        return NULL;
2160}
2161
2162static struct dentry *proc_map_files_lookup(struct inode *dir,
2163                struct dentry *dentry, struct nameidata *nd)
2164{
2165        unsigned long vm_start, vm_end;
2166        struct vm_area_struct *vma;
2167        struct task_struct *task;
2168        struct dentry *result;
2169        struct mm_struct *mm;
2170
2171        result = ERR_PTR(-EACCES);
2172        if (!capable(CAP_SYS_ADMIN))
2173                goto out;
2174
2175        result = ERR_PTR(-ENOENT);
2176        task = get_proc_task(dir);
2177        if (!task)
2178                goto out;
2179
2180        result = ERR_PTR(-EACCES);
2181        if (lock_trace(task))
2182                goto out_put_task;
2183
2184        result = ERR_PTR(-ENOENT);
2185        if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
2186                goto out_unlock;
2187
2188        mm = get_task_mm(task);
2189        if (!mm)
2190                goto out_unlock;
2191
2192        down_read(&mm->mmap_sem);
2193        vma = find_exact_vma(mm, vm_start, vm_end);
2194        if (!vma)
2195                goto out_no_vma;
2196
2197        result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file);
2198
2199out_no_vma:
2200        up_read(&mm->mmap_sem);
2201        mmput(mm);
2202out_unlock:
2203        unlock_trace(task);
2204out_put_task:
2205        put_task_struct(task);
2206out:
2207        return result;
2208}
2209
2210static const struct inode_operations proc_map_files_inode_operations = {
2211        .lookup         = proc_map_files_lookup,
2212        .permission     = proc_fd_permission,
2213        .setattr        = proc_setattr,
2214};
2215
2216static int
2217proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2218{
2219        struct dentry *dentry = filp->f_path.dentry;
2220        struct inode *inode = dentry->d_inode;
2221        struct vm_area_struct *vma;
2222        struct task_struct *task;
2223        struct mm_struct *mm;
2224        ino_t ino;
2225        int ret;
2226
2227        ret = -EACCES;
2228        if (!capable(CAP_SYS_ADMIN))
2229                goto out;
2230
2231        ret = -ENOENT;
2232        task = get_proc_task(inode);
2233        if (!task)
2234                goto out;
2235
2236        ret = -EACCES;
2237        if (lock_trace(task))
2238                goto out_put_task;
2239
2240        ret = 0;
2241        switch (filp->f_pos) {
2242        case 0:
2243                ino = inode->i_ino;
2244                if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
2245                        goto out_unlock;
2246                filp->f_pos++;
2247        case 1:
2248                ino = parent_ino(dentry);
2249                if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
2250                        goto out_unlock;
2251                filp->f_pos++;
2252        default:
2253        {
2254                unsigned long nr_files, pos, i;
2255                struct flex_array *fa = NULL;
2256                struct map_files_info info;
2257                struct map_files_info *p;
2258
2259                mm = get_task_mm(task);
2260                if (!mm)
2261                        goto out_unlock;
2262                down_read(&mm->mmap_sem);
2263
2264                nr_files = 0;
2265
2266                /*
2267                 * We need two passes here:
2268                 *
2269                 *  1) Collect vmas of mapped files with mmap_sem taken
2270                 *  2) Release mmap_sem and instantiate entries
2271                 *
2272                 * otherwise we get lockdep complained, since filldir()
2273                 * routine might require mmap_sem taken in might_fault().
2274                 */
2275
2276                for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
2277                        if (vma->vm_file && ++pos > filp->f_pos)
2278                                nr_files++;
2279                }
2280
2281                if (nr_files) {
2282                        fa = flex_array_alloc(sizeof(info), nr_files,
2283                                                GFP_KERNEL);
2284                        if (!fa || flex_array_prealloc(fa, 0, nr_files,
2285                                                        GFP_KERNEL)) {
2286                                ret = -ENOMEM;
2287                                if (fa)
2288                                        flex_array_free(fa);
2289                                up_read(&mm->mmap_sem);
2290                                mmput(mm);
2291                                goto out_unlock;
2292                        }
2293                        for (i = 0, vma = mm->mmap, pos = 2; vma;
2294                                        vma = vma->vm_next) {
2295                                if (!vma->vm_file)
2296                                        continue;
2297                                if (++pos <= filp->f_pos)
2298                                        continue;
2299
2300                                get_file(vma->vm_file);
2301                                info.file = vma->vm_file;
2302                                info.len = snprintf(info.name,
2303                                                sizeof(info.name), "%lx-%lx",
2304                                                vma->vm_start, vma->vm_end);
2305                                if (flex_array_put(fa, i++, &info, GFP_KERNEL))
2306                                        BUG();
2307                        }
2308                }
2309                up_read(&mm->mmap_sem);
2310
2311                for (i = 0; i < nr_files; i++) {
2312                        p = flex_array_get(fa, i);
2313                        ret = proc_fill_cache(filp, dirent, filldir,
2314                                              p->name, p->len,
2315                                              proc_map_files_instantiate,
2316                                              task, p->file);
2317                        if (ret)
2318                                break;
2319                        filp->f_pos++;
2320                        fput(p->file);
2321                }
2322                for (; i < nr_files; i++) {
2323                        /*
2324                         * In case of error don't forget
2325                         * to put rest of file refs.
2326                         */
2327                        p = flex_array_get(fa, i);
2328                        fput(p->file);
2329                }
2330                if (fa)
2331                        flex_array_free(fa);
2332                mmput(mm);
2333        }
2334        }
2335
2336out_unlock:
2337        unlock_trace(task);
2338out_put_task:
2339        put_task_struct(task);
2340out:
2341        return ret;
2342}
2343
2344static const struct file_operations proc_map_files_operations = {
2345        .read           = generic_read_dir,
2346        .readdir        = proc_map_files_readdir,
2347        .llseek         = default_llseek,
2348};
2349
2350#endif /* CONFIG_CHECKPOINT_RESTORE */
2351
2352/*
2353 * /proc/pid/fd needs a special permission handler so that a process can still
2354 * access /proc/self/fd after it has executed a setuid().
2355 */
2356static int proc_fd_permission(struct inode *inode, int mask)
2357{
2358        int rv = generic_permission(inode, mask);
2359        if (rv == 0)
2360                return 0;
2361        if (task_pid(current) == proc_pid(inode))
2362                rv = 0;
2363        return rv;
2364}
2365
2366/*
2367 * proc directories can do almost nothing..
2368 */
2369static const struct inode_operations proc_fd_inode_operations = {
2370        .lookup         = proc_lookupfd,
2371        .permission     = proc_fd_permission,
2372        .setattr        = proc_setattr,
2373};
2374
2375static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2376        struct dentry *dentry, struct task_struct *task, const void *ptr)
2377{
2378        unsigned fd = *(unsigned *)ptr;
2379        struct inode *inode;
2380        struct proc_inode *ei;
2381        struct dentry *error = ERR_PTR(-ENOENT);
2382
2383        inode = proc_pid_make_inode(dir->i_sb, task);
2384        if (!inode)
2385                goto out;
2386        ei = PROC_I(inode);
2387        ei->fd = fd;
2388        inode->i_mode = S_IFREG | S_IRUSR;
2389        inode->i_fop = &proc_fdinfo_file_operations;
2390        d_set_d_op(dentry, &tid_fd_dentry_operations);
2391        d_add(dentry, inode);
2392        /* Close the race of the process dying before we return the dentry */
2393        if (tid_fd_revalidate(dentry, NULL))
2394                error = NULL;
2395
2396 out:
2397        return error;
2398}
2399
2400static struct dentry *proc_lookupfdinfo(struct inode *dir,
2401                                        struct dentry *dentry,
2402                                        struct nameidata *nd)
2403{
2404        return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
2405}
2406
2407static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
2408{
2409        return proc_readfd_common(filp, dirent, filldir,
2410                                  proc_fdinfo_instantiate);
2411}
2412
2413static const struct file_operations proc_fdinfo_operations = {
2414        .read           = generic_read_dir,
2415        .readdir        = proc_readfdinfo,
2416        .llseek         = default_llseek,
2417};
2418
2419/*
2420 * proc directories can do almost nothing..
2421 */
2422static const struct inode_operations proc_fdinfo_inode_operations = {
2423        .lookup         = proc_lookupfdinfo,
2424        .setattr        = proc_setattr,
2425};
2426
2427
2428static struct dentry *proc_pident_instantiate(struct inode *dir,
2429        struct dentry *dentry, struct task_struct *task, const void *ptr)
2430{
2431        const struct pid_entry *p = ptr;
2432        struct inode *inode;
2433        struct proc_inode *ei;
2434        struct dentry *error = ERR_PTR(-ENOENT);
2435
2436        inode = proc_pid_make_inode(dir->i_sb, task);
2437        if (!inode)
2438                goto out;
2439
2440        ei = PROC_I(inode);
2441        inode->i_mode = p->mode;
2442        if (S_ISDIR(inode->i_mode))
2443                set_nlink(inode, 2);    /* Use getattr to fix if necessary */
2444        if (p->iop)
2445                inode->i_op = p->iop;
2446        if (p->fop)
2447                inode->i_fop = p->fop;
2448        ei->op = p->op;
2449        d_set_d_op(dentry, &pid_dentry_operations);
2450        d_add(dentry, inode);
2451        /* Close the race of the process dying before we return the dentry */
2452        if (pid_revalidate(dentry, NULL))
2453                error = NULL;
2454out:
2455        return error;
2456}
2457
2458static struct dentry *proc_pident_lookup(struct inode *dir, 
2459                                         struct dentry *dentry,
2460                                         const struct pid_entry *ents,
2461                                         unsigned int nents)
2462{
2463        struct dentry *error;
2464        struct task_struct *task = get_proc_task(dir);
2465        const struct pid_entry *p, *last;
2466
2467        error = ERR_PTR(-ENOENT);
2468
2469        if (!task)
2470                goto out_no_task;
2471
2472        /*
2473         * Yes, it does not scale. And it should not. Don't add
2474         * new entries into /proc/<tgid>/ without very good reasons.
2475         */
2476        last = &ents[nents - 1];
2477        for (p = ents; p <= last; p++) {
2478                if (p->len != dentry->d_name.len)
2479                        continue;
2480                if (!memcmp(dentry->d_name.name, p->name, p->len))
2481                        break;
2482        }
2483        if (p > last)
2484                goto out;
2485
2486        error = proc_pident_instantiate(dir, dentry, task, p);
2487out:
2488        put_task_struct(task);
2489out_no_task:
2490        return error;
2491}
2492
2493static int proc_pident_fill_cache(struct file *filp, void *dirent,
2494        filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
2495{
2496        return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
2497                                proc_pident_instantiate, task, p);
2498}
2499
2500static int proc_pident_readdir(struct file *filp,
2501                void *dirent, filldir_t filldir,
2502                const struct pid_entry *ents, unsigned int nents)
2503{
2504        int i;
2505        struct dentry *dentry = filp->f_path.dentry;
2506        struct inode *inode = dentry->d_inode;
2507        struct task_struct *task = get_proc_task(inode);
2508        const struct pid_entry *p, *last;
2509        ino_t ino;
2510        int ret;
2511
2512        ret = -ENOENT;
2513        if (!task)
2514                goto out_no_task;
2515
2516        ret = 0;
2517        i = filp->f_pos;
2518        switch (i) {
2519        case 0:
2520                ino = inode->i_ino;
2521                if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
2522                        goto out;
2523                i++;
2524                filp->f_pos++;
2525                /* fall through */
2526        case 1:
2527                ino = parent_ino(dentry);
2528                if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
2529                        goto out;
2530                i++;
2531                filp->f_pos++;
2532                /* fall through */
2533        default:
2534                i -= 2;
2535                if (i >= nents) {
2536                        ret = 1;
2537                        goto out;
2538                }
2539                p = ents + i;
2540                last = &ents[nents - 1];
2541                while (p <= last) {
2542                        if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0)
2543                                goto out;
2544                        filp->f_pos++;
2545                        p++;
2546                }
2547        }
2548
2549        ret = 1;
2550out:
2551        put_task_struct(task);
2552out_no_task:
2553        return ret;
2554}
2555
2556#ifdef CONFIG_SECURITY
2557static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
2558                                  size_t count, loff_t *ppos)
2559{
2560        struct inode * inode = file->f_path.dentry->d_inode;
2561        char *p = NULL;
2562        ssize_t length;
2563        struct task_struct *task = get_proc_task(inode);
2564
2565        if (!task)
2566                return -ESRCH;
2567
2568        length = security_getprocattr(task,
2569                                      (char*)file->f_path.dentry->d_name.name,
2570                                      &p);
2571        put_task_struct(task);
2572        if (length > 0)
2573                length = simple_read_from_buffer(buf, count, ppos, p, length);
2574        kfree(p);
2575        return length;
2576}
2577
2578static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
2579                                   size_t count, loff_t *ppos)
2580{
2581        struct inode * inode = file->f_path.dentry->d_inode;
2582        char *page;
2583        ssize_t length;
2584        struct task_struct *task = get_proc_task(inode);
2585
2586        length = -ESRCH;
2587        if (!task)
2588                goto out_no_task;
2589        if (count > PAGE_SIZE)
2590                count = PAGE_SIZE;
2591
2592        /* No partial writes. */
2593        length = -EINVAL;
2594        if (*ppos != 0)
2595                goto out;
2596
2597        length = -ENOMEM;
2598        page = (char*)__get_free_page(GFP_TEMPORARY);
2599        if (!page)
2600                goto out;
2601
2602        length = -EFAULT;
2603        if (copy_from_user(page, buf, count))
2604                goto out_free;
2605
2606        /* Guard against adverse ptrace interaction */
2607        length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
2608        if (length < 0)
2609                goto out_free;
2610
2611        length = security_setprocattr(task,
2612                                      (char*)file->f_path.dentry->d_name.name,
2613                                      (void*)page, count);
2614        mutex_unlock(&task->signal->cred_guard_mutex);
2615out_free:
2616        free_page((unsigned long) page);
2617out:
2618        put_task_struct(task);
2619out_no_task:
2620        return length;
2621}
2622
2623static const struct file_operations proc_pid_attr_operations = {
2624        .read           = proc_pid_attr_read,
2625        .write          = proc_pid_attr_write,
2626        .llseek         = generic_file_llseek,
2627};
2628
2629static const struct pid_entry attr_dir_stuff[] = {
2630        REG("current",    S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2631        REG("prev",       S_IRUGO,         proc_pid_attr_operations),
2632        REG("exec",       S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2633        REG("fscreate",   S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2634        REG("keycreate",  S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2635        REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2636};
2637
2638static int proc_attr_dir_readdir(struct file * filp,
2639                             void * dirent, filldir_t filldir)
2640{
2641        return proc_pident_readdir(filp,dirent,filldir,
2642                                   attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
2643}
2644
2645static const struct file_operations proc_attr_dir_operations = {
2646        .read           = generic_read_dir,
2647        .readdir        = proc_attr_dir_readdir,
2648        .llseek         = default_llseek,
2649};
2650
2651static struct dentry *proc_attr_dir_lookup(struct inode *dir,
2652                                struct dentry *dentry, struct nameidata *nd)
2653{
2654        return proc_pident_lookup(dir, dentry,
2655                                  attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
2656}
2657
2658static const struct inode_operations proc_attr_dir_inode_operations = {
2659        .lookup         = proc_attr_dir_lookup,
2660        .getattr        = pid_getattr,
2661        .setattr        = proc_setattr,
2662};
2663
2664#endif
2665
2666#ifdef CONFIG_ELF_CORE
2667static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
2668                                         size_t count, loff_t *ppos)
2669{
2670        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
2671        struct mm_struct *mm;
2672        char buffer[PROC_NUMBUF];
2673        size_t len;
2674        int ret;
2675
2676        if (!task)
2677                return -ESRCH;
2678
2679        ret = 0;
2680        mm = get_task_mm(task);
2681        if (mm) {
2682                len = snprintf(buffer, sizeof(buffer), "%08lx\n",
2683                               ((mm->flags & MMF_DUMP_FILTER_MASK) >>
2684                                MMF_DUMP_FILTER_SHIFT));
2685                mmput(mm);
2686                ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
2687        }
2688
2689        put_task_struct(task);
2690
2691        return ret;
2692}
2693
2694static ssize_t proc_coredump_filter_write(struct file *file,
2695                                          const char __user *buf,
2696                                          size_t count,
2697                                          loff_t *ppos)
2698{
2699        struct task_struct *task;
2700        struct mm_struct *mm;
2701        char buffer[PROC_NUMBUF], *end;
2702        unsigned int val;
2703        int ret;
2704        int i;
2705        unsigned long mask;
2706
2707        ret = -EFAULT;
2708        memset(buffer, 0, sizeof(buffer));
2709        if (count > sizeof(buffer) - 1)
2710                count = sizeof(buffer) - 1;
2711        if (copy_from_user(buffer, buf, count))
2712                goto out_no_task;
2713
2714        ret = -EINVAL;
2715        val = (unsigned int)simple_strtoul(buffer, &end, 0);
2716        if (*end == '\n')
2717                end++;
2718        if (end - buffer == 0)
2719                goto out_no_task;
2720
2721        ret = -ESRCH;
2722        task = get_proc_task(file->f_dentry->d_inode);
2723        if (!task)
2724                goto out_no_task;
2725
2726        ret = end - buffer;
2727        mm = get_task_mm(task);
2728        if (!mm)
2729                goto out_no_mm;
2730
2731        for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
2732                if (val & mask)
2733                        set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
2734                else
2735                        clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
2736        }
2737
2738        mmput(mm);
2739 out_no_mm:
2740        put_task_struct(task);
2741 out_no_task:
2742        return ret;
2743}
2744
2745static const struct file_operations proc_coredump_filter_operations = {
2746        .read           = proc_coredump_filter_read,
2747        .write          = proc_coredump_filter_write,
2748        .llseek         = generic_file_llseek,
2749};
2750#endif
2751
2752/*
2753 * /proc/self:
2754 */
2755static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
2756                              int buflen)
2757{
2758        struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2759        pid_t tgid = task_tgid_nr_ns(current, ns);
2760        char tmp[PROC_NUMBUF];
2761        if (!tgid)
2762                return -ENOENT;
2763        sprintf(tmp, "%d", tgid);
2764        return vfs_readlink(dentry,buffer,buflen,tmp);
2765}
2766
2767static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
2768{
2769        struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2770        pid_t tgid = task_tgid_nr_ns(current, ns);
2771        char *name = ERR_PTR(-ENOENT);
2772        if (tgid) {
2773                name = __getname();
2774                if (!name)
2775                        name = ERR_PTR(-ENOMEM);
2776                else
2777                        sprintf(name, "%d", tgid);
2778        }
2779        nd_set_link(nd, name);
2780        return NULL;
2781}
2782
2783static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
2784                                void *cookie)
2785{
2786        char *s = nd_get_link(nd);
2787        if (!IS_ERR(s))
2788                __putname(s);
2789}
2790
2791static const struct inode_operations proc_self_inode_operations = {
2792        .readlink       = proc_self_readlink,
2793        .follow_link    = proc_self_follow_link,
2794        .put_link       = proc_self_put_link,
2795};
2796
2797/*
2798 * proc base
2799 *
2800 * These are the directory entries in the root directory of /proc
2801 * that properly belong to the /proc filesystem, as they describe
2802 * describe something that is process related.
2803 */
2804static const struct pid_entry proc_base_stuff[] = {
2805        NOD("self", S_IFLNK|S_IRWXUGO,
2806                &proc_self_inode_operations, NULL, {}),
2807};
2808
2809static struct dentry *proc_base_instantiate(struct inode *dir,
2810        struct dentry *dentry, struct task_struct *task, const void *ptr)
2811{
2812        const struct pid_entry *p = ptr;
2813        struct inode *inode;
2814        struct proc_inode *ei;
2815        struct dentry *error;
2816
2817        /* Allocate the inode */
2818        error = ERR_PTR(-ENOMEM);
2819        inode = new_inode(dir->i_sb);
2820        if (!inode)
2821                goto out;
2822
2823        /* Initialize the inode */
2824        ei = PROC_I(inode);
2825        inode->i_ino = get_next_ino();
2826        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2827
2828        /*
2829         * grab the reference to the task.
2830         */
2831        ei->pid = get_task_pid(task, PIDTYPE_PID);
2832        if (!ei->pid)
2833                goto out_iput;
2834
2835        inode->i_mode = p->mode;
2836        if (S_ISDIR(inode->i_mode))
2837                set_nlink(inode, 2);
2838        if (S_ISLNK(inode->i_mode))
2839                inode->i_size = 64;
2840        if (p->iop)
2841                inode->i_op = p->iop;
2842        if (p->fop)
2843                inode->i_fop = p->fop;
2844        ei->op = p->op;
2845        d_add(dentry, inode);
2846        error = NULL;
2847out:
2848        return error;
2849out_iput:
2850        iput(inode);
2851        goto out;
2852}
2853
2854static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
2855{
2856        struct dentry *error;
2857        struct task_struct *task = get_proc_task(dir);
2858        const struct pid_entry *p, *last;
2859
2860        error = ERR_PTR(-ENOENT);
2861
2862        if (!task)
2863                goto out_no_task;
2864
2865        /* Lookup the directory entry */
2866        last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
2867        for (p = proc_base_stuff; p <= last; p++) {
2868                if (p->len != dentry->d_name.len)
2869                        continue;
2870                if (!memcmp(dentry->d_name.name, p->name, p->len))
2871                        break;
2872        }
2873        if (p > last)
2874                goto out;
2875
2876        error = proc_base_instantiate(dir, dentry, task, p);
2877
2878out:
2879        put_task_struct(task);
2880out_no_task:
2881        return error;
2882}
2883
2884static int proc_base_fill_cache(struct file *filp, void *dirent,
2885        filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
2886{
2887        return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
2888                                proc_base_instantiate, task, p);
2889}
2890
2891#ifdef CONFIG_TASK_IO_ACCOUNTING
2892static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2893{
2894        struct task_io_accounting acct = task->ioac;
2895        unsigned long flags;
2896        int result;
2897
2898        result = mutex_lock_killable(&task->signal->cred_guard_mutex);
2899        if (result)
2900                return result;
2901
2902        if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
2903                result = -EACCES;
2904                goto out_unlock;
2905        }
2906
2907        if (whole && lock_task_sighand(task, &flags)) {
2908                struct task_struct *t = task;
2909
2910                task_io_accounting_add(&acct, &task->signal->ioac);
2911                while_each_thread(task, t)
2912                        task_io_accounting_add(&acct, &t->ioac);
2913
2914                unlock_task_sighand(task, &flags);
2915        }
2916        result = sprintf(buffer,
2917                        "rchar: %llu\n"
2918                        "wchar: %llu\n"
2919                        "syscr: %llu\n"
2920                        "syscw: %llu\n"
2921                        "read_bytes: %llu\n"
2922                        "write_bytes: %llu\n"
2923                        "cancelled_write_bytes: %llu\n",
2924                        (unsigned long long)acct.rchar,
2925                        (unsigned long long)acct.wchar,
2926                        (unsigned long long)acct.syscr,
2927                        (unsigned long long)acct.syscw,
2928                        (unsigned long long)acct.read_bytes,
2929                        (unsigned long long)acct.write_bytes,
2930                        (unsigned long long)acct.cancelled_write_bytes);
2931out_unlock:
2932        mutex_unlock(&task->signal->cred_guard_mutex);
2933        return result;
2934}
2935
2936static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
2937{
2938        return do_io_accounting(task, buffer, 0);
2939}
2940
2941static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2942{
2943        return do_io_accounting(task, buffer, 1);
2944}
2945#endif /* CONFIG_TASK_IO_ACCOUNTING */
2946
2947static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2948                                struct pid *pid, struct task_struct *task)
2949{
2950        int err = lock_trace(task);
2951        if (!err) {
2952                seq_printf(m, "%08x\n", task->personality);
2953                unlock_trace(task);
2954        }
2955        return err;
2956}
2957
2958/*
2959 * Thread groups
2960 */
2961static const struct file_operations proc_task_operations;
2962static const struct inode_operations proc_task_inode_operations;
2963
2964static const struct pid_entry tgid_base_stuff[] = {
2965        DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
2966        DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2967#ifdef CONFIG_CHECKPOINT_RESTORE
2968        DIR("map_files",  S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
2969#endif
2970        DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2971        DIR("ns",         S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2972#ifdef CONFIG_NET
2973        DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2974#endif
2975        REG("environ",    S_IRUSR, proc_environ_operations),
2976        INF("auxv",       S_IRUSR, proc_pid_auxv),
2977        ONE("status",     S_IRUGO, proc_pid_status),
2978        ONE("personality", S_IRUGO, proc_pid_personality),
2979        INF("limits",     S_IRUGO, proc_pid_limits),
2980#ifdef CONFIG_SCHED_DEBUG
2981        REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2982#endif
2983#ifdef CONFIG_SCHED_AUTOGROUP
2984        REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
2985#endif
2986        REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2987#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2988        INF("syscall",    S_IRUGO, proc_pid_syscall),
2989#endif
2990        INF("cmdline",    S_IRUGO, proc_pid_cmdline),
2991        ONE("stat",       S_IRUGO, proc_tgid_stat),
2992        ONE("statm",      S_IRUGO, proc_pid_statm),
2993        REG("maps",       S_IRUGO, proc_maps_operations),
2994#ifdef CONFIG_NUMA
2995        REG("numa_maps",  S_IRUGO, proc_numa_maps_operations),
2996#endif
2997        REG("mem",        S_IRUSR|S_IWUSR, proc_mem_operations),
2998        LNK("cwd",        proc_cwd_link),
2999        LNK("root",       proc_root_link),
3000        LNK("exe",        proc_exe_link),
3001        REG("mounts",     S_IRUGO, proc_mounts_operations),
3002        REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
3003        REG("mountstats", S_IRUSR, proc_mountstats_operations),
3004#ifdef CONFIG_PROC_PAGE_MONITOR
3005        REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3006        REG("smaps",      S_IRUGO, proc_smaps_operations),
3007        REG("pagemap",    S_IRUGO, proc_pagemap_operations),
3008#endif
3009#ifdef CONFIG_SECURITY
3010        DIR("attr",       S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
3011#endif
3012#ifdef CONFIG_KALLSYMS
3013        INF("wchan",      S_IRUGO, proc_pid_wchan),
3014#endif
3015#ifdef CONFIG_STACKTRACE
3016        ONE("stack",      S_IRUGO, proc_pid_stack),
3017#endif
3018#ifdef CONFIG_SCHEDSTATS
3019        INF("schedstat",  S_IRUGO, proc_pid_schedstat),
3020#endif
3021#ifdef CONFIG_LATENCYTOP
3022        REG("latency",  S_IRUGO, proc_lstats_operations),
3023#endif
3024#ifdef CONFIG_PROC_PID_CPUSET
3025        REG("cpuset",     S_IRUGO, proc_cpuset_operations),
3026#endif
3027#ifdef CONFIG_CGROUPS
3028        REG("cgroup",  S_IRUGO, proc_cgroup_operations),
3029#endif
3030        INF("oom_score",  S_IRUGO, proc_oom_score),
3031        REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
3032        REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3033#ifdef CONFIG_AUDITSYSCALL
3034        REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
3035        REG("sessionid",  S_IRUGO, proc_sessionid_operations),
3036#endif
3037#ifdef CONFIG_FAULT_INJECTION
3038        REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
3039#endif
3040#ifdef CONFIG_ELF_CORE
3041        REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
3042#endif
3043#ifdef CONFIG_TASK_IO_ACCOUNTING
3044        INF("io",       S_IRUSR, proc_tgid_io_accounting),
3045#endif
3046#ifdef CONFIG_HARDWALL
3047        INF("hardwall",   S_IRUGO, proc_pid_hardwall),
3048#endif
3049};
3050
3051static int proc_tgid_base_readdir(struct file * filp,
3052                             void * dirent, filldir_t filldir)
3053{
3054        return proc_pident_readdir(filp,dirent,filldir,
3055                                   tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
3056}
3057
3058static const struct file_operations proc_tgid_base_operations = {
3059        .read           = generic_read_dir,
3060        .readdir        = proc_tgid_base_readdir,
3061        .llseek         = default_llseek,
3062};
3063
3064static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
3065        return proc_pident_lookup(dir, dentry,
3066                                  tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
3067}
3068
3069static const struct inode_operations proc_tgid_base_inode_operations = {
3070        .lookup         = proc_tgid_base_lookup,
3071        .getattr        = pid_getattr,
3072        .setattr        = proc_setattr,
3073        .permission     = proc_pid_permission,
3074};
3075
3076static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
3077{
3078        struct dentry *dentry, *leader, *dir;
3079        char buf[PROC_NUMBUF];
3080        struct qstr name;
3081
3082        name.name = buf;
3083        name.len = snprintf(buf, sizeof(buf), "%d", pid);
3084        dentry = d_hash_and_lookup(mnt->mnt_root, &name);
3085        if (dentry) {
3086                shrink_dcache_parent(dentry);
3087                d_drop(dentry);
3088                dput(dentry);
3089        }
3090
3091        name.name = buf;
3092        name.len = snprintf(buf, sizeof(buf), "%d", tgid);
3093        leader = d_hash_and_lookup(mnt->mnt_root, &name);
3094        if (!leader)
3095                goto out;
3096
3097        name.name = "task";
3098        name.len = strlen(name.name);
3099        dir = d_hash_and_lookup(leader, &name);
3100        if (!dir)
3101                goto out_put_leader;
3102
3103        name.name = buf;
3104        name.len = snprintf(buf, sizeof(buf), "%d", pid);
3105        dentry = d_hash_and_lookup(dir, &name);
3106        if (dentry) {
3107                shrink_dcache_parent(dentry);
3108                d_drop(dentry);
3109                dput(dentry);
3110        }
3111
3112        dput(dir);
3113out_put_leader:
3114        dput(leader);
3115out:
3116        return;
3117}
3118
3119/**
3120 * proc_flush_task -  Remove dcache entries for @task from the /proc dcache.
3121 * @task: task that should be flushed.
3122 *
3123 * When flushing dentries from proc, one needs to flush them from global
3124 * proc (proc_mnt) and from all the namespaces' procs this task was seen
3125 * in. This call is supposed to do all of this job.
3126 *
3127 * Looks in the dcache for
3128 * /proc/@pid
3129 * /proc/@tgid/task/@pid
3130 * if either directory is present flushes it and all of it'ts children
3131 * from the dcache.
3132 *
3133 * It is safe and reasonable to cache /proc entries for a task until
3134 * that task exits.  After that they just clog up the dcache with
3135 * useless entries, possibly causing useful dcache entries to be
3136 * flushed instead.  This routine is proved to flush those useless
3137 * dcache entries at process exit time.
3138 *
3139 * NOTE: This routine is just an optimization so it does not guarantee
3140 *       that no dcache entries will exist at process exit time it
3141 *       just makes it very unlikely that any will persist.
3142 */
3143
3144void proc_flush_task(struct task_struct *task)
3145{
3146        int i;
3147        struct pid *pid, *tgid;
3148        struct upid *upid;
3149
3150        pid = task_pid(task);
3151        tgid = task_tgid(task);
3152
3153        for (i = 0; i <= pid->level; i++) {
3154                upid = &pid->numbers[i];
3155                proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
3156                                        tgid->numbers[i].nr);
3157        }
3158
3159        upid = &pid->numbers[pid->level];
3160        if (upid->nr == 1)
3161                pid_ns_release_proc(upid->ns);
3162}
3163
3164static struct dentry *proc_pid_instantiate(struct inode *dir,
3165                                           struct dentry * dentry,
3166                                           struct task_struct *task, const void *ptr)
3167{
3168        struct dentry *error = ERR_PTR(-ENOENT);
3169        struct inode *inode;
3170
3171        inode = proc_pid_make_inode(dir->i_sb, task);
3172        if (!inode)
3173                goto out;
3174
3175        inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
3176        inode->i_op = &proc_tgid_base_inode_operations;
3177        inode->i_fop = &proc_tgid_base_operations;
3178        inode->i_flags|=S_IMMUTABLE;
3179
3180        set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
3181                                                  ARRAY_SIZE(tgid_base_stuff)));
3182
3183        d_set_d_op(dentry, &pid_dentry_operations);
3184
3185        d_add(dentry, inode);
3186        /* Close the race of the process dying before we return the dentry */
3187        if (pid_revalidate(dentry, NULL))
3188                error = NULL;
3189out:
3190        return error;
3191}
3192
3193struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
3194{
3195        struct dentry *result;
3196        struct task_struct *task;
3197        unsigned tgid;
3198        struct pid_namespace *ns;
3199
3200        result = proc_base_lookup(dir, dentry);
3201        if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
3202                goto out;
3203
3204        tgid = name_to_int(dentry);
3205        if (tgid == ~0U)
3206                goto out;
3207
3208        ns = dentry->d_sb->s_fs_info;
3209        rcu_read_lock();
3210        task = find_task_by_pid_ns(tgid, ns);
3211        if (task)
3212                get_task_struct(task);
3213        rcu_read_unlock();
3214        if (!task)
3215                goto out;
3216
3217        result = proc_pid_instantiate(dir, dentry, task, NULL);
3218        put_task_struct(task);
3219out:
3220        return result;
3221}
3222
3223/*
3224 * Find the first task with tgid >= tgid
3225 *
3226 */
3227struct tgid_iter {
3228        unsigned int tgid;
3229        struct task_struct *task;
3230};
3231static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
3232{
3233        struct pid *pid;
3234
3235        if (iter.task)
3236                put_task_struct(iter.task);
3237        rcu_read_lock();
3238retry:
3239        iter.task = NULL;
3240        pid = find_ge_pid(iter.tgid, ns);
3241        if (pid) {
3242                iter.tgid = pid_nr_ns(pid, ns);
3243                iter.task = pid_task(pid, PIDTYPE_PID);
3244                /* What we to know is if the pid we have find is the
3245                 * pid of a thread_group_leader.  Testing for task
3246                 * being a thread_group_leader is the obvious thing
3247                 * todo but there is a window when it fails, due to
3248                 * the pid transfer logic in de_thread.
3249                 *
3250                 * So we perform the straight forward test of seeing
3251                 * if the pid we have found is the pid of a thread
3252                 * group leader, and don't worry if the task we have
3253                 * found doesn't happen to be a thread group leader.
3254                 * As we don't care in the case of readdir.
3255                 */
3256                if (!iter.task || !has_group_leader_pid(iter.task)) {
3257                        iter.tgid += 1;
3258                        goto retry;
3259                }
3260                get_task_struct(iter.task);
3261        }
3262        rcu_read_unlock();
3263        return iter;
3264}
3265
3266#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
3267
3268static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
3269        struct tgid_iter iter)
3270{
3271        char name[PROC_NUMBUF];
3272        int len = snprintf(name, sizeof(name), "%d", iter.tgid);
3273        return proc_fill_cache(filp, dirent, filldir, name, len,
3274                                proc_pid_instantiate, iter.task, NULL);
3275}
3276
3277static int fake_filldir(void *buf, const char *name, int namelen,
3278                        loff_t offset, u64 ino, unsigned d_type)
3279{
3280        return 0;
3281}
3282
3283/* for the /proc/ directory itself, after non-process stuff has been done */
3284int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3285{
3286        unsigned int nr;
3287        struct task_struct *reaper;
3288        struct tgid_iter iter;
3289        struct pid_namespace *ns;
3290        filldir_t __filldir;
3291
3292        if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
3293                goto out_no_task;
3294        nr = filp->f_pos - FIRST_PROCESS_ENTRY;
3295
3296        reaper = get_proc_task(filp->f_path.dentry->d_inode);
3297        if (!reaper)
3298                goto out_no_task;
3299
3300        for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
3301                const struct pid_entry *p = &proc_base_stuff[nr];
3302                if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
3303                        goto out;
3304        }
3305
3306        ns = filp->f_dentry->d_sb->s_fs_info;
3307        iter.task = NULL;
3308        iter.tgid = filp->f_pos - TGID_OFFSET;
3309        for (iter = next_tgid(ns, iter);
3310             iter.task;
3311             iter.tgid += 1, iter = next_tgid(ns, iter)) {
3312                if (has_pid_permissions(ns, iter.task, 2))
3313                        __filldir = filldir;
3314                else
3315                        __filldir = fake_filldir;
3316
3317                filp->f_pos = iter.tgid + TGID_OFFSET;
3318                if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) {
3319                        put_task_struct(iter.task);
3320                        goto out;
3321                }
3322        }
3323        filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
3324out:
3325        put_task_struct(reaper);
3326out_no_task:
3327        return 0;
3328}
3329
3330/*
3331 * Tasks
3332 */
3333static const struct pid_entry tid_base_stuff[] = {
3334        DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3335        DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3336        DIR("ns",        S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
3337        REG("environ",   S_IRUSR, proc_environ_operations),
3338        INF("auxv",      S_IRUSR, proc_pid_auxv),
3339        ONE("status",    S_IRUGO, proc_pid_status),
3340        ONE("personality", S_IRUGO, proc_pid_personality),
3341        INF("limits",    S_IRUGO, proc_pid_limits),
3342#ifdef CONFIG_SCHED_DEBUG
3343        REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3344#endif
3345        REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
3346#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3347        INF("syscall",   S_IRUGO, proc_pid_syscall),
3348#endif
3349        INF("cmdline",   S_IRUGO, proc_pid_cmdline),
3350        ONE("stat",      S_IRUGO, proc_tid_stat),
3351        ONE("statm",     S_IRUGO, proc_pid_statm),
3352        REG("maps",      S_IRUGO, proc_maps_operations),
3353#ifdef CONFIG_NUMA
3354        REG("numa_maps", S_IRUGO, proc_numa_maps_operations),
3355#endif
3356        REG("mem",       S_IRUSR|S_IWUSR, proc_mem_operations),
3357        LNK("cwd",       proc_cwd_link),
3358        LNK("root",      proc_root_link),
3359        LNK("exe",       proc_exe_link),
3360        REG("mounts",    S_IRUGO, proc_mounts_operations),
3361        REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
3362#ifdef CONFIG_PROC_PAGE_MONITOR
3363        REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3364        REG("smaps",     S_IRUGO, proc_smaps_operations),
3365        REG("pagemap",    S_IRUGO, proc_pagemap_operations),
3366#endif
3367#ifdef CONFIG_SECURITY
3368        DIR("attr",      S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
3369#endif
3370#ifdef CONFIG_KALLSYMS
3371        INF("wchan",     S_IRUGO, proc_pid_wchan),
3372#endif
3373#ifdef CONFIG_STACKTRACE
3374        ONE("stack",      S_IRUGO, proc_pid_stack),
3375#endif
3376#ifdef CONFIG_SCHEDSTATS
3377        INF("schedstat", S_IRUGO, proc_pid_schedstat),
3378#endif
3379#ifdef CONFIG_LATENCYTOP
3380        REG("latency",  S_IRUGO, proc_lstats_operations),
3381#endif
3382#ifdef CONFIG_PROC_PID_CPUSET
3383        REG("cpuset",    S_IRUGO, proc_cpuset_operations),
3384#endif
3385#ifdef CONFIG_CGROUPS
3386        REG("cgroup",  S_IRUGO, proc_cgroup_operations),
3387#endif
3388        INF("oom_score", S_IRUGO, proc_oom_score),
3389        REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
3390        REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3391#ifdef CONFIG_AUDITSYSCALL
3392        REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
3393        REG("sessionid",  S_IRUGO, proc_sessionid_operations),
3394#endif
3395#ifdef CONFIG_FAULT_INJECTION
3396        REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
3397#endif
3398#ifdef CONFIG_TASK_IO_ACCOUNTING
3399        INF("io",       S_IRUSR, proc_tid_io_accounting),
3400#endif
3401#ifdef CONFIG_HARDWALL
3402        INF("hardwall",   S_IRUGO, proc_pid_hardwall),
3403#endif
3404};
3405
3406static int proc_tid_base_readdir(struct file * filp,
3407                             void * dirent, filldir_t filldir)
3408{
3409        return proc_pident_readdir(filp,dirent,filldir,
3410                                   tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
3411}
3412
3413static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
3414        return proc_pident_lookup(dir, dentry,
3415                                  tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
3416}
3417
3418static const struct file_operations proc_tid_base_operations = {
3419        .read           = generic_read_dir,
3420        .readdir        = proc_tid_base_readdir,
3421        .llseek         = default_llseek,
3422};
3423
3424static const struct inode_operations proc_tid_base_inode_operations = {
3425        .lookup         = proc_tid_base_lookup,
3426        .getattr        = pid_getattr,
3427        .setattr        = proc_setattr,
3428};
3429
3430static struct dentry *proc_task_instantiate(struct inode *dir,
3431        struct dentry *dentry, struct task_struct *task, const void *ptr)
3432{
3433        struct dentry *error = ERR_PTR(-ENOENT);
3434        struct inode *inode;
3435        inode = proc_pid_make_inode(dir->i_sb, task);
3436
3437        if (!inode)
3438                goto out;
3439        inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
3440        inode->i_op = &proc_tid_base_inode_operations;
3441        inode->i_fop = &proc_tid_base_operations;
3442        inode->i_flags|=S_IMMUTABLE;
3443
3444        set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
3445                                                  ARRAY_SIZE(tid_base_stuff)));
3446
3447        d_set_d_op(dentry, &pid_dentry_operations);
3448
3449        d_add(dentry, inode);
3450        /* Close the race of the process dying before we return the dentry */
3451        if (pid_revalidate(dentry, NULL))
3452                error = NULL;
3453out:
3454        return error;
3455}
3456
3457static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
3458{
3459        struct dentry *result = ERR_PTR(-ENOENT);
3460        struct task_struct *task;
3461        struct task_struct *leader = get_proc_task(dir);
3462        unsigned tid;
3463        struct pid_namespace *ns;
3464
3465        if (!leader)
3466                goto out_no_task;
3467
3468        tid = name_to_int(dentry);
3469        if (tid == ~0U)
3470                goto out;
3471
3472        ns = dentry->d_sb->s_fs_info;
3473        rcu_read_lock();
3474        task = find_task_by_pid_ns(tid, ns);
3475        if (task)
3476                get_task_struct(task);
3477        rcu_read_unlock();
3478        if (!task)
3479                goto out;
3480        if (!same_thread_group(leader, task))
3481                goto out_drop_task;
3482
3483        result = proc_task_instantiate(dir, dentry, task, NULL);
3484out_drop_task:
3485        put_task_struct(task);
3486out:
3487        put_task_struct(leader);
3488out_no_task:
3489        return result;
3490}
3491
3492/*
3493 * Find the first tid of a thread group to return to user space.
3494 *
3495 * Usually this is just the thread group leader, but if the users
3496 * buffer was too small or there was a seek into the middle of the
3497 * directory we have more work todo.
3498 *
3499 * In the case of a short read we start with find_task_by_pid.
3500 *
3501 * In the case of a seek we start with the leader and walk nr
3502 * threads past it.
3503 */
3504static struct task_struct *first_tid(struct task_struct *leader,
3505                int tid, int nr, struct pid_namespace *ns)
3506{
3507        struct task_struct *pos;
3508
3509        rcu_read_lock();
3510        /* Attempt to start with the pid of a thread */
3511        if (tid && (nr > 0)) {
3512                pos = find_task_by_pid_ns(tid, ns);
3513                if (pos && (pos->group_leader == leader))
3514                        goto found;
3515        }
3516
3517        /* If nr exceeds the number of threads there is nothing todo */
3518        pos = NULL;
3519        if (nr && nr >= get_nr_threads(leader))
3520                goto out;
3521
3522        /* If we haven't found our starting place yet start
3523         * with the leader and walk nr threads forward.
3524         */
3525        for (pos = leader; nr > 0; --nr) {
3526                pos = next_thread(pos);
3527                if (pos == leader) {
3528                        pos = NULL;
3529                        goto out;
3530                }
3531        }
3532found:
3533        get_task_struct(pos);
3534out:
3535        rcu_read_unlock();
3536        return pos;
3537}
3538
3539/*
3540 * Find the next thread in the thread list.
3541 * Return NULL if there is an error or no next thread.
3542 *
3543 * The reference to the input task_struct is released.
3544 */
3545static struct task_struct *next_tid(struct task_struct *start)
3546{
3547        struct task_struct *pos = NULL;
3548        rcu_read_lock();
3549        if (pid_alive(start)) {
3550                pos = next_thread(start);
3551                if (thread_group_leader(pos))
3552                        pos = NULL;
3553                else
3554                        get_task_struct(pos);
3555        }
3556        rcu_read_unlock();
3557        put_task_struct(start);
3558        return pos;
3559}
3560
3561static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
3562        struct task_struct *task, int tid)
3563{
3564        char name[PROC_NUMBUF];
3565        int len = snprintf(name, sizeof(name), "%d", tid);
3566        return proc_fill_cache(filp, dirent, filldir, name, len,
3567                                proc_task_instantiate, task, NULL);
3568}
3569
3570/* for the /proc/TGID/task/ directories */
3571static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
3572{
3573        struct dentry *dentry = filp->f_path.dentry;
3574        struct inode *inode = dentry->d_inode;
3575        struct task_struct *leader = NULL;
3576        struct task_struct *task;
3577        int retval = -ENOENT;
3578        ino_t ino;
3579        int tid;
3580        struct pid_namespace *ns;
3581
3582        task = get_proc_task(inode);
3583        if (!task)
3584                goto out_no_task;
3585        rcu_read_lock();
3586        if (pid_alive(task)) {
3587                leader = task->group_leader;
3588                get_task_struct(leader);
3589        }
3590        rcu_read_unlock();
3591        put_task_struct(task);
3592        if (!leader)
3593                goto out_no_task;
3594        retval = 0;
3595
3596        switch ((unsigned long)filp->f_pos) {
3597        case 0:
3598                ino = inode->i_ino;
3599                if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0)
3600                        goto out;
3601                filp->f_pos++;
3602                /* fall through */
3603        case 1:
3604                ino = parent_ino(dentry);
3605                if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0)
3606                        goto out;
3607                filp->f_pos++;
3608                /* fall through */
3609        }
3610
3611        /* f_version caches the tgid value that the last readdir call couldn't
3612         * return. lseek aka telldir automagically resets f_version to 0.
3613         */
3614        ns = filp->f_dentry->d_sb->s_fs_info;
3615        tid = (int)filp->f_version;
3616        filp->f_version = 0;
3617        for (task = first_tid(leader, tid, filp->f_pos - 2, ns);
3618             task;
3619             task = next_tid(task), filp->f_pos++) {
3620                tid = task_pid_nr_ns(task, ns);
3621                if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
3622                        /* returning this tgid failed, save it as the first
3623                         * pid for the next readir call */
3624                        filp->f_version = (u64)tid;
3625                        put_task_struct(task);
3626                        break;
3627                }
3628        }
3629out:
3630        put_task_struct(leader);
3631out_no_task:
3632        return retval;
3633}
3634
3635static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
3636{
3637        struct inode *inode = dentry->d_inode;
3638        struct task_struct *p = get_proc_task(inode);
3639        generic_fillattr(inode, stat);
3640
3641        if (p) {
3642                stat->nlink += get_nr_threads(p);
3643                put_task_struct(p);
3644        }
3645
3646        return 0;
3647}
3648
3649static const struct inode_operations proc_task_inode_operations = {
3650        .lookup         = proc_task_lookup,
3651        .getattr        = proc_task_getattr,
3652        .setattr        = proc_setattr,
3653        .permission     = proc_pid_permission,
3654};
3655
3656static const struct file_operations proc_task_operations = {
3657        .read           = generic_read_dir,
3658        .readdir        = proc_task_readdir,
3659        .llseek         = default_llseek,
3660};
3661