linux/fs/proc/inode.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  linux/fs/proc/inode.c
   4 *
   5 *  Copyright (C) 1991, 1992  Linus Torvalds
   6 */
   7
   8#include <linux/cache.h>
   9#include <linux/time.h>
  10#include <linux/proc_fs.h>
  11#include <linux/kernel.h>
  12#include <linux/pid_namespace.h>
  13#include <linux/mm.h>
  14#include <linux/string.h>
  15#include <linux/stat.h>
  16#include <linux/completion.h>
  17#include <linux/poll.h>
  18#include <linux/printk.h>
  19#include <linux/file.h>
  20#include <linux/limits.h>
  21#include <linux/init.h>
  22#include <linux/module.h>
  23#include <linux/sysctl.h>
  24#include <linux/seq_file.h>
  25#include <linux/slab.h>
  26#include <linux/mount.h>
  27#include <linux/magic.h>
  28
  29#include <linux/uaccess.h>
  30
  31#include "internal.h"
  32
  33static void proc_evict_inode(struct inode *inode)
  34{
  35        struct proc_dir_entry *de;
  36        struct ctl_table_header *head;
  37
  38        truncate_inode_pages_final(&inode->i_data);
  39        clear_inode(inode);
  40
  41        /* Stop tracking associated processes */
  42        put_pid(PROC_I(inode)->pid);
  43
  44        /* Let go of any associated proc directory entry */
  45        de = PDE(inode);
  46        if (de)
  47                pde_put(de);
  48
  49        head = PROC_I(inode)->sysctl;
  50        if (head) {
  51                RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
  52                proc_sys_evict_inode(inode, head);
  53        }
  54}
  55
  56static struct kmem_cache *proc_inode_cachep __ro_after_init;
  57static struct kmem_cache *pde_opener_cache __ro_after_init;
  58
  59static struct inode *proc_alloc_inode(struct super_block *sb)
  60{
  61        struct proc_inode *ei;
  62        struct inode *inode;
  63
  64        ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
  65        if (!ei)
  66                return NULL;
  67        ei->pid = NULL;
  68        ei->fd = 0;
  69        ei->op.proc_get_link = NULL;
  70        ei->pde = NULL;
  71        ei->sysctl = NULL;
  72        ei->sysctl_entry = NULL;
  73        ei->ns_ops = NULL;
  74        inode = &ei->vfs_inode;
  75        return inode;
  76}
  77
  78static void proc_i_callback(struct rcu_head *head)
  79{
  80        struct inode *inode = container_of(head, struct inode, i_rcu);
  81        kmem_cache_free(proc_inode_cachep, PROC_I(inode));
  82}
  83
  84static void proc_destroy_inode(struct inode *inode)
  85{
  86        call_rcu(&inode->i_rcu, proc_i_callback);
  87}
  88
  89static void init_once(void *foo)
  90{
  91        struct proc_inode *ei = (struct proc_inode *) foo;
  92
  93        inode_init_once(&ei->vfs_inode);
  94}
  95
  96void __init proc_init_kmemcache(void)
  97{
  98        proc_inode_cachep = kmem_cache_create("proc_inode_cache",
  99                                             sizeof(struct proc_inode),
 100                                             0, (SLAB_RECLAIM_ACCOUNT|
 101                                                SLAB_MEM_SPREAD|SLAB_ACCOUNT|
 102                                                SLAB_PANIC),
 103                                             init_once);
 104        pde_opener_cache =
 105                kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0,
 106                                  SLAB_ACCOUNT|SLAB_PANIC, NULL);
 107        proc_dir_entry_cache = kmem_cache_create_usercopy(
 108                "proc_dir_entry", sizeof(struct proc_dir_entry), 0, SLAB_PANIC,
 109                offsetof(struct proc_dir_entry, inline_name),
 110                sizeof_field(struct proc_dir_entry, inline_name), NULL);
 111}
 112
 113static int proc_show_options(struct seq_file *seq, struct dentry *root)
 114{
 115        struct super_block *sb = root->d_sb;
 116        struct pid_namespace *pid = sb->s_fs_info;
 117
 118        if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
 119                seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
 120        if (pid->hide_pid != HIDEPID_OFF)
 121                seq_printf(seq, ",hidepid=%u", pid->hide_pid);
 122
 123        return 0;
 124}
 125
 126static const struct super_operations proc_sops = {
 127        .alloc_inode    = proc_alloc_inode,
 128        .destroy_inode  = proc_destroy_inode,
 129        .drop_inode     = generic_delete_inode,
 130        .evict_inode    = proc_evict_inode,
 131        .statfs         = simple_statfs,
 132        .remount_fs     = proc_remount,
 133        .show_options   = proc_show_options,
 134};
 135
 136enum {BIAS = -1U<<31};
 137
 138static inline int use_pde(struct proc_dir_entry *pde)
 139{
 140        return likely(atomic_inc_unless_negative(&pde->in_use));
 141}
 142
 143static void unuse_pde(struct proc_dir_entry *pde)
 144{
 145        if (unlikely(atomic_dec_return(&pde->in_use) == BIAS))
 146                complete(pde->pde_unload_completion);
 147}
 148
 149/* pde is locked on entry, unlocked on exit */
 150static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 151{
 152        /*
 153         * close() (proc_reg_release()) can't delete an entry and proceed:
 154         * ->release hook needs to be available at the right moment.
 155         *
 156         * rmmod (remove_proc_entry() et al) can't delete an entry and proceed:
 157         * "struct file" needs to be available at the right moment.
 158         *
 159         * Therefore, first process to enter this function does ->release() and
 160         * signals its completion to the other process which does nothing.
 161         */
 162        if (pdeo->closing) {
 163                /* somebody else is doing that, just wait */
 164                DECLARE_COMPLETION_ONSTACK(c);
 165                pdeo->c = &c;
 166                spin_unlock(&pde->pde_unload_lock);
 167                wait_for_completion(&c);
 168        } else {
 169                struct file *file;
 170                struct completion *c;
 171
 172                pdeo->closing = true;
 173                spin_unlock(&pde->pde_unload_lock);
 174                file = pdeo->file;
 175                pde->proc_fops->release(file_inode(file), file);
 176                spin_lock(&pde->pde_unload_lock);
 177                /* After ->release. */
 178                list_del(&pdeo->lh);
 179                c = pdeo->c;
 180                spin_unlock(&pde->pde_unload_lock);
 181                if (unlikely(c))
 182                        complete(c);
 183                kmem_cache_free(pde_opener_cache, pdeo);
 184        }
 185}
 186
 187void proc_entry_rundown(struct proc_dir_entry *de)
 188{
 189        DECLARE_COMPLETION_ONSTACK(c);
 190        /* Wait until all existing callers into module are done. */
 191        de->pde_unload_completion = &c;
 192        if (atomic_add_return(BIAS, &de->in_use) != BIAS)
 193                wait_for_completion(&c);
 194
 195        /* ->pde_openers list can't grow from now on. */
 196
 197        spin_lock(&de->pde_unload_lock);
 198        while (!list_empty(&de->pde_openers)) {
 199                struct pde_opener *pdeo;
 200                pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
 201                close_pdeo(de, pdeo);
 202                spin_lock(&de->pde_unload_lock);
 203        }
 204        spin_unlock(&de->pde_unload_lock);
 205}
 206
 207static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
 208{
 209        struct proc_dir_entry *pde = PDE(file_inode(file));
 210        loff_t rv = -EINVAL;
 211        if (use_pde(pde)) {
 212                loff_t (*llseek)(struct file *, loff_t, int);
 213                llseek = pde->proc_fops->llseek;
 214                if (!llseek)
 215                        llseek = default_llseek;
 216                rv = llseek(file, offset, whence);
 217                unuse_pde(pde);
 218        }
 219        return rv;
 220}
 221
 222static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 223{
 224        ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
 225        struct proc_dir_entry *pde = PDE(file_inode(file));
 226        ssize_t rv = -EIO;
 227        if (use_pde(pde)) {
 228                read = pde->proc_fops->read;
 229                if (read)
 230                        rv = read(file, buf, count, ppos);
 231                unuse_pde(pde);
 232        }
 233        return rv;
 234}
 235
 236static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 237{
 238        ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
 239        struct proc_dir_entry *pde = PDE(file_inode(file));
 240        ssize_t rv = -EIO;
 241        if (use_pde(pde)) {
 242                write = pde->proc_fops->write;
 243                if (write)
 244                        rv = write(file, buf, count, ppos);
 245                unuse_pde(pde);
 246        }
 247        return rv;
 248}
 249
 250static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts)
 251{
 252        struct proc_dir_entry *pde = PDE(file_inode(file));
 253        __poll_t rv = DEFAULT_POLLMASK;
 254        __poll_t (*poll)(struct file *, struct poll_table_struct *);
 255        if (use_pde(pde)) {
 256                poll = pde->proc_fops->poll;
 257                if (poll)
 258                        rv = poll(file, pts);
 259                unuse_pde(pde);
 260        }
 261        return rv;
 262}
 263
 264static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 265{
 266        struct proc_dir_entry *pde = PDE(file_inode(file));
 267        long rv = -ENOTTY;
 268        long (*ioctl)(struct file *, unsigned int, unsigned long);
 269        if (use_pde(pde)) {
 270                ioctl = pde->proc_fops->unlocked_ioctl;
 271                if (ioctl)
 272                        rv = ioctl(file, cmd, arg);
 273                unuse_pde(pde);
 274        }
 275        return rv;
 276}
 277
 278#ifdef CONFIG_COMPAT
 279static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 280{
 281        struct proc_dir_entry *pde = PDE(file_inode(file));
 282        long rv = -ENOTTY;
 283        long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
 284        if (use_pde(pde)) {
 285                compat_ioctl = pde->proc_fops->compat_ioctl;
 286                if (compat_ioctl)
 287                        rv = compat_ioctl(file, cmd, arg);
 288                unuse_pde(pde);
 289        }
 290        return rv;
 291}
 292#endif
 293
 294static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
 295{
 296        struct proc_dir_entry *pde = PDE(file_inode(file));
 297        int rv = -EIO;
 298        int (*mmap)(struct file *, struct vm_area_struct *);
 299        if (use_pde(pde)) {
 300                mmap = pde->proc_fops->mmap;
 301                if (mmap)
 302                        rv = mmap(file, vma);
 303                unuse_pde(pde);
 304        }
 305        return rv;
 306}
 307
 308static unsigned long
 309proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
 310                           unsigned long len, unsigned long pgoff,
 311                           unsigned long flags)
 312{
 313        struct proc_dir_entry *pde = PDE(file_inode(file));
 314        unsigned long rv = -EIO;
 315
 316        if (use_pde(pde)) {
 317                typeof(proc_reg_get_unmapped_area) *get_area;
 318
 319                get_area = pde->proc_fops->get_unmapped_area;
 320#ifdef CONFIG_MMU
 321                if (!get_area)
 322                        get_area = current->mm->get_unmapped_area;
 323#endif
 324
 325                if (get_area)
 326                        rv = get_area(file, orig_addr, len, pgoff, flags);
 327                else
 328                        rv = orig_addr;
 329                unuse_pde(pde);
 330        }
 331        return rv;
 332}
 333
 334static int proc_reg_open(struct inode *inode, struct file *file)
 335{
 336        struct proc_dir_entry *pde = PDE(inode);
 337        int rv = 0;
 338        int (*open)(struct inode *, struct file *);
 339        int (*release)(struct inode *, struct file *);
 340        struct pde_opener *pdeo;
 341
 342        /*
 343         * Ensure that
 344         * 1) PDE's ->release hook will be called no matter what
 345         *    either normally by close()/->release, or forcefully by
 346         *    rmmod/remove_proc_entry.
 347         *
 348         * 2) rmmod isn't blocked by opening file in /proc and sitting on
 349         *    the descriptor (including "rmmod foo </proc/foo" scenario).
 350         *
 351         * Save every "struct file" with custom ->release hook.
 352         */
 353        if (!use_pde(pde))
 354                return -ENOENT;
 355
 356        release = pde->proc_fops->release;
 357        if (release) {
 358                pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL);
 359                if (!pdeo) {
 360                        rv = -ENOMEM;
 361                        goto out_unuse;
 362                }
 363        }
 364
 365        open = pde->proc_fops->open;
 366        if (open)
 367                rv = open(inode, file);
 368
 369        if (release) {
 370                if (rv == 0) {
 371                        /* To know what to release. */
 372                        pdeo->file = file;
 373                        pdeo->closing = false;
 374                        pdeo->c = NULL;
 375                        spin_lock(&pde->pde_unload_lock);
 376                        list_add(&pdeo->lh, &pde->pde_openers);
 377                        spin_unlock(&pde->pde_unload_lock);
 378                } else
 379                        kmem_cache_free(pde_opener_cache, pdeo);
 380        }
 381
 382out_unuse:
 383        unuse_pde(pde);
 384        return rv;
 385}
 386
 387static int proc_reg_release(struct inode *inode, struct file *file)
 388{
 389        struct proc_dir_entry *pde = PDE(inode);
 390        struct pde_opener *pdeo;
 391        spin_lock(&pde->pde_unload_lock);
 392        list_for_each_entry(pdeo, &pde->pde_openers, lh) {
 393                if (pdeo->file == file) {
 394                        close_pdeo(pde, pdeo);
 395                        return 0;
 396                }
 397        }
 398        spin_unlock(&pde->pde_unload_lock);
 399        return 0;
 400}
 401
 402static const struct file_operations proc_reg_file_ops = {
 403        .llseek         = proc_reg_llseek,
 404        .read           = proc_reg_read,
 405        .write          = proc_reg_write,
 406        .poll           = proc_reg_poll,
 407        .unlocked_ioctl = proc_reg_unlocked_ioctl,
 408#ifdef CONFIG_COMPAT
 409        .compat_ioctl   = proc_reg_compat_ioctl,
 410#endif
 411        .mmap           = proc_reg_mmap,
 412        .get_unmapped_area = proc_reg_get_unmapped_area,
 413        .open           = proc_reg_open,
 414        .release        = proc_reg_release,
 415};
 416
 417#ifdef CONFIG_COMPAT
 418static const struct file_operations proc_reg_file_ops_no_compat = {
 419        .llseek         = proc_reg_llseek,
 420        .read           = proc_reg_read,
 421        .write          = proc_reg_write,
 422        .poll           = proc_reg_poll,
 423        .unlocked_ioctl = proc_reg_unlocked_ioctl,
 424        .mmap           = proc_reg_mmap,
 425        .get_unmapped_area = proc_reg_get_unmapped_area,
 426        .open           = proc_reg_open,
 427        .release        = proc_reg_release,
 428};
 429#endif
 430
 431static void proc_put_link(void *p)
 432{
 433        unuse_pde(p);
 434}
 435
 436static const char *proc_get_link(struct dentry *dentry,
 437                                 struct inode *inode,
 438                                 struct delayed_call *done)
 439{
 440        struct proc_dir_entry *pde = PDE(inode);
 441        if (!use_pde(pde))
 442                return ERR_PTR(-EINVAL);
 443        set_delayed_call(done, proc_put_link, pde);
 444        return pde->data;
 445}
 446
 447const struct inode_operations proc_link_inode_operations = {
 448        .get_link       = proc_get_link,
 449};
 450
 451struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 452{
 453        struct inode *inode = new_inode_pseudo(sb);
 454
 455        if (inode) {
 456                inode->i_ino = de->low_ino;
 457                inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 458                PROC_I(inode)->pde = de;
 459
 460                if (is_empty_pde(de)) {
 461                        make_empty_dir_inode(inode);
 462                        return inode;
 463                }
 464                if (de->mode) {
 465                        inode->i_mode = de->mode;
 466                        inode->i_uid = de->uid;
 467                        inode->i_gid = de->gid;
 468                }
 469                if (de->size)
 470                        inode->i_size = de->size;
 471                if (de->nlink)
 472                        set_nlink(inode, de->nlink);
 473                WARN_ON(!de->proc_iops);
 474                inode->i_op = de->proc_iops;
 475                if (de->proc_fops) {
 476                        if (S_ISREG(inode->i_mode)) {
 477#ifdef CONFIG_COMPAT
 478                                if (!de->proc_fops->compat_ioctl)
 479                                        inode->i_fop =
 480                                                &proc_reg_file_ops_no_compat;
 481                                else
 482#endif
 483                                        inode->i_fop = &proc_reg_file_ops;
 484                        } else {
 485                                inode->i_fop = de->proc_fops;
 486                        }
 487                }
 488        } else
 489               pde_put(de);
 490        return inode;
 491}
 492
 493int proc_fill_super(struct super_block *s, void *data, int silent)
 494{
 495        struct pid_namespace *ns = get_pid_ns(s->s_fs_info);
 496        struct inode *root_inode;
 497        int ret;
 498
 499        if (!proc_parse_options(data, ns))
 500                return -EINVAL;
 501
 502        /* User space would break if executables or devices appear on proc */
 503        s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
 504        s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
 505        s->s_blocksize = 1024;
 506        s->s_blocksize_bits = 10;
 507        s->s_magic = PROC_SUPER_MAGIC;
 508        s->s_op = &proc_sops;
 509        s->s_time_gran = 1;
 510
 511        /*
 512         * procfs isn't actually a stacking filesystem; however, there is
 513         * too much magic going on inside it to permit stacking things on
 514         * top of it
 515         */
 516        s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
 517        
 518        pde_get(&proc_root);
 519        root_inode = proc_get_inode(s, &proc_root);
 520        if (!root_inode) {
 521                pr_err("proc_fill_super: get root inode failed\n");
 522                return -ENOMEM;
 523        }
 524
 525        s->s_root = d_make_root(root_inode);
 526        if (!s->s_root) {
 527                pr_err("proc_fill_super: allocate dentry failed\n");
 528                return -ENOMEM;
 529        }
 530
 531        ret = proc_setup_self(s);
 532        if (ret) {
 533                return ret;
 534        }
 535        return proc_setup_thread_self(s);
 536}
 537