linux/ipc/shm.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * linux/ipc/shm.c
   4 * Copyright (C) 1992, 1993 Krishna Balasubramanian
   5 *       Many improvements/fixes by Bruno Haible.
   6 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
   7 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
   8 *
   9 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
  10 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
  11 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
  12 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
  13 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
  14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
  15 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
  16 *
  17 * support for audit of ipc object properties and permission changes
  18 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
  19 *
  20 * namespaces support
  21 * OpenVZ, SWsoft Inc.
  22 * Pavel Emelianov <xemul@openvz.org>
  23 *
  24 * Better ipc lock (kern_ipc_perm.lock) handling
  25 * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
  26 */
  27
  28#include <linux/slab.h>
  29#include <linux/mm.h>
  30#include <linux/hugetlb.h>
  31#include <linux/shm.h>
  32#include <linux/init.h>
  33#include <linux/file.h>
  34#include <linux/mman.h>
  35#include <linux/shmem_fs.h>
  36#include <linux/security.h>
  37#include <linux/syscalls.h>
  38#include <linux/audit.h>
  39#include <linux/capability.h>
  40#include <linux/ptrace.h>
  41#include <linux/seq_file.h>
  42#include <linux/rwsem.h>
  43#include <linux/nsproxy.h>
  44#include <linux/mount.h>
  45#include <linux/ipc_namespace.h>
  46
  47#include <linux/uaccess.h>
  48
  49#include "util.h"
  50
  51struct shmid_kernel /* private to the kernel */
  52{
  53        struct kern_ipc_perm    shm_perm;
  54        struct file             *shm_file;
  55        unsigned long           shm_nattch;
  56        unsigned long           shm_segsz;
  57        time64_t                shm_atim;
  58        time64_t                shm_dtim;
  59        time64_t                shm_ctim;
  60        struct pid              *shm_cprid;
  61        struct pid              *shm_lprid;
  62        struct user_struct      *mlock_user;
  63
  64        /* The task created the shm object.  NULL if the task is dead. */
  65        struct task_struct      *shm_creator;
  66        struct list_head        shm_clist;      /* list by creator */
  67} __randomize_layout;
  68
  69/* shm_mode upper byte flags */
  70#define SHM_DEST        01000   /* segment will be destroyed on last detach */
  71#define SHM_LOCKED      02000   /* segment will not be swapped */
  72
  73struct shm_file_data {
  74        int id;
  75        struct ipc_namespace *ns;
  76        struct file *file;
  77        const struct vm_operations_struct *vm_ops;
  78};
  79
  80#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
  81
  82static const struct file_operations shm_file_operations;
  83static const struct vm_operations_struct shm_vm_ops;
  84
  85#define shm_ids(ns)     ((ns)->ids[IPC_SHM_IDS])
  86
  87#define shm_unlock(shp)                 \
  88        ipc_unlock(&(shp)->shm_perm)
  89
  90static int newseg(struct ipc_namespace *, struct ipc_params *);
  91static void shm_open(struct vm_area_struct *vma);
  92static void shm_close(struct vm_area_struct *vma);
  93static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
  94#ifdef CONFIG_PROC_FS
  95static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
  96#endif
  97
  98int shm_init_ns(struct ipc_namespace *ns)
  99{
 100        ns->shm_ctlmax = SHMMAX;
 101        ns->shm_ctlall = SHMALL;
 102        ns->shm_ctlmni = SHMMNI;
 103        ns->shm_rmid_forced = 0;
 104        ns->shm_tot = 0;
 105        return ipc_init_ids(&shm_ids(ns));
 106}
 107
 108/*
 109 * Called with shm_ids.rwsem (writer) and the shp structure locked.
 110 * Only shm_ids.rwsem remains locked on exit.
 111 */
 112static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 113{
 114        struct shmid_kernel *shp;
 115
 116        shp = container_of(ipcp, struct shmid_kernel, shm_perm);
 117
 118        if (shp->shm_nattch) {
 119                shp->shm_perm.mode |= SHM_DEST;
 120                /* Do not find it any more */
 121                ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
 122                shm_unlock(shp);
 123        } else
 124                shm_destroy(ns, shp);
 125}
 126
 127#ifdef CONFIG_IPC_NS
 128void shm_exit_ns(struct ipc_namespace *ns)
 129{
 130        free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
 131        idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
 132        rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
 133}
 134#endif
 135
 136static int __init ipc_ns_init(void)
 137{
 138        const int err = shm_init_ns(&init_ipc_ns);
 139        WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err);
 140        return err;
 141}
 142
 143pure_initcall(ipc_ns_init);
 144
 145void __init shm_init(void)
 146{
 147        ipc_init_proc_interface("sysvipc/shm",
 148#if BITS_PER_LONG <= 32
 149                                "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
 150#else
 151                                "       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
 152#endif
 153                                IPC_SHM_IDS, sysvipc_shm_proc_show);
 154}
 155
 156static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
 157{
 158        struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
 159
 160        if (IS_ERR(ipcp))
 161                return ERR_CAST(ipcp);
 162
 163        return container_of(ipcp, struct shmid_kernel, shm_perm);
 164}
 165
 166static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
 167{
 168        struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
 169
 170        if (IS_ERR(ipcp))
 171                return ERR_CAST(ipcp);
 172
 173        return container_of(ipcp, struct shmid_kernel, shm_perm);
 174}
 175
 176/*
 177 * shm_lock_(check_) routines are called in the paths where the rwsem
 178 * is not necessarily held.
 179 */
 180static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
 181{
 182        struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
 183
 184        /*
 185         * Callers of shm_lock() must validate the status of the returned ipc
 186         * object pointer (as returned by ipc_lock()), and error out as
 187         * appropriate.
 188         */
 189        if (IS_ERR(ipcp))
 190                return (void *)ipcp;
 191        return container_of(ipcp, struct shmid_kernel, shm_perm);
 192}
 193
 194static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
 195{
 196        rcu_read_lock();
 197        ipc_lock_object(&ipcp->shm_perm);
 198}
 199
 200static void shm_rcu_free(struct rcu_head *head)
 201{
 202        struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
 203                                                        rcu);
 204        struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
 205                                                        shm_perm);
 206        security_shm_free(&shp->shm_perm);
 207        kvfree(shp);
 208}
 209
 210static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
 211{
 212        list_del(&s->shm_clist);
 213        ipc_rmid(&shm_ids(ns), &s->shm_perm);
 214}
 215
 216
 217static int __shm_open(struct vm_area_struct *vma)
 218{
 219        struct file *file = vma->vm_file;
 220        struct shm_file_data *sfd = shm_file_data(file);
 221        struct shmid_kernel *shp;
 222
 223        shp = shm_lock(sfd->ns, sfd->id);
 224
 225        if (IS_ERR(shp))
 226                return PTR_ERR(shp);
 227
 228        if (shp->shm_file != sfd->file) {
 229                /* ID was reused */
 230                shm_unlock(shp);
 231                return -EINVAL;
 232        }
 233
 234        shp->shm_atim = ktime_get_real_seconds();
 235        ipc_update_pid(&shp->shm_lprid, task_tgid(current));
 236        shp->shm_nattch++;
 237        shm_unlock(shp);
 238        return 0;
 239}
 240
 241/* This is called by fork, once for every shm attach. */
 242static void shm_open(struct vm_area_struct *vma)
 243{
 244        int err = __shm_open(vma);
 245        /*
 246         * We raced in the idr lookup or with shm_destroy().
 247         * Either way, the ID is busted.
 248         */
 249        WARN_ON_ONCE(err);
 250}
 251
 252/*
 253 * shm_destroy - free the struct shmid_kernel
 254 *
 255 * @ns: namespace
 256 * @shp: struct to free
 257 *
 258 * It has to be called with shp and shm_ids.rwsem (writer) locked,
 259 * but returns with shp unlocked and freed.
 260 */
 261static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 262{
 263        struct file *shm_file;
 264
 265        shm_file = shp->shm_file;
 266        shp->shm_file = NULL;
 267        ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
 268        shm_rmid(ns, shp);
 269        shm_unlock(shp);
 270        if (!is_file_hugepages(shm_file))
 271                shmem_lock(shm_file, 0, shp->mlock_user);
 272        else if (shp->mlock_user)
 273                user_shm_unlock(i_size_read(file_inode(shm_file)),
 274                                shp->mlock_user);
 275        fput(shm_file);
 276        ipc_update_pid(&shp->shm_cprid, NULL);
 277        ipc_update_pid(&shp->shm_lprid, NULL);
 278        ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
 279}
 280
 281/*
 282 * shm_may_destroy - identifies whether shm segment should be destroyed now
 283 *
 284 * Returns true if and only if there are no active users of the segment and
 285 * one of the following is true:
 286 *
 287 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
 288 *
 289 * 2) sysctl kernel.shm_rmid_forced is set to 1.
 290 */
 291static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 292{
 293        return (shp->shm_nattch == 0) &&
 294               (ns->shm_rmid_forced ||
 295                (shp->shm_perm.mode & SHM_DEST));
 296}
 297
 298/*
 299 * remove the attach descriptor vma.
 300 * free memory for segment if it is marked destroyed.
 301 * The descriptor has already been removed from the current->mm->mmap list
 302 * and will later be kfree()d.
 303 */
 304static void shm_close(struct vm_area_struct *vma)
 305{
 306        struct file *file = vma->vm_file;
 307        struct shm_file_data *sfd = shm_file_data(file);
 308        struct shmid_kernel *shp;
 309        struct ipc_namespace *ns = sfd->ns;
 310
 311        down_write(&shm_ids(ns).rwsem);
 312        /* remove from the list of attaches of the shm segment */
 313        shp = shm_lock(ns, sfd->id);
 314
 315        /*
 316         * We raced in the idr lookup or with shm_destroy().
 317         * Either way, the ID is busted.
 318         */
 319        if (WARN_ON_ONCE(IS_ERR(shp)))
 320                goto done; /* no-op */
 321
 322        ipc_update_pid(&shp->shm_lprid, task_tgid(current));
 323        shp->shm_dtim = ktime_get_real_seconds();
 324        shp->shm_nattch--;
 325        if (shm_may_destroy(ns, shp))
 326                shm_destroy(ns, shp);
 327        else
 328                shm_unlock(shp);
 329done:
 330        up_write(&shm_ids(ns).rwsem);
 331}
 332
 333/* Called with ns->shm_ids(ns).rwsem locked */
 334static int shm_try_destroy_orphaned(int id, void *p, void *data)
 335{
 336        struct ipc_namespace *ns = data;
 337        struct kern_ipc_perm *ipcp = p;
 338        struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
 339
 340        /*
 341         * We want to destroy segments without users and with already
 342         * exit'ed originating process.
 343         *
 344         * As shp->* are changed under rwsem, it's safe to skip shp locking.
 345         */
 346        if (shp->shm_creator != NULL)
 347                return 0;
 348
 349        if (shm_may_destroy(ns, shp)) {
 350                shm_lock_by_ptr(shp);
 351                shm_destroy(ns, shp);
 352        }
 353        return 0;
 354}
 355
 356void shm_destroy_orphaned(struct ipc_namespace *ns)
 357{
 358        down_write(&shm_ids(ns).rwsem);
 359        if (shm_ids(ns).in_use)
 360                idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
 361        up_write(&shm_ids(ns).rwsem);
 362}
 363
 364/* Locking assumes this will only be called with task == current */
 365void exit_shm(struct task_struct *task)
 366{
 367        struct ipc_namespace *ns = task->nsproxy->ipc_ns;
 368        struct shmid_kernel *shp, *n;
 369
 370        if (list_empty(&task->sysvshm.shm_clist))
 371                return;
 372
 373        /*
 374         * If kernel.shm_rmid_forced is not set then only keep track of
 375         * which shmids are orphaned, so that a later set of the sysctl
 376         * can clean them up.
 377         */
 378        if (!ns->shm_rmid_forced) {
 379                down_read(&shm_ids(ns).rwsem);
 380                list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
 381                        shp->shm_creator = NULL;
 382                /*
 383                 * Only under read lock but we are only called on current
 384                 * so no entry on the list will be shared.
 385                 */
 386                list_del(&task->sysvshm.shm_clist);
 387                up_read(&shm_ids(ns).rwsem);
 388                return;
 389        }
 390
 391        /*
 392         * Destroy all already created segments, that were not yet mapped,
 393         * and mark any mapped as orphan to cover the sysctl toggling.
 394         * Destroy is skipped if shm_may_destroy() returns false.
 395         */
 396        down_write(&shm_ids(ns).rwsem);
 397        list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
 398                shp->shm_creator = NULL;
 399
 400                if (shm_may_destroy(ns, shp)) {
 401                        shm_lock_by_ptr(shp);
 402                        shm_destroy(ns, shp);
 403                }
 404        }
 405
 406        /* Remove the list head from any segments still attached. */
 407        list_del(&task->sysvshm.shm_clist);
 408        up_write(&shm_ids(ns).rwsem);
 409}
 410
 411static int shm_fault(struct vm_fault *vmf)
 412{
 413        struct file *file = vmf->vma->vm_file;
 414        struct shm_file_data *sfd = shm_file_data(file);
 415
 416        return sfd->vm_ops->fault(vmf);
 417}
 418
 419static int shm_split(struct vm_area_struct *vma, unsigned long addr)
 420{
 421        struct file *file = vma->vm_file;
 422        struct shm_file_data *sfd = shm_file_data(file);
 423
 424        if (sfd->vm_ops->split)
 425                return sfd->vm_ops->split(vma, addr);
 426
 427        return 0;
 428}
 429
 430#ifdef CONFIG_NUMA
 431static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
 432{
 433        struct file *file = vma->vm_file;
 434        struct shm_file_data *sfd = shm_file_data(file);
 435        int err = 0;
 436
 437        if (sfd->vm_ops->set_policy)
 438                err = sfd->vm_ops->set_policy(vma, new);
 439        return err;
 440}
 441
 442static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
 443                                        unsigned long addr)
 444{
 445        struct file *file = vma->vm_file;
 446        struct shm_file_data *sfd = shm_file_data(file);
 447        struct mempolicy *pol = NULL;
 448
 449        if (sfd->vm_ops->get_policy)
 450                pol = sfd->vm_ops->get_policy(vma, addr);
 451        else if (vma->vm_policy)
 452                pol = vma->vm_policy;
 453
 454        return pol;
 455}
 456#endif
 457
 458static int shm_mmap(struct file *file, struct vm_area_struct *vma)
 459{
 460        struct shm_file_data *sfd = shm_file_data(file);
 461        int ret;
 462
 463        /*
 464         * In case of remap_file_pages() emulation, the file can represent an
 465         * IPC ID that was removed, and possibly even reused by another shm
 466         * segment already.  Propagate this case as an error to caller.
 467         */
 468        ret = __shm_open(vma);
 469        if (ret)
 470                return ret;
 471
 472        ret = call_mmap(sfd->file, vma);
 473        if (ret) {
 474                shm_close(vma);
 475                return ret;
 476        }
 477        sfd->vm_ops = vma->vm_ops;
 478#ifdef CONFIG_MMU
 479        WARN_ON(!sfd->vm_ops->fault);
 480#endif
 481        vma->vm_ops = &shm_vm_ops;
 482        return 0;
 483}
 484
 485static int shm_release(struct inode *ino, struct file *file)
 486{
 487        struct shm_file_data *sfd = shm_file_data(file);
 488
 489        put_ipc_ns(sfd->ns);
 490        fput(sfd->file);
 491        shm_file_data(file) = NULL;
 492        kfree(sfd);
 493        return 0;
 494}
 495
 496static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 497{
 498        struct shm_file_data *sfd = shm_file_data(file);
 499
 500        if (!sfd->file->f_op->fsync)
 501                return -EINVAL;
 502        return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
 503}
 504
 505static long shm_fallocate(struct file *file, int mode, loff_t offset,
 506                          loff_t len)
 507{
 508        struct shm_file_data *sfd = shm_file_data(file);
 509
 510        if (!sfd->file->f_op->fallocate)
 511                return -EOPNOTSUPP;
 512        return sfd->file->f_op->fallocate(file, mode, offset, len);
 513}
 514
 515static unsigned long shm_get_unmapped_area(struct file *file,
 516        unsigned long addr, unsigned long len, unsigned long pgoff,
 517        unsigned long flags)
 518{
 519        struct shm_file_data *sfd = shm_file_data(file);
 520
 521        return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
 522                                                pgoff, flags);
 523}
 524
 525static const struct file_operations shm_file_operations = {
 526        .mmap           = shm_mmap,
 527        .fsync          = shm_fsync,
 528        .release        = shm_release,
 529        .get_unmapped_area      = shm_get_unmapped_area,
 530        .llseek         = noop_llseek,
 531        .fallocate      = shm_fallocate,
 532};
 533
 534/*
 535 * shm_file_operations_huge is now identical to shm_file_operations,
 536 * but we keep it distinct for the sake of is_file_shm_hugepages().
 537 */
 538static const struct file_operations shm_file_operations_huge = {
 539        .mmap           = shm_mmap,
 540        .fsync          = shm_fsync,
 541        .release        = shm_release,
 542        .get_unmapped_area      = shm_get_unmapped_area,
 543        .llseek         = noop_llseek,
 544        .fallocate      = shm_fallocate,
 545};
 546
 547bool is_file_shm_hugepages(struct file *file)
 548{
 549        return file->f_op == &shm_file_operations_huge;
 550}
 551
 552static const struct vm_operations_struct shm_vm_ops = {
 553        .open   = shm_open,     /* callback for a new vm-area open */
 554        .close  = shm_close,    /* callback for when the vm-area is released */
 555        .fault  = shm_fault,
 556        .split  = shm_split,
 557#if defined(CONFIG_NUMA)
 558        .set_policy = shm_set_policy,
 559        .get_policy = shm_get_policy,
 560#endif
 561};
 562
 563/**
 564 * newseg - Create a new shared memory segment
 565 * @ns: namespace
 566 * @params: ptr to the structure that contains key, size and shmflg
 567 *
 568 * Called with shm_ids.rwsem held as a writer.
 569 */
 570static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 571{
 572        key_t key = params->key;
 573        int shmflg = params->flg;
 574        size_t size = params->u.size;
 575        int error;
 576        struct shmid_kernel *shp;
 577        size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 578        struct file *file;
 579        char name[13];
 580        vm_flags_t acctflag = 0;
 581
 582        if (size < SHMMIN || size > ns->shm_ctlmax)
 583                return -EINVAL;
 584
 585        if (numpages << PAGE_SHIFT < size)
 586                return -ENOSPC;
 587
 588        if (ns->shm_tot + numpages < ns->shm_tot ||
 589                        ns->shm_tot + numpages > ns->shm_ctlall)
 590                return -ENOSPC;
 591
 592        shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
 593        if (unlikely(!shp))
 594                return -ENOMEM;
 595
 596        shp->shm_perm.key = key;
 597        shp->shm_perm.mode = (shmflg & S_IRWXUGO);
 598        shp->mlock_user = NULL;
 599
 600        shp->shm_perm.security = NULL;
 601        error = security_shm_alloc(&shp->shm_perm);
 602        if (error) {
 603                kvfree(shp);
 604                return error;
 605        }
 606
 607        sprintf(name, "SYSV%08x", key);
 608        if (shmflg & SHM_HUGETLB) {
 609                struct hstate *hs;
 610                size_t hugesize;
 611
 612                hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
 613                if (!hs) {
 614                        error = -EINVAL;
 615                        goto no_file;
 616                }
 617                hugesize = ALIGN(size, huge_page_size(hs));
 618
 619                /* hugetlb_file_setup applies strict accounting */
 620                if (shmflg & SHM_NORESERVE)
 621                        acctflag = VM_NORESERVE;
 622                file = hugetlb_file_setup(name, hugesize, acctflag,
 623                                  &shp->mlock_user, HUGETLB_SHMFS_INODE,
 624                                (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
 625        } else {
 626                /*
 627                 * Do not allow no accounting for OVERCOMMIT_NEVER, even
 628                 * if it's asked for.
 629                 */
 630                if  ((shmflg & SHM_NORESERVE) &&
 631                                sysctl_overcommit_memory != OVERCOMMIT_NEVER)
 632                        acctflag = VM_NORESERVE;
 633                file = shmem_kernel_file_setup(name, size, acctflag);
 634        }
 635        error = PTR_ERR(file);
 636        if (IS_ERR(file))
 637                goto no_file;
 638
 639        shp->shm_cprid = get_pid(task_tgid(current));
 640        shp->shm_lprid = NULL;
 641        shp->shm_atim = shp->shm_dtim = 0;
 642        shp->shm_ctim = ktime_get_real_seconds();
 643        shp->shm_segsz = size;
 644        shp->shm_nattch = 0;
 645        shp->shm_file = file;
 646        shp->shm_creator = current;
 647
 648        /* ipc_addid() locks shp upon success. */
 649        error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
 650        if (error < 0)
 651                goto no_id;
 652
 653        list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
 654
 655        /*
 656         * shmid gets reported as "inode#" in /proc/pid/maps.
 657         * proc-ps tools use this. Changing this will break them.
 658         */
 659        file_inode(file)->i_ino = shp->shm_perm.id;
 660
 661        ns->shm_tot += numpages;
 662        error = shp->shm_perm.id;
 663
 664        ipc_unlock_object(&shp->shm_perm);
 665        rcu_read_unlock();
 666        return error;
 667
 668no_id:
 669        ipc_update_pid(&shp->shm_cprid, NULL);
 670        ipc_update_pid(&shp->shm_lprid, NULL);
 671        if (is_file_hugepages(file) && shp->mlock_user)
 672                user_shm_unlock(size, shp->mlock_user);
 673        fput(file);
 674no_file:
 675        call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
 676        return error;
 677}
 678
 679/*
 680 * Called with shm_ids.rwsem and ipcp locked.
 681 */
 682static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
 683                                struct ipc_params *params)
 684{
 685        struct shmid_kernel *shp;
 686
 687        shp = container_of(ipcp, struct shmid_kernel, shm_perm);
 688        if (shp->shm_segsz < params->u.size)
 689                return -EINVAL;
 690
 691        return 0;
 692}
 693
 694long ksys_shmget(key_t key, size_t size, int shmflg)
 695{
 696        struct ipc_namespace *ns;
 697        static const struct ipc_ops shm_ops = {
 698                .getnew = newseg,
 699                .associate = security_shm_associate,
 700                .more_checks = shm_more_checks,
 701        };
 702        struct ipc_params shm_params;
 703
 704        ns = current->nsproxy->ipc_ns;
 705
 706        shm_params.key = key;
 707        shm_params.flg = shmflg;
 708        shm_params.u.size = size;
 709
 710        return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
 711}
 712
 713SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
 714{
 715        return ksys_shmget(key, size, shmflg);
 716}
 717
 718static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
 719{
 720        switch (version) {
 721        case IPC_64:
 722                return copy_to_user(buf, in, sizeof(*in));
 723        case IPC_OLD:
 724            {
 725                struct shmid_ds out;
 726
 727                memset(&out, 0, sizeof(out));
 728                ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
 729                out.shm_segsz   = in->shm_segsz;
 730                out.shm_atime   = in->shm_atime;
 731                out.shm_dtime   = in->shm_dtime;
 732                out.shm_ctime   = in->shm_ctime;
 733                out.shm_cpid    = in->shm_cpid;
 734                out.shm_lpid    = in->shm_lpid;
 735                out.shm_nattch  = in->shm_nattch;
 736
 737                return copy_to_user(buf, &out, sizeof(out));
 738            }
 739        default:
 740                return -EINVAL;
 741        }
 742}
 743
 744static inline unsigned long
 745copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
 746{
 747        switch (version) {
 748        case IPC_64:
 749                if (copy_from_user(out, buf, sizeof(*out)))
 750                        return -EFAULT;
 751                return 0;
 752        case IPC_OLD:
 753            {
 754                struct shmid_ds tbuf_old;
 755
 756                if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
 757                        return -EFAULT;
 758
 759                out->shm_perm.uid       = tbuf_old.shm_perm.uid;
 760                out->shm_perm.gid       = tbuf_old.shm_perm.gid;
 761                out->shm_perm.mode      = tbuf_old.shm_perm.mode;
 762
 763                return 0;
 764            }
 765        default:
 766                return -EINVAL;
 767        }
 768}
 769
 770static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
 771{
 772        switch (version) {
 773        case IPC_64:
 774                return copy_to_user(buf, in, sizeof(*in));
 775        case IPC_OLD:
 776            {
 777                struct shminfo out;
 778
 779                if (in->shmmax > INT_MAX)
 780                        out.shmmax = INT_MAX;
 781                else
 782                        out.shmmax = (int)in->shmmax;
 783
 784                out.shmmin      = in->shmmin;
 785                out.shmmni      = in->shmmni;
 786                out.shmseg      = in->shmseg;
 787                out.shmall      = in->shmall;
 788
 789                return copy_to_user(buf, &out, sizeof(out));
 790            }
 791        default:
 792                return -EINVAL;
 793        }
 794}
 795
 796/*
 797 * Calculate and add used RSS and swap pages of a shm.
 798 * Called with shm_ids.rwsem held as a reader
 799 */
 800static void shm_add_rss_swap(struct shmid_kernel *shp,
 801        unsigned long *rss_add, unsigned long *swp_add)
 802{
 803        struct inode *inode;
 804
 805        inode = file_inode(shp->shm_file);
 806
 807        if (is_file_hugepages(shp->shm_file)) {
 808                struct address_space *mapping = inode->i_mapping;
 809                struct hstate *h = hstate_file(shp->shm_file);
 810                *rss_add += pages_per_huge_page(h) * mapping->nrpages;
 811        } else {
 812#ifdef CONFIG_SHMEM
 813                struct shmem_inode_info *info = SHMEM_I(inode);
 814
 815                spin_lock_irq(&info->lock);
 816                *rss_add += inode->i_mapping->nrpages;
 817                *swp_add += info->swapped;
 818                spin_unlock_irq(&info->lock);
 819#else
 820                *rss_add += inode->i_mapping->nrpages;
 821#endif
 822        }
 823}
 824
 825/*
 826 * Called with shm_ids.rwsem held as a reader
 827 */
 828static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
 829                unsigned long *swp)
 830{
 831        int next_id;
 832        int total, in_use;
 833
 834        *rss = 0;
 835        *swp = 0;
 836
 837        in_use = shm_ids(ns).in_use;
 838
 839        for (total = 0, next_id = 0; total < in_use; next_id++) {
 840                struct kern_ipc_perm *ipc;
 841                struct shmid_kernel *shp;
 842
 843                ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
 844                if (ipc == NULL)
 845                        continue;
 846                shp = container_of(ipc, struct shmid_kernel, shm_perm);
 847
 848                shm_add_rss_swap(shp, rss, swp);
 849
 850                total++;
 851        }
 852}
 853
 854/*
 855 * This function handles some shmctl commands which require the rwsem
 856 * to be held in write mode.
 857 * NOTE: no locks must be held, the rwsem is taken inside this function.
 858 */
 859static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
 860                       struct shmid64_ds *shmid64)
 861{
 862        struct kern_ipc_perm *ipcp;
 863        struct shmid_kernel *shp;
 864        int err;
 865
 866        down_write(&shm_ids(ns).rwsem);
 867        rcu_read_lock();
 868
 869        ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
 870                                      &shmid64->shm_perm, 0);
 871        if (IS_ERR(ipcp)) {
 872                err = PTR_ERR(ipcp);
 873                goto out_unlock1;
 874        }
 875
 876        shp = container_of(ipcp, struct shmid_kernel, shm_perm);
 877
 878        err = security_shm_shmctl(&shp->shm_perm, cmd);
 879        if (err)
 880                goto out_unlock1;
 881
 882        switch (cmd) {
 883        case IPC_RMID:
 884                ipc_lock_object(&shp->shm_perm);
 885                /* do_shm_rmid unlocks the ipc object and rcu */
 886                do_shm_rmid(ns, ipcp);
 887                goto out_up;
 888        case IPC_SET:
 889                ipc_lock_object(&shp->shm_perm);
 890                err = ipc_update_perm(&shmid64->shm_perm, ipcp);
 891                if (err)
 892                        goto out_unlock0;
 893                shp->shm_ctim = ktime_get_real_seconds();
 894                break;
 895        default:
 896                err = -EINVAL;
 897                goto out_unlock1;
 898        }
 899
 900out_unlock0:
 901        ipc_unlock_object(&shp->shm_perm);
 902out_unlock1:
 903        rcu_read_unlock();
 904out_up:
 905        up_write(&shm_ids(ns).rwsem);
 906        return err;
 907}
 908
 909static int shmctl_ipc_info(struct ipc_namespace *ns,
 910                           struct shminfo64 *shminfo)
 911{
 912        int err = security_shm_shmctl(NULL, IPC_INFO);
 913        if (!err) {
 914                memset(shminfo, 0, sizeof(*shminfo));
 915                shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
 916                shminfo->shmmax = ns->shm_ctlmax;
 917                shminfo->shmall = ns->shm_ctlall;
 918                shminfo->shmmin = SHMMIN;
 919                down_read(&shm_ids(ns).rwsem);
 920                err = ipc_get_maxid(&shm_ids(ns));
 921                up_read(&shm_ids(ns).rwsem);
 922                if (err < 0)
 923                        err = 0;
 924        }
 925        return err;
 926}
 927
 928static int shmctl_shm_info(struct ipc_namespace *ns,
 929                           struct shm_info *shm_info)
 930{
 931        int err = security_shm_shmctl(NULL, SHM_INFO);
 932        if (!err) {
 933                memset(shm_info, 0, sizeof(*shm_info));
 934                down_read(&shm_ids(ns).rwsem);
 935                shm_info->used_ids = shm_ids(ns).in_use;
 936                shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
 937                shm_info->shm_tot = ns->shm_tot;
 938                shm_info->swap_attempts = 0;
 939                shm_info->swap_successes = 0;
 940                err = ipc_get_maxid(&shm_ids(ns));
 941                up_read(&shm_ids(ns).rwsem);
 942                if (err < 0)
 943                        err = 0;
 944        }
 945        return err;
 946}
 947
 948static int shmctl_stat(struct ipc_namespace *ns, int shmid,
 949                        int cmd, struct shmid64_ds *tbuf)
 950{
 951        struct shmid_kernel *shp;
 952        int id = 0;
 953        int err;
 954
 955        memset(tbuf, 0, sizeof(*tbuf));
 956
 957        rcu_read_lock();
 958        if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
 959                shp = shm_obtain_object(ns, shmid);
 960                if (IS_ERR(shp)) {
 961                        err = PTR_ERR(shp);
 962                        goto out_unlock;
 963                }
 964                id = shp->shm_perm.id;
 965        } else { /* IPC_STAT */
 966                shp = shm_obtain_object_check(ns, shmid);
 967                if (IS_ERR(shp)) {
 968                        err = PTR_ERR(shp);
 969                        goto out_unlock;
 970                }
 971        }
 972
 973        /*
 974         * Semantically SHM_STAT_ANY ought to be identical to
 975         * that functionality provided by the /proc/sysvipc/
 976         * interface. As such, only audit these calls and
 977         * do not do traditional S_IRUGO permission checks on
 978         * the ipc object.
 979         */
 980        if (cmd == SHM_STAT_ANY)
 981                audit_ipc_obj(&shp->shm_perm);
 982        else {
 983                err = -EACCES;
 984                if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
 985                        goto out_unlock;
 986        }
 987
 988        err = security_shm_shmctl(&shp->shm_perm, cmd);
 989        if (err)
 990                goto out_unlock;
 991
 992        ipc_lock_object(&shp->shm_perm);
 993
 994        if (!ipc_valid_object(&shp->shm_perm)) {
 995                ipc_unlock_object(&shp->shm_perm);
 996                err = -EIDRM;
 997                goto out_unlock;
 998        }
 999
1000        kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
1001        tbuf->shm_segsz = shp->shm_segsz;
1002        tbuf->shm_atime = shp->shm_atim;
1003        tbuf->shm_dtime = shp->shm_dtim;
1004        tbuf->shm_ctime = shp->shm_ctim;
1005        tbuf->shm_cpid  = pid_vnr(shp->shm_cprid);
1006        tbuf->shm_lpid  = pid_vnr(shp->shm_lprid);
1007        tbuf->shm_nattch = shp->shm_nattch;
1008
1009        ipc_unlock_object(&shp->shm_perm);
1010        rcu_read_unlock();
1011        return id;
1012
1013out_unlock:
1014        rcu_read_unlock();
1015        return err;
1016}
1017
1018static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
1019{
1020        struct shmid_kernel *shp;
1021        struct file *shm_file;
1022        int err;
1023
1024        rcu_read_lock();
1025        shp = shm_obtain_object_check(ns, shmid);
1026        if (IS_ERR(shp)) {
1027                err = PTR_ERR(shp);
1028                goto out_unlock1;
1029        }
1030
1031        audit_ipc_obj(&(shp->shm_perm));
1032        err = security_shm_shmctl(&shp->shm_perm, cmd);
1033        if (err)
1034                goto out_unlock1;
1035
1036        ipc_lock_object(&shp->shm_perm);
1037
1038        /* check if shm_destroy() is tearing down shp */
1039        if (!ipc_valid_object(&shp->shm_perm)) {
1040                err = -EIDRM;
1041                goto out_unlock0;
1042        }
1043
1044        if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
1045                kuid_t euid = current_euid();
1046
1047                if (!uid_eq(euid, shp->shm_perm.uid) &&
1048                    !uid_eq(euid, shp->shm_perm.cuid)) {
1049                        err = -EPERM;
1050                        goto out_unlock0;
1051                }
1052                if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
1053                        err = -EPERM;
1054                        goto out_unlock0;
1055                }
1056        }
1057
1058        shm_file = shp->shm_file;
1059        if (is_file_hugepages(shm_file))
1060                goto out_unlock0;
1061
1062        if (cmd == SHM_LOCK) {
1063                struct user_struct *user = current_user();
1064
1065                err = shmem_lock(shm_file, 1, user);
1066                if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1067                        shp->shm_perm.mode |= SHM_LOCKED;
1068                        shp->mlock_user = user;
1069                }
1070                goto out_unlock0;
1071        }
1072
1073        /* SHM_UNLOCK */
1074        if (!(shp->shm_perm.mode & SHM_LOCKED))
1075                goto out_unlock0;
1076        shmem_lock(shm_file, 0, shp->mlock_user);
1077        shp->shm_perm.mode &= ~SHM_LOCKED;
1078        shp->mlock_user = NULL;
1079        get_file(shm_file);
1080        ipc_unlock_object(&shp->shm_perm);
1081        rcu_read_unlock();
1082        shmem_unlock_mapping(shm_file->f_mapping);
1083
1084        fput(shm_file);
1085        return err;
1086
1087out_unlock0:
1088        ipc_unlock_object(&shp->shm_perm);
1089out_unlock1:
1090        rcu_read_unlock();
1091        return err;
1092}
1093
1094long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
1095{
1096        int err, version;
1097        struct ipc_namespace *ns;
1098        struct shmid64_ds sem64;
1099
1100        if (cmd < 0 || shmid < 0)
1101                return -EINVAL;
1102
1103        version = ipc_parse_version(&cmd);
1104        ns = current->nsproxy->ipc_ns;
1105
1106        switch (cmd) {
1107        case IPC_INFO: {
1108                struct shminfo64 shminfo;
1109                err = shmctl_ipc_info(ns, &shminfo);
1110                if (err < 0)
1111                        return err;
1112                if (copy_shminfo_to_user(buf, &shminfo, version))
1113                        err = -EFAULT;
1114                return err;
1115        }
1116        case SHM_INFO: {
1117                struct shm_info shm_info;
1118                err = shmctl_shm_info(ns, &shm_info);
1119                if (err < 0)
1120                        return err;
1121                if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
1122                        err = -EFAULT;
1123                return err;
1124        }
1125        case SHM_STAT:
1126        case SHM_STAT_ANY:
1127        case IPC_STAT: {
1128                err = shmctl_stat(ns, shmid, cmd, &sem64);
1129                if (err < 0)
1130                        return err;
1131                if (copy_shmid_to_user(buf, &sem64, version))
1132                        err = -EFAULT;
1133                return err;
1134        }
1135        case IPC_SET:
1136                if (copy_shmid_from_user(&sem64, buf, version))
1137                        return -EFAULT;
1138                /* fallthru */
1139        case IPC_RMID:
1140                return shmctl_down(ns, shmid, cmd, &sem64);
1141        case SHM_LOCK:
1142        case SHM_UNLOCK:
1143                return shmctl_do_lock(ns, shmid, cmd);
1144        default:
1145                return -EINVAL;
1146        }
1147}
1148
1149SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1150{
1151        return ksys_shmctl(shmid, cmd, buf);
1152}
1153
1154#ifdef CONFIG_COMPAT
1155
1156struct compat_shmid_ds {
1157        struct compat_ipc_perm shm_perm;
1158        int shm_segsz;
1159        compat_time_t shm_atime;
1160        compat_time_t shm_dtime;
1161        compat_time_t shm_ctime;
1162        compat_ipc_pid_t shm_cpid;
1163        compat_ipc_pid_t shm_lpid;
1164        unsigned short shm_nattch;
1165        unsigned short shm_unused;
1166        compat_uptr_t shm_unused2;
1167        compat_uptr_t shm_unused3;
1168};
1169
1170struct compat_shminfo64 {
1171        compat_ulong_t shmmax;
1172        compat_ulong_t shmmin;
1173        compat_ulong_t shmmni;
1174        compat_ulong_t shmseg;
1175        compat_ulong_t shmall;
1176        compat_ulong_t __unused1;
1177        compat_ulong_t __unused2;
1178        compat_ulong_t __unused3;
1179        compat_ulong_t __unused4;
1180};
1181
1182struct compat_shm_info {
1183        compat_int_t used_ids;
1184        compat_ulong_t shm_tot, shm_rss, shm_swp;
1185        compat_ulong_t swap_attempts, swap_successes;
1186};
1187
1188static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
1189                                        int version)
1190{
1191        if (in->shmmax > INT_MAX)
1192                in->shmmax = INT_MAX;
1193        if (version == IPC_64) {
1194                struct compat_shminfo64 info;
1195                memset(&info, 0, sizeof(info));
1196                info.shmmax = in->shmmax;
1197                info.shmmin = in->shmmin;
1198                info.shmmni = in->shmmni;
1199                info.shmseg = in->shmseg;
1200                info.shmall = in->shmall;
1201                return copy_to_user(buf, &info, sizeof(info));
1202        } else {
1203                struct shminfo info;
1204                memset(&info, 0, sizeof(info));
1205                info.shmmax = in->shmmax;
1206                info.shmmin = in->shmmin;
1207                info.shmmni = in->shmmni;
1208                info.shmseg = in->shmseg;
1209                info.shmall = in->shmall;
1210                return copy_to_user(buf, &info, sizeof(info));
1211        }
1212}
1213
1214static int put_compat_shm_info(struct shm_info *ip,
1215                                struct compat_shm_info __user *uip)
1216{
1217        struct compat_shm_info info;
1218
1219        memset(&info, 0, sizeof(info));
1220        info.used_ids = ip->used_ids;
1221        info.shm_tot = ip->shm_tot;
1222        info.shm_rss = ip->shm_rss;
1223        info.shm_swp = ip->shm_swp;
1224        info.swap_attempts = ip->swap_attempts;
1225        info.swap_successes = ip->swap_successes;
1226        return copy_to_user(uip, &info, sizeof(info));
1227}
1228
1229static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
1230                                        int version)
1231{
1232        if (version == IPC_64) {
1233                struct compat_shmid64_ds v;
1234                memset(&v, 0, sizeof(v));
1235                to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
1236                v.shm_atime = in->shm_atime;
1237                v.shm_dtime = in->shm_dtime;
1238                v.shm_ctime = in->shm_ctime;
1239                v.shm_segsz = in->shm_segsz;
1240                v.shm_nattch = in->shm_nattch;
1241                v.shm_cpid = in->shm_cpid;
1242                v.shm_lpid = in->shm_lpid;
1243                return copy_to_user(buf, &v, sizeof(v));
1244        } else {
1245                struct compat_shmid_ds v;
1246                memset(&v, 0, sizeof(v));
1247                to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
1248                v.shm_perm.key = in->shm_perm.key;
1249                v.shm_atime = in->shm_atime;
1250                v.shm_dtime = in->shm_dtime;
1251                v.shm_ctime = in->shm_ctime;
1252                v.shm_segsz = in->shm_segsz;
1253                v.shm_nattch = in->shm_nattch;
1254                v.shm_cpid = in->shm_cpid;
1255                v.shm_lpid = in->shm_lpid;
1256                return copy_to_user(buf, &v, sizeof(v));
1257        }
1258}
1259
1260static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
1261                                        int version)
1262{
1263        memset(out, 0, sizeof(*out));
1264        if (version == IPC_64) {
1265                struct compat_shmid64_ds __user *p = buf;
1266                return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
1267        } else {
1268                struct compat_shmid_ds __user *p = buf;
1269                return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
1270        }
1271}
1272
1273long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr)
1274{
1275        struct ipc_namespace *ns;
1276        struct shmid64_ds sem64;
1277        int version = compat_ipc_parse_version(&cmd);
1278        int err;
1279
1280        ns = current->nsproxy->ipc_ns;
1281
1282        if (cmd < 0 || shmid < 0)
1283                return -EINVAL;
1284
1285        switch (cmd) {
1286        case IPC_INFO: {
1287                struct shminfo64 shminfo;
1288                err = shmctl_ipc_info(ns, &shminfo);
1289                if (err < 0)
1290                        return err;
1291                if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
1292                        err = -EFAULT;
1293                return err;
1294        }
1295        case SHM_INFO: {
1296                struct shm_info shm_info;
1297                err = shmctl_shm_info(ns, &shm_info);
1298                if (err < 0)
1299                        return err;
1300                if (put_compat_shm_info(&shm_info, uptr))
1301                        err = -EFAULT;
1302                return err;
1303        }
1304        case IPC_STAT:
1305        case SHM_STAT_ANY:
1306        case SHM_STAT:
1307                err = shmctl_stat(ns, shmid, cmd, &sem64);
1308                if (err < 0)
1309                        return err;
1310                if (copy_compat_shmid_to_user(uptr, &sem64, version))
1311                        err = -EFAULT;
1312                return err;
1313
1314        case IPC_SET:
1315                if (copy_compat_shmid_from_user(&sem64, uptr, version))
1316                        return -EFAULT;
1317                /* fallthru */
1318        case IPC_RMID:
1319                return shmctl_down(ns, shmid, cmd, &sem64);
1320        case SHM_LOCK:
1321        case SHM_UNLOCK:
1322                return shmctl_do_lock(ns, shmid, cmd);
1323                break;
1324        default:
1325                return -EINVAL;
1326        }
1327        return err;
1328}
1329
1330COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
1331{
1332        return compat_ksys_shmctl(shmid, cmd, uptr);
1333}
1334#endif
1335
1336/*
1337 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1338 *
1339 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1340 * "raddr" thing points to kernel space, and there has to be a wrapper around
1341 * this.
1342 */
1343long do_shmat(int shmid, char __user *shmaddr, int shmflg,
1344              ulong *raddr, unsigned long shmlba)
1345{
1346        struct shmid_kernel *shp;
1347        unsigned long addr = (unsigned long)shmaddr;
1348        unsigned long size;
1349        struct file *file;
1350        int    err;
1351        unsigned long flags = MAP_SHARED;
1352        unsigned long prot;
1353        int acc_mode;
1354        struct ipc_namespace *ns;
1355        struct shm_file_data *sfd;
1356        struct path path;
1357        fmode_t f_mode;
1358        unsigned long populate = 0;
1359
1360        err = -EINVAL;
1361        if (shmid < 0)
1362                goto out;
1363
1364        if (addr) {
1365                if (addr & (shmlba - 1)) {
1366                        if (shmflg & SHM_RND) {
1367                                addr &= ~(shmlba - 1);  /* round down */
1368
1369                                /*
1370                                 * Ensure that the round-down is non-nil
1371                                 * when remapping. This can happen for
1372                                 * cases when addr < shmlba.
1373                                 */
1374                                if (!addr && (shmflg & SHM_REMAP))
1375                                        goto out;
1376                        } else
1377#ifndef __ARCH_FORCE_SHMLBA
1378                                if (addr & ~PAGE_MASK)
1379#endif
1380                                        goto out;
1381                }
1382
1383                flags |= MAP_FIXED;
1384        } else if ((shmflg & SHM_REMAP))
1385                goto out;
1386
1387        if (shmflg & SHM_RDONLY) {
1388                prot = PROT_READ;
1389                acc_mode = S_IRUGO;
1390                f_mode = FMODE_READ;
1391        } else {
1392                prot = PROT_READ | PROT_WRITE;
1393                acc_mode = S_IRUGO | S_IWUGO;
1394                f_mode = FMODE_READ | FMODE_WRITE;
1395        }
1396        if (shmflg & SHM_EXEC) {
1397                prot |= PROT_EXEC;
1398                acc_mode |= S_IXUGO;
1399        }
1400
1401        /*
1402         * We cannot rely on the fs check since SYSV IPC does have an
1403         * additional creator id...
1404         */
1405        ns = current->nsproxy->ipc_ns;
1406        rcu_read_lock();
1407        shp = shm_obtain_object_check(ns, shmid);
1408        if (IS_ERR(shp)) {
1409                err = PTR_ERR(shp);
1410                goto out_unlock;
1411        }
1412
1413        err = -EACCES;
1414        if (ipcperms(ns, &shp->shm_perm, acc_mode))
1415                goto out_unlock;
1416
1417        err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg);
1418        if (err)
1419                goto out_unlock;
1420
1421        ipc_lock_object(&shp->shm_perm);
1422
1423        /* check if shm_destroy() is tearing down shp */
1424        if (!ipc_valid_object(&shp->shm_perm)) {
1425                ipc_unlock_object(&shp->shm_perm);
1426                err = -EIDRM;
1427                goto out_unlock;
1428        }
1429
1430        path = shp->shm_file->f_path;
1431        path_get(&path);
1432        shp->shm_nattch++;
1433        size = i_size_read(d_inode(path.dentry));
1434        ipc_unlock_object(&shp->shm_perm);
1435        rcu_read_unlock();
1436
1437        err = -ENOMEM;
1438        sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1439        if (!sfd) {
1440                path_put(&path);
1441                goto out_nattch;
1442        }
1443
1444        file = alloc_file(&path, f_mode,
1445                          is_file_hugepages(shp->shm_file) ?
1446                                &shm_file_operations_huge :
1447                                &shm_file_operations);
1448        err = PTR_ERR(file);
1449        if (IS_ERR(file)) {
1450                kfree(sfd);
1451                path_put(&path);
1452                goto out_nattch;
1453        }
1454
1455        file->private_data = sfd;
1456        file->f_mapping = shp->shm_file->f_mapping;
1457        sfd->id = shp->shm_perm.id;
1458        sfd->ns = get_ipc_ns(ns);
1459        /*
1460         * We need to take a reference to the real shm file to prevent the
1461         * pointer from becoming stale in cases where the lifetime of the outer
1462         * file extends beyond that of the shm segment.  It's not usually
1463         * possible, but it can happen during remap_file_pages() emulation as
1464         * that unmaps the memory, then does ->mmap() via file reference only.
1465         * We'll deny the ->mmap() if the shm segment was since removed, but to
1466         * detect shm ID reuse we need to compare the file pointers.
1467         */
1468        sfd->file = get_file(shp->shm_file);
1469        sfd->vm_ops = NULL;
1470
1471        err = security_mmap_file(file, prot, flags);
1472        if (err)
1473                goto out_fput;
1474
1475        if (down_write_killable(&current->mm->mmap_sem)) {
1476                err = -EINTR;
1477                goto out_fput;
1478        }
1479
1480        if (addr && !(shmflg & SHM_REMAP)) {
1481                err = -EINVAL;
1482                if (addr + size < addr)
1483                        goto invalid;
1484
1485                if (find_vma_intersection(current->mm, addr, addr + size))
1486                        goto invalid;
1487        }
1488
1489        addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
1490        *raddr = addr;
1491        err = 0;
1492        if (IS_ERR_VALUE(addr))
1493                err = (long)addr;
1494invalid:
1495        up_write(&current->mm->mmap_sem);
1496        if (populate)
1497                mm_populate(addr, populate);
1498
1499out_fput:
1500        fput(file);
1501
1502out_nattch:
1503        down_write(&shm_ids(ns).rwsem);
1504        shp = shm_lock(ns, shmid);
1505        shp->shm_nattch--;
1506        if (shm_may_destroy(ns, shp))
1507                shm_destroy(ns, shp);
1508        else
1509                shm_unlock(shp);
1510        up_write(&shm_ids(ns).rwsem);
1511        return err;
1512
1513out_unlock:
1514        rcu_read_unlock();
1515out:
1516        return err;
1517}
1518
1519SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1520{
1521        unsigned long ret;
1522        long err;
1523
1524        err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1525        if (err)
1526                return err;
1527        force_successful_syscall_return();
1528        return (long)ret;
1529}
1530
1531#ifdef CONFIG_COMPAT
1532
1533#ifndef COMPAT_SHMLBA
1534#define COMPAT_SHMLBA   SHMLBA
1535#endif
1536
1537COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
1538{
1539        unsigned long ret;
1540        long err;
1541
1542        err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
1543        if (err)
1544                return err;
1545        force_successful_syscall_return();
1546        return (long)ret;
1547}
1548#endif
1549
1550/*
1551 * detach and kill segment if marked destroyed.
1552 * The work is done in shm_close.
1553 */
1554long ksys_shmdt(char __user *shmaddr)
1555{
1556        struct mm_struct *mm = current->mm;
1557        struct vm_area_struct *vma;
1558        unsigned long addr = (unsigned long)shmaddr;
1559        int retval = -EINVAL;
1560#ifdef CONFIG_MMU
1561        loff_t size = 0;
1562        struct file *file;
1563        struct vm_area_struct *next;
1564#endif
1565
1566        if (addr & ~PAGE_MASK)
1567                return retval;
1568
1569        if (down_write_killable(&mm->mmap_sem))
1570                return -EINTR;
1571
1572        /*
1573         * This function tries to be smart and unmap shm segments that
1574         * were modified by partial mlock or munmap calls:
1575         * - It first determines the size of the shm segment that should be
1576         *   unmapped: It searches for a vma that is backed by shm and that
1577         *   started at address shmaddr. It records it's size and then unmaps
1578         *   it.
1579         * - Then it unmaps all shm vmas that started at shmaddr and that
1580         *   are within the initially determined size and that are from the
1581         *   same shm segment from which we determined the size.
1582         * Errors from do_munmap are ignored: the function only fails if
1583         * it's called with invalid parameters or if it's called to unmap
1584         * a part of a vma. Both calls in this function are for full vmas,
1585         * the parameters are directly copied from the vma itself and always
1586         * valid - therefore do_munmap cannot fail. (famous last words?)
1587         */
1588        /*
1589         * If it had been mremap()'d, the starting address would not
1590         * match the usual checks anyway. So assume all vma's are
1591         * above the starting address given.
1592         */
1593        vma = find_vma(mm, addr);
1594
1595#ifdef CONFIG_MMU
1596        while (vma) {
1597                next = vma->vm_next;
1598
1599                /*
1600                 * Check if the starting address would match, i.e. it's
1601                 * a fragment created by mprotect() and/or munmap(), or it
1602                 * otherwise it starts at this address with no hassles.
1603                 */
1604                if ((vma->vm_ops == &shm_vm_ops) &&
1605                        (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1606
1607                        /*
1608                         * Record the file of the shm segment being
1609                         * unmapped.  With mremap(), someone could place
1610                         * page from another segment but with equal offsets
1611                         * in the range we are unmapping.
1612                         */
1613                        file = vma->vm_file;
1614                        size = i_size_read(file_inode(vma->vm_file));
1615                        do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1616                        /*
1617                         * We discovered the size of the shm segment, so
1618                         * break out of here and fall through to the next
1619                         * loop that uses the size information to stop
1620                         * searching for matching vma's.
1621                         */
1622                        retval = 0;
1623                        vma = next;
1624                        break;
1625                }
1626                vma = next;
1627        }
1628
1629        /*
1630         * We need look no further than the maximum address a fragment
1631         * could possibly have landed at. Also cast things to loff_t to
1632         * prevent overflows and make comparisons vs. equal-width types.
1633         */
1634        size = PAGE_ALIGN(size);
1635        while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1636                next = vma->vm_next;
1637
1638                /* finding a matching vma now does not alter retval */
1639                if ((vma->vm_ops == &shm_vm_ops) &&
1640                    ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
1641                    (vma->vm_file == file))
1642                        do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1643                vma = next;
1644        }
1645
1646#else   /* CONFIG_MMU */
1647        /* under NOMMU conditions, the exact address to be destroyed must be
1648         * given
1649         */
1650        if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1651                do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1652                retval = 0;
1653        }
1654
1655#endif
1656
1657        up_write(&mm->mmap_sem);
1658        return retval;
1659}
1660
1661SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1662{
1663        return ksys_shmdt(shmaddr);
1664}
1665
1666#ifdef CONFIG_PROC_FS
1667static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1668{
1669        struct pid_namespace *pid_ns = ipc_seq_pid_ns(s);
1670        struct user_namespace *user_ns = seq_user_ns(s);
1671        struct kern_ipc_perm *ipcp = it;
1672        struct shmid_kernel *shp;
1673        unsigned long rss = 0, swp = 0;
1674
1675        shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1676        shm_add_rss_swap(shp, &rss, &swp);
1677
1678#if BITS_PER_LONG <= 32
1679#define SIZE_SPEC "%10lu"
1680#else
1681#define SIZE_SPEC "%21lu"
1682#endif
1683
1684        seq_printf(s,
1685                   "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1686                   "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
1687                   SIZE_SPEC " " SIZE_SPEC "\n",
1688                   shp->shm_perm.key,
1689                   shp->shm_perm.id,
1690                   shp->shm_perm.mode,
1691                   shp->shm_segsz,
1692                   pid_nr_ns(shp->shm_cprid, pid_ns),
1693                   pid_nr_ns(shp->shm_lprid, pid_ns),
1694                   shp->shm_nattch,
1695                   from_kuid_munged(user_ns, shp->shm_perm.uid),
1696                   from_kgid_munged(user_ns, shp->shm_perm.gid),
1697                   from_kuid_munged(user_ns, shp->shm_perm.cuid),
1698                   from_kgid_munged(user_ns, shp->shm_perm.cgid),
1699                   shp->shm_atim,
1700                   shp->shm_dtim,
1701                   shp->shm_ctim,
1702                   rss * PAGE_SIZE,
1703                   swp * PAGE_SIZE);
1704
1705        return 0;
1706}
1707#endif
1708