linux/ipc/msg.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * linux/ipc/msg.c
   4 * Copyright (C) 1992 Krishna Balasubramanian
   5 *
   6 * Removed all the remaining kerneld mess
   7 * Catch the -EFAULT stuff properly
   8 * Use GFP_KERNEL for messages as in 1.2
   9 * Fixed up the unchecked user space derefs
  10 * Copyright (C) 1998 Alan Cox & Andi Kleen
  11 *
  12 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
  13 *
  14 * mostly rewritten, threaded and wake-one semantics added
  15 * MSGMAX limit removed, sysctl's added
  16 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
  17 *
  18 * support for audit of ipc object properties and permission changes
  19 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
  20 *
  21 * namespaces support
  22 * OpenVZ, SWsoft Inc.
  23 * Pavel Emelianov <xemul@openvz.org>
  24 */
  25
  26#include <linux/capability.h>
  27#include <linux/msg.h>
  28#include <linux/spinlock.h>
  29#include <linux/init.h>
  30#include <linux/mm.h>
  31#include <linux/proc_fs.h>
  32#include <linux/list.h>
  33#include <linux/security.h>
  34#include <linux/sched/wake_q.h>
  35#include <linux/syscalls.h>
  36#include <linux/audit.h>
  37#include <linux/seq_file.h>
  38#include <linux/rwsem.h>
  39#include <linux/nsproxy.h>
  40#include <linux/ipc_namespace.h>
  41#include <linux/rhashtable.h>
  42
  43#include <asm/current.h>
  44#include <linux/uaccess.h>
  45#include "util.h"
  46
  47/* one msq_queue structure for each present queue on the system */
  48struct msg_queue {
  49        struct kern_ipc_perm q_perm;
  50        time64_t q_stime;               /* last msgsnd time */
  51        time64_t q_rtime;               /* last msgrcv time */
  52        time64_t q_ctime;               /* last change time */
  53        unsigned long q_cbytes;         /* current number of bytes on queue */
  54        unsigned long q_qnum;           /* number of messages in queue */
  55        unsigned long q_qbytes;         /* max number of bytes on queue */
  56        struct pid *q_lspid;            /* pid of last msgsnd */
  57        struct pid *q_lrpid;            /* last receive pid */
  58
  59        struct list_head q_messages;
  60        struct list_head q_receivers;
  61        struct list_head q_senders;
  62} __randomize_layout;
  63
  64/* one msg_receiver structure for each sleeping receiver */
  65struct msg_receiver {
  66        struct list_head        r_list;
  67        struct task_struct      *r_tsk;
  68
  69        int                     r_mode;
  70        long                    r_msgtype;
  71        long                    r_maxsize;
  72
  73        struct msg_msg          *r_msg;
  74};
  75
  76/* one msg_sender for each sleeping sender */
  77struct msg_sender {
  78        struct list_head        list;
  79        struct task_struct      *tsk;
  80        size_t                  msgsz;
  81};
  82
  83#define SEARCH_ANY              1
  84#define SEARCH_EQUAL            2
  85#define SEARCH_NOTEQUAL         3
  86#define SEARCH_LESSEQUAL        4
  87#define SEARCH_NUMBER           5
  88
  89#define msg_ids(ns)     ((ns)->ids[IPC_MSG_IDS])
  90
  91static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id)
  92{
  93        struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&msg_ids(ns), id);
  94
  95        if (IS_ERR(ipcp))
  96                return ERR_CAST(ipcp);
  97
  98        return container_of(ipcp, struct msg_queue, q_perm);
  99}
 100
 101static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns,
 102                                                        int id)
 103{
 104        struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id);
 105
 106        if (IS_ERR(ipcp))
 107                return ERR_CAST(ipcp);
 108
 109        return container_of(ipcp, struct msg_queue, q_perm);
 110}
 111
 112static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
 113{
 114        ipc_rmid(&msg_ids(ns), &s->q_perm);
 115}
 116
 117static void msg_rcu_free(struct rcu_head *head)
 118{
 119        struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
 120        struct msg_queue *msq = container_of(p, struct msg_queue, q_perm);
 121
 122        security_msg_queue_free(&msq->q_perm);
 123        kvfree(msq);
 124}
 125
 126/**
 127 * newque - Create a new msg queue
 128 * @ns: namespace
 129 * @params: ptr to the structure that contains the key and msgflg
 130 *
 131 * Called with msg_ids.rwsem held (writer)
 132 */
 133static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 134{
 135        struct msg_queue *msq;
 136        int retval;
 137        key_t key = params->key;
 138        int msgflg = params->flg;
 139
 140        msq = kvmalloc(sizeof(*msq), GFP_KERNEL);
 141        if (unlikely(!msq))
 142                return -ENOMEM;
 143
 144        msq->q_perm.mode = msgflg & S_IRWXUGO;
 145        msq->q_perm.key = key;
 146
 147        msq->q_perm.security = NULL;
 148        retval = security_msg_queue_alloc(&msq->q_perm);
 149        if (retval) {
 150                kvfree(msq);
 151                return retval;
 152        }
 153
 154        msq->q_stime = msq->q_rtime = 0;
 155        msq->q_ctime = ktime_get_real_seconds();
 156        msq->q_cbytes = msq->q_qnum = 0;
 157        msq->q_qbytes = ns->msg_ctlmnb;
 158        msq->q_lspid = msq->q_lrpid = NULL;
 159        INIT_LIST_HEAD(&msq->q_messages);
 160        INIT_LIST_HEAD(&msq->q_receivers);
 161        INIT_LIST_HEAD(&msq->q_senders);
 162
 163        /* ipc_addid() locks msq upon success. */
 164        retval = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
 165        if (retval < 0) {
 166                ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 167                return retval;
 168        }
 169
 170        ipc_unlock_object(&msq->q_perm);
 171        rcu_read_unlock();
 172
 173        return msq->q_perm.id;
 174}
 175
 176static inline bool msg_fits_inqueue(struct msg_queue *msq, size_t msgsz)
 177{
 178        return msgsz + msq->q_cbytes <= msq->q_qbytes &&
 179                1 + msq->q_qnum <= msq->q_qbytes;
 180}
 181
 182static inline void ss_add(struct msg_queue *msq,
 183                          struct msg_sender *mss, size_t msgsz)
 184{
 185        mss->tsk = current;
 186        mss->msgsz = msgsz;
 187        __set_current_state(TASK_INTERRUPTIBLE);
 188        list_add_tail(&mss->list, &msq->q_senders);
 189}
 190
 191static inline void ss_del(struct msg_sender *mss)
 192{
 193        if (mss->list.next)
 194                list_del(&mss->list);
 195}
 196
 197static void ss_wakeup(struct msg_queue *msq,
 198                      struct wake_q_head *wake_q, bool kill)
 199{
 200        struct msg_sender *mss, *t;
 201        struct task_struct *stop_tsk = NULL;
 202        struct list_head *h = &msq->q_senders;
 203
 204        list_for_each_entry_safe(mss, t, h, list) {
 205                if (kill)
 206                        mss->list.next = NULL;
 207
 208                /*
 209                 * Stop at the first task we don't wakeup,
 210                 * we've already iterated the original
 211                 * sender queue.
 212                 */
 213                else if (stop_tsk == mss->tsk)
 214                        break;
 215                /*
 216                 * We are not in an EIDRM scenario here, therefore
 217                 * verify that we really need to wakeup the task.
 218                 * To maintain current semantics and wakeup order,
 219                 * move the sender to the tail on behalf of the
 220                 * blocked task.
 221                 */
 222                else if (!msg_fits_inqueue(msq, mss->msgsz)) {
 223                        if (!stop_tsk)
 224                                stop_tsk = mss->tsk;
 225
 226                        list_move_tail(&mss->list, &msq->q_senders);
 227                        continue;
 228                }
 229
 230                wake_q_add(wake_q, mss->tsk);
 231        }
 232}
 233
 234static void expunge_all(struct msg_queue *msq, int res,
 235                        struct wake_q_head *wake_q)
 236{
 237        struct msg_receiver *msr, *t;
 238
 239        list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
 240                wake_q_add(wake_q, msr->r_tsk);
 241                WRITE_ONCE(msr->r_msg, ERR_PTR(res));
 242        }
 243}
 244
 245/*
 246 * freeque() wakes up waiters on the sender and receiver waiting queue,
 247 * removes the message queue from message queue ID IDR, and cleans up all the
 248 * messages associated with this queue.
 249 *
 250 * msg_ids.rwsem (writer) and the spinlock for this message queue are held
 251 * before freeque() is called. msg_ids.rwsem remains locked on exit.
 252 */
 253static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 254{
 255        struct msg_msg *msg, *t;
 256        struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
 257        DEFINE_WAKE_Q(wake_q);
 258
 259        expunge_all(msq, -EIDRM, &wake_q);
 260        ss_wakeup(msq, &wake_q, true);
 261        msg_rmid(ns, msq);
 262        ipc_unlock_object(&msq->q_perm);
 263        wake_up_q(&wake_q);
 264        rcu_read_unlock();
 265
 266        list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
 267                atomic_dec(&ns->msg_hdrs);
 268                free_msg(msg);
 269        }
 270        atomic_sub(msq->q_cbytes, &ns->msg_bytes);
 271        ipc_update_pid(&msq->q_lspid, NULL);
 272        ipc_update_pid(&msq->q_lrpid, NULL);
 273        ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 274}
 275
 276long ksys_msgget(key_t key, int msgflg)
 277{
 278        struct ipc_namespace *ns;
 279        static const struct ipc_ops msg_ops = {
 280                .getnew = newque,
 281                .associate = security_msg_queue_associate,
 282        };
 283        struct ipc_params msg_params;
 284
 285        ns = current->nsproxy->ipc_ns;
 286
 287        msg_params.key = key;
 288        msg_params.flg = msgflg;
 289
 290        return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
 291}
 292
 293SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
 294{
 295        return ksys_msgget(key, msgflg);
 296}
 297
 298static inline unsigned long
 299copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
 300{
 301        switch (version) {
 302        case IPC_64:
 303                return copy_to_user(buf, in, sizeof(*in));
 304        case IPC_OLD:
 305        {
 306                struct msqid_ds out;
 307
 308                memset(&out, 0, sizeof(out));
 309
 310                ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm);
 311
 312                out.msg_stime           = in->msg_stime;
 313                out.msg_rtime           = in->msg_rtime;
 314                out.msg_ctime           = in->msg_ctime;
 315
 316                if (in->msg_cbytes > USHRT_MAX)
 317                        out.msg_cbytes  = USHRT_MAX;
 318                else
 319                        out.msg_cbytes  = in->msg_cbytes;
 320                out.msg_lcbytes         = in->msg_cbytes;
 321
 322                if (in->msg_qnum > USHRT_MAX)
 323                        out.msg_qnum    = USHRT_MAX;
 324                else
 325                        out.msg_qnum    = in->msg_qnum;
 326
 327                if (in->msg_qbytes > USHRT_MAX)
 328                        out.msg_qbytes  = USHRT_MAX;
 329                else
 330                        out.msg_qbytes  = in->msg_qbytes;
 331                out.msg_lqbytes         = in->msg_qbytes;
 332
 333                out.msg_lspid           = in->msg_lspid;
 334                out.msg_lrpid           = in->msg_lrpid;
 335
 336                return copy_to_user(buf, &out, sizeof(out));
 337        }
 338        default:
 339                return -EINVAL;
 340        }
 341}
 342
 343static inline unsigned long
 344copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
 345{
 346        switch (version) {
 347        case IPC_64:
 348                if (copy_from_user(out, buf, sizeof(*out)))
 349                        return -EFAULT;
 350                return 0;
 351        case IPC_OLD:
 352        {
 353                struct msqid_ds tbuf_old;
 354
 355                if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
 356                        return -EFAULT;
 357
 358                out->msg_perm.uid       = tbuf_old.msg_perm.uid;
 359                out->msg_perm.gid       = tbuf_old.msg_perm.gid;
 360                out->msg_perm.mode      = tbuf_old.msg_perm.mode;
 361
 362                if (tbuf_old.msg_qbytes == 0)
 363                        out->msg_qbytes = tbuf_old.msg_lqbytes;
 364                else
 365                        out->msg_qbytes = tbuf_old.msg_qbytes;
 366
 367                return 0;
 368        }
 369        default:
 370                return -EINVAL;
 371        }
 372}
 373
 374/*
 375 * This function handles some msgctl commands which require the rwsem
 376 * to be held in write mode.
 377 * NOTE: no locks must be held, the rwsem is taken inside this function.
 378 */
 379static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
 380                        struct msqid64_ds *msqid64)
 381{
 382        struct kern_ipc_perm *ipcp;
 383        struct msg_queue *msq;
 384        int err;
 385
 386        down_write(&msg_ids(ns).rwsem);
 387        rcu_read_lock();
 388
 389        ipcp = ipcctl_obtain_check(ns, &msg_ids(ns), msqid, cmd,
 390                                      &msqid64->msg_perm, msqid64->msg_qbytes);
 391        if (IS_ERR(ipcp)) {
 392                err = PTR_ERR(ipcp);
 393                goto out_unlock1;
 394        }
 395
 396        msq = container_of(ipcp, struct msg_queue, q_perm);
 397
 398        err = security_msg_queue_msgctl(&msq->q_perm, cmd);
 399        if (err)
 400                goto out_unlock1;
 401
 402        switch (cmd) {
 403        case IPC_RMID:
 404                ipc_lock_object(&msq->q_perm);
 405                /* freeque unlocks the ipc object and rcu */
 406                freeque(ns, ipcp);
 407                goto out_up;
 408        case IPC_SET:
 409        {
 410                DEFINE_WAKE_Q(wake_q);
 411
 412                if (msqid64->msg_qbytes > ns->msg_ctlmnb &&
 413                    !capable(CAP_SYS_RESOURCE)) {
 414                        err = -EPERM;
 415                        goto out_unlock1;
 416                }
 417
 418                ipc_lock_object(&msq->q_perm);
 419                err = ipc_update_perm(&msqid64->msg_perm, ipcp);
 420                if (err)
 421                        goto out_unlock0;
 422
 423                msq->q_qbytes = msqid64->msg_qbytes;
 424
 425                msq->q_ctime = ktime_get_real_seconds();
 426                /*
 427                 * Sleeping receivers might be excluded by
 428                 * stricter permissions.
 429                 */
 430                expunge_all(msq, -EAGAIN, &wake_q);
 431                /*
 432                 * Sleeping senders might be able to send
 433                 * due to a larger queue size.
 434                 */
 435                ss_wakeup(msq, &wake_q, false);
 436                ipc_unlock_object(&msq->q_perm);
 437                wake_up_q(&wake_q);
 438
 439                goto out_unlock1;
 440        }
 441        default:
 442                err = -EINVAL;
 443                goto out_unlock1;
 444        }
 445
 446out_unlock0:
 447        ipc_unlock_object(&msq->q_perm);
 448out_unlock1:
 449        rcu_read_unlock();
 450out_up:
 451        up_write(&msg_ids(ns).rwsem);
 452        return err;
 453}
 454
 455static int msgctl_info(struct ipc_namespace *ns, int msqid,
 456                         int cmd, struct msginfo *msginfo)
 457{
 458        int err;
 459        int max_idx;
 460
 461        /*
 462         * We must not return kernel stack data.
 463         * due to padding, it's not enough
 464         * to set all member fields.
 465         */
 466        err = security_msg_queue_msgctl(NULL, cmd);
 467        if (err)
 468                return err;
 469
 470        memset(msginfo, 0, sizeof(*msginfo));
 471        msginfo->msgmni = ns->msg_ctlmni;
 472        msginfo->msgmax = ns->msg_ctlmax;
 473        msginfo->msgmnb = ns->msg_ctlmnb;
 474        msginfo->msgssz = MSGSSZ;
 475        msginfo->msgseg = MSGSEG;
 476        down_read(&msg_ids(ns).rwsem);
 477        if (cmd == MSG_INFO) {
 478                msginfo->msgpool = msg_ids(ns).in_use;
 479                msginfo->msgmap = atomic_read(&ns->msg_hdrs);
 480                msginfo->msgtql = atomic_read(&ns->msg_bytes);
 481        } else {
 482                msginfo->msgmap = MSGMAP;
 483                msginfo->msgpool = MSGPOOL;
 484                msginfo->msgtql = MSGTQL;
 485        }
 486        max_idx = ipc_get_maxidx(&msg_ids(ns));
 487        up_read(&msg_ids(ns).rwsem);
 488        return (max_idx < 0) ? 0 : max_idx;
 489}
 490
 491static int msgctl_stat(struct ipc_namespace *ns, int msqid,
 492                         int cmd, struct msqid64_ds *p)
 493{
 494        struct msg_queue *msq;
 495        int err;
 496
 497        memset(p, 0, sizeof(*p));
 498
 499        rcu_read_lock();
 500        if (cmd == MSG_STAT || cmd == MSG_STAT_ANY) {
 501                msq = msq_obtain_object(ns, msqid);
 502                if (IS_ERR(msq)) {
 503                        err = PTR_ERR(msq);
 504                        goto out_unlock;
 505                }
 506        } else { /* IPC_STAT */
 507                msq = msq_obtain_object_check(ns, msqid);
 508                if (IS_ERR(msq)) {
 509                        err = PTR_ERR(msq);
 510                        goto out_unlock;
 511                }
 512        }
 513
 514        /* see comment for SHM_STAT_ANY */
 515        if (cmd == MSG_STAT_ANY)
 516                audit_ipc_obj(&msq->q_perm);
 517        else {
 518                err = -EACCES;
 519                if (ipcperms(ns, &msq->q_perm, S_IRUGO))
 520                        goto out_unlock;
 521        }
 522
 523        err = security_msg_queue_msgctl(&msq->q_perm, cmd);
 524        if (err)
 525                goto out_unlock;
 526
 527        ipc_lock_object(&msq->q_perm);
 528
 529        if (!ipc_valid_object(&msq->q_perm)) {
 530                ipc_unlock_object(&msq->q_perm);
 531                err = -EIDRM;
 532                goto out_unlock;
 533        }
 534
 535        kernel_to_ipc64_perm(&msq->q_perm, &p->msg_perm);
 536        p->msg_stime  = msq->q_stime;
 537        p->msg_rtime  = msq->q_rtime;
 538        p->msg_ctime  = msq->q_ctime;
 539#ifndef CONFIG_64BIT
 540        p->msg_stime_high = msq->q_stime >> 32;
 541        p->msg_rtime_high = msq->q_rtime >> 32;
 542        p->msg_ctime_high = msq->q_ctime >> 32;
 543#endif
 544        p->msg_cbytes = msq->q_cbytes;
 545        p->msg_qnum   = msq->q_qnum;
 546        p->msg_qbytes = msq->q_qbytes;
 547        p->msg_lspid  = pid_vnr(msq->q_lspid);
 548        p->msg_lrpid  = pid_vnr(msq->q_lrpid);
 549
 550        if (cmd == IPC_STAT) {
 551                /*
 552                 * As defined in SUS:
 553                 * Return 0 on success
 554                 */
 555                err = 0;
 556        } else {
 557                /*
 558                 * MSG_STAT and MSG_STAT_ANY (both Linux specific)
 559                 * Return the full id, including the sequence number
 560                 */
 561                err = msq->q_perm.id;
 562        }
 563
 564        ipc_unlock_object(&msq->q_perm);
 565out_unlock:
 566        rcu_read_unlock();
 567        return err;
 568}
 569
 570long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 571{
 572        int version;
 573        struct ipc_namespace *ns;
 574        struct msqid64_ds msqid64;
 575        int err;
 576
 577        if (msqid < 0 || cmd < 0)
 578                return -EINVAL;
 579
 580        version = ipc_parse_version(&cmd);
 581        ns = current->nsproxy->ipc_ns;
 582
 583        switch (cmd) {
 584        case IPC_INFO:
 585        case MSG_INFO: {
 586                struct msginfo msginfo;
 587                err = msgctl_info(ns, msqid, cmd, &msginfo);
 588                if (err < 0)
 589                        return err;
 590                if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
 591                        err = -EFAULT;
 592                return err;
 593        }
 594        case MSG_STAT:  /* msqid is an index rather than a msg queue id */
 595        case MSG_STAT_ANY:
 596        case IPC_STAT:
 597                err = msgctl_stat(ns, msqid, cmd, &msqid64);
 598                if (err < 0)
 599                        return err;
 600                if (copy_msqid_to_user(buf, &msqid64, version))
 601                        err = -EFAULT;
 602                return err;
 603        case IPC_SET:
 604                if (copy_msqid_from_user(&msqid64, buf, version))
 605                        return -EFAULT;
 606                /* fallthru */
 607        case IPC_RMID:
 608                return msgctl_down(ns, msqid, cmd, &msqid64);
 609        default:
 610                return  -EINVAL;
 611        }
 612}
 613
 614SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
 615{
 616        return ksys_msgctl(msqid, cmd, buf);
 617}
 618
 619#ifdef CONFIG_COMPAT
 620
 621struct compat_msqid_ds {
 622        struct compat_ipc_perm msg_perm;
 623        compat_uptr_t msg_first;
 624        compat_uptr_t msg_last;
 625        compat_time_t msg_stime;
 626        compat_time_t msg_rtime;
 627        compat_time_t msg_ctime;
 628        compat_ulong_t msg_lcbytes;
 629        compat_ulong_t msg_lqbytes;
 630        unsigned short msg_cbytes;
 631        unsigned short msg_qnum;
 632        unsigned short msg_qbytes;
 633        compat_ipc_pid_t msg_lspid;
 634        compat_ipc_pid_t msg_lrpid;
 635};
 636
 637static int copy_compat_msqid_from_user(struct msqid64_ds *out, void __user *buf,
 638                                        int version)
 639{
 640        memset(out, 0, sizeof(*out));
 641        if (version == IPC_64) {
 642                struct compat_msqid64_ds __user *p = buf;
 643                if (get_compat_ipc64_perm(&out->msg_perm, &p->msg_perm))
 644                        return -EFAULT;
 645                if (get_user(out->msg_qbytes, &p->msg_qbytes))
 646                        return -EFAULT;
 647        } else {
 648                struct compat_msqid_ds __user *p = buf;
 649                if (get_compat_ipc_perm(&out->msg_perm, &p->msg_perm))
 650                        return -EFAULT;
 651                if (get_user(out->msg_qbytes, &p->msg_qbytes))
 652                        return -EFAULT;
 653        }
 654        return 0;
 655}
 656
 657static int copy_compat_msqid_to_user(void __user *buf, struct msqid64_ds *in,
 658                                        int version)
 659{
 660        if (version == IPC_64) {
 661                struct compat_msqid64_ds v;
 662                memset(&v, 0, sizeof(v));
 663                to_compat_ipc64_perm(&v.msg_perm, &in->msg_perm);
 664                v.msg_stime      = lower_32_bits(in->msg_stime);
 665                v.msg_stime_high = upper_32_bits(in->msg_stime);
 666                v.msg_rtime      = lower_32_bits(in->msg_rtime);
 667                v.msg_rtime_high = upper_32_bits(in->msg_rtime);
 668                v.msg_ctime      = lower_32_bits(in->msg_ctime);
 669                v.msg_ctime_high = upper_32_bits(in->msg_ctime);
 670                v.msg_cbytes = in->msg_cbytes;
 671                v.msg_qnum = in->msg_qnum;
 672                v.msg_qbytes = in->msg_qbytes;
 673                v.msg_lspid = in->msg_lspid;
 674                v.msg_lrpid = in->msg_lrpid;
 675                return copy_to_user(buf, &v, sizeof(v));
 676        } else {
 677                struct compat_msqid_ds v;
 678                memset(&v, 0, sizeof(v));
 679                to_compat_ipc_perm(&v.msg_perm, &in->msg_perm);
 680                v.msg_stime = in->msg_stime;
 681                v.msg_rtime = in->msg_rtime;
 682                v.msg_ctime = in->msg_ctime;
 683                v.msg_cbytes = in->msg_cbytes;
 684                v.msg_qnum = in->msg_qnum;
 685                v.msg_qbytes = in->msg_qbytes;
 686                v.msg_lspid = in->msg_lspid;
 687                v.msg_lrpid = in->msg_lrpid;
 688                return copy_to_user(buf, &v, sizeof(v));
 689        }
 690}
 691
 692long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr)
 693{
 694        struct ipc_namespace *ns;
 695        int err;
 696        struct msqid64_ds msqid64;
 697        int version = compat_ipc_parse_version(&cmd);
 698
 699        ns = current->nsproxy->ipc_ns;
 700
 701        if (msqid < 0 || cmd < 0)
 702                return -EINVAL;
 703
 704        switch (cmd & (~IPC_64)) {
 705        case IPC_INFO:
 706        case MSG_INFO: {
 707                struct msginfo msginfo;
 708                err = msgctl_info(ns, msqid, cmd, &msginfo);
 709                if (err < 0)
 710                        return err;
 711                if (copy_to_user(uptr, &msginfo, sizeof(struct msginfo)))
 712                        err = -EFAULT;
 713                return err;
 714        }
 715        case IPC_STAT:
 716        case MSG_STAT:
 717        case MSG_STAT_ANY:
 718                err = msgctl_stat(ns, msqid, cmd, &msqid64);
 719                if (err < 0)
 720                        return err;
 721                if (copy_compat_msqid_to_user(uptr, &msqid64, version))
 722                        err = -EFAULT;
 723                return err;
 724        case IPC_SET:
 725                if (copy_compat_msqid_from_user(&msqid64, uptr, version))
 726                        return -EFAULT;
 727                /* fallthru */
 728        case IPC_RMID:
 729                return msgctl_down(ns, msqid, cmd, &msqid64);
 730        default:
 731                return -EINVAL;
 732        }
 733}
 734
 735COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr)
 736{
 737        return compat_ksys_msgctl(msqid, cmd, uptr);
 738}
 739#endif
 740
 741static int testmsg(struct msg_msg *msg, long type, int mode)
 742{
 743        switch (mode) {
 744        case SEARCH_ANY:
 745        case SEARCH_NUMBER:
 746                return 1;
 747        case SEARCH_LESSEQUAL:
 748                if (msg->m_type <= type)
 749                        return 1;
 750                break;
 751        case SEARCH_EQUAL:
 752                if (msg->m_type == type)
 753                        return 1;
 754                break;
 755        case SEARCH_NOTEQUAL:
 756                if (msg->m_type != type)
 757                        return 1;
 758                break;
 759        }
 760        return 0;
 761}
 762
 763static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
 764                                 struct wake_q_head *wake_q)
 765{
 766        struct msg_receiver *msr, *t;
 767
 768        list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
 769                if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
 770                    !security_msg_queue_msgrcv(&msq->q_perm, msg, msr->r_tsk,
 771                                               msr->r_msgtype, msr->r_mode)) {
 772
 773                        list_del(&msr->r_list);
 774                        if (msr->r_maxsize < msg->m_ts) {
 775                                wake_q_add(wake_q, msr->r_tsk);
 776                                WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG));
 777                        } else {
 778                                ipc_update_pid(&msq->q_lrpid, task_pid(msr->r_tsk));
 779                                msq->q_rtime = ktime_get_real_seconds();
 780
 781                                wake_q_add(wake_q, msr->r_tsk);
 782                                WRITE_ONCE(msr->r_msg, msg);
 783                                return 1;
 784                        }
 785                }
 786        }
 787
 788        return 0;
 789}
 790
 791static long do_msgsnd(int msqid, long mtype, void __user *mtext,
 792                size_t msgsz, int msgflg)
 793{
 794        struct msg_queue *msq;
 795        struct msg_msg *msg;
 796        int err;
 797        struct ipc_namespace *ns;
 798        DEFINE_WAKE_Q(wake_q);
 799
 800        ns = current->nsproxy->ipc_ns;
 801
 802        if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
 803                return -EINVAL;
 804        if (mtype < 1)
 805                return -EINVAL;
 806
 807        msg = load_msg(mtext, msgsz);
 808        if (IS_ERR(msg))
 809                return PTR_ERR(msg);
 810
 811        msg->m_type = mtype;
 812        msg->m_ts = msgsz;
 813
 814        rcu_read_lock();
 815        msq = msq_obtain_object_check(ns, msqid);
 816        if (IS_ERR(msq)) {
 817                err = PTR_ERR(msq);
 818                goto out_unlock1;
 819        }
 820
 821        ipc_lock_object(&msq->q_perm);
 822
 823        for (;;) {
 824                struct msg_sender s;
 825
 826                err = -EACCES;
 827                if (ipcperms(ns, &msq->q_perm, S_IWUGO))
 828                        goto out_unlock0;
 829
 830                /* raced with RMID? */
 831                if (!ipc_valid_object(&msq->q_perm)) {
 832                        err = -EIDRM;
 833                        goto out_unlock0;
 834                }
 835
 836                err = security_msg_queue_msgsnd(&msq->q_perm, msg, msgflg);
 837                if (err)
 838                        goto out_unlock0;
 839
 840                if (msg_fits_inqueue(msq, msgsz))
 841                        break;
 842
 843                /* queue full, wait: */
 844                if (msgflg & IPC_NOWAIT) {
 845                        err = -EAGAIN;
 846                        goto out_unlock0;
 847                }
 848
 849                /* enqueue the sender and prepare to block */
 850                ss_add(msq, &s, msgsz);
 851
 852                if (!ipc_rcu_getref(&msq->q_perm)) {
 853                        err = -EIDRM;
 854                        goto out_unlock0;
 855                }
 856
 857                ipc_unlock_object(&msq->q_perm);
 858                rcu_read_unlock();
 859                schedule();
 860
 861                rcu_read_lock();
 862                ipc_lock_object(&msq->q_perm);
 863
 864                ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 865                /* raced with RMID? */
 866                if (!ipc_valid_object(&msq->q_perm)) {
 867                        err = -EIDRM;
 868                        goto out_unlock0;
 869                }
 870                ss_del(&s);
 871
 872                if (signal_pending(current)) {
 873                        err = -ERESTARTNOHAND;
 874                        goto out_unlock0;
 875                }
 876
 877        }
 878
 879        ipc_update_pid(&msq->q_lspid, task_tgid(current));
 880        msq->q_stime = ktime_get_real_seconds();
 881
 882        if (!pipelined_send(msq, msg, &wake_q)) {
 883                /* no one is waiting for this message, enqueue it */
 884                list_add_tail(&msg->m_list, &msq->q_messages);
 885                msq->q_cbytes += msgsz;
 886                msq->q_qnum++;
 887                atomic_add(msgsz, &ns->msg_bytes);
 888                atomic_inc(&ns->msg_hdrs);
 889        }
 890
 891        err = 0;
 892        msg = NULL;
 893
 894out_unlock0:
 895        ipc_unlock_object(&msq->q_perm);
 896        wake_up_q(&wake_q);
 897out_unlock1:
 898        rcu_read_unlock();
 899        if (msg != NULL)
 900                free_msg(msg);
 901        return err;
 902}
 903
 904long ksys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz,
 905                 int msgflg)
 906{
 907        long mtype;
 908
 909        if (get_user(mtype, &msgp->mtype))
 910                return -EFAULT;
 911        return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
 912}
 913
 914SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
 915                int, msgflg)
 916{
 917        return ksys_msgsnd(msqid, msgp, msgsz, msgflg);
 918}
 919
 920#ifdef CONFIG_COMPAT
 921
 922struct compat_msgbuf {
 923        compat_long_t mtype;
 924        char mtext[1];
 925};
 926
 927long compat_ksys_msgsnd(int msqid, compat_uptr_t msgp,
 928                       compat_ssize_t msgsz, int msgflg)
 929{
 930        struct compat_msgbuf __user *up = compat_ptr(msgp);
 931        compat_long_t mtype;
 932
 933        if (get_user(mtype, &up->mtype))
 934                return -EFAULT;
 935        return do_msgsnd(msqid, mtype, up->mtext, (ssize_t)msgsz, msgflg);
 936}
 937
 938COMPAT_SYSCALL_DEFINE4(msgsnd, int, msqid, compat_uptr_t, msgp,
 939                       compat_ssize_t, msgsz, int, msgflg)
 940{
 941        return compat_ksys_msgsnd(msqid, msgp, msgsz, msgflg);
 942}
 943#endif
 944
 945static inline int convert_mode(long *msgtyp, int msgflg)
 946{
 947        if (msgflg & MSG_COPY)
 948                return SEARCH_NUMBER;
 949        /*
 950         *  find message of correct type.
 951         *  msgtyp = 0 => get first.
 952         *  msgtyp > 0 => get first message of matching type.
 953         *  msgtyp < 0 => get message with least type must be < abs(msgtype).
 954         */
 955        if (*msgtyp == 0)
 956                return SEARCH_ANY;
 957        if (*msgtyp < 0) {
 958                if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */
 959                        *msgtyp = LONG_MAX;
 960                else
 961                        *msgtyp = -*msgtyp;
 962                return SEARCH_LESSEQUAL;
 963        }
 964        if (msgflg & MSG_EXCEPT)
 965                return SEARCH_NOTEQUAL;
 966        return SEARCH_EQUAL;
 967}
 968
 969static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
 970{
 971        struct msgbuf __user *msgp = dest;
 972        size_t msgsz;
 973
 974        if (put_user(msg->m_type, &msgp->mtype))
 975                return -EFAULT;
 976
 977        msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
 978        if (store_msg(msgp->mtext, msg, msgsz))
 979                return -EFAULT;
 980        return msgsz;
 981}
 982
 983#ifdef CONFIG_CHECKPOINT_RESTORE
 984/*
 985 * This function creates new kernel message structure, large enough to store
 986 * bufsz message bytes.
 987 */
 988static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
 989{
 990        struct msg_msg *copy;
 991
 992        /*
 993         * Create dummy message to copy real message to.
 994         */
 995        copy = load_msg(buf, bufsz);
 996        if (!IS_ERR(copy))
 997                copy->m_ts = bufsz;
 998        return copy;
 999}
1000
1001static inline void free_copy(struct msg_msg *copy)
1002{
1003        if (copy)
1004                free_msg(copy);
1005}
1006#else
1007static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
1008{
1009        return ERR_PTR(-ENOSYS);
1010}
1011
1012static inline void free_copy(struct msg_msg *copy)
1013{
1014}
1015#endif
1016
1017static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode)
1018{
1019        struct msg_msg *msg, *found = NULL;
1020        long count = 0;
1021
1022        list_for_each_entry(msg, &msq->q_messages, m_list) {
1023                if (testmsg(msg, *msgtyp, mode) &&
1024                    !security_msg_queue_msgrcv(&msq->q_perm, msg, current,
1025                                               *msgtyp, mode)) {
1026                        if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
1027                                *msgtyp = msg->m_type - 1;
1028                                found = msg;
1029                        } else if (mode == SEARCH_NUMBER) {
1030                                if (*msgtyp == count)
1031                                        return msg;
1032                        } else
1033                                return msg;
1034                        count++;
1035                }
1036        }
1037
1038        return found ?: ERR_PTR(-EAGAIN);
1039}
1040
1041static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg,
1042               long (*msg_handler)(void __user *, struct msg_msg *, size_t))
1043{
1044        int mode;
1045        struct msg_queue *msq;
1046        struct ipc_namespace *ns;
1047        struct msg_msg *msg, *copy = NULL;
1048        DEFINE_WAKE_Q(wake_q);
1049
1050        ns = current->nsproxy->ipc_ns;
1051
1052        if (msqid < 0 || (long) bufsz < 0)
1053                return -EINVAL;
1054
1055        if (msgflg & MSG_COPY) {
1056                if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT))
1057                        return -EINVAL;
1058                copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
1059                if (IS_ERR(copy))
1060                        return PTR_ERR(copy);
1061        }
1062        mode = convert_mode(&msgtyp, msgflg);
1063
1064        rcu_read_lock();
1065        msq = msq_obtain_object_check(ns, msqid);
1066        if (IS_ERR(msq)) {
1067                rcu_read_unlock();
1068                free_copy(copy);
1069                return PTR_ERR(msq);
1070        }
1071
1072        for (;;) {
1073                struct msg_receiver msr_d;
1074
1075                msg = ERR_PTR(-EACCES);
1076                if (ipcperms(ns, &msq->q_perm, S_IRUGO))
1077                        goto out_unlock1;
1078
1079                ipc_lock_object(&msq->q_perm);
1080
1081                /* raced with RMID? */
1082                if (!ipc_valid_object(&msq->q_perm)) {
1083                        msg = ERR_PTR(-EIDRM);
1084                        goto out_unlock0;
1085                }
1086
1087                msg = find_msg(msq, &msgtyp, mode);
1088                if (!IS_ERR(msg)) {
1089                        /*
1090                         * Found a suitable message.
1091                         * Unlink it from the queue.
1092                         */
1093                        if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
1094                                msg = ERR_PTR(-E2BIG);
1095                                goto out_unlock0;
1096                        }
1097                        /*
1098                         * If we are copying, then do not unlink message and do
1099                         * not update queue parameters.
1100                         */
1101                        if (msgflg & MSG_COPY) {
1102                                msg = copy_msg(msg, copy);
1103                                goto out_unlock0;
1104                        }
1105
1106                        list_del(&msg->m_list);
1107                        msq->q_qnum--;
1108                        msq->q_rtime = ktime_get_real_seconds();
1109                        ipc_update_pid(&msq->q_lrpid, task_tgid(current));
1110                        msq->q_cbytes -= msg->m_ts;
1111                        atomic_sub(msg->m_ts, &ns->msg_bytes);
1112                        atomic_dec(&ns->msg_hdrs);
1113                        ss_wakeup(msq, &wake_q, false);
1114
1115                        goto out_unlock0;
1116                }
1117
1118                /* No message waiting. Wait for a message */
1119                if (msgflg & IPC_NOWAIT) {
1120                        msg = ERR_PTR(-ENOMSG);
1121                        goto out_unlock0;
1122                }
1123
1124                list_add_tail(&msr_d.r_list, &msq->q_receivers);
1125                msr_d.r_tsk = current;
1126                msr_d.r_msgtype = msgtyp;
1127                msr_d.r_mode = mode;
1128                if (msgflg & MSG_NOERROR)
1129                        msr_d.r_maxsize = INT_MAX;
1130                else
1131                        msr_d.r_maxsize = bufsz;
1132                msr_d.r_msg = ERR_PTR(-EAGAIN);
1133                __set_current_state(TASK_INTERRUPTIBLE);
1134
1135                ipc_unlock_object(&msq->q_perm);
1136                rcu_read_unlock();
1137                schedule();
1138
1139                /*
1140                 * Lockless receive, part 1:
1141                 * We don't hold a reference to the queue and getting a
1142                 * reference would defeat the idea of a lockless operation,
1143                 * thus the code relies on rcu to guarantee the existence of
1144                 * msq:
1145                 * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
1146                 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
1147                 */
1148                rcu_read_lock();
1149
1150                /*
1151                 * Lockless receive, part 2:
1152                 * The work in pipelined_send() and expunge_all():
1153                 * - Set pointer to message
1154                 * - Queue the receiver task for later wakeup
1155                 * - Wake up the process after the lock is dropped.
1156                 *
1157                 * Should the process wake up before this wakeup (due to a
1158                 * signal) it will either see the message and continue ...
1159                 */
1160                msg = READ_ONCE(msr_d.r_msg);
1161                if (msg != ERR_PTR(-EAGAIN))
1162                        goto out_unlock1;
1163
1164                 /*
1165                  * ... or see -EAGAIN, acquire the lock to check the message
1166                  * again.
1167                  */
1168                ipc_lock_object(&msq->q_perm);
1169
1170                msg = msr_d.r_msg;
1171                if (msg != ERR_PTR(-EAGAIN))
1172                        goto out_unlock0;
1173
1174                list_del(&msr_d.r_list);
1175                if (signal_pending(current)) {
1176                        msg = ERR_PTR(-ERESTARTNOHAND);
1177                        goto out_unlock0;
1178                }
1179
1180                ipc_unlock_object(&msq->q_perm);
1181        }
1182
1183out_unlock0:
1184        ipc_unlock_object(&msq->q_perm);
1185        wake_up_q(&wake_q);
1186out_unlock1:
1187        rcu_read_unlock();
1188        if (IS_ERR(msg)) {
1189                free_copy(copy);
1190                return PTR_ERR(msg);
1191        }
1192
1193        bufsz = msg_handler(buf, msg, bufsz);
1194        free_msg(msg);
1195
1196        return bufsz;
1197}
1198
1199long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz,
1200                 long msgtyp, int msgflg)
1201{
1202        return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
1203}
1204
1205SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
1206                long, msgtyp, int, msgflg)
1207{
1208        return ksys_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg);
1209}
1210
1211#ifdef CONFIG_COMPAT
1212static long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
1213{
1214        struct compat_msgbuf __user *msgp = dest;
1215        size_t msgsz;
1216
1217        if (put_user(msg->m_type, &msgp->mtype))
1218                return -EFAULT;
1219
1220        msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
1221        if (store_msg(msgp->mtext, msg, msgsz))
1222                return -EFAULT;
1223        return msgsz;
1224}
1225
1226long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz,
1227                        compat_long_t msgtyp, int msgflg)
1228{
1229        return do_msgrcv(msqid, compat_ptr(msgp), (ssize_t)msgsz, (long)msgtyp,
1230                         msgflg, compat_do_msg_fill);
1231}
1232
1233COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
1234                       compat_ssize_t, msgsz, compat_long_t, msgtyp,
1235                       int, msgflg)
1236{
1237        return compat_ksys_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg);
1238}
1239#endif
1240
1241void msg_init_ns(struct ipc_namespace *ns)
1242{
1243        ns->msg_ctlmax = MSGMAX;
1244        ns->msg_ctlmnb = MSGMNB;
1245        ns->msg_ctlmni = MSGMNI;
1246
1247        atomic_set(&ns->msg_bytes, 0);
1248        atomic_set(&ns->msg_hdrs, 0);
1249        ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
1250}
1251
1252#ifdef CONFIG_IPC_NS
1253void msg_exit_ns(struct ipc_namespace *ns)
1254{
1255        free_ipcs(ns, &msg_ids(ns), freeque);
1256        idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
1257        rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
1258}
1259#endif
1260
1261#ifdef CONFIG_PROC_FS
1262static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
1263{
1264        struct pid_namespace *pid_ns = ipc_seq_pid_ns(s);
1265        struct user_namespace *user_ns = seq_user_ns(s);
1266        struct kern_ipc_perm *ipcp = it;
1267        struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
1268
1269        seq_printf(s,
1270                   "%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10llu %10llu %10llu\n",
1271                   msq->q_perm.key,
1272                   msq->q_perm.id,
1273                   msq->q_perm.mode,
1274                   msq->q_cbytes,
1275                   msq->q_qnum,
1276                   pid_nr_ns(msq->q_lspid, pid_ns),
1277                   pid_nr_ns(msq->q_lrpid, pid_ns),
1278                   from_kuid_munged(user_ns, msq->q_perm.uid),
1279                   from_kgid_munged(user_ns, msq->q_perm.gid),
1280                   from_kuid_munged(user_ns, msq->q_perm.cuid),
1281                   from_kgid_munged(user_ns, msq->q_perm.cgid),
1282                   msq->q_stime,
1283                   msq->q_rtime,
1284                   msq->q_ctime);
1285
1286        return 0;
1287}
1288#endif
1289
1290void __init msg_init(void)
1291{
1292        msg_init_ns(&init_ipc_ns);
1293
1294        ipc_init_proc_interface("sysvipc/msg",
1295                                "       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
1296                                IPC_MSG_IDS, sysvipc_msg_proc_show);
1297}
1298