linux/ipc/msg.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * linux/ipc/msg.c
   4 * Copyright (C) 1992 Krishna Balasubramanian
   5 *
   6 * Removed all the remaining kerneld mess
   7 * Catch the -EFAULT stuff properly
   8 * Use GFP_KERNEL for messages as in 1.2
   9 * Fixed up the unchecked user space derefs
  10 * Copyright (C) 1998 Alan Cox & Andi Kleen
  11 *
  12 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
  13 *
  14 * mostly rewritten, threaded and wake-one semantics added
  15 * MSGMAX limit removed, sysctl's added
  16 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
  17 *
  18 * support for audit of ipc object properties and permission changes
  19 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
  20 *
  21 * namespaces support
  22 * OpenVZ, SWsoft Inc.
  23 * Pavel Emelianov <xemul@openvz.org>
  24 */
  25
  26#include <linux/capability.h>
  27#include <linux/msg.h>
  28#include <linux/spinlock.h>
  29#include <linux/init.h>
  30#include <linux/mm.h>
  31#include <linux/proc_fs.h>
  32#include <linux/list.h>
  33#include <linux/security.h>
  34#include <linux/sched/wake_q.h>
  35#include <linux/syscalls.h>
  36#include <linux/audit.h>
  37#include <linux/seq_file.h>
  38#include <linux/rwsem.h>
  39#include <linux/nsproxy.h>
  40#include <linux/ipc_namespace.h>
  41
  42#include <asm/current.h>
  43#include <linux/uaccess.h>
  44#include "util.h"
  45
  46/* one msg_receiver structure for each sleeping receiver */
  47struct msg_receiver {
  48        struct list_head        r_list;
  49        struct task_struct      *r_tsk;
  50
  51        int                     r_mode;
  52        long                    r_msgtype;
  53        long                    r_maxsize;
  54
  55        struct msg_msg          *r_msg;
  56};
  57
  58/* one msg_sender for each sleeping sender */
  59struct msg_sender {
  60        struct list_head        list;
  61        struct task_struct      *tsk;
  62        size_t                  msgsz;
  63};
  64
  65#define SEARCH_ANY              1
  66#define SEARCH_EQUAL            2
  67#define SEARCH_NOTEQUAL         3
  68#define SEARCH_LESSEQUAL        4
  69#define SEARCH_NUMBER           5
  70
  71#define msg_ids(ns)     ((ns)->ids[IPC_MSG_IDS])
  72
  73static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id)
  74{
  75        struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&msg_ids(ns), id);
  76
  77        if (IS_ERR(ipcp))
  78                return ERR_CAST(ipcp);
  79
  80        return container_of(ipcp, struct msg_queue, q_perm);
  81}
  82
  83static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns,
  84                                                        int id)
  85{
  86        struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id);
  87
  88        if (IS_ERR(ipcp))
  89                return ERR_CAST(ipcp);
  90
  91        return container_of(ipcp, struct msg_queue, q_perm);
  92}
  93
  94static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
  95{
  96        ipc_rmid(&msg_ids(ns), &s->q_perm);
  97}
  98
  99static void msg_rcu_free(struct rcu_head *head)
 100{
 101        struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
 102        struct msg_queue *msq = container_of(p, struct msg_queue, q_perm);
 103
 104        security_msg_queue_free(msq);
 105        kvfree(msq);
 106}
 107
 108/**
 109 * newque - Create a new msg queue
 110 * @ns: namespace
 111 * @params: ptr to the structure that contains the key and msgflg
 112 *
 113 * Called with msg_ids.rwsem held (writer)
 114 */
 115static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 116{
 117        struct msg_queue *msq;
 118        int retval;
 119        key_t key = params->key;
 120        int msgflg = params->flg;
 121
 122        msq = kvmalloc(sizeof(*msq), GFP_KERNEL);
 123        if (unlikely(!msq))
 124                return -ENOMEM;
 125
 126        msq->q_perm.mode = msgflg & S_IRWXUGO;
 127        msq->q_perm.key = key;
 128
 129        msq->q_perm.security = NULL;
 130        retval = security_msg_queue_alloc(msq);
 131        if (retval) {
 132                kvfree(msq);
 133                return retval;
 134        }
 135
 136        msq->q_stime = msq->q_rtime = 0;
 137        msq->q_ctime = ktime_get_real_seconds();
 138        msq->q_cbytes = msq->q_qnum = 0;
 139        msq->q_qbytes = ns->msg_ctlmnb;
 140        msq->q_lspid = msq->q_lrpid = 0;
 141        INIT_LIST_HEAD(&msq->q_messages);
 142        INIT_LIST_HEAD(&msq->q_receivers);
 143        INIT_LIST_HEAD(&msq->q_senders);
 144
 145        /* ipc_addid() locks msq upon success. */
 146        retval = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
 147        if (retval < 0) {
 148                call_rcu(&msq->q_perm.rcu, msg_rcu_free);
 149                return retval;
 150        }
 151
 152        ipc_unlock_object(&msq->q_perm);
 153        rcu_read_unlock();
 154
 155        return msq->q_perm.id;
 156}
 157
 158static inline bool msg_fits_inqueue(struct msg_queue *msq, size_t msgsz)
 159{
 160        return msgsz + msq->q_cbytes <= msq->q_qbytes &&
 161                1 + msq->q_qnum <= msq->q_qbytes;
 162}
 163
 164static inline void ss_add(struct msg_queue *msq,
 165                          struct msg_sender *mss, size_t msgsz)
 166{
 167        mss->tsk = current;
 168        mss->msgsz = msgsz;
 169        __set_current_state(TASK_INTERRUPTIBLE);
 170        list_add_tail(&mss->list, &msq->q_senders);
 171}
 172
 173static inline void ss_del(struct msg_sender *mss)
 174{
 175        if (mss->list.next)
 176                list_del(&mss->list);
 177}
 178
 179static void ss_wakeup(struct msg_queue *msq,
 180                      struct wake_q_head *wake_q, bool kill)
 181{
 182        struct msg_sender *mss, *t;
 183        struct task_struct *stop_tsk = NULL;
 184        struct list_head *h = &msq->q_senders;
 185
 186        list_for_each_entry_safe(mss, t, h, list) {
 187                if (kill)
 188                        mss->list.next = NULL;
 189
 190                /*
 191                 * Stop at the first task we don't wakeup,
 192                 * we've already iterated the original
 193                 * sender queue.
 194                 */
 195                else if (stop_tsk == mss->tsk)
 196                        break;
 197                /*
 198                 * We are not in an EIDRM scenario here, therefore
 199                 * verify that we really need to wakeup the task.
 200                 * To maintain current semantics and wakeup order,
 201                 * move the sender to the tail on behalf of the
 202                 * blocked task.
 203                 */
 204                else if (!msg_fits_inqueue(msq, mss->msgsz)) {
 205                        if (!stop_tsk)
 206                                stop_tsk = mss->tsk;
 207
 208                        list_move_tail(&mss->list, &msq->q_senders);
 209                        continue;
 210                }
 211
 212                wake_q_add(wake_q, mss->tsk);
 213        }
 214}
 215
 216static void expunge_all(struct msg_queue *msq, int res,
 217                        struct wake_q_head *wake_q)
 218{
 219        struct msg_receiver *msr, *t;
 220
 221        list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
 222                wake_q_add(wake_q, msr->r_tsk);
 223                WRITE_ONCE(msr->r_msg, ERR_PTR(res));
 224        }
 225}
 226
 227/*
 228 * freeque() wakes up waiters on the sender and receiver waiting queue,
 229 * removes the message queue from message queue ID IDR, and cleans up all the
 230 * messages associated with this queue.
 231 *
 232 * msg_ids.rwsem (writer) and the spinlock for this message queue are held
 233 * before freeque() is called. msg_ids.rwsem remains locked on exit.
 234 */
 235static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 236{
 237        struct msg_msg *msg, *t;
 238        struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
 239        DEFINE_WAKE_Q(wake_q);
 240
 241        expunge_all(msq, -EIDRM, &wake_q);
 242        ss_wakeup(msq, &wake_q, true);
 243        msg_rmid(ns, msq);
 244        ipc_unlock_object(&msq->q_perm);
 245        wake_up_q(&wake_q);
 246        rcu_read_unlock();
 247
 248        list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
 249                atomic_dec(&ns->msg_hdrs);
 250                free_msg(msg);
 251        }
 252        atomic_sub(msq->q_cbytes, &ns->msg_bytes);
 253        ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 254}
 255
 256/*
 257 * Called with msg_ids.rwsem and ipcp locked.
 258 */
 259static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
 260{
 261        struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
 262
 263        return security_msg_queue_associate(msq, msgflg);
 264}
 265
 266SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
 267{
 268        struct ipc_namespace *ns;
 269        static const struct ipc_ops msg_ops = {
 270                .getnew = newque,
 271                .associate = msg_security,
 272        };
 273        struct ipc_params msg_params;
 274
 275        ns = current->nsproxy->ipc_ns;
 276
 277        msg_params.key = key;
 278        msg_params.flg = msgflg;
 279
 280        return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
 281}
 282
 283static inline unsigned long
 284copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
 285{
 286        switch (version) {
 287        case IPC_64:
 288                return copy_to_user(buf, in, sizeof(*in));
 289        case IPC_OLD:
 290        {
 291                struct msqid_ds out;
 292
 293                memset(&out, 0, sizeof(out));
 294
 295                ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm);
 296
 297                out.msg_stime           = in->msg_stime;
 298                out.msg_rtime           = in->msg_rtime;
 299                out.msg_ctime           = in->msg_ctime;
 300
 301                if (in->msg_cbytes > USHRT_MAX)
 302                        out.msg_cbytes  = USHRT_MAX;
 303                else
 304                        out.msg_cbytes  = in->msg_cbytes;
 305                out.msg_lcbytes         = in->msg_cbytes;
 306
 307                if (in->msg_qnum > USHRT_MAX)
 308                        out.msg_qnum    = USHRT_MAX;
 309                else
 310                        out.msg_qnum    = in->msg_qnum;
 311
 312                if (in->msg_qbytes > USHRT_MAX)
 313                        out.msg_qbytes  = USHRT_MAX;
 314                else
 315                        out.msg_qbytes  = in->msg_qbytes;
 316                out.msg_lqbytes         = in->msg_qbytes;
 317
 318                out.msg_lspid           = in->msg_lspid;
 319                out.msg_lrpid           = in->msg_lrpid;
 320
 321                return copy_to_user(buf, &out, sizeof(out));
 322        }
 323        default:
 324                return -EINVAL;
 325        }
 326}
 327
 328static inline unsigned long
 329copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
 330{
 331        switch (version) {
 332        case IPC_64:
 333                if (copy_from_user(out, buf, sizeof(*out)))
 334                        return -EFAULT;
 335                return 0;
 336        case IPC_OLD:
 337        {
 338                struct msqid_ds tbuf_old;
 339
 340                if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
 341                        return -EFAULT;
 342
 343                out->msg_perm.uid       = tbuf_old.msg_perm.uid;
 344                out->msg_perm.gid       = tbuf_old.msg_perm.gid;
 345                out->msg_perm.mode      = tbuf_old.msg_perm.mode;
 346
 347                if (tbuf_old.msg_qbytes == 0)
 348                        out->msg_qbytes = tbuf_old.msg_lqbytes;
 349                else
 350                        out->msg_qbytes = tbuf_old.msg_qbytes;
 351
 352                return 0;
 353        }
 354        default:
 355                return -EINVAL;
 356        }
 357}
 358
 359/*
 360 * This function handles some msgctl commands which require the rwsem
 361 * to be held in write mode.
 362 * NOTE: no locks must be held, the rwsem is taken inside this function.
 363 */
 364static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
 365                        struct msqid64_ds *msqid64)
 366{
 367        struct kern_ipc_perm *ipcp;
 368        struct msg_queue *msq;
 369        int err;
 370
 371        down_write(&msg_ids(ns).rwsem);
 372        rcu_read_lock();
 373
 374        ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd,
 375                                      &msqid64->msg_perm, msqid64->msg_qbytes);
 376        if (IS_ERR(ipcp)) {
 377                err = PTR_ERR(ipcp);
 378                goto out_unlock1;
 379        }
 380
 381        msq = container_of(ipcp, struct msg_queue, q_perm);
 382
 383        err = security_msg_queue_msgctl(msq, cmd);
 384        if (err)
 385                goto out_unlock1;
 386
 387        switch (cmd) {
 388        case IPC_RMID:
 389                ipc_lock_object(&msq->q_perm);
 390                /* freeque unlocks the ipc object and rcu */
 391                freeque(ns, ipcp);
 392                goto out_up;
 393        case IPC_SET:
 394        {
 395                DEFINE_WAKE_Q(wake_q);
 396
 397                if (msqid64->msg_qbytes > ns->msg_ctlmnb &&
 398                    !capable(CAP_SYS_RESOURCE)) {
 399                        err = -EPERM;
 400                        goto out_unlock1;
 401                }
 402
 403                ipc_lock_object(&msq->q_perm);
 404                err = ipc_update_perm(&msqid64->msg_perm, ipcp);
 405                if (err)
 406                        goto out_unlock0;
 407
 408                msq->q_qbytes = msqid64->msg_qbytes;
 409
 410                msq->q_ctime = ktime_get_real_seconds();
 411                /*
 412                 * Sleeping receivers might be excluded by
 413                 * stricter permissions.
 414                 */
 415                expunge_all(msq, -EAGAIN, &wake_q);
 416                /*
 417                 * Sleeping senders might be able to send
 418                 * due to a larger queue size.
 419                 */
 420                ss_wakeup(msq, &wake_q, false);
 421                ipc_unlock_object(&msq->q_perm);
 422                wake_up_q(&wake_q);
 423
 424                goto out_unlock1;
 425        }
 426        default:
 427                err = -EINVAL;
 428                goto out_unlock1;
 429        }
 430
 431out_unlock0:
 432        ipc_unlock_object(&msq->q_perm);
 433out_unlock1:
 434        rcu_read_unlock();
 435out_up:
 436        up_write(&msg_ids(ns).rwsem);
 437        return err;
 438}
 439
 440static int msgctl_info(struct ipc_namespace *ns, int msqid,
 441                         int cmd, struct msginfo *msginfo)
 442{
 443        int err;
 444        int max_id;
 445
 446        /*
 447         * We must not return kernel stack data.
 448         * due to padding, it's not enough
 449         * to set all member fields.
 450         */
 451        err = security_msg_queue_msgctl(NULL, cmd);
 452        if (err)
 453                return err;
 454
 455        memset(msginfo, 0, sizeof(*msginfo));
 456        msginfo->msgmni = ns->msg_ctlmni;
 457        msginfo->msgmax = ns->msg_ctlmax;
 458        msginfo->msgmnb = ns->msg_ctlmnb;
 459        msginfo->msgssz = MSGSSZ;
 460        msginfo->msgseg = MSGSEG;
 461        down_read(&msg_ids(ns).rwsem);
 462        if (cmd == MSG_INFO) {
 463                msginfo->msgpool = msg_ids(ns).in_use;
 464                msginfo->msgmap = atomic_read(&ns->msg_hdrs);
 465                msginfo->msgtql = atomic_read(&ns->msg_bytes);
 466        } else {
 467                msginfo->msgmap = MSGMAP;
 468                msginfo->msgpool = MSGPOOL;
 469                msginfo->msgtql = MSGTQL;
 470        }
 471        max_id = ipc_get_maxid(&msg_ids(ns));
 472        up_read(&msg_ids(ns).rwsem);
 473        return (max_id < 0) ? 0 : max_id;
 474}
 475
 476static int msgctl_stat(struct ipc_namespace *ns, int msqid,
 477                         int cmd, struct msqid64_ds *p)
 478{
 479        int err;
 480        struct msg_queue *msq;
 481        int success_return;
 482
 483        memset(p, 0, sizeof(*p));
 484
 485        rcu_read_lock();
 486        if (cmd == MSG_STAT) {
 487                msq = msq_obtain_object(ns, msqid);
 488                if (IS_ERR(msq)) {
 489                        err = PTR_ERR(msq);
 490                        goto out_unlock;
 491                }
 492                success_return = msq->q_perm.id;
 493        } else {
 494                msq = msq_obtain_object_check(ns, msqid);
 495                if (IS_ERR(msq)) {
 496                        err = PTR_ERR(msq);
 497                        goto out_unlock;
 498                }
 499                success_return = 0;
 500        }
 501
 502        err = -EACCES;
 503        if (ipcperms(ns, &msq->q_perm, S_IRUGO))
 504                goto out_unlock;
 505
 506        err = security_msg_queue_msgctl(msq, cmd);
 507        if (err)
 508                goto out_unlock;
 509
 510        kernel_to_ipc64_perm(&msq->q_perm, &p->msg_perm);
 511        p->msg_stime  = msq->q_stime;
 512        p->msg_rtime  = msq->q_rtime;
 513        p->msg_ctime  = msq->q_ctime;
 514        p->msg_cbytes = msq->q_cbytes;
 515        p->msg_qnum   = msq->q_qnum;
 516        p->msg_qbytes = msq->q_qbytes;
 517        p->msg_lspid  = msq->q_lspid;
 518        p->msg_lrpid  = msq->q_lrpid;
 519        rcu_read_unlock();
 520
 521        return success_return;
 522
 523out_unlock:
 524        rcu_read_unlock();
 525        return err;
 526}
 527
 528SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
 529{
 530        int version;
 531        struct ipc_namespace *ns;
 532        struct msqid64_ds msqid64;
 533        int err;
 534
 535        if (msqid < 0 || cmd < 0)
 536                return -EINVAL;
 537
 538        version = ipc_parse_version(&cmd);
 539        ns = current->nsproxy->ipc_ns;
 540
 541        switch (cmd) {
 542        case IPC_INFO:
 543        case MSG_INFO: {
 544                struct msginfo msginfo;
 545                err = msgctl_info(ns, msqid, cmd, &msginfo);
 546                if (err < 0)
 547                        return err;
 548                if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
 549                        err = -EFAULT;
 550                return err;
 551        }
 552        case MSG_STAT:  /* msqid is an index rather than a msg queue id */
 553        case IPC_STAT:
 554                err = msgctl_stat(ns, msqid, cmd, &msqid64);
 555                if (err < 0)
 556                        return err;
 557                if (copy_msqid_to_user(buf, &msqid64, version))
 558                        err = -EFAULT;
 559                return err;
 560        case IPC_SET:
 561                if (copy_msqid_from_user(&msqid64, buf, version))
 562                        return -EFAULT;
 563                /* fallthru */
 564        case IPC_RMID:
 565                return msgctl_down(ns, msqid, cmd, &msqid64);
 566        default:
 567                return  -EINVAL;
 568        }
 569}
 570
 571#ifdef CONFIG_COMPAT
 572
 573struct compat_msqid_ds {
 574        struct compat_ipc_perm msg_perm;
 575        compat_uptr_t msg_first;
 576        compat_uptr_t msg_last;
 577        compat_time_t msg_stime;
 578        compat_time_t msg_rtime;
 579        compat_time_t msg_ctime;
 580        compat_ulong_t msg_lcbytes;
 581        compat_ulong_t msg_lqbytes;
 582        unsigned short msg_cbytes;
 583        unsigned short msg_qnum;
 584        unsigned short msg_qbytes;
 585        compat_ipc_pid_t msg_lspid;
 586        compat_ipc_pid_t msg_lrpid;
 587};
 588
 589static int copy_compat_msqid_from_user(struct msqid64_ds *out, void __user *buf,
 590                                        int version)
 591{
 592        memset(out, 0, sizeof(*out));
 593        if (version == IPC_64) {
 594                struct compat_msqid64_ds *p = buf;
 595                if (get_compat_ipc64_perm(&out->msg_perm, &p->msg_perm))
 596                        return -EFAULT;
 597                if (get_user(out->msg_qbytes, &p->msg_qbytes))
 598                        return -EFAULT;
 599        } else {
 600                struct compat_msqid_ds *p = buf;
 601                if (get_compat_ipc_perm(&out->msg_perm, &p->msg_perm))
 602                        return -EFAULT;
 603                if (get_user(out->msg_qbytes, &p->msg_qbytes))
 604                        return -EFAULT;
 605        }
 606        return 0;
 607}
 608
 609static int copy_compat_msqid_to_user(void __user *buf, struct msqid64_ds *in,
 610                                        int version)
 611{
 612        if (version == IPC_64) {
 613                struct compat_msqid64_ds v;
 614                memset(&v, 0, sizeof(v));
 615                to_compat_ipc64_perm(&v.msg_perm, &in->msg_perm);
 616                v.msg_stime = in->msg_stime;
 617                v.msg_rtime = in->msg_rtime;
 618                v.msg_ctime = in->msg_ctime;
 619                v.msg_cbytes = in->msg_cbytes;
 620                v.msg_qnum = in->msg_qnum;
 621                v.msg_qbytes = in->msg_qbytes;
 622                v.msg_lspid = in->msg_lspid;
 623                v.msg_lrpid = in->msg_lrpid;
 624                return copy_to_user(buf, &v, sizeof(v));
 625        } else {
 626                struct compat_msqid_ds v;
 627                memset(&v, 0, sizeof(v));
 628                to_compat_ipc_perm(&v.msg_perm, &in->msg_perm);
 629                v.msg_stime = in->msg_stime;
 630                v.msg_rtime = in->msg_rtime;
 631                v.msg_ctime = in->msg_ctime;
 632                v.msg_cbytes = in->msg_cbytes;
 633                v.msg_qnum = in->msg_qnum;
 634                v.msg_qbytes = in->msg_qbytes;
 635                v.msg_lspid = in->msg_lspid;
 636                v.msg_lrpid = in->msg_lrpid;
 637                return copy_to_user(buf, &v, sizeof(v));
 638        }
 639}
 640
 641COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr)
 642{
 643        struct ipc_namespace *ns;
 644        int err;
 645        struct msqid64_ds msqid64;
 646        int version = compat_ipc_parse_version(&cmd);
 647
 648        ns = current->nsproxy->ipc_ns;
 649
 650        if (msqid < 0 || cmd < 0)
 651                return -EINVAL;
 652
 653        switch (cmd & (~IPC_64)) {
 654        case IPC_INFO:
 655        case MSG_INFO: {
 656                struct msginfo msginfo;
 657                err = msgctl_info(ns, msqid, cmd, &msginfo);
 658                if (err < 0)
 659                        return err;
 660                if (copy_to_user(uptr, &msginfo, sizeof(struct msginfo)))
 661                        err = -EFAULT;
 662                return err;
 663        }
 664        case IPC_STAT:
 665        case MSG_STAT:
 666                err = msgctl_stat(ns, msqid, cmd, &msqid64);
 667                if (err < 0)
 668                        return err;
 669                if (copy_compat_msqid_to_user(uptr, &msqid64, version))
 670                        err = -EFAULT;
 671                return err;
 672        case IPC_SET:
 673                if (copy_compat_msqid_from_user(&msqid64, uptr, version))
 674                        return -EFAULT;
 675                /* fallthru */
 676        case IPC_RMID:
 677                return msgctl_down(ns, msqid, cmd, &msqid64);
 678        default:
 679                return -EINVAL;
 680        }
 681}
 682#endif
 683
 684static int testmsg(struct msg_msg *msg, long type, int mode)
 685{
 686        switch (mode) {
 687        case SEARCH_ANY:
 688        case SEARCH_NUMBER:
 689                return 1;
 690        case SEARCH_LESSEQUAL:
 691                if (msg->m_type <= type)
 692                        return 1;
 693                break;
 694        case SEARCH_EQUAL:
 695                if (msg->m_type == type)
 696                        return 1;
 697                break;
 698        case SEARCH_NOTEQUAL:
 699                if (msg->m_type != type)
 700                        return 1;
 701                break;
 702        }
 703        return 0;
 704}
 705
 706static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
 707                                 struct wake_q_head *wake_q)
 708{
 709        struct msg_receiver *msr, *t;
 710
 711        list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
 712                if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
 713                    !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
 714                                               msr->r_msgtype, msr->r_mode)) {
 715
 716                        list_del(&msr->r_list);
 717                        if (msr->r_maxsize < msg->m_ts) {
 718                                wake_q_add(wake_q, msr->r_tsk);
 719                                WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG));
 720                        } else {
 721                                msq->q_lrpid = task_pid_vnr(msr->r_tsk);
 722                                msq->q_rtime = get_seconds();
 723
 724                                wake_q_add(wake_q, msr->r_tsk);
 725                                WRITE_ONCE(msr->r_msg, msg);
 726                                return 1;
 727                        }
 728                }
 729        }
 730
 731        return 0;
 732}
 733
 734static long do_msgsnd(int msqid, long mtype, void __user *mtext,
 735                size_t msgsz, int msgflg)
 736{
 737        struct msg_queue *msq;
 738        struct msg_msg *msg;
 739        int err;
 740        struct ipc_namespace *ns;
 741        DEFINE_WAKE_Q(wake_q);
 742
 743        ns = current->nsproxy->ipc_ns;
 744
 745        if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
 746                return -EINVAL;
 747        if (mtype < 1)
 748                return -EINVAL;
 749
 750        msg = load_msg(mtext, msgsz);
 751        if (IS_ERR(msg))
 752                return PTR_ERR(msg);
 753
 754        msg->m_type = mtype;
 755        msg->m_ts = msgsz;
 756
 757        rcu_read_lock();
 758        msq = msq_obtain_object_check(ns, msqid);
 759        if (IS_ERR(msq)) {
 760                err = PTR_ERR(msq);
 761                goto out_unlock1;
 762        }
 763
 764        ipc_lock_object(&msq->q_perm);
 765
 766        for (;;) {
 767                struct msg_sender s;
 768
 769                err = -EACCES;
 770                if (ipcperms(ns, &msq->q_perm, S_IWUGO))
 771                        goto out_unlock0;
 772
 773                /* raced with RMID? */
 774                if (!ipc_valid_object(&msq->q_perm)) {
 775                        err = -EIDRM;
 776                        goto out_unlock0;
 777                }
 778
 779                err = security_msg_queue_msgsnd(msq, msg, msgflg);
 780                if (err)
 781                        goto out_unlock0;
 782
 783                if (msg_fits_inqueue(msq, msgsz))
 784                        break;
 785
 786                /* queue full, wait: */
 787                if (msgflg & IPC_NOWAIT) {
 788                        err = -EAGAIN;
 789                        goto out_unlock0;
 790                }
 791
 792                /* enqueue the sender and prepare to block */
 793                ss_add(msq, &s, msgsz);
 794
 795                if (!ipc_rcu_getref(&msq->q_perm)) {
 796                        err = -EIDRM;
 797                        goto out_unlock0;
 798                }
 799
 800                ipc_unlock_object(&msq->q_perm);
 801                rcu_read_unlock();
 802                schedule();
 803
 804                rcu_read_lock();
 805                ipc_lock_object(&msq->q_perm);
 806
 807                ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 808                /* raced with RMID? */
 809                if (!ipc_valid_object(&msq->q_perm)) {
 810                        err = -EIDRM;
 811                        goto out_unlock0;
 812                }
 813                ss_del(&s);
 814
 815                if (signal_pending(current)) {
 816                        err = -ERESTARTNOHAND;
 817                        goto out_unlock0;
 818                }
 819
 820        }
 821
 822        msq->q_lspid = task_tgid_vnr(current);
 823        msq->q_stime = get_seconds();
 824
 825        if (!pipelined_send(msq, msg, &wake_q)) {
 826                /* no one is waiting for this message, enqueue it */
 827                list_add_tail(&msg->m_list, &msq->q_messages);
 828                msq->q_cbytes += msgsz;
 829                msq->q_qnum++;
 830                atomic_add(msgsz, &ns->msg_bytes);
 831                atomic_inc(&ns->msg_hdrs);
 832        }
 833
 834        err = 0;
 835        msg = NULL;
 836
 837out_unlock0:
 838        ipc_unlock_object(&msq->q_perm);
 839        wake_up_q(&wake_q);
 840out_unlock1:
 841        rcu_read_unlock();
 842        if (msg != NULL)
 843                free_msg(msg);
 844        return err;
 845}
 846
 847SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
 848                int, msgflg)
 849{
 850        long mtype;
 851
 852        if (get_user(mtype, &msgp->mtype))
 853                return -EFAULT;
 854        return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
 855}
 856
 857#ifdef CONFIG_COMPAT
 858
 859struct compat_msgbuf {
 860        compat_long_t mtype;
 861        char mtext[1];
 862};
 863
 864COMPAT_SYSCALL_DEFINE4(msgsnd, int, msqid, compat_uptr_t, msgp,
 865                       compat_ssize_t, msgsz, int, msgflg)
 866{
 867        struct compat_msgbuf __user *up = compat_ptr(msgp);
 868        compat_long_t mtype;
 869
 870        if (get_user(mtype, &up->mtype))
 871                return -EFAULT;
 872        return do_msgsnd(msqid, mtype, up->mtext, (ssize_t)msgsz, msgflg);
 873}
 874#endif
 875
 876static inline int convert_mode(long *msgtyp, int msgflg)
 877{
 878        if (msgflg & MSG_COPY)
 879                return SEARCH_NUMBER;
 880        /*
 881         *  find message of correct type.
 882         *  msgtyp = 0 => get first.
 883         *  msgtyp > 0 => get first message of matching type.
 884         *  msgtyp < 0 => get message with least type must be < abs(msgtype).
 885         */
 886        if (*msgtyp == 0)
 887                return SEARCH_ANY;
 888        if (*msgtyp < 0) {
 889                if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */
 890                        *msgtyp = LONG_MAX;
 891                else
 892                        *msgtyp = -*msgtyp;
 893                return SEARCH_LESSEQUAL;
 894        }
 895        if (msgflg & MSG_EXCEPT)
 896                return SEARCH_NOTEQUAL;
 897        return SEARCH_EQUAL;
 898}
 899
 900static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
 901{
 902        struct msgbuf __user *msgp = dest;
 903        size_t msgsz;
 904
 905        if (put_user(msg->m_type, &msgp->mtype))
 906                return -EFAULT;
 907
 908        msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
 909        if (store_msg(msgp->mtext, msg, msgsz))
 910                return -EFAULT;
 911        return msgsz;
 912}
 913
 914#ifdef CONFIG_CHECKPOINT_RESTORE
 915/*
 916 * This function creates new kernel message structure, large enough to store
 917 * bufsz message bytes.
 918 */
 919static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
 920{
 921        struct msg_msg *copy;
 922
 923        /*
 924         * Create dummy message to copy real message to.
 925         */
 926        copy = load_msg(buf, bufsz);
 927        if (!IS_ERR(copy))
 928                copy->m_ts = bufsz;
 929        return copy;
 930}
 931
 932static inline void free_copy(struct msg_msg *copy)
 933{
 934        if (copy)
 935                free_msg(copy);
 936}
 937#else
 938static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
 939{
 940        return ERR_PTR(-ENOSYS);
 941}
 942
 943static inline void free_copy(struct msg_msg *copy)
 944{
 945}
 946#endif
 947
 948static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode)
 949{
 950        struct msg_msg *msg, *found = NULL;
 951        long count = 0;
 952
 953        list_for_each_entry(msg, &msq->q_messages, m_list) {
 954                if (testmsg(msg, *msgtyp, mode) &&
 955                    !security_msg_queue_msgrcv(msq, msg, current,
 956                                               *msgtyp, mode)) {
 957                        if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
 958                                *msgtyp = msg->m_type - 1;
 959                                found = msg;
 960                        } else if (mode == SEARCH_NUMBER) {
 961                                if (*msgtyp == count)
 962                                        return msg;
 963                        } else
 964                                return msg;
 965                        count++;
 966                }
 967        }
 968
 969        return found ?: ERR_PTR(-EAGAIN);
 970}
 971
 972static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg,
 973               long (*msg_handler)(void __user *, struct msg_msg *, size_t))
 974{
 975        int mode;
 976        struct msg_queue *msq;
 977        struct ipc_namespace *ns;
 978        struct msg_msg *msg, *copy = NULL;
 979        DEFINE_WAKE_Q(wake_q);
 980
 981        ns = current->nsproxy->ipc_ns;
 982
 983        if (msqid < 0 || (long) bufsz < 0)
 984                return -EINVAL;
 985
 986        if (msgflg & MSG_COPY) {
 987                if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT))
 988                        return -EINVAL;
 989                copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
 990                if (IS_ERR(copy))
 991                        return PTR_ERR(copy);
 992        }
 993        mode = convert_mode(&msgtyp, msgflg);
 994
 995        rcu_read_lock();
 996        msq = msq_obtain_object_check(ns, msqid);
 997        if (IS_ERR(msq)) {
 998                rcu_read_unlock();
 999                free_copy(copy);
1000                return PTR_ERR(msq);
1001        }
1002
1003        for (;;) {
1004                struct msg_receiver msr_d;
1005
1006                msg = ERR_PTR(-EACCES);
1007                if (ipcperms(ns, &msq->q_perm, S_IRUGO))
1008                        goto out_unlock1;
1009
1010                ipc_lock_object(&msq->q_perm);
1011
1012                /* raced with RMID? */
1013                if (!ipc_valid_object(&msq->q_perm)) {
1014                        msg = ERR_PTR(-EIDRM);
1015                        goto out_unlock0;
1016                }
1017
1018                msg = find_msg(msq, &msgtyp, mode);
1019                if (!IS_ERR(msg)) {
1020                        /*
1021                         * Found a suitable message.
1022                         * Unlink it from the queue.
1023                         */
1024                        if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
1025                                msg = ERR_PTR(-E2BIG);
1026                                goto out_unlock0;
1027                        }
1028                        /*
1029                         * If we are copying, then do not unlink message and do
1030                         * not update queue parameters.
1031                         */
1032                        if (msgflg & MSG_COPY) {
1033                                msg = copy_msg(msg, copy);
1034                                goto out_unlock0;
1035                        }
1036
1037                        list_del(&msg->m_list);
1038                        msq->q_qnum--;
1039                        msq->q_rtime = get_seconds();
1040                        msq->q_lrpid = task_tgid_vnr(current);
1041                        msq->q_cbytes -= msg->m_ts;
1042                        atomic_sub(msg->m_ts, &ns->msg_bytes);
1043                        atomic_dec(&ns->msg_hdrs);
1044                        ss_wakeup(msq, &wake_q, false);
1045
1046                        goto out_unlock0;
1047                }
1048
1049                /* No message waiting. Wait for a message */
1050                if (msgflg & IPC_NOWAIT) {
1051                        msg = ERR_PTR(-ENOMSG);
1052                        goto out_unlock0;
1053                }
1054
1055                list_add_tail(&msr_d.r_list, &msq->q_receivers);
1056                msr_d.r_tsk = current;
1057                msr_d.r_msgtype = msgtyp;
1058                msr_d.r_mode = mode;
1059                if (msgflg & MSG_NOERROR)
1060                        msr_d.r_maxsize = INT_MAX;
1061                else
1062                        msr_d.r_maxsize = bufsz;
1063                msr_d.r_msg = ERR_PTR(-EAGAIN);
1064                __set_current_state(TASK_INTERRUPTIBLE);
1065
1066                ipc_unlock_object(&msq->q_perm);
1067                rcu_read_unlock();
1068                schedule();
1069
1070                /*
1071                 * Lockless receive, part 1:
1072                 * We don't hold a reference to the queue and getting a
1073                 * reference would defeat the idea of a lockless operation,
1074                 * thus the code relies on rcu to guarantee the existence of
1075                 * msq:
1076                 * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
1077                 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
1078                 */
1079                rcu_read_lock();
1080
1081                /*
1082                 * Lockless receive, part 2:
1083                 * The work in pipelined_send() and expunge_all():
1084                 * - Set pointer to message
1085                 * - Queue the receiver task for later wakeup
1086                 * - Wake up the process after the lock is dropped.
1087                 *
1088                 * Should the process wake up before this wakeup (due to a
1089                 * signal) it will either see the message and continue ...
1090                 */
1091                msg = READ_ONCE(msr_d.r_msg);
1092                if (msg != ERR_PTR(-EAGAIN))
1093                        goto out_unlock1;
1094
1095                 /*
1096                  * ... or see -EAGAIN, acquire the lock to check the message
1097                  * again.
1098                  */
1099                ipc_lock_object(&msq->q_perm);
1100
1101                msg = msr_d.r_msg;
1102                if (msg != ERR_PTR(-EAGAIN))
1103                        goto out_unlock0;
1104
1105                list_del(&msr_d.r_list);
1106                if (signal_pending(current)) {
1107                        msg = ERR_PTR(-ERESTARTNOHAND);
1108                        goto out_unlock0;
1109                }
1110
1111                ipc_unlock_object(&msq->q_perm);
1112        }
1113
1114out_unlock0:
1115        ipc_unlock_object(&msq->q_perm);
1116        wake_up_q(&wake_q);
1117out_unlock1:
1118        rcu_read_unlock();
1119        if (IS_ERR(msg)) {
1120                free_copy(copy);
1121                return PTR_ERR(msg);
1122        }
1123
1124        bufsz = msg_handler(buf, msg, bufsz);
1125        free_msg(msg);
1126
1127        return bufsz;
1128}
1129
1130SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
1131                long, msgtyp, int, msgflg)
1132{
1133        return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
1134}
1135
1136#ifdef CONFIG_COMPAT
1137static long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
1138{
1139        struct compat_msgbuf __user *msgp = dest;
1140        size_t msgsz;
1141
1142        if (put_user(msg->m_type, &msgp->mtype))
1143                return -EFAULT;
1144
1145        msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
1146        if (store_msg(msgp->mtext, msg, msgsz))
1147                return -EFAULT;
1148        return msgsz;
1149}
1150
1151COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
1152                       compat_ssize_t, msgsz, compat_long_t, msgtyp, int, msgflg)
1153{
1154        return do_msgrcv(msqid, compat_ptr(msgp), (ssize_t)msgsz, (long)msgtyp,
1155                         msgflg, compat_do_msg_fill);
1156}
1157#endif
1158
1159int msg_init_ns(struct ipc_namespace *ns)
1160{
1161        ns->msg_ctlmax = MSGMAX;
1162        ns->msg_ctlmnb = MSGMNB;
1163        ns->msg_ctlmni = MSGMNI;
1164
1165        atomic_set(&ns->msg_bytes, 0);
1166        atomic_set(&ns->msg_hdrs, 0);
1167        return ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
1168}
1169
1170#ifdef CONFIG_IPC_NS
1171void msg_exit_ns(struct ipc_namespace *ns)
1172{
1173        free_ipcs(ns, &msg_ids(ns), freeque);
1174        idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
1175        rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
1176}
1177#endif
1178
1179#ifdef CONFIG_PROC_FS
1180static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
1181{
1182        struct user_namespace *user_ns = seq_user_ns(s);
1183        struct kern_ipc_perm *ipcp = it;
1184        struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
1185
1186        seq_printf(s,
1187                   "%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10llu %10llu %10llu\n",
1188                   msq->q_perm.key,
1189                   msq->q_perm.id,
1190                   msq->q_perm.mode,
1191                   msq->q_cbytes,
1192                   msq->q_qnum,
1193                   msq->q_lspid,
1194                   msq->q_lrpid,
1195                   from_kuid_munged(user_ns, msq->q_perm.uid),
1196                   from_kgid_munged(user_ns, msq->q_perm.gid),
1197                   from_kuid_munged(user_ns, msq->q_perm.cuid),
1198                   from_kgid_munged(user_ns, msq->q_perm.cgid),
1199                   msq->q_stime,
1200                   msq->q_rtime,
1201                   msq->q_ctime);
1202
1203        return 0;
1204}
1205#endif
1206
1207int __init msg_init(void)
1208{
1209        const int err = msg_init_ns(&init_ipc_ns);
1210
1211        ipc_init_proc_interface("sysvipc/msg",
1212                                "       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
1213                                IPC_MSG_IDS, sysvipc_msg_proc_show);
1214        return err;
1215}
1216