linux/ipc/msg.c
<<
>>
Prefs
   1/*
   2 * linux/ipc/msg.c
   3 * Copyright (C) 1992 Krishna Balasubramanian
   4 *
   5 * Removed all the remaining kerneld mess
   6 * Catch the -EFAULT stuff properly
   7 * Use GFP_KERNEL for messages as in 1.2
   8 * Fixed up the unchecked user space derefs
   9 * Copyright (C) 1998 Alan Cox & Andi Kleen
  10 *
  11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
  12 *
  13 * mostly rewritten, threaded and wake-one semantics added
  14 * MSGMAX limit removed, sysctl's added
  15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
  16 *
  17 * support for audit of ipc object properties and permission changes
  18 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
  19 *
  20 * namespaces support
  21 * OpenVZ, SWsoft Inc.
  22 * Pavel Emelianov <xemul@openvz.org>
  23 */
  24
  25#include <linux/capability.h>
  26#include <linux/msg.h>
  27#include <linux/spinlock.h>
  28#include <linux/init.h>
  29#include <linux/mm.h>
  30#include <linux/proc_fs.h>
  31#include <linux/list.h>
  32#include <linux/security.h>
  33#include <linux/sched.h>
  34#include <linux/syscalls.h>
  35#include <linux/audit.h>
  36#include <linux/seq_file.h>
  37#include <linux/rwsem.h>
  38#include <linux/nsproxy.h>
  39#include <linux/ipc_namespace.h>
  40
  41#include <asm/current.h>
  42#include <linux/uaccess.h>
  43#include "util.h"
  44
  45/* one msg_receiver structure for each sleeping receiver */
  46struct msg_receiver {
  47        struct list_head        r_list;
  48        struct task_struct      *r_tsk;
  49
  50        int                     r_mode;
  51        long                    r_msgtype;
  52        long                    r_maxsize;
  53
  54        struct msg_msg          *r_msg;
  55};
  56
  57/* one msg_sender for each sleeping sender */
  58struct msg_sender {
  59        struct list_head        list;
  60        struct task_struct      *tsk;
  61        size_t                  msgsz;
  62};
  63
  64#define SEARCH_ANY              1
  65#define SEARCH_EQUAL            2
  66#define SEARCH_NOTEQUAL         3
  67#define SEARCH_LESSEQUAL        4
  68#define SEARCH_NUMBER           5
  69
  70#define msg_ids(ns)     ((ns)->ids[IPC_MSG_IDS])
  71
  72static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id)
  73{
  74        struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&msg_ids(ns), id);
  75
  76        if (IS_ERR(ipcp))
  77                return ERR_CAST(ipcp);
  78
  79        return container_of(ipcp, struct msg_queue, q_perm);
  80}
  81
  82static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns,
  83                                                        int id)
  84{
  85        struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id);
  86
  87        if (IS_ERR(ipcp))
  88                return ERR_CAST(ipcp);
  89
  90        return container_of(ipcp, struct msg_queue, q_perm);
  91}
  92
  93static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
  94{
  95        ipc_rmid(&msg_ids(ns), &s->q_perm);
  96}
  97
  98static void msg_rcu_free(struct rcu_head *head)
  99{
 100        struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
 101        struct msg_queue *msq = ipc_rcu_to_struct(p);
 102
 103        security_msg_queue_free(msq);
 104        ipc_rcu_free(head);
 105}
 106
 107/**
 108 * newque - Create a new msg queue
 109 * @ns: namespace
 110 * @params: ptr to the structure that contains the key and msgflg
 111 *
 112 * Called with msg_ids.rwsem held (writer)
 113 */
 114static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 115{
 116        struct msg_queue *msq;
 117        int id, retval;
 118        key_t key = params->key;
 119        int msgflg = params->flg;
 120
 121        msq = ipc_rcu_alloc(sizeof(*msq));
 122        if (!msq)
 123                return -ENOMEM;
 124
 125        msq->q_perm.mode = msgflg & S_IRWXUGO;
 126        msq->q_perm.key = key;
 127
 128        msq->q_perm.security = NULL;
 129        retval = security_msg_queue_alloc(msq);
 130        if (retval) {
 131                ipc_rcu_putref(msq, ipc_rcu_free);
 132                return retval;
 133        }
 134
 135        msq->q_stime = msq->q_rtime = 0;
 136        msq->q_ctime = get_seconds();
 137        msq->q_cbytes = msq->q_qnum = 0;
 138        msq->q_qbytes = ns->msg_ctlmnb;
 139        msq->q_lspid = msq->q_lrpid = 0;
 140        INIT_LIST_HEAD(&msq->q_messages);
 141        INIT_LIST_HEAD(&msq->q_receivers);
 142        INIT_LIST_HEAD(&msq->q_senders);
 143
 144        /* ipc_addid() locks msq upon success. */
 145        id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
 146        if (id < 0) {
 147                ipc_rcu_putref(msq, msg_rcu_free);
 148                return id;
 149        }
 150
 151        ipc_unlock_object(&msq->q_perm);
 152        rcu_read_unlock();
 153
 154        return msq->q_perm.id;
 155}
 156
 157static inline bool msg_fits_inqueue(struct msg_queue *msq, size_t msgsz)
 158{
 159        return msgsz + msq->q_cbytes <= msq->q_qbytes &&
 160                1 + msq->q_qnum <= msq->q_qbytes;
 161}
 162
 163static inline void ss_add(struct msg_queue *msq,
 164                          struct msg_sender *mss, size_t msgsz)
 165{
 166        mss->tsk = current;
 167        mss->msgsz = msgsz;
 168        __set_current_state(TASK_INTERRUPTIBLE);
 169        list_add_tail(&mss->list, &msq->q_senders);
 170}
 171
 172static inline void ss_del(struct msg_sender *mss)
 173{
 174        if (mss->list.next)
 175                list_del(&mss->list);
 176}
 177
 178static void ss_wakeup(struct msg_queue *msq,
 179                      struct wake_q_head *wake_q, bool kill)
 180{
 181        struct msg_sender *mss, *t;
 182        struct task_struct *stop_tsk = NULL;
 183        struct list_head *h = &msq->q_senders;
 184
 185        list_for_each_entry_safe(mss, t, h, list) {
 186                if (kill)
 187                        mss->list.next = NULL;
 188
 189                /*
 190                 * Stop at the first task we don't wakeup,
 191                 * we've already iterated the original
 192                 * sender queue.
 193                 */
 194                else if (stop_tsk == mss->tsk)
 195                        break;
 196                /*
 197                 * We are not in an EIDRM scenario here, therefore
 198                 * verify that we really need to wakeup the task.
 199                 * To maintain current semantics and wakeup order,
 200                 * move the sender to the tail on behalf of the
 201                 * blocked task.
 202                 */
 203                else if (!msg_fits_inqueue(msq, mss->msgsz)) {
 204                        if (!stop_tsk)
 205                                stop_tsk = mss->tsk;
 206
 207                        list_move_tail(&mss->list, &msq->q_senders);
 208                        continue;
 209                }
 210
 211                wake_q_add(wake_q, mss->tsk);
 212        }
 213}
 214
 215static void expunge_all(struct msg_queue *msq, int res,
 216                        struct wake_q_head *wake_q)
 217{
 218        struct msg_receiver *msr, *t;
 219
 220        list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
 221                wake_q_add(wake_q, msr->r_tsk);
 222                WRITE_ONCE(msr->r_msg, ERR_PTR(res));
 223        }
 224}
 225
 226/*
 227 * freeque() wakes up waiters on the sender and receiver waiting queue,
 228 * removes the message queue from message queue ID IDR, and cleans up all the
 229 * messages associated with this queue.
 230 *
 231 * msg_ids.rwsem (writer) and the spinlock for this message queue are held
 232 * before freeque() is called. msg_ids.rwsem remains locked on exit.
 233 */
 234static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 235{
 236        struct msg_msg *msg, *t;
 237        struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
 238        DEFINE_WAKE_Q(wake_q);
 239
 240        expunge_all(msq, -EIDRM, &wake_q);
 241        ss_wakeup(msq, &wake_q, true);
 242        msg_rmid(ns, msq);
 243        ipc_unlock_object(&msq->q_perm);
 244        wake_up_q(&wake_q);
 245        rcu_read_unlock();
 246
 247        list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
 248                atomic_dec(&ns->msg_hdrs);
 249                free_msg(msg);
 250        }
 251        atomic_sub(msq->q_cbytes, &ns->msg_bytes);
 252        ipc_rcu_putref(msq, msg_rcu_free);
 253}
 254
 255/*
 256 * Called with msg_ids.rwsem and ipcp locked.
 257 */
 258static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
 259{
 260        struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
 261
 262        return security_msg_queue_associate(msq, msgflg);
 263}
 264
 265SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
 266{
 267        struct ipc_namespace *ns;
 268        static const struct ipc_ops msg_ops = {
 269                .getnew = newque,
 270                .associate = msg_security,
 271        };
 272        struct ipc_params msg_params;
 273
 274        ns = current->nsproxy->ipc_ns;
 275
 276        msg_params.key = key;
 277        msg_params.flg = msgflg;
 278
 279        return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
 280}
 281
 282static inline unsigned long
 283copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
 284{
 285        switch (version) {
 286        case IPC_64:
 287                return copy_to_user(buf, in, sizeof(*in));
 288        case IPC_OLD:
 289        {
 290                struct msqid_ds out;
 291
 292                memset(&out, 0, sizeof(out));
 293
 294                ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm);
 295
 296                out.msg_stime           = in->msg_stime;
 297                out.msg_rtime           = in->msg_rtime;
 298                out.msg_ctime           = in->msg_ctime;
 299
 300                if (in->msg_cbytes > USHRT_MAX)
 301                        out.msg_cbytes  = USHRT_MAX;
 302                else
 303                        out.msg_cbytes  = in->msg_cbytes;
 304                out.msg_lcbytes         = in->msg_cbytes;
 305
 306                if (in->msg_qnum > USHRT_MAX)
 307                        out.msg_qnum    = USHRT_MAX;
 308                else
 309                        out.msg_qnum    = in->msg_qnum;
 310
 311                if (in->msg_qbytes > USHRT_MAX)
 312                        out.msg_qbytes  = USHRT_MAX;
 313                else
 314                        out.msg_qbytes  = in->msg_qbytes;
 315                out.msg_lqbytes         = in->msg_qbytes;
 316
 317                out.msg_lspid           = in->msg_lspid;
 318                out.msg_lrpid           = in->msg_lrpid;
 319
 320                return copy_to_user(buf, &out, sizeof(out));
 321        }
 322        default:
 323                return -EINVAL;
 324        }
 325}
 326
 327static inline unsigned long
 328copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
 329{
 330        switch (version) {
 331        case IPC_64:
 332                if (copy_from_user(out, buf, sizeof(*out)))
 333                        return -EFAULT;
 334                return 0;
 335        case IPC_OLD:
 336        {
 337                struct msqid_ds tbuf_old;
 338
 339                if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
 340                        return -EFAULT;
 341
 342                out->msg_perm.uid       = tbuf_old.msg_perm.uid;
 343                out->msg_perm.gid       = tbuf_old.msg_perm.gid;
 344                out->msg_perm.mode      = tbuf_old.msg_perm.mode;
 345
 346                if (tbuf_old.msg_qbytes == 0)
 347                        out->msg_qbytes = tbuf_old.msg_lqbytes;
 348                else
 349                        out->msg_qbytes = tbuf_old.msg_qbytes;
 350
 351                return 0;
 352        }
 353        default:
 354                return -EINVAL;
 355        }
 356}
 357
 358/*
 359 * This function handles some msgctl commands which require the rwsem
 360 * to be held in write mode.
 361 * NOTE: no locks must be held, the rwsem is taken inside this function.
 362 */
 363static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
 364                       struct msqid_ds __user *buf, int version)
 365{
 366        struct kern_ipc_perm *ipcp;
 367        struct msqid64_ds uninitialized_var(msqid64);
 368        struct msg_queue *msq;
 369        int err;
 370
 371        if (cmd == IPC_SET) {
 372                if (copy_msqid_from_user(&msqid64, buf, version))
 373                        return -EFAULT;
 374        }
 375
 376        down_write(&msg_ids(ns).rwsem);
 377        rcu_read_lock();
 378
 379        ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd,
 380                                      &msqid64.msg_perm, msqid64.msg_qbytes);
 381        if (IS_ERR(ipcp)) {
 382                err = PTR_ERR(ipcp);
 383                goto out_unlock1;
 384        }
 385
 386        msq = container_of(ipcp, struct msg_queue, q_perm);
 387
 388        err = security_msg_queue_msgctl(msq, cmd);
 389        if (err)
 390                goto out_unlock1;
 391
 392        switch (cmd) {
 393        case IPC_RMID:
 394                ipc_lock_object(&msq->q_perm);
 395                /* freeque unlocks the ipc object and rcu */
 396                freeque(ns, ipcp);
 397                goto out_up;
 398        case IPC_SET:
 399        {
 400                DEFINE_WAKE_Q(wake_q);
 401
 402                if (msqid64.msg_qbytes > ns->msg_ctlmnb &&
 403                    !capable(CAP_SYS_RESOURCE)) {
 404                        err = -EPERM;
 405                        goto out_unlock1;
 406                }
 407
 408                ipc_lock_object(&msq->q_perm);
 409                err = ipc_update_perm(&msqid64.msg_perm, ipcp);
 410                if (err)
 411                        goto out_unlock0;
 412
 413                msq->q_qbytes = msqid64.msg_qbytes;
 414
 415                msq->q_ctime = get_seconds();
 416                /*
 417                 * Sleeping receivers might be excluded by
 418                 * stricter permissions.
 419                 */
 420                expunge_all(msq, -EAGAIN, &wake_q);
 421                /*
 422                 * Sleeping senders might be able to send
 423                 * due to a larger queue size.
 424                 */
 425                ss_wakeup(msq, &wake_q, false);
 426                ipc_unlock_object(&msq->q_perm);
 427                wake_up_q(&wake_q);
 428
 429                goto out_unlock1;
 430        }
 431        default:
 432                err = -EINVAL;
 433                goto out_unlock1;
 434        }
 435
 436out_unlock0:
 437        ipc_unlock_object(&msq->q_perm);
 438out_unlock1:
 439        rcu_read_unlock();
 440out_up:
 441        up_write(&msg_ids(ns).rwsem);
 442        return err;
 443}
 444
 445static int msgctl_nolock(struct ipc_namespace *ns, int msqid,
 446                         int cmd, int version, void __user *buf)
 447{
 448        int err;
 449        struct msg_queue *msq;
 450
 451        switch (cmd) {
 452        case IPC_INFO:
 453        case MSG_INFO:
 454        {
 455                struct msginfo msginfo;
 456                int max_id;
 457
 458                if (!buf)
 459                        return -EFAULT;
 460
 461                /*
 462                 * We must not return kernel stack data.
 463                 * due to padding, it's not enough
 464                 * to set all member fields.
 465                 */
 466                err = security_msg_queue_msgctl(NULL, cmd);
 467                if (err)
 468                        return err;
 469
 470                memset(&msginfo, 0, sizeof(msginfo));
 471                msginfo.msgmni = ns->msg_ctlmni;
 472                msginfo.msgmax = ns->msg_ctlmax;
 473                msginfo.msgmnb = ns->msg_ctlmnb;
 474                msginfo.msgssz = MSGSSZ;
 475                msginfo.msgseg = MSGSEG;
 476                down_read(&msg_ids(ns).rwsem);
 477                if (cmd == MSG_INFO) {
 478                        msginfo.msgpool = msg_ids(ns).in_use;
 479                        msginfo.msgmap = atomic_read(&ns->msg_hdrs);
 480                        msginfo.msgtql = atomic_read(&ns->msg_bytes);
 481                } else {
 482                        msginfo.msgmap = MSGMAP;
 483                        msginfo.msgpool = MSGPOOL;
 484                        msginfo.msgtql = MSGTQL;
 485                }
 486                max_id = ipc_get_maxid(&msg_ids(ns));
 487                up_read(&msg_ids(ns).rwsem);
 488                if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
 489                        return -EFAULT;
 490                return (max_id < 0) ? 0 : max_id;
 491        }
 492
 493        case MSG_STAT:
 494        case IPC_STAT:
 495        {
 496                struct msqid64_ds tbuf;
 497                int success_return;
 498
 499                if (!buf)
 500                        return -EFAULT;
 501
 502                memset(&tbuf, 0, sizeof(tbuf));
 503
 504                rcu_read_lock();
 505                if (cmd == MSG_STAT) {
 506                        msq = msq_obtain_object(ns, msqid);
 507                        if (IS_ERR(msq)) {
 508                                err = PTR_ERR(msq);
 509                                goto out_unlock;
 510                        }
 511                        success_return = msq->q_perm.id;
 512                } else {
 513                        msq = msq_obtain_object_check(ns, msqid);
 514                        if (IS_ERR(msq)) {
 515                                err = PTR_ERR(msq);
 516                                goto out_unlock;
 517                        }
 518                        success_return = 0;
 519                }
 520
 521                err = -EACCES;
 522                if (ipcperms(ns, &msq->q_perm, S_IRUGO))
 523                        goto out_unlock;
 524
 525                err = security_msg_queue_msgctl(msq, cmd);
 526                if (err)
 527                        goto out_unlock;
 528
 529                kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
 530                tbuf.msg_stime  = msq->q_stime;
 531                tbuf.msg_rtime  = msq->q_rtime;
 532                tbuf.msg_ctime  = msq->q_ctime;
 533                tbuf.msg_cbytes = msq->q_cbytes;
 534                tbuf.msg_qnum   = msq->q_qnum;
 535                tbuf.msg_qbytes = msq->q_qbytes;
 536                tbuf.msg_lspid  = msq->q_lspid;
 537                tbuf.msg_lrpid  = msq->q_lrpid;
 538                rcu_read_unlock();
 539
 540                if (copy_msqid_to_user(buf, &tbuf, version))
 541                        return -EFAULT;
 542                return success_return;
 543        }
 544
 545        default:
 546                return -EINVAL;
 547        }
 548
 549        return err;
 550out_unlock:
 551        rcu_read_unlock();
 552        return err;
 553}
 554
 555SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
 556{
 557        int version;
 558        struct ipc_namespace *ns;
 559
 560        if (msqid < 0 || cmd < 0)
 561                return -EINVAL;
 562
 563        version = ipc_parse_version(&cmd);
 564        ns = current->nsproxy->ipc_ns;
 565
 566        switch (cmd) {
 567        case IPC_INFO:
 568        case MSG_INFO:
 569        case MSG_STAT:  /* msqid is an index rather than a msg queue id */
 570        case IPC_STAT:
 571                return msgctl_nolock(ns, msqid, cmd, version, buf);
 572        case IPC_SET:
 573        case IPC_RMID:
 574                return msgctl_down(ns, msqid, cmd, buf, version);
 575        default:
 576                return  -EINVAL;
 577        }
 578}
 579
 580static int testmsg(struct msg_msg *msg, long type, int mode)
 581{
 582        switch (mode) {
 583        case SEARCH_ANY:
 584        case SEARCH_NUMBER:
 585                return 1;
 586        case SEARCH_LESSEQUAL:
 587                if (msg->m_type <= type)
 588                        return 1;
 589                break;
 590        case SEARCH_EQUAL:
 591                if (msg->m_type == type)
 592                        return 1;
 593                break;
 594        case SEARCH_NOTEQUAL:
 595                if (msg->m_type != type)
 596                        return 1;
 597                break;
 598        }
 599        return 0;
 600}
 601
 602static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
 603                                 struct wake_q_head *wake_q)
 604{
 605        struct msg_receiver *msr, *t;
 606
 607        list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
 608                if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
 609                    !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
 610                                               msr->r_msgtype, msr->r_mode)) {
 611
 612                        list_del(&msr->r_list);
 613                        if (msr->r_maxsize < msg->m_ts) {
 614                                wake_q_add(wake_q, msr->r_tsk);
 615                                WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG));
 616                        } else {
 617                                msq->q_lrpid = task_pid_vnr(msr->r_tsk);
 618                                msq->q_rtime = get_seconds();
 619
 620                                wake_q_add(wake_q, msr->r_tsk);
 621                                WRITE_ONCE(msr->r_msg, msg);
 622                                return 1;
 623                        }
 624                }
 625        }
 626
 627        return 0;
 628}
 629
 630long do_msgsnd(int msqid, long mtype, void __user *mtext,
 631                size_t msgsz, int msgflg)
 632{
 633        struct msg_queue *msq;
 634        struct msg_msg *msg;
 635        int err;
 636        struct ipc_namespace *ns;
 637        DEFINE_WAKE_Q(wake_q);
 638
 639        ns = current->nsproxy->ipc_ns;
 640
 641        if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
 642                return -EINVAL;
 643        if (mtype < 1)
 644                return -EINVAL;
 645
 646        msg = load_msg(mtext, msgsz);
 647        if (IS_ERR(msg))
 648                return PTR_ERR(msg);
 649
 650        msg->m_type = mtype;
 651        msg->m_ts = msgsz;
 652
 653        rcu_read_lock();
 654        msq = msq_obtain_object_check(ns, msqid);
 655        if (IS_ERR(msq)) {
 656                err = PTR_ERR(msq);
 657                goto out_unlock1;
 658        }
 659
 660        ipc_lock_object(&msq->q_perm);
 661
 662        for (;;) {
 663                struct msg_sender s;
 664
 665                err = -EACCES;
 666                if (ipcperms(ns, &msq->q_perm, S_IWUGO))
 667                        goto out_unlock0;
 668
 669                /* raced with RMID? */
 670                if (!ipc_valid_object(&msq->q_perm)) {
 671                        err = -EIDRM;
 672                        goto out_unlock0;
 673                }
 674
 675                err = security_msg_queue_msgsnd(msq, msg, msgflg);
 676                if (err)
 677                        goto out_unlock0;
 678
 679                if (msg_fits_inqueue(msq, msgsz))
 680                        break;
 681
 682                /* queue full, wait: */
 683                if (msgflg & IPC_NOWAIT) {
 684                        err = -EAGAIN;
 685                        goto out_unlock0;
 686                }
 687
 688                /* enqueue the sender and prepare to block */
 689                ss_add(msq, &s, msgsz);
 690
 691                if (!ipc_rcu_getref(msq)) {
 692                        err = -EIDRM;
 693                        goto out_unlock0;
 694                }
 695
 696                ipc_unlock_object(&msq->q_perm);
 697                rcu_read_unlock();
 698                schedule();
 699
 700                rcu_read_lock();
 701                ipc_lock_object(&msq->q_perm);
 702
 703                ipc_rcu_putref(msq, msg_rcu_free);
 704                /* raced with RMID? */
 705                if (!ipc_valid_object(&msq->q_perm)) {
 706                        err = -EIDRM;
 707                        goto out_unlock0;
 708                }
 709                ss_del(&s);
 710
 711                if (signal_pending(current)) {
 712                        err = -ERESTARTNOHAND;
 713                        goto out_unlock0;
 714                }
 715
 716        }
 717
 718        msq->q_lspid = task_tgid_vnr(current);
 719        msq->q_stime = get_seconds();
 720
 721        if (!pipelined_send(msq, msg, &wake_q)) {
 722                /* no one is waiting for this message, enqueue it */
 723                list_add_tail(&msg->m_list, &msq->q_messages);
 724                msq->q_cbytes += msgsz;
 725                msq->q_qnum++;
 726                atomic_add(msgsz, &ns->msg_bytes);
 727                atomic_inc(&ns->msg_hdrs);
 728        }
 729
 730        err = 0;
 731        msg = NULL;
 732
 733out_unlock0:
 734        ipc_unlock_object(&msq->q_perm);
 735        wake_up_q(&wake_q);
 736out_unlock1:
 737        rcu_read_unlock();
 738        if (msg != NULL)
 739                free_msg(msg);
 740        return err;
 741}
 742
 743SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
 744                int, msgflg)
 745{
 746        long mtype;
 747
 748        if (get_user(mtype, &msgp->mtype))
 749                return -EFAULT;
 750        return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
 751}
 752
 753static inline int convert_mode(long *msgtyp, int msgflg)
 754{
 755        if (msgflg & MSG_COPY)
 756                return SEARCH_NUMBER;
 757        /*
 758         *  find message of correct type.
 759         *  msgtyp = 0 => get first.
 760         *  msgtyp > 0 => get first message of matching type.
 761         *  msgtyp < 0 => get message with least type must be < abs(msgtype).
 762         */
 763        if (*msgtyp == 0)
 764                return SEARCH_ANY;
 765        if (*msgtyp < 0) {
 766                if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */
 767                        *msgtyp = LONG_MAX;
 768                else
 769                        *msgtyp = -*msgtyp;
 770                return SEARCH_LESSEQUAL;
 771        }
 772        if (msgflg & MSG_EXCEPT)
 773                return SEARCH_NOTEQUAL;
 774        return SEARCH_EQUAL;
 775}
 776
 777static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
 778{
 779        struct msgbuf __user *msgp = dest;
 780        size_t msgsz;
 781
 782        if (put_user(msg->m_type, &msgp->mtype))
 783                return -EFAULT;
 784
 785        msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
 786        if (store_msg(msgp->mtext, msg, msgsz))
 787                return -EFAULT;
 788        return msgsz;
 789}
 790
 791#ifdef CONFIG_CHECKPOINT_RESTORE
 792/*
 793 * This function creates new kernel message structure, large enough to store
 794 * bufsz message bytes.
 795 */
 796static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
 797{
 798        struct msg_msg *copy;
 799
 800        /*
 801         * Create dummy message to copy real message to.
 802         */
 803        copy = load_msg(buf, bufsz);
 804        if (!IS_ERR(copy))
 805                copy->m_ts = bufsz;
 806        return copy;
 807}
 808
 809static inline void free_copy(struct msg_msg *copy)
 810{
 811        if (copy)
 812                free_msg(copy);
 813}
 814#else
 815static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
 816{
 817        return ERR_PTR(-ENOSYS);
 818}
 819
 820static inline void free_copy(struct msg_msg *copy)
 821{
 822}
 823#endif
 824
 825static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode)
 826{
 827        struct msg_msg *msg, *found = NULL;
 828        long count = 0;
 829
 830        list_for_each_entry(msg, &msq->q_messages, m_list) {
 831                if (testmsg(msg, *msgtyp, mode) &&
 832                    !security_msg_queue_msgrcv(msq, msg, current,
 833                                               *msgtyp, mode)) {
 834                        if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
 835                                *msgtyp = msg->m_type - 1;
 836                                found = msg;
 837                        } else if (mode == SEARCH_NUMBER) {
 838                                if (*msgtyp == count)
 839                                        return msg;
 840                        } else
 841                                return msg;
 842                        count++;
 843                }
 844        }
 845
 846        return found ?: ERR_PTR(-EAGAIN);
 847}
 848
 849long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg,
 850               long (*msg_handler)(void __user *, struct msg_msg *, size_t))
 851{
 852        int mode;
 853        struct msg_queue *msq;
 854        struct ipc_namespace *ns;
 855        struct msg_msg *msg, *copy = NULL;
 856        DEFINE_WAKE_Q(wake_q);
 857
 858        ns = current->nsproxy->ipc_ns;
 859
 860        if (msqid < 0 || (long) bufsz < 0)
 861                return -EINVAL;
 862
 863        if (msgflg & MSG_COPY) {
 864                if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT))
 865                        return -EINVAL;
 866                copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
 867                if (IS_ERR(copy))
 868                        return PTR_ERR(copy);
 869        }
 870        mode = convert_mode(&msgtyp, msgflg);
 871
 872        rcu_read_lock();
 873        msq = msq_obtain_object_check(ns, msqid);
 874        if (IS_ERR(msq)) {
 875                rcu_read_unlock();
 876                free_copy(copy);
 877                return PTR_ERR(msq);
 878        }
 879
 880        for (;;) {
 881                struct msg_receiver msr_d;
 882
 883                msg = ERR_PTR(-EACCES);
 884                if (ipcperms(ns, &msq->q_perm, S_IRUGO))
 885                        goto out_unlock1;
 886
 887                ipc_lock_object(&msq->q_perm);
 888
 889                /* raced with RMID? */
 890                if (!ipc_valid_object(&msq->q_perm)) {
 891                        msg = ERR_PTR(-EIDRM);
 892                        goto out_unlock0;
 893                }
 894
 895                msg = find_msg(msq, &msgtyp, mode);
 896                if (!IS_ERR(msg)) {
 897                        /*
 898                         * Found a suitable message.
 899                         * Unlink it from the queue.
 900                         */
 901                        if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
 902                                msg = ERR_PTR(-E2BIG);
 903                                goto out_unlock0;
 904                        }
 905                        /*
 906                         * If we are copying, then do not unlink message and do
 907                         * not update queue parameters.
 908                         */
 909                        if (msgflg & MSG_COPY) {
 910                                msg = copy_msg(msg, copy);
 911                                goto out_unlock0;
 912                        }
 913
 914                        list_del(&msg->m_list);
 915                        msq->q_qnum--;
 916                        msq->q_rtime = get_seconds();
 917                        msq->q_lrpid = task_tgid_vnr(current);
 918                        msq->q_cbytes -= msg->m_ts;
 919                        atomic_sub(msg->m_ts, &ns->msg_bytes);
 920                        atomic_dec(&ns->msg_hdrs);
 921                        ss_wakeup(msq, &wake_q, false);
 922
 923                        goto out_unlock0;
 924                }
 925
 926                /* No message waiting. Wait for a message */
 927                if (msgflg & IPC_NOWAIT) {
 928                        msg = ERR_PTR(-ENOMSG);
 929                        goto out_unlock0;
 930                }
 931
 932                list_add_tail(&msr_d.r_list, &msq->q_receivers);
 933                msr_d.r_tsk = current;
 934                msr_d.r_msgtype = msgtyp;
 935                msr_d.r_mode = mode;
 936                if (msgflg & MSG_NOERROR)
 937                        msr_d.r_maxsize = INT_MAX;
 938                else
 939                        msr_d.r_maxsize = bufsz;
 940                msr_d.r_msg = ERR_PTR(-EAGAIN);
 941                __set_current_state(TASK_INTERRUPTIBLE);
 942
 943                ipc_unlock_object(&msq->q_perm);
 944                rcu_read_unlock();
 945                schedule();
 946
 947                /*
 948                 * Lockless receive, part 1:
 949                 * We don't hold a reference to the queue and getting a
 950                 * reference would defeat the idea of a lockless operation,
 951                 * thus the code relies on rcu to guarantee the existence of
 952                 * msq:
 953                 * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
 954                 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
 955                 */
 956                rcu_read_lock();
 957
 958                /*
 959                 * Lockless receive, part 2:
 960                 * The work in pipelined_send() and expunge_all():
 961                 * - Set pointer to message
 962                 * - Queue the receiver task for later wakeup
 963                 * - Wake up the process after the lock is dropped.
 964                 *
 965                 * Should the process wake up before this wakeup (due to a
 966                 * signal) it will either see the message and continue ...
 967                 */
 968                msg = READ_ONCE(msr_d.r_msg);
 969                if (msg != ERR_PTR(-EAGAIN))
 970                        goto out_unlock1;
 971
 972                 /*
 973                  * ... or see -EAGAIN, acquire the lock to check the message
 974                  * again.
 975                  */
 976                ipc_lock_object(&msq->q_perm);
 977
 978                msg = msr_d.r_msg;
 979                if (msg != ERR_PTR(-EAGAIN))
 980                        goto out_unlock0;
 981
 982                list_del(&msr_d.r_list);
 983                if (signal_pending(current)) {
 984                        msg = ERR_PTR(-ERESTARTNOHAND);
 985                        goto out_unlock0;
 986                }
 987
 988                ipc_unlock_object(&msq->q_perm);
 989        }
 990
 991out_unlock0:
 992        ipc_unlock_object(&msq->q_perm);
 993        wake_up_q(&wake_q);
 994out_unlock1:
 995        rcu_read_unlock();
 996        if (IS_ERR(msg)) {
 997                free_copy(copy);
 998                return PTR_ERR(msg);
 999        }
1000
1001        bufsz = msg_handler(buf, msg, bufsz);
1002        free_msg(msg);
1003
1004        return bufsz;
1005}
1006
1007SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
1008                long, msgtyp, int, msgflg)
1009{
1010        return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
1011}
1012
1013
1014void msg_init_ns(struct ipc_namespace *ns)
1015{
1016        ns->msg_ctlmax = MSGMAX;
1017        ns->msg_ctlmnb = MSGMNB;
1018        ns->msg_ctlmni = MSGMNI;
1019
1020        atomic_set(&ns->msg_bytes, 0);
1021        atomic_set(&ns->msg_hdrs, 0);
1022        ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
1023}
1024
1025#ifdef CONFIG_IPC_NS
1026void msg_exit_ns(struct ipc_namespace *ns)
1027{
1028        free_ipcs(ns, &msg_ids(ns), freeque);
1029        idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
1030}
1031#endif
1032
1033#ifdef CONFIG_PROC_FS
1034static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
1035{
1036        struct user_namespace *user_ns = seq_user_ns(s);
1037        struct msg_queue *msq = it;
1038
1039        seq_printf(s,
1040                   "%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
1041                   msq->q_perm.key,
1042                   msq->q_perm.id,
1043                   msq->q_perm.mode,
1044                   msq->q_cbytes,
1045                   msq->q_qnum,
1046                   msq->q_lspid,
1047                   msq->q_lrpid,
1048                   from_kuid_munged(user_ns, msq->q_perm.uid),
1049                   from_kgid_munged(user_ns, msq->q_perm.gid),
1050                   from_kuid_munged(user_ns, msq->q_perm.cuid),
1051                   from_kgid_munged(user_ns, msq->q_perm.cgid),
1052                   msq->q_stime,
1053                   msq->q_rtime,
1054                   msq->q_ctime);
1055
1056        return 0;
1057}
1058#endif
1059
1060void __init msg_init(void)
1061{
1062        msg_init_ns(&init_ipc_ns);
1063
1064        ipc_init_proc_interface("sysvipc/msg",
1065                                "       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
1066                                IPC_MSG_IDS, sysvipc_msg_proc_show);
1067}
1068