linux/net/tipc/socket.c
<<
>>
Prefs
   1/*
   2 * net/tipc/socket.c: TIPC socket API
   3 *
   4 * Copyright (c) 2001-2007, 2012 Ericsson AB
   5 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
   6 * All rights reserved.
   7 *
   8 * Redistribution and use in source and binary forms, with or without
   9 * modification, are permitted provided that the following conditions are met:
  10 *
  11 * 1. Redistributions of source code must retain the above copyright
  12 *    notice, this list of conditions and the following disclaimer.
  13 * 2. Redistributions in binary form must reproduce the above copyright
  14 *    notice, this list of conditions and the following disclaimer in the
  15 *    documentation and/or other materials provided with the distribution.
  16 * 3. Neither the names of the copyright holders nor the names of its
  17 *    contributors may be used to endorse or promote products derived from
  18 *    this software without specific prior written permission.
  19 *
  20 * Alternatively, this software may be distributed under the terms of the
  21 * GNU General Public License ("GPL") version 2 as published by the Free
  22 * Software Foundation.
  23 *
  24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  34 * POSSIBILITY OF SUCH DAMAGE.
  35 */
  36
  37#include "core.h"
  38#include "port.h"
  39
  40#include <linux/export.h>
  41#include <net/sock.h>
  42
  43#define SS_LISTENING    -1      /* socket is listening */
  44#define SS_READY        -2      /* socket is connectionless */
  45
  46#define CONN_TIMEOUT_DEFAULT    8000    /* default connect timeout = 8s */
  47
  48struct tipc_sock {
  49        struct sock sk;
  50        struct tipc_port *p;
  51        struct tipc_portid peer_name;
  52        unsigned int conn_timeout;
  53};
  54
  55#define tipc_sk(sk) ((struct tipc_sock *)(sk))
  56#define tipc_sk_port(sk) (tipc_sk(sk)->p)
  57
  58static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
  59static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
  60static void wakeupdispatch(struct tipc_port *tport);
  61static void tipc_data_ready(struct sock *sk, int len);
  62static void tipc_write_space(struct sock *sk);
  63static int release(struct socket *sock);
  64static int accept(struct socket *sock, struct socket *new_sock, int flags);
  65
  66static const struct proto_ops packet_ops;
  67static const struct proto_ops stream_ops;
  68static const struct proto_ops msg_ops;
  69
  70static struct proto tipc_proto;
  71static struct proto tipc_proto_kern;
  72
  73/*
  74 * Revised TIPC socket locking policy:
  75 *
  76 * Most socket operations take the standard socket lock when they start
  77 * and hold it until they finish (or until they need to sleep).  Acquiring
  78 * this lock grants the owner exclusive access to the fields of the socket
  79 * data structures, with the exception of the backlog queue.  A few socket
  80 * operations can be done without taking the socket lock because they only
  81 * read socket information that never changes during the life of the socket.
  82 *
  83 * Socket operations may acquire the lock for the associated TIPC port if they
  84 * need to perform an operation on the port.  If any routine needs to acquire
  85 * both the socket lock and the port lock it must take the socket lock first
  86 * to avoid the risk of deadlock.
  87 *
  88 * The dispatcher handling incoming messages cannot grab the socket lock in
  89 * the standard fashion, since invoked it runs at the BH level and cannot block.
  90 * Instead, it checks to see if the socket lock is currently owned by someone,
  91 * and either handles the message itself or adds it to the socket's backlog
  92 * queue; in the latter case the queued message is processed once the process
  93 * owning the socket lock releases it.
  94 *
  95 * NOTE: Releasing the socket lock while an operation is sleeping overcomes
  96 * the problem of a blocked socket operation preventing any other operations
  97 * from occurring.  However, applications must be careful if they have
  98 * multiple threads trying to send (or receive) on the same socket, as these
  99 * operations might interfere with each other.  For example, doing a connect
 100 * and a receive at the same time might allow the receive to consume the
 101 * ACK message meant for the connect.  While additional work could be done
 102 * to try and overcome this, it doesn't seem to be worthwhile at the present.
 103 *
 104 * NOTE: Releasing the socket lock while an operation is sleeping also ensures
 105 * that another operation that must be performed in a non-blocking manner is
 106 * not delayed for very long because the lock has already been taken.
 107 *
 108 * NOTE: This code assumes that certain fields of a port/socket pair are
 109 * constant over its lifetime; such fields can be examined without taking
 110 * the socket lock and/or port lock, and do not need to be re-read even
 111 * after resuming processing after waiting.  These fields include:
 112 *   - socket type
 113 *   - pointer to socket sk structure (aka tipc_sock structure)
 114 *   - pointer to port structure
 115 *   - port reference
 116 */
 117
 118/**
 119 * advance_rx_queue - discard first buffer in socket receive queue
 120 *
 121 * Caller must hold socket lock
 122 */
 123static void advance_rx_queue(struct sock *sk)
 124{
 125        kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
 126}
 127
 128/**
 129 * reject_rx_queue - reject all buffers in socket receive queue
 130 *
 131 * Caller must hold socket lock
 132 */
 133static void reject_rx_queue(struct sock *sk)
 134{
 135        struct sk_buff *buf;
 136
 137        while ((buf = __skb_dequeue(&sk->sk_receive_queue)))
 138                tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
 139}
 140
 141/**
 142 * tipc_sk_create - create a TIPC socket
 143 * @net: network namespace (must be default network)
 144 * @sock: pre-allocated socket structure
 145 * @protocol: protocol indicator (must be 0)
 146 * @kern: caused by kernel or by userspace?
 147 *
 148 * This routine creates additional data structures used by the TIPC socket,
 149 * initializes them, and links them together.
 150 *
 151 * Returns 0 on success, errno otherwise
 152 */
 153static int tipc_sk_create(struct net *net, struct socket *sock, int protocol,
 154                          int kern)
 155{
 156        const struct proto_ops *ops;
 157        socket_state state;
 158        struct sock *sk;
 159        struct tipc_port *tp_ptr;
 160
 161        /* Validate arguments */
 162        if (unlikely(protocol != 0))
 163                return -EPROTONOSUPPORT;
 164
 165        switch (sock->type) {
 166        case SOCK_STREAM:
 167                ops = &stream_ops;
 168                state = SS_UNCONNECTED;
 169                break;
 170        case SOCK_SEQPACKET:
 171                ops = &packet_ops;
 172                state = SS_UNCONNECTED;
 173                break;
 174        case SOCK_DGRAM:
 175        case SOCK_RDM:
 176                ops = &msg_ops;
 177                state = SS_READY;
 178                break;
 179        default:
 180                return -EPROTOTYPE;
 181        }
 182
 183        /* Allocate socket's protocol area */
 184        if (!kern)
 185                sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
 186        else
 187                sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern);
 188
 189        if (sk == NULL)
 190                return -ENOMEM;
 191
 192        /* Allocate TIPC port for socket to use */
 193        tp_ptr = tipc_createport(sk, &dispatch, &wakeupdispatch,
 194                                 TIPC_LOW_IMPORTANCE);
 195        if (unlikely(!tp_ptr)) {
 196                sk_free(sk);
 197                return -ENOMEM;
 198        }
 199
 200        /* Finish initializing socket data structures */
 201        sock->ops = ops;
 202        sock->state = state;
 203
 204        sock_init_data(sock, sk);
 205        sk->sk_backlog_rcv = backlog_rcv;
 206        sk->sk_rcvbuf = sysctl_tipc_rmem[1];
 207        sk->sk_data_ready = tipc_data_ready;
 208        sk->sk_write_space = tipc_write_space;
 209        tipc_sk(sk)->p = tp_ptr;
 210        tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT;
 211
 212        spin_unlock_bh(tp_ptr->lock);
 213
 214        if (sock->state == SS_READY) {
 215                tipc_set_portunreturnable(tp_ptr->ref, 1);
 216                if (sock->type == SOCK_DGRAM)
 217                        tipc_set_portunreliable(tp_ptr->ref, 1);
 218        }
 219
 220        return 0;
 221}
 222
 223/**
 224 * tipc_sock_create_local - create TIPC socket from inside TIPC module
 225 * @type: socket type - SOCK_RDM or SOCK_SEQPACKET
 226 *
 227 * We cannot use sock_creat_kern here because it bumps module user count.
 228 * Since socket owner and creator is the same module we must make sure
 229 * that module count remains zero for module local sockets, otherwise
 230 * we cannot do rmmod.
 231 *
 232 * Returns 0 on success, errno otherwise
 233 */
 234int tipc_sock_create_local(int type, struct socket **res)
 235{
 236        int rc;
 237
 238        rc = sock_create_lite(AF_TIPC, type, 0, res);
 239        if (rc < 0) {
 240                pr_err("Failed to create kernel socket\n");
 241                return rc;
 242        }
 243        tipc_sk_create(&init_net, *res, 0, 1);
 244
 245        return 0;
 246}
 247
 248/**
 249 * tipc_sock_release_local - release socket created by tipc_sock_create_local
 250 * @sock: the socket to be released.
 251 *
 252 * Module reference count is not incremented when such sockets are created,
 253 * so we must keep it from being decremented when they are released.
 254 */
 255void tipc_sock_release_local(struct socket *sock)
 256{
 257        release(sock);
 258        sock->ops = NULL;
 259        sock_release(sock);
 260}
 261
 262/**
 263 * tipc_sock_accept_local - accept a connection on a socket created
 264 * with tipc_sock_create_local. Use this function to avoid that
 265 * module reference count is inadvertently incremented.
 266 *
 267 * @sock:    the accepting socket
 268 * @newsock: reference to the new socket to be created
 269 * @flags:   socket flags
 270 */
 271
 272int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
 273                           int flags)
 274{
 275        struct sock *sk = sock->sk;
 276        int ret;
 277
 278        ret = sock_create_lite(sk->sk_family, sk->sk_type,
 279                               sk->sk_protocol, newsock);
 280        if (ret < 0)
 281                return ret;
 282
 283        ret = accept(sock, *newsock, flags);
 284        if (ret < 0) {
 285                sock_release(*newsock);
 286                return ret;
 287        }
 288        (*newsock)->ops = sock->ops;
 289        return ret;
 290}
 291
 292/**
 293 * release - destroy a TIPC socket
 294 * @sock: socket to destroy
 295 *
 296 * This routine cleans up any messages that are still queued on the socket.
 297 * For DGRAM and RDM socket types, all queued messages are rejected.
 298 * For SEQPACKET and STREAM socket types, the first message is rejected
 299 * and any others are discarded.  (If the first message on a STREAM socket
 300 * is partially-read, it is discarded and the next one is rejected instead.)
 301 *
 302 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 303 * are returned or discarded according to the "destination droppable" setting
 304 * specified for the message by the sender.
 305 *
 306 * Returns 0 on success, errno otherwise
 307 */
 308static int release(struct socket *sock)
 309{
 310        struct sock *sk = sock->sk;
 311        struct tipc_port *tport;
 312        struct sk_buff *buf;
 313        int res;
 314
 315        /*
 316         * Exit if socket isn't fully initialized (occurs when a failed accept()
 317         * releases a pre-allocated child socket that was never used)
 318         */
 319        if (sk == NULL)
 320                return 0;
 321
 322        tport = tipc_sk_port(sk);
 323        lock_sock(sk);
 324
 325        /*
 326         * Reject all unreceived messages, except on an active connection
 327         * (which disconnects locally & sends a 'FIN+' to peer)
 328         */
 329        while (sock->state != SS_DISCONNECTING) {
 330                buf = __skb_dequeue(&sk->sk_receive_queue);
 331                if (buf == NULL)
 332                        break;
 333                if (TIPC_SKB_CB(buf)->handle != NULL)
 334                        kfree_skb(buf);
 335                else {
 336                        if ((sock->state == SS_CONNECTING) ||
 337                            (sock->state == SS_CONNECTED)) {
 338                                sock->state = SS_DISCONNECTING;
 339                                tipc_disconnect(tport->ref);
 340                        }
 341                        tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
 342                }
 343        }
 344
 345        /*
 346         * Delete TIPC port; this ensures no more messages are queued
 347         * (also disconnects an active connection & sends a 'FIN-' to peer)
 348         */
 349        res = tipc_deleteport(tport);
 350
 351        /* Discard any remaining (connection-based) messages in receive queue */
 352        __skb_queue_purge(&sk->sk_receive_queue);
 353
 354        /* Reject any messages that accumulated in backlog queue */
 355        sock->state = SS_DISCONNECTING;
 356        release_sock(sk);
 357
 358        sock_put(sk);
 359        sock->sk = NULL;
 360
 361        return res;
 362}
 363
 364/**
 365 * bind - associate or disassocate TIPC name(s) with a socket
 366 * @sock: socket structure
 367 * @uaddr: socket address describing name(s) and desired operation
 368 * @uaddr_len: size of socket address data structure
 369 *
 370 * Name and name sequence binding is indicated using a positive scope value;
 371 * a negative scope value unbinds the specified name.  Specifying no name
 372 * (i.e. a socket address length of 0) unbinds all names from the socket.
 373 *
 374 * Returns 0 on success, errno otherwise
 375 *
 376 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 377 *       access any non-constant socket information.
 378 */
 379static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
 380{
 381        struct sock *sk = sock->sk;
 382        struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 383        struct tipc_port *tport = tipc_sk_port(sock->sk);
 384        int res = -EINVAL;
 385
 386        lock_sock(sk);
 387        if (unlikely(!uaddr_len)) {
 388                res = tipc_withdraw(tport, 0, NULL);
 389                goto exit;
 390        }
 391
 392        if (uaddr_len < sizeof(struct sockaddr_tipc)) {
 393                res = -EINVAL;
 394                goto exit;
 395        }
 396        if (addr->family != AF_TIPC) {
 397                res = -EAFNOSUPPORT;
 398                goto exit;
 399        }
 400
 401        if (addr->addrtype == TIPC_ADDR_NAME)
 402                addr->addr.nameseq.upper = addr->addr.nameseq.lower;
 403        else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
 404                res = -EAFNOSUPPORT;
 405                goto exit;
 406        }
 407
 408        if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
 409            (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
 410            (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
 411                res = -EACCES;
 412                goto exit;
 413        }
 414
 415        res = (addr->scope > 0) ?
 416                tipc_publish(tport, addr->scope, &addr->addr.nameseq) :
 417                tipc_withdraw(tport, -addr->scope, &addr->addr.nameseq);
 418exit:
 419        release_sock(sk);
 420        return res;
 421}
 422
 423/**
 424 * get_name - get port ID of socket or peer socket
 425 * @sock: socket structure
 426 * @uaddr: area for returned socket address
 427 * @uaddr_len: area for returned length of socket address
 428 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
 429 *
 430 * Returns 0 on success, errno otherwise
 431 *
 432 * NOTE: This routine doesn't need to take the socket lock since it only
 433 *       accesses socket information that is unchanging (or which changes in
 434 *       a completely predictable manner).
 435 */
 436static int get_name(struct socket *sock, struct sockaddr *uaddr,
 437                    int *uaddr_len, int peer)
 438{
 439        struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 440        struct tipc_sock *tsock = tipc_sk(sock->sk);
 441
 442        memset(addr, 0, sizeof(*addr));
 443        if (peer) {
 444                if ((sock->state != SS_CONNECTED) &&
 445                        ((peer != 2) || (sock->state != SS_DISCONNECTING)))
 446                        return -ENOTCONN;
 447                addr->addr.id.ref = tsock->peer_name.ref;
 448                addr->addr.id.node = tsock->peer_name.node;
 449        } else {
 450                addr->addr.id.ref = tsock->p->ref;
 451                addr->addr.id.node = tipc_own_addr;
 452        }
 453
 454        *uaddr_len = sizeof(*addr);
 455        addr->addrtype = TIPC_ADDR_ID;
 456        addr->family = AF_TIPC;
 457        addr->scope = 0;
 458        addr->addr.name.domain = 0;
 459
 460        return 0;
 461}
 462
 463/**
 464 * poll - read and possibly block on pollmask
 465 * @file: file structure associated with the socket
 466 * @sock: socket for which to calculate the poll bits
 467 * @wait: ???
 468 *
 469 * Returns pollmask value
 470 *
 471 * COMMENTARY:
 472 * It appears that the usual socket locking mechanisms are not useful here
 473 * since the pollmask info is potentially out-of-date the moment this routine
 474 * exits.  TCP and other protocols seem to rely on higher level poll routines
 475 * to handle any preventable race conditions, so TIPC will do the same ...
 476 *
 477 * TIPC sets the returned events as follows:
 478 *
 479 * socket state         flags set
 480 * ------------         ---------
 481 * unconnected          no read flags
 482 *                      POLLOUT if port is not congested
 483 *
 484 * connecting           POLLIN/POLLRDNORM if ACK/NACK in rx queue
 485 *                      no write flags
 486 *
 487 * connected            POLLIN/POLLRDNORM if data in rx queue
 488 *                      POLLOUT if port is not congested
 489 *
 490 * disconnecting        POLLIN/POLLRDNORM/POLLHUP
 491 *                      no write flags
 492 *
 493 * listening            POLLIN if SYN in rx queue
 494 *                      no write flags
 495 *
 496 * ready                POLLIN/POLLRDNORM if data in rx queue
 497 * [connectionless]     POLLOUT (since port cannot be congested)
 498 *
 499 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 500 * imply that the operation will succeed, merely that it should be performed
 501 * and will not block.
 502 */
 503static unsigned int poll(struct file *file, struct socket *sock,
 504                         poll_table *wait)
 505{
 506        struct sock *sk = sock->sk;
 507        u32 mask = 0;
 508
 509        sock_poll_wait(file, sk_sleep(sk), wait);
 510
 511        switch ((int)sock->state) {
 512        case SS_UNCONNECTED:
 513                if (!tipc_sk_port(sk)->congested)
 514                        mask |= POLLOUT;
 515                break;
 516        case SS_READY:
 517        case SS_CONNECTED:
 518                if (!tipc_sk_port(sk)->congested)
 519                        mask |= POLLOUT;
 520                /* fall thru' */
 521        case SS_CONNECTING:
 522        case SS_LISTENING:
 523                if (!skb_queue_empty(&sk->sk_receive_queue))
 524                        mask |= (POLLIN | POLLRDNORM);
 525                break;
 526        case SS_DISCONNECTING:
 527                mask = (POLLIN | POLLRDNORM | POLLHUP);
 528                break;
 529        }
 530
 531        return mask;
 532}
 533
 534/**
 535 * dest_name_check - verify user is permitted to send to specified port name
 536 * @dest: destination address
 537 * @m: descriptor for message to be sent
 538 *
 539 * Prevents restricted configuration commands from being issued by
 540 * unauthorized users.
 541 *
 542 * Returns 0 if permission is granted, otherwise errno
 543 */
 544static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
 545{
 546        struct tipc_cfg_msg_hdr hdr;
 547
 548        if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES))
 549                return 0;
 550        if (likely(dest->addr.name.name.type == TIPC_TOP_SRV))
 551                return 0;
 552        if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
 553                return -EACCES;
 554
 555        if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr)))
 556                return -EMSGSIZE;
 557        if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
 558                return -EFAULT;
 559        if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
 560                return -EACCES;
 561
 562        return 0;
 563}
 564
 565static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
 566{
 567        struct sock *sk = sock->sk;
 568        struct tipc_port *tport = tipc_sk_port(sk);
 569        DEFINE_WAIT(wait);
 570        int done;
 571
 572        do {
 573                int err = sock_error(sk);
 574                if (err)
 575                        return err;
 576                if (sock->state == SS_DISCONNECTING)
 577                        return -EPIPE;
 578                if (!*timeo_p)
 579                        return -EAGAIN;
 580                if (signal_pending(current))
 581                        return sock_intr_errno(*timeo_p);
 582
 583                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 584                done = sk_wait_event(sk, timeo_p, !tport->congested);
 585                finish_wait(sk_sleep(sk), &wait);
 586        } while (!done);
 587        return 0;
 588}
 589
 590/**
 591 * send_msg - send message in connectionless manner
 592 * @iocb: if NULL, indicates that socket lock is already held
 593 * @sock: socket structure
 594 * @m: message to send
 595 * @total_len: length of message
 596 *
 597 * Message must have an destination specified explicitly.
 598 * Used for SOCK_RDM and SOCK_DGRAM messages,
 599 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 600 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
 601 *
 602 * Returns the number of bytes sent on success, or errno otherwise
 603 */
 604static int send_msg(struct kiocb *iocb, struct socket *sock,
 605                    struct msghdr *m, size_t total_len)
 606{
 607        struct sock *sk = sock->sk;
 608        struct tipc_port *tport = tipc_sk_port(sk);
 609        DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 610        int needs_conn;
 611        long timeo;
 612        int res = -EINVAL;
 613
 614        if (unlikely(!dest))
 615                return -EDESTADDRREQ;
 616        if (unlikely((m->msg_namelen < sizeof(*dest)) ||
 617                     (dest->family != AF_TIPC)))
 618                return -EINVAL;
 619        if (total_len > TIPC_MAX_USER_MSG_SIZE)
 620                return -EMSGSIZE;
 621
 622        if (iocb)
 623                lock_sock(sk);
 624
 625        needs_conn = (sock->state != SS_READY);
 626        if (unlikely(needs_conn)) {
 627                if (sock->state == SS_LISTENING) {
 628                        res = -EPIPE;
 629                        goto exit;
 630                }
 631                if (sock->state != SS_UNCONNECTED) {
 632                        res = -EISCONN;
 633                        goto exit;
 634                }
 635                if (tport->published) {
 636                        res = -EOPNOTSUPP;
 637                        goto exit;
 638                }
 639                if (dest->addrtype == TIPC_ADDR_NAME) {
 640                        tport->conn_type = dest->addr.name.name.type;
 641                        tport->conn_instance = dest->addr.name.name.instance;
 642                }
 643
 644                /* Abort any pending connection attempts (very unlikely) */
 645                reject_rx_queue(sk);
 646        }
 647
 648        timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
 649        do {
 650                if (dest->addrtype == TIPC_ADDR_NAME) {
 651                        res = dest_name_check(dest, m);
 652                        if (res)
 653                                break;
 654                        res = tipc_send2name(tport->ref,
 655                                             &dest->addr.name.name,
 656                                             dest->addr.name.domain,
 657                                             m->msg_iov,
 658                                             total_len);
 659                } else if (dest->addrtype == TIPC_ADDR_ID) {
 660                        res = tipc_send2port(tport->ref,
 661                                             &dest->addr.id,
 662                                             m->msg_iov,
 663                                             total_len);
 664                } else if (dest->addrtype == TIPC_ADDR_MCAST) {
 665                        if (needs_conn) {
 666                                res = -EOPNOTSUPP;
 667                                break;
 668                        }
 669                        res = dest_name_check(dest, m);
 670                        if (res)
 671                                break;
 672                        res = tipc_multicast(tport->ref,
 673                                             &dest->addr.nameseq,
 674                                             m->msg_iov,
 675                                             total_len);
 676                }
 677                if (likely(res != -ELINKCONG)) {
 678                        if (needs_conn && (res >= 0))
 679                                sock->state = SS_CONNECTING;
 680                        break;
 681                }
 682                res = tipc_wait_for_sndmsg(sock, &timeo);
 683                if (res)
 684                        break;
 685        } while (1);
 686
 687exit:
 688        if (iocb)
 689                release_sock(sk);
 690        return res;
 691}
 692
 693static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
 694{
 695        struct sock *sk = sock->sk;
 696        struct tipc_port *tport = tipc_sk_port(sk);
 697        DEFINE_WAIT(wait);
 698        int done;
 699
 700        do {
 701                int err = sock_error(sk);
 702                if (err)
 703                        return err;
 704                if (sock->state == SS_DISCONNECTING)
 705                        return -EPIPE;
 706                else if (sock->state != SS_CONNECTED)
 707                        return -ENOTCONN;
 708                if (!*timeo_p)
 709                        return -EAGAIN;
 710                if (signal_pending(current))
 711                        return sock_intr_errno(*timeo_p);
 712
 713                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 714                done = sk_wait_event(sk, timeo_p,
 715                                     (!tport->congested || !tport->connected));
 716                finish_wait(sk_sleep(sk), &wait);
 717        } while (!done);
 718        return 0;
 719}
 720
 721/**
 722 * send_packet - send a connection-oriented message
 723 * @iocb: if NULL, indicates that socket lock is already held
 724 * @sock: socket structure
 725 * @m: message to send
 726 * @total_len: length of message
 727 *
 728 * Used for SOCK_SEQPACKET messages and SOCK_STREAM data.
 729 *
 730 * Returns the number of bytes sent on success, or errno otherwise
 731 */
 732static int send_packet(struct kiocb *iocb, struct socket *sock,
 733                       struct msghdr *m, size_t total_len)
 734{
 735        struct sock *sk = sock->sk;
 736        struct tipc_port *tport = tipc_sk_port(sk);
 737        DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 738        int res = -EINVAL;
 739        long timeo;
 740
 741        /* Handle implied connection establishment */
 742        if (unlikely(dest))
 743                return send_msg(iocb, sock, m, total_len);
 744
 745        if (total_len > TIPC_MAX_USER_MSG_SIZE)
 746                return -EMSGSIZE;
 747
 748        if (iocb)
 749                lock_sock(sk);
 750
 751        if (unlikely(sock->state != SS_CONNECTED)) {
 752                if (sock->state == SS_DISCONNECTING)
 753                        res = -EPIPE;
 754                else
 755                        res = -ENOTCONN;
 756                goto exit;
 757        }
 758
 759        timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
 760        do {
 761                res = tipc_send(tport->ref, m->msg_iov, total_len);
 762                if (likely(res != -ELINKCONG))
 763                        break;
 764                res = tipc_wait_for_sndpkt(sock, &timeo);
 765                if (res)
 766                        break;
 767        } while (1);
 768exit:
 769        if (iocb)
 770                release_sock(sk);
 771        return res;
 772}
 773
 774/**
 775 * send_stream - send stream-oriented data
 776 * @iocb: (unused)
 777 * @sock: socket structure
 778 * @m: data to send
 779 * @total_len: total length of data to be sent
 780 *
 781 * Used for SOCK_STREAM data.
 782 *
 783 * Returns the number of bytes sent on success (or partial success),
 784 * or errno if no data sent
 785 */
 786static int send_stream(struct kiocb *iocb, struct socket *sock,
 787                       struct msghdr *m, size_t total_len)
 788{
 789        struct sock *sk = sock->sk;
 790        struct tipc_port *tport = tipc_sk_port(sk);
 791        struct msghdr my_msg;
 792        struct iovec my_iov;
 793        struct iovec *curr_iov;
 794        int curr_iovlen;
 795        char __user *curr_start;
 796        u32 hdr_size;
 797        int curr_left;
 798        int bytes_to_send;
 799        int bytes_sent;
 800        int res;
 801
 802        lock_sock(sk);
 803
 804        /* Handle special cases where there is no connection */
 805        if (unlikely(sock->state != SS_CONNECTED)) {
 806                if (sock->state == SS_UNCONNECTED)
 807                        res = send_packet(NULL, sock, m, total_len);
 808                else
 809                        res = sock->state == SS_DISCONNECTING ? -EPIPE : -ENOTCONN;
 810                goto exit;
 811        }
 812
 813        if (unlikely(m->msg_name)) {
 814                res = -EISCONN;
 815                goto exit;
 816        }
 817
 818        if (total_len > (unsigned int)INT_MAX) {
 819                res = -EMSGSIZE;
 820                goto exit;
 821        }
 822
 823        /*
 824         * Send each iovec entry using one or more messages
 825         *
 826         * Note: This algorithm is good for the most likely case
 827         * (i.e. one large iovec entry), but could be improved to pass sets
 828         * of small iovec entries into send_packet().
 829         */
 830        curr_iov = m->msg_iov;
 831        curr_iovlen = m->msg_iovlen;
 832        my_msg.msg_iov = &my_iov;
 833        my_msg.msg_iovlen = 1;
 834        my_msg.msg_flags = m->msg_flags;
 835        my_msg.msg_name = NULL;
 836        bytes_sent = 0;
 837
 838        hdr_size = msg_hdr_sz(&tport->phdr);
 839
 840        while (curr_iovlen--) {
 841                curr_start = curr_iov->iov_base;
 842                curr_left = curr_iov->iov_len;
 843
 844                while (curr_left) {
 845                        bytes_to_send = tport->max_pkt - hdr_size;
 846                        if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
 847                                bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
 848                        if (curr_left < bytes_to_send)
 849                                bytes_to_send = curr_left;
 850                        my_iov.iov_base = curr_start;
 851                        my_iov.iov_len = bytes_to_send;
 852                        res = send_packet(NULL, sock, &my_msg, bytes_to_send);
 853                        if (res < 0) {
 854                                if (bytes_sent)
 855                                        res = bytes_sent;
 856                                goto exit;
 857                        }
 858                        curr_left -= bytes_to_send;
 859                        curr_start += bytes_to_send;
 860                        bytes_sent += bytes_to_send;
 861                }
 862
 863                curr_iov++;
 864        }
 865        res = bytes_sent;
 866exit:
 867        release_sock(sk);
 868        return res;
 869}
 870
 871/**
 872 * auto_connect - complete connection setup to a remote port
 873 * @sock: socket structure
 874 * @msg: peer's response message
 875 *
 876 * Returns 0 on success, errno otherwise
 877 */
 878static int auto_connect(struct socket *sock, struct tipc_msg *msg)
 879{
 880        struct tipc_sock *tsock = tipc_sk(sock->sk);
 881        struct tipc_port *p_ptr;
 882
 883        tsock->peer_name.ref = msg_origport(msg);
 884        tsock->peer_name.node = msg_orignode(msg);
 885        p_ptr = tipc_port_deref(tsock->p->ref);
 886        if (!p_ptr)
 887                return -EINVAL;
 888
 889        __tipc_connect(tsock->p->ref, p_ptr, &tsock->peer_name);
 890
 891        if (msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)
 892                return -EINVAL;
 893        msg_set_importance(&p_ptr->phdr, (u32)msg_importance(msg));
 894        sock->state = SS_CONNECTED;
 895        return 0;
 896}
 897
 898/**
 899 * set_orig_addr - capture sender's address for received message
 900 * @m: descriptor for message info
 901 * @msg: received message header
 902 *
 903 * Note: Address is not captured if not requested by receiver.
 904 */
 905static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
 906{
 907        DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
 908
 909        if (addr) {
 910                addr->family = AF_TIPC;
 911                addr->addrtype = TIPC_ADDR_ID;
 912                memset(&addr->addr, 0, sizeof(addr->addr));
 913                addr->addr.id.ref = msg_origport(msg);
 914                addr->addr.id.node = msg_orignode(msg);
 915                addr->addr.name.domain = 0;     /* could leave uninitialized */
 916                addr->scope = 0;                /* could leave uninitialized */
 917                m->msg_namelen = sizeof(struct sockaddr_tipc);
 918        }
 919}
 920
 921/**
 922 * anc_data_recv - optionally capture ancillary data for received message
 923 * @m: descriptor for message info
 924 * @msg: received message header
 925 * @tport: TIPC port associated with message
 926 *
 927 * Note: Ancillary data is not captured if not requested by receiver.
 928 *
 929 * Returns 0 if successful, otherwise errno
 930 */
 931static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
 932                         struct tipc_port *tport)
 933{
 934        u32 anc_data[3];
 935        u32 err;
 936        u32 dest_type;
 937        int has_name;
 938        int res;
 939
 940        if (likely(m->msg_controllen == 0))
 941                return 0;
 942
 943        /* Optionally capture errored message object(s) */
 944        err = msg ? msg_errcode(msg) : 0;
 945        if (unlikely(err)) {
 946                anc_data[0] = err;
 947                anc_data[1] = msg_data_sz(msg);
 948                res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
 949                if (res)
 950                        return res;
 951                if (anc_data[1]) {
 952                        res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
 953                                       msg_data(msg));
 954                        if (res)
 955                                return res;
 956                }
 957        }
 958
 959        /* Optionally capture message destination object */
 960        dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
 961        switch (dest_type) {
 962        case TIPC_NAMED_MSG:
 963                has_name = 1;
 964                anc_data[0] = msg_nametype(msg);
 965                anc_data[1] = msg_namelower(msg);
 966                anc_data[2] = msg_namelower(msg);
 967                break;
 968        case TIPC_MCAST_MSG:
 969                has_name = 1;
 970                anc_data[0] = msg_nametype(msg);
 971                anc_data[1] = msg_namelower(msg);
 972                anc_data[2] = msg_nameupper(msg);
 973                break;
 974        case TIPC_CONN_MSG:
 975                has_name = (tport->conn_type != 0);
 976                anc_data[0] = tport->conn_type;
 977                anc_data[1] = tport->conn_instance;
 978                anc_data[2] = tport->conn_instance;
 979                break;
 980        default:
 981                has_name = 0;
 982        }
 983        if (has_name) {
 984                res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
 985                if (res)
 986                        return res;
 987        }
 988
 989        return 0;
 990}
 991
 992static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo)
 993{
 994        struct sock *sk = sock->sk;
 995        DEFINE_WAIT(wait);
 996        int err;
 997
 998        for (;;) {
 999                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1000                if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1001                        if (sock->state == SS_DISCONNECTING) {
1002                                err = -ENOTCONN;
1003                                break;
1004                        }
1005                        release_sock(sk);
1006                        timeo = schedule_timeout(timeo);
1007                        lock_sock(sk);
1008                }
1009                err = 0;
1010                if (!skb_queue_empty(&sk->sk_receive_queue))
1011                        break;
1012                err = sock_intr_errno(timeo);
1013                if (signal_pending(current))
1014                        break;
1015                err = -EAGAIN;
1016                if (!timeo)
1017                        break;
1018        }
1019        finish_wait(sk_sleep(sk), &wait);
1020        return err;
1021}
1022
1023/**
1024 * recv_msg - receive packet-oriented message
1025 * @iocb: (unused)
1026 * @m: descriptor for message info
1027 * @buf_len: total size of user buffer area
1028 * @flags: receive flags
1029 *
1030 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
1031 * If the complete message doesn't fit in user area, truncate it.
1032 *
1033 * Returns size of returned message data, errno otherwise
1034 */
1035static int recv_msg(struct kiocb *iocb, struct socket *sock,
1036                    struct msghdr *m, size_t buf_len, int flags)
1037{
1038        struct sock *sk = sock->sk;
1039        struct tipc_port *tport = tipc_sk_port(sk);
1040        struct sk_buff *buf;
1041        struct tipc_msg *msg;
1042        long timeo;
1043        unsigned int sz;
1044        u32 err;
1045        int res;
1046
1047        /* Catch invalid receive requests */
1048        if (unlikely(!buf_len))
1049                return -EINVAL;
1050
1051        lock_sock(sk);
1052
1053        if (unlikely(sock->state == SS_UNCONNECTED)) {
1054                res = -ENOTCONN;
1055                goto exit;
1056        }
1057
1058        timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1059restart:
1060
1061        /* Look for a message in receive queue; wait if necessary */
1062        res = tipc_wait_for_rcvmsg(sock, timeo);
1063        if (res)
1064                goto exit;
1065
1066        /* Look at first message in receive queue */
1067        buf = skb_peek(&sk->sk_receive_queue);
1068        msg = buf_msg(buf);
1069        sz = msg_data_sz(msg);
1070        err = msg_errcode(msg);
1071
1072        /* Discard an empty non-errored message & try again */
1073        if ((!sz) && (!err)) {
1074                advance_rx_queue(sk);
1075                goto restart;
1076        }
1077
1078        /* Capture sender's address (optional) */
1079        set_orig_addr(m, msg);
1080
1081        /* Capture ancillary data (optional) */
1082        res = anc_data_recv(m, msg, tport);
1083        if (res)
1084                goto exit;
1085
1086        /* Capture message data (if valid) & compute return value (always) */
1087        if (!err) {
1088                if (unlikely(buf_len < sz)) {
1089                        sz = buf_len;
1090                        m->msg_flags |= MSG_TRUNC;
1091                }
1092                res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg),
1093                                              m->msg_iov, sz);
1094                if (res)
1095                        goto exit;
1096                res = sz;
1097        } else {
1098                if ((sock->state == SS_READY) ||
1099                    ((err == TIPC_CONN_SHUTDOWN) || m->msg_control))
1100                        res = 0;
1101                else
1102                        res = -ECONNRESET;
1103        }
1104
1105        /* Consume received message (optional) */
1106        if (likely(!(flags & MSG_PEEK))) {
1107                if ((sock->state != SS_READY) &&
1108                    (++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
1109                        tipc_acknowledge(tport->ref, tport->conn_unacked);
1110                advance_rx_queue(sk);
1111        }
1112exit:
1113        release_sock(sk);
1114        return res;
1115}
1116
1117/**
1118 * recv_stream - receive stream-oriented data
1119 * @iocb: (unused)
1120 * @m: descriptor for message info
1121 * @buf_len: total size of user buffer area
1122 * @flags: receive flags
1123 *
1124 * Used for SOCK_STREAM messages only.  If not enough data is available
1125 * will optionally wait for more; never truncates data.
1126 *
1127 * Returns size of returned message data, errno otherwise
1128 */
1129static int recv_stream(struct kiocb *iocb, struct socket *sock,
1130                       struct msghdr *m, size_t buf_len, int flags)
1131{
1132        struct sock *sk = sock->sk;
1133        struct tipc_port *tport = tipc_sk_port(sk);
1134        struct sk_buff *buf;
1135        struct tipc_msg *msg;
1136        long timeo;
1137        unsigned int sz;
1138        int sz_to_copy, target, needed;
1139        int sz_copied = 0;
1140        u32 err;
1141        int res = 0;
1142
1143        /* Catch invalid receive attempts */
1144        if (unlikely(!buf_len))
1145                return -EINVAL;
1146
1147        lock_sock(sk);
1148
1149        if (unlikely(sock->state == SS_UNCONNECTED)) {
1150                res = -ENOTCONN;
1151                goto exit;
1152        }
1153
1154        target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
1155        timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1156
1157restart:
1158        /* Look for a message in receive queue; wait if necessary */
1159        res = tipc_wait_for_rcvmsg(sock, timeo);
1160        if (res)
1161                goto exit;
1162
1163        /* Look at first message in receive queue */
1164        buf = skb_peek(&sk->sk_receive_queue);
1165        msg = buf_msg(buf);
1166        sz = msg_data_sz(msg);
1167        err = msg_errcode(msg);
1168
1169        /* Discard an empty non-errored message & try again */
1170        if ((!sz) && (!err)) {
1171                advance_rx_queue(sk);
1172                goto restart;
1173        }
1174
1175        /* Optionally capture sender's address & ancillary data of first msg */
1176        if (sz_copied == 0) {
1177                set_orig_addr(m, msg);
1178                res = anc_data_recv(m, msg, tport);
1179                if (res)
1180                        goto exit;
1181        }
1182
1183        /* Capture message data (if valid) & compute return value (always) */
1184        if (!err) {
1185                u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle);
1186
1187                sz -= offset;
1188                needed = (buf_len - sz_copied);
1189                sz_to_copy = (sz <= needed) ? sz : needed;
1190
1191                res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset,
1192                                              m->msg_iov, sz_to_copy);
1193                if (res)
1194                        goto exit;
1195
1196                sz_copied += sz_to_copy;
1197
1198                if (sz_to_copy < sz) {
1199                        if (!(flags & MSG_PEEK))
1200                                TIPC_SKB_CB(buf)->handle =
1201                                (void *)(unsigned long)(offset + sz_to_copy);
1202                        goto exit;
1203                }
1204        } else {
1205                if (sz_copied != 0)
1206                        goto exit; /* can't add error msg to valid data */
1207
1208                if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
1209                        res = 0;
1210                else
1211                        res = -ECONNRESET;
1212        }
1213
1214        /* Consume received message (optional) */
1215        if (likely(!(flags & MSG_PEEK))) {
1216                if (unlikely(++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
1217                        tipc_acknowledge(tport->ref, tport->conn_unacked);
1218                advance_rx_queue(sk);
1219        }
1220
1221        /* Loop around if more data is required */
1222        if ((sz_copied < buf_len) &&    /* didn't get all requested data */
1223            (!skb_queue_empty(&sk->sk_receive_queue) ||
1224            (sz_copied < target)) &&    /* and more is ready or required */
1225            (!(flags & MSG_PEEK)) &&    /* and aren't just peeking at data */
1226            (!err))                     /* and haven't reached a FIN */
1227                goto restart;
1228
1229exit:
1230        release_sock(sk);
1231        return sz_copied ? sz_copied : res;
1232}
1233
1234/**
1235 * tipc_write_space - wake up thread if port congestion is released
1236 * @sk: socket
1237 */
1238static void tipc_write_space(struct sock *sk)
1239{
1240        struct socket_wq *wq;
1241
1242        rcu_read_lock();
1243        wq = rcu_dereference(sk->sk_wq);
1244        if (wq_has_sleeper(wq))
1245                wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
1246                                                POLLWRNORM | POLLWRBAND);
1247        rcu_read_unlock();
1248}
1249
1250/**
1251 * tipc_data_ready - wake up threads to indicate messages have been received
1252 * @sk: socket
1253 * @len: the length of messages
1254 */
1255static void tipc_data_ready(struct sock *sk, int len)
1256{
1257        struct socket_wq *wq;
1258
1259        rcu_read_lock();
1260        wq = rcu_dereference(sk->sk_wq);
1261        if (wq_has_sleeper(wq))
1262                wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
1263                                                POLLRDNORM | POLLRDBAND);
1264        rcu_read_unlock();
1265}
1266
1267/**
1268 * filter_connect - Handle all incoming messages for a connection-based socket
1269 * @tsock: TIPC socket
1270 * @msg: message
1271 *
1272 * Returns TIPC error status code and socket error status code
1273 * once it encounters some errors
1274 */
1275static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
1276{
1277        struct socket *sock = tsock->sk.sk_socket;
1278        struct tipc_msg *msg = buf_msg(*buf);
1279        struct sock *sk = &tsock->sk;
1280        u32 retval = TIPC_ERR_NO_PORT;
1281        int res;
1282
1283        if (msg_mcast(msg))
1284                return retval;
1285
1286        switch ((int)sock->state) {
1287        case SS_CONNECTED:
1288                /* Accept only connection-based messages sent by peer */
1289                if (msg_connected(msg) && tipc_port_peer_msg(tsock->p, msg)) {
1290                        if (unlikely(msg_errcode(msg))) {
1291                                sock->state = SS_DISCONNECTING;
1292                                __tipc_disconnect(tsock->p);
1293                        }
1294                        retval = TIPC_OK;
1295                }
1296                break;
1297        case SS_CONNECTING:
1298                /* Accept only ACK or NACK message */
1299                if (unlikely(msg_errcode(msg))) {
1300                        sock->state = SS_DISCONNECTING;
1301                        sk->sk_err = ECONNREFUSED;
1302                        retval = TIPC_OK;
1303                        break;
1304                }
1305
1306                if (unlikely(!msg_connected(msg)))
1307                        break;
1308
1309                res = auto_connect(sock, msg);
1310                if (res) {
1311                        sock->state = SS_DISCONNECTING;
1312                        sk->sk_err = -res;
1313                        retval = TIPC_OK;
1314                        break;
1315                }
1316
1317                /* If an incoming message is an 'ACK-', it should be
1318                 * discarded here because it doesn't contain useful
1319                 * data. In addition, we should try to wake up
1320                 * connect() routine if sleeping.
1321                 */
1322                if (msg_data_sz(msg) == 0) {
1323                        kfree_skb(*buf);
1324                        *buf = NULL;
1325                        if (waitqueue_active(sk_sleep(sk)))
1326                                wake_up_interruptible(sk_sleep(sk));
1327                }
1328                retval = TIPC_OK;
1329                break;
1330        case SS_LISTENING:
1331        case SS_UNCONNECTED:
1332                /* Accept only SYN message */
1333                if (!msg_connected(msg) && !(msg_errcode(msg)))
1334                        retval = TIPC_OK;
1335                break;
1336        case SS_DISCONNECTING:
1337                break;
1338        default:
1339                pr_err("Unknown socket state %u\n", sock->state);
1340        }
1341        return retval;
1342}
1343
1344/**
1345 * rcvbuf_limit - get proper overload limit of socket receive queue
1346 * @sk: socket
1347 * @buf: message
1348 *
1349 * For all connection oriented messages, irrespective of importance,
1350 * the default overload value (i.e. 67MB) is set as limit.
1351 *
1352 * For all connectionless messages, by default new queue limits are
1353 * as belows:
1354 *
1355 * TIPC_LOW_IMPORTANCE       (4 MB)
1356 * TIPC_MEDIUM_IMPORTANCE    (8 MB)
1357 * TIPC_HIGH_IMPORTANCE      (16 MB)
1358 * TIPC_CRITICAL_IMPORTANCE  (32 MB)
1359 *
1360 * Returns overload limit according to corresponding message importance
1361 */
1362static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
1363{
1364        struct tipc_msg *msg = buf_msg(buf);
1365
1366        if (msg_connected(msg))
1367                return sysctl_tipc_rmem[2];
1368
1369        return sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE <<
1370                msg_importance(msg);
1371}
1372
1373/**
1374 * filter_rcv - validate incoming message
1375 * @sk: socket
1376 * @buf: message
1377 *
1378 * Enqueues message on receive queue if acceptable; optionally handles
1379 * disconnect indication for a connected socket.
1380 *
1381 * Called with socket lock already taken; port lock may also be taken.
1382 *
1383 * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
1384 */
1385static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
1386{
1387        struct socket *sock = sk->sk_socket;
1388        struct tipc_msg *msg = buf_msg(buf);
1389        unsigned int limit = rcvbuf_limit(sk, buf);
1390        u32 res = TIPC_OK;
1391
1392        /* Reject message if it is wrong sort of message for socket */
1393        if (msg_type(msg) > TIPC_DIRECT_MSG)
1394                return TIPC_ERR_NO_PORT;
1395
1396        if (sock->state == SS_READY) {
1397                if (msg_connected(msg))
1398                        return TIPC_ERR_NO_PORT;
1399        } else {
1400                res = filter_connect(tipc_sk(sk), &buf);
1401                if (res != TIPC_OK || buf == NULL)
1402                        return res;
1403        }
1404
1405        /* Reject message if there isn't room to queue it */
1406        if (sk_rmem_alloc_get(sk) + buf->truesize >= limit)
1407                return TIPC_ERR_OVERLOAD;
1408
1409        /* Enqueue message */
1410        TIPC_SKB_CB(buf)->handle = NULL;
1411        __skb_queue_tail(&sk->sk_receive_queue, buf);
1412        skb_set_owner_r(buf, sk);
1413
1414        sk->sk_data_ready(sk, 0);
1415        return TIPC_OK;
1416}
1417
1418/**
1419 * backlog_rcv - handle incoming message from backlog queue
1420 * @sk: socket
1421 * @buf: message
1422 *
1423 * Caller must hold socket lock, but not port lock.
1424 *
1425 * Returns 0
1426 */
1427static int backlog_rcv(struct sock *sk, struct sk_buff *buf)
1428{
1429        u32 res;
1430
1431        res = filter_rcv(sk, buf);
1432        if (res)
1433                tipc_reject_msg(buf, res);
1434        return 0;
1435}
1436
1437/**
1438 * dispatch - handle incoming message
1439 * @tport: TIPC port that received message
1440 * @buf: message
1441 *
1442 * Called with port lock already taken.
1443 *
1444 * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
1445 */
1446static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
1447{
1448        struct sock *sk = tport->sk;
1449        u32 res;
1450
1451        /*
1452         * Process message if socket is unlocked; otherwise add to backlog queue
1453         *
1454         * This code is based on sk_receive_skb(), but must be distinct from it
1455         * since a TIPC-specific filter/reject mechanism is utilized
1456         */
1457        bh_lock_sock(sk);
1458        if (!sock_owned_by_user(sk)) {
1459                res = filter_rcv(sk, buf);
1460        } else {
1461                if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf)))
1462                        res = TIPC_ERR_OVERLOAD;
1463                else
1464                        res = TIPC_OK;
1465        }
1466        bh_unlock_sock(sk);
1467
1468        return res;
1469}
1470
1471/**
1472 * wakeupdispatch - wake up port after congestion
1473 * @tport: port to wakeup
1474 *
1475 * Called with port lock already taken.
1476 */
1477static void wakeupdispatch(struct tipc_port *tport)
1478{
1479        struct sock *sk = tport->sk;
1480
1481        sk->sk_write_space(sk);
1482}
1483
1484static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
1485{
1486        struct sock *sk = sock->sk;
1487        DEFINE_WAIT(wait);
1488        int done;
1489
1490        do {
1491                int err = sock_error(sk);
1492                if (err)
1493                        return err;
1494                if (!*timeo_p)
1495                        return -ETIMEDOUT;
1496                if (signal_pending(current))
1497                        return sock_intr_errno(*timeo_p);
1498
1499                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1500                done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING);
1501                finish_wait(sk_sleep(sk), &wait);
1502        } while (!done);
1503        return 0;
1504}
1505
1506/**
1507 * connect - establish a connection to another TIPC port
1508 * @sock: socket structure
1509 * @dest: socket address for destination port
1510 * @destlen: size of socket address data structure
1511 * @flags: file-related flags associated with socket
1512 *
1513 * Returns 0 on success, errno otherwise
1514 */
1515static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1516                   int flags)
1517{
1518        struct sock *sk = sock->sk;
1519        struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
1520        struct msghdr m = {NULL,};
1521        long timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout;
1522        socket_state previous;
1523        int res;
1524
1525        lock_sock(sk);
1526
1527        /* For now, TIPC does not allow use of connect() with DGRAM/RDM types */
1528        if (sock->state == SS_READY) {
1529                res = -EOPNOTSUPP;
1530                goto exit;
1531        }
1532
1533        /*
1534         * Reject connection attempt using multicast address
1535         *
1536         * Note: send_msg() validates the rest of the address fields,
1537         *       so there's no need to do it here
1538         */
1539        if (dst->addrtype == TIPC_ADDR_MCAST) {
1540                res = -EINVAL;
1541                goto exit;
1542        }
1543
1544        previous = sock->state;
1545        switch (sock->state) {
1546        case SS_UNCONNECTED:
1547                /* Send a 'SYN-' to destination */
1548                m.msg_name = dest;
1549                m.msg_namelen = destlen;
1550
1551                /* If connect is in non-blocking case, set MSG_DONTWAIT to
1552                 * indicate send_msg() is never blocked.
1553                 */
1554                if (!timeout)
1555                        m.msg_flags = MSG_DONTWAIT;
1556
1557                res = send_msg(NULL, sock, &m, 0);
1558                if ((res < 0) && (res != -EWOULDBLOCK))
1559                        goto exit;
1560
1561                /* Just entered SS_CONNECTING state; the only
1562                 * difference is that return value in non-blocking
1563                 * case is EINPROGRESS, rather than EALREADY.
1564                 */
1565                res = -EINPROGRESS;
1566        case SS_CONNECTING:
1567                if (previous == SS_CONNECTING)
1568                        res = -EALREADY;
1569                if (!timeout)
1570                        goto exit;
1571                timeout = msecs_to_jiffies(timeout);
1572                /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1573                res = tipc_wait_for_connect(sock, &timeout);
1574                break;
1575        case SS_CONNECTED:
1576                res = -EISCONN;
1577                break;
1578        default:
1579                res = -EINVAL;
1580                break;
1581        }
1582exit:
1583        release_sock(sk);
1584        return res;
1585}
1586
1587/**
1588 * listen - allow socket to listen for incoming connections
1589 * @sock: socket structure
1590 * @len: (unused)
1591 *
1592 * Returns 0 on success, errno otherwise
1593 */
1594static int listen(struct socket *sock, int len)
1595{
1596        struct sock *sk = sock->sk;
1597        int res;
1598
1599        lock_sock(sk);
1600
1601        if (sock->state != SS_UNCONNECTED)
1602                res = -EINVAL;
1603        else {
1604                sock->state = SS_LISTENING;
1605                res = 0;
1606        }
1607
1608        release_sock(sk);
1609        return res;
1610}
1611
1612static int tipc_wait_for_accept(struct socket *sock, long timeo)
1613{
1614        struct sock *sk = sock->sk;
1615        DEFINE_WAIT(wait);
1616        int err;
1617
1618        /* True wake-one mechanism for incoming connections: only
1619         * one process gets woken up, not the 'whole herd'.
1620         * Since we do not 'race & poll' for established sockets
1621         * anymore, the common case will execute the loop only once.
1622        */
1623        for (;;) {
1624                prepare_to_wait_exclusive(sk_sleep(sk), &wait,
1625                                          TASK_INTERRUPTIBLE);
1626                if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1627                        release_sock(sk);
1628                        timeo = schedule_timeout(timeo);
1629                        lock_sock(sk);
1630                }
1631                err = 0;
1632                if (!skb_queue_empty(&sk->sk_receive_queue))
1633                        break;
1634                err = -EINVAL;
1635                if (sock->state != SS_LISTENING)
1636                        break;
1637                err = sock_intr_errno(timeo);
1638                if (signal_pending(current))
1639                        break;
1640                err = -EAGAIN;
1641                if (!timeo)
1642                        break;
1643        }
1644        finish_wait(sk_sleep(sk), &wait);
1645        return err;
1646}
1647
1648/**
1649 * accept - wait for connection request
1650 * @sock: listening socket
1651 * @newsock: new socket that is to be connected
1652 * @flags: file-related flags associated with socket
1653 *
1654 * Returns 0 on success, errno otherwise
1655 */
1656static int accept(struct socket *sock, struct socket *new_sock, int flags)
1657{
1658        struct sock *new_sk, *sk = sock->sk;
1659        struct sk_buff *buf;
1660        struct tipc_sock *new_tsock;
1661        struct tipc_port *new_tport;
1662        struct tipc_msg *msg;
1663        u32 new_ref;
1664        long timeo;
1665        int res;
1666
1667        lock_sock(sk);
1668
1669        if (sock->state != SS_LISTENING) {
1670                res = -EINVAL;
1671                goto exit;
1672        }
1673
1674        timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1675        res = tipc_wait_for_accept(sock, timeo);
1676        if (res)
1677                goto exit;
1678
1679        buf = skb_peek(&sk->sk_receive_queue);
1680
1681        res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1);
1682        if (res)
1683                goto exit;
1684
1685        new_sk = new_sock->sk;
1686        new_tsock = tipc_sk(new_sk);
1687        new_tport = new_tsock->p;
1688        new_ref = new_tport->ref;
1689        msg = buf_msg(buf);
1690
1691        /* we lock on new_sk; but lockdep sees the lock on sk */
1692        lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING);
1693
1694        /*
1695         * Reject any stray messages received by new socket
1696         * before the socket lock was taken (very, very unlikely)
1697         */
1698        reject_rx_queue(new_sk);
1699
1700        /* Connect new socket to it's peer */
1701        new_tsock->peer_name.ref = msg_origport(msg);
1702        new_tsock->peer_name.node = msg_orignode(msg);
1703        tipc_connect(new_ref, &new_tsock->peer_name);
1704        new_sock->state = SS_CONNECTED;
1705
1706        tipc_set_portimportance(new_ref, msg_importance(msg));
1707        if (msg_named(msg)) {
1708                new_tport->conn_type = msg_nametype(msg);
1709                new_tport->conn_instance = msg_nameinst(msg);
1710        }
1711
1712        /*
1713         * Respond to 'SYN-' by discarding it & returning 'ACK'-.
1714         * Respond to 'SYN+' by queuing it on new socket.
1715         */
1716        if (!msg_data_sz(msg)) {
1717                struct msghdr m = {NULL,};
1718
1719                advance_rx_queue(sk);
1720                send_packet(NULL, new_sock, &m, 0);
1721        } else {
1722                __skb_dequeue(&sk->sk_receive_queue);
1723                __skb_queue_head(&new_sk->sk_receive_queue, buf);
1724                skb_set_owner_r(buf, new_sk);
1725        }
1726        release_sock(new_sk);
1727
1728exit:
1729        release_sock(sk);
1730        return res;
1731}
1732
1733/**
1734 * shutdown - shutdown socket connection
1735 * @sock: socket structure
1736 * @how: direction to close (must be SHUT_RDWR)
1737 *
1738 * Terminates connection (if necessary), then purges socket's receive queue.
1739 *
1740 * Returns 0 on success, errno otherwise
1741 */
1742static int shutdown(struct socket *sock, int how)
1743{
1744        struct sock *sk = sock->sk;
1745        struct tipc_port *tport = tipc_sk_port(sk);
1746        struct sk_buff *buf;
1747        int res;
1748
1749        if (how != SHUT_RDWR)
1750                return -EINVAL;
1751
1752        lock_sock(sk);
1753
1754        switch (sock->state) {
1755        case SS_CONNECTING:
1756        case SS_CONNECTED:
1757
1758restart:
1759                /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
1760                buf = __skb_dequeue(&sk->sk_receive_queue);
1761                if (buf) {
1762                        if (TIPC_SKB_CB(buf)->handle != NULL) {
1763                                kfree_skb(buf);
1764                                goto restart;
1765                        }
1766                        tipc_disconnect(tport->ref);
1767                        tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN);
1768                } else {
1769                        tipc_shutdown(tport->ref);
1770                }
1771
1772                sock->state = SS_DISCONNECTING;
1773
1774                /* fall through */
1775
1776        case SS_DISCONNECTING:
1777
1778                /* Discard any unreceived messages */
1779                __skb_queue_purge(&sk->sk_receive_queue);
1780
1781                /* Wake up anyone sleeping in poll */
1782                sk->sk_state_change(sk);
1783                res = 0;
1784                break;
1785
1786        default:
1787                res = -ENOTCONN;
1788        }
1789
1790        release_sock(sk);
1791        return res;
1792}
1793
1794/**
1795 * setsockopt - set socket option
1796 * @sock: socket structure
1797 * @lvl: option level
1798 * @opt: option identifier
1799 * @ov: pointer to new option value
1800 * @ol: length of option value
1801 *
1802 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
1803 * (to ease compatibility).
1804 *
1805 * Returns 0 on success, errno otherwise
1806 */
1807static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
1808                      unsigned int ol)
1809{
1810        struct sock *sk = sock->sk;
1811        struct tipc_port *tport = tipc_sk_port(sk);
1812        u32 value;
1813        int res;
1814
1815        if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
1816                return 0;
1817        if (lvl != SOL_TIPC)
1818                return -ENOPROTOOPT;
1819        if (ol < sizeof(value))
1820                return -EINVAL;
1821        res = get_user(value, (u32 __user *)ov);
1822        if (res)
1823                return res;
1824
1825        lock_sock(sk);
1826
1827        switch (opt) {
1828        case TIPC_IMPORTANCE:
1829                res = tipc_set_portimportance(tport->ref, value);
1830                break;
1831        case TIPC_SRC_DROPPABLE:
1832                if (sock->type != SOCK_STREAM)
1833                        res = tipc_set_portunreliable(tport->ref, value);
1834                else
1835                        res = -ENOPROTOOPT;
1836                break;
1837        case TIPC_DEST_DROPPABLE:
1838                res = tipc_set_portunreturnable(tport->ref, value);
1839                break;
1840        case TIPC_CONN_TIMEOUT:
1841                tipc_sk(sk)->conn_timeout = value;
1842                /* no need to set "res", since already 0 at this point */
1843                break;
1844        default:
1845                res = -EINVAL;
1846        }
1847
1848        release_sock(sk);
1849
1850        return res;
1851}
1852
1853/**
1854 * getsockopt - get socket option
1855 * @sock: socket structure
1856 * @lvl: option level
1857 * @opt: option identifier
1858 * @ov: receptacle for option value
1859 * @ol: receptacle for length of option value
1860 *
1861 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
1862 * (to ease compatibility).
1863 *
1864 * Returns 0 on success, errno otherwise
1865 */
1866static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
1867                      int __user *ol)
1868{
1869        struct sock *sk = sock->sk;
1870        struct tipc_port *tport = tipc_sk_port(sk);
1871        int len;
1872        u32 value;
1873        int res;
1874
1875        if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
1876                return put_user(0, ol);
1877        if (lvl != SOL_TIPC)
1878                return -ENOPROTOOPT;
1879        res = get_user(len, ol);
1880        if (res)
1881                return res;
1882
1883        lock_sock(sk);
1884
1885        switch (opt) {
1886        case TIPC_IMPORTANCE:
1887                res = tipc_portimportance(tport->ref, &value);
1888                break;
1889        case TIPC_SRC_DROPPABLE:
1890                res = tipc_portunreliable(tport->ref, &value);
1891                break;
1892        case TIPC_DEST_DROPPABLE:
1893                res = tipc_portunreturnable(tport->ref, &value);
1894                break;
1895        case TIPC_CONN_TIMEOUT:
1896                value = tipc_sk(sk)->conn_timeout;
1897                /* no need to set "res", since already 0 at this point */
1898                break;
1899        case TIPC_NODE_RECVQ_DEPTH:
1900                value = 0; /* was tipc_queue_size, now obsolete */
1901                break;
1902        case TIPC_SOCK_RECVQ_DEPTH:
1903                value = skb_queue_len(&sk->sk_receive_queue);
1904                break;
1905        default:
1906                res = -EINVAL;
1907        }
1908
1909        release_sock(sk);
1910
1911        if (res)
1912                return res;     /* "get" failed */
1913
1914        if (len < sizeof(value))
1915                return -EINVAL;
1916
1917        if (copy_to_user(ov, &value, sizeof(value)))
1918                return -EFAULT;
1919
1920        return put_user(sizeof(value), ol);
1921}
1922
1923/* Protocol switches for the various types of TIPC sockets */
1924
1925static const struct proto_ops msg_ops = {
1926        .owner          = THIS_MODULE,
1927        .family         = AF_TIPC,
1928        .release        = release,
1929        .bind           = bind,
1930        .connect        = connect,
1931        .socketpair     = sock_no_socketpair,
1932        .accept         = sock_no_accept,
1933        .getname        = get_name,
1934        .poll           = poll,
1935        .ioctl          = sock_no_ioctl,
1936        .listen         = sock_no_listen,
1937        .shutdown       = shutdown,
1938        .setsockopt     = setsockopt,
1939        .getsockopt     = getsockopt,
1940        .sendmsg        = send_msg,
1941        .recvmsg        = recv_msg,
1942        .mmap           = sock_no_mmap,
1943        .sendpage       = sock_no_sendpage
1944};
1945
1946static const struct proto_ops packet_ops = {
1947        .owner          = THIS_MODULE,
1948        .family         = AF_TIPC,
1949        .release        = release,
1950        .bind           = bind,
1951        .connect        = connect,
1952        .socketpair     = sock_no_socketpair,
1953        .accept         = accept,
1954        .getname        = get_name,
1955        .poll           = poll,
1956        .ioctl          = sock_no_ioctl,
1957        .listen         = listen,
1958        .shutdown       = shutdown,
1959        .setsockopt     = setsockopt,
1960        .getsockopt     = getsockopt,
1961        .sendmsg        = send_packet,
1962        .recvmsg        = recv_msg,
1963        .mmap           = sock_no_mmap,
1964        .sendpage       = sock_no_sendpage
1965};
1966
1967static const struct proto_ops stream_ops = {
1968        .owner          = THIS_MODULE,
1969        .family         = AF_TIPC,
1970        .release        = release,
1971        .bind           = bind,
1972        .connect        = connect,
1973        .socketpair     = sock_no_socketpair,
1974        .accept         = accept,
1975        .getname        = get_name,
1976        .poll           = poll,
1977        .ioctl          = sock_no_ioctl,
1978        .listen         = listen,
1979        .shutdown       = shutdown,
1980        .setsockopt     = setsockopt,
1981        .getsockopt     = getsockopt,
1982        .sendmsg        = send_stream,
1983        .recvmsg        = recv_stream,
1984        .mmap           = sock_no_mmap,
1985        .sendpage       = sock_no_sendpage
1986};
1987
1988static const struct net_proto_family tipc_family_ops = {
1989        .owner          = THIS_MODULE,
1990        .family         = AF_TIPC,
1991        .create         = tipc_sk_create
1992};
1993
1994static struct proto tipc_proto = {
1995        .name           = "TIPC",
1996        .owner          = THIS_MODULE,
1997        .obj_size       = sizeof(struct tipc_sock),
1998        .sysctl_rmem    = sysctl_tipc_rmem
1999};
2000
2001static struct proto tipc_proto_kern = {
2002        .name           = "TIPC",
2003        .obj_size       = sizeof(struct tipc_sock),
2004        .sysctl_rmem    = sysctl_tipc_rmem
2005};
2006
2007/**
2008 * tipc_socket_init - initialize TIPC socket interface
2009 *
2010 * Returns 0 on success, errno otherwise
2011 */
2012int tipc_socket_init(void)
2013{
2014        int res;
2015
2016        res = proto_register(&tipc_proto, 1);
2017        if (res) {
2018                pr_err("Failed to register TIPC protocol type\n");
2019                goto out;
2020        }
2021
2022        res = sock_register(&tipc_family_ops);
2023        if (res) {
2024                pr_err("Failed to register TIPC socket type\n");
2025                proto_unregister(&tipc_proto);
2026                goto out;
2027        }
2028 out:
2029        return res;
2030}
2031
2032/**
2033 * tipc_socket_stop - stop TIPC socket interface
2034 */
2035void tipc_socket_stop(void)
2036{
2037        sock_unregister(tipc_family_ops.family);
2038        proto_unregister(&tipc_proto);
2039}
2040