linux/net/unix/af_unix.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * NET4:        Implementation of BSD Unix domain sockets.
   4 *
   5 * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   6 *
   7 * Fixes:
   8 *              Linus Torvalds  :       Assorted bug cures.
   9 *              Niibe Yutaka    :       async I/O support.
  10 *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11 *              Alan Cox        :       Limit size of allocated blocks.
  12 *              Alan Cox        :       Fixed the stupid socketpair bug.
  13 *              Alan Cox        :       BSD compatibility fine tuning.
  14 *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15 *              Alan Cox        :       Sorted out a proper draft version of
  16 *                                      file descriptor passing hacked up from
  17 *                                      Mike Shaver's work.
  18 *              Marty Leisner   :       Fixes to fd passing
  19 *              Nick Nevin      :       recvmsg bugfix.
  20 *              Alan Cox        :       Started proper garbage collector
  21 *              Heiko EiBfeldt  :       Missing verify_area check
  22 *              Alan Cox        :       Started POSIXisms
  23 *              Andreas Schwab  :       Replace inode by dentry for proper
  24 *                                      reference counting
  25 *              Kirk Petersen   :       Made this a module
  26 *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27 *                                      Lots of bug fixes.
  28 *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29 *                                      by above two patches.
  30 *           Andrea Arcangeli   :       If possible we block in connect(2)
  31 *                                      if the max backlog of the listen socket
  32 *                                      is been reached. This won't break
  33 *                                      old apps and it will avoid huge amount
  34 *                                      of socks hashed (this for unix_gc()
  35 *                                      performances reasons).
  36 *                                      Security fix that limits the max
  37 *                                      number of socks to 2*max_files and
  38 *                                      the number of skb queueable in the
  39 *                                      dgram receiver.
  40 *              Artur Skawina   :       Hash function optimizations
  41 *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42 *            Malcolm Beattie   :       Set peercred for socketpair
  43 *           Michal Ostrowski   :       Module initialization cleanup.
  44 *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45 *                                      the core infrastructure is doing that
  46 *                                      for all net proto families now (2.5.69+)
  47 *
  48 * Known differences from reference BSD that was tested:
  49 *
  50 *      [TO FIX]
  51 *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52 *              other the moment one end closes.
  53 *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54 *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55 *      [NOT TO FIX]
  56 *      accept() returns a path name even if the connecting socket has closed
  57 *              in the meantime (BSD loses the path and gives up).
  58 *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59 *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60 *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61 *      BSD af_unix apparently has connect forgetting to block properly.
  62 *              (need to check this with the POSIX spec in detail)
  63 *
  64 * Differences from 2.0.0-11-... (ANK)
  65 *      Bug fixes and improvements.
  66 *              - client shutdown killed server socket.
  67 *              - removed all useless cli/sti pairs.
  68 *
  69 *      Semantic changes/extensions.
  70 *              - generic control message passing.
  71 *              - SCM_CREDENTIALS control message.
  72 *              - "Abstract" (not FS based) socket bindings.
  73 *                Abstract names are sequences of bytes (not zero terminated)
  74 *                started by 0, so that this name space does not intersect
  75 *                with BSD names.
  76 */
  77
  78#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80#include <linux/module.h>
  81#include <linux/kernel.h>
  82#include <linux/signal.h>
  83#include <linux/sched/signal.h>
  84#include <linux/errno.h>
  85#include <linux/string.h>
  86#include <linux/stat.h>
  87#include <linux/dcache.h>
  88#include <linux/namei.h>
  89#include <linux/socket.h>
  90#include <linux/un.h>
  91#include <linux/fcntl.h>
  92#include <linux/termios.h>
  93#include <linux/sockios.h>
  94#include <linux/net.h>
  95#include <linux/in.h>
  96#include <linux/fs.h>
  97#include <linux/slab.h>
  98#include <linux/uaccess.h>
  99#include <linux/skbuff.h>
 100#include <linux/netdevice.h>
 101#include <net/net_namespace.h>
 102#include <net/sock.h>
 103#include <net/tcp_states.h>
 104#include <net/af_unix.h>
 105#include <linux/proc_fs.h>
 106#include <linux/seq_file.h>
 107#include <net/scm.h>
 108#include <linux/init.h>
 109#include <linux/poll.h>
 110#include <linux/rtnetlink.h>
 111#include <linux/mount.h>
 112#include <net/checksum.h>
 113#include <linux/security.h>
 114#include <linux/freezer.h>
 115#include <linux/file.h>
 116
 117#include "scm.h"
 118
 119struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 120EXPORT_SYMBOL_GPL(unix_socket_table);
 121DEFINE_SPINLOCK(unix_table_lock);
 122EXPORT_SYMBOL_GPL(unix_table_lock);
 123static atomic_long_t unix_nr_socks;
 124
 125
 126static struct hlist_head *unix_sockets_unbound(void *addr)
 127{
 128        unsigned long hash = (unsigned long)addr;
 129
 130        hash ^= hash >> 16;
 131        hash ^= hash >> 8;
 132        hash %= UNIX_HASH_SIZE;
 133        return &unix_socket_table[UNIX_HASH_SIZE + hash];
 134}
 135
 136#define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 137
 138#ifdef CONFIG_SECURITY_NETWORK
 139static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 140{
 141        UNIXCB(skb).secid = scm->secid;
 142}
 143
 144static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 145{
 146        scm->secid = UNIXCB(skb).secid;
 147}
 148
 149static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 150{
 151        return (scm->secid == UNIXCB(skb).secid);
 152}
 153#else
 154static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 155{ }
 156
 157static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158{ }
 159
 160static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 161{
 162        return true;
 163}
 164#endif /* CONFIG_SECURITY_NETWORK */
 165
 166/*
 167 *  SMP locking strategy:
 168 *    hash table is protected with spinlock unix_table_lock
 169 *    each socket state is protected by separate spin lock.
 170 */
 171
 172static inline unsigned int unix_hash_fold(__wsum n)
 173{
 174        unsigned int hash = (__force unsigned int)csum_fold(n);
 175
 176        hash ^= hash>>8;
 177        return hash&(UNIX_HASH_SIZE-1);
 178}
 179
 180#define unix_peer(sk) (unix_sk(sk)->peer)
 181
 182static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 183{
 184        return unix_peer(osk) == sk;
 185}
 186
 187static inline int unix_may_send(struct sock *sk, struct sock *osk)
 188{
 189        return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 190}
 191
 192static inline int unix_recvq_full(struct sock const *sk)
 193{
 194        return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 195}
 196
 197struct sock *unix_peer_get(struct sock *s)
 198{
 199        struct sock *peer;
 200
 201        unix_state_lock(s);
 202        peer = unix_peer(s);
 203        if (peer)
 204                sock_hold(peer);
 205        unix_state_unlock(s);
 206        return peer;
 207}
 208EXPORT_SYMBOL_GPL(unix_peer_get);
 209
 210static inline void unix_release_addr(struct unix_address *addr)
 211{
 212        if (refcount_dec_and_test(&addr->refcnt))
 213                kfree(addr);
 214}
 215
 216/*
 217 *      Check unix socket name:
 218 *              - should be not zero length.
 219 *              - if started by not zero, should be NULL terminated (FS object)
 220 *              - if started by zero, it is abstract name.
 221 */
 222
 223static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 224{
 225        *hashp = 0;
 226
 227        if (len <= sizeof(short) || len > sizeof(*sunaddr))
 228                return -EINVAL;
 229        if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 230                return -EINVAL;
 231        if (sunaddr->sun_path[0]) {
 232                /*
 233                 * This may look like an off by one error but it is a bit more
 234                 * subtle. 108 is the longest valid AF_UNIX path for a binding.
 235                 * sun_path[108] doesn't as such exist.  However in kernel space
 236                 * we are guaranteed that it is a valid memory location in our
 237                 * kernel address buffer.
 238                 */
 239                ((char *)sunaddr)[len] = 0;
 240                len = strlen(sunaddr->sun_path)+1+sizeof(short);
 241                return len;
 242        }
 243
 244        *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 245        return len;
 246}
 247
 248static void __unix_remove_socket(struct sock *sk)
 249{
 250        sk_del_node_init(sk);
 251}
 252
 253static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 254{
 255        WARN_ON(!sk_unhashed(sk));
 256        sk_add_node(sk, list);
 257}
 258
 259static inline void unix_remove_socket(struct sock *sk)
 260{
 261        spin_lock(&unix_table_lock);
 262        __unix_remove_socket(sk);
 263        spin_unlock(&unix_table_lock);
 264}
 265
 266static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 267{
 268        spin_lock(&unix_table_lock);
 269        __unix_insert_socket(list, sk);
 270        spin_unlock(&unix_table_lock);
 271}
 272
 273static struct sock *__unix_find_socket_byname(struct net *net,
 274                                              struct sockaddr_un *sunname,
 275                                              int len, int type, unsigned int hash)
 276{
 277        struct sock *s;
 278
 279        sk_for_each(s, &unix_socket_table[hash ^ type]) {
 280                struct unix_sock *u = unix_sk(s);
 281
 282                if (!net_eq(sock_net(s), net))
 283                        continue;
 284
 285                if (u->addr->len == len &&
 286                    !memcmp(u->addr->name, sunname, len))
 287                        return s;
 288        }
 289        return NULL;
 290}
 291
 292static inline struct sock *unix_find_socket_byname(struct net *net,
 293                                                   struct sockaddr_un *sunname,
 294                                                   int len, int type,
 295                                                   unsigned int hash)
 296{
 297        struct sock *s;
 298
 299        spin_lock(&unix_table_lock);
 300        s = __unix_find_socket_byname(net, sunname, len, type, hash);
 301        if (s)
 302                sock_hold(s);
 303        spin_unlock(&unix_table_lock);
 304        return s;
 305}
 306
 307static struct sock *unix_find_socket_byinode(struct inode *i)
 308{
 309        struct sock *s;
 310
 311        spin_lock(&unix_table_lock);
 312        sk_for_each(s,
 313                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 314                struct dentry *dentry = unix_sk(s)->path.dentry;
 315
 316                if (dentry && d_backing_inode(dentry) == i) {
 317                        sock_hold(s);
 318                        goto found;
 319                }
 320        }
 321        s = NULL;
 322found:
 323        spin_unlock(&unix_table_lock);
 324        return s;
 325}
 326
 327/* Support code for asymmetrically connected dgram sockets
 328 *
 329 * If a datagram socket is connected to a socket not itself connected
 330 * to the first socket (eg, /dev/log), clients may only enqueue more
 331 * messages if the present receive queue of the server socket is not
 332 * "too large". This means there's a second writeability condition
 333 * poll and sendmsg need to test. The dgram recv code will do a wake
 334 * up on the peer_wait wait queue of a socket upon reception of a
 335 * datagram which needs to be propagated to sleeping would-be writers
 336 * since these might not have sent anything so far. This can't be
 337 * accomplished via poll_wait because the lifetime of the server
 338 * socket might be less than that of its clients if these break their
 339 * association with it or if the server socket is closed while clients
 340 * are still connected to it and there's no way to inform "a polling
 341 * implementation" that it should let go of a certain wait queue
 342 *
 343 * In order to propagate a wake up, a wait_queue_entry_t of the client
 344 * socket is enqueued on the peer_wait queue of the server socket
 345 * whose wake function does a wake_up on the ordinary client socket
 346 * wait queue. This connection is established whenever a write (or
 347 * poll for write) hit the flow control condition and broken when the
 348 * association to the server socket is dissolved or after a wake up
 349 * was relayed.
 350 */
 351
 352static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 353                                      void *key)
 354{
 355        struct unix_sock *u;
 356        wait_queue_head_t *u_sleep;
 357
 358        u = container_of(q, struct unix_sock, peer_wake);
 359
 360        __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 361                            q);
 362        u->peer_wake.private = NULL;
 363
 364        /* relaying can only happen while the wq still exists */
 365        u_sleep = sk_sleep(&u->sk);
 366        if (u_sleep)
 367                wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 368
 369        return 0;
 370}
 371
 372static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 373{
 374        struct unix_sock *u, *u_other;
 375        int rc;
 376
 377        u = unix_sk(sk);
 378        u_other = unix_sk(other);
 379        rc = 0;
 380        spin_lock(&u_other->peer_wait.lock);
 381
 382        if (!u->peer_wake.private) {
 383                u->peer_wake.private = other;
 384                __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 385
 386                rc = 1;
 387        }
 388
 389        spin_unlock(&u_other->peer_wait.lock);
 390        return rc;
 391}
 392
 393static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 394                                            struct sock *other)
 395{
 396        struct unix_sock *u, *u_other;
 397
 398        u = unix_sk(sk);
 399        u_other = unix_sk(other);
 400        spin_lock(&u_other->peer_wait.lock);
 401
 402        if (u->peer_wake.private == other) {
 403                __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 404                u->peer_wake.private = NULL;
 405        }
 406
 407        spin_unlock(&u_other->peer_wait.lock);
 408}
 409
 410static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 411                                                   struct sock *other)
 412{
 413        unix_dgram_peer_wake_disconnect(sk, other);
 414        wake_up_interruptible_poll(sk_sleep(sk),
 415                                   EPOLLOUT |
 416                                   EPOLLWRNORM |
 417                                   EPOLLWRBAND);
 418}
 419
 420/* preconditions:
 421 *      - unix_peer(sk) == other
 422 *      - association is stable
 423 */
 424static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 425{
 426        int connected;
 427
 428        connected = unix_dgram_peer_wake_connect(sk, other);
 429
 430        /* If other is SOCK_DEAD, we want to make sure we signal
 431         * POLLOUT, such that a subsequent write() can get a
 432         * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 433         * to other and its full, we will hang waiting for POLLOUT.
 434         */
 435        if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 436                return 1;
 437
 438        if (connected)
 439                unix_dgram_peer_wake_disconnect(sk, other);
 440
 441        return 0;
 442}
 443
 444static int unix_writable(const struct sock *sk)
 445{
 446        return sk->sk_state != TCP_LISTEN &&
 447               (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 448}
 449
 450static void unix_write_space(struct sock *sk)
 451{
 452        struct socket_wq *wq;
 453
 454        rcu_read_lock();
 455        if (unix_writable(sk)) {
 456                wq = rcu_dereference(sk->sk_wq);
 457                if (skwq_has_sleeper(wq))
 458                        wake_up_interruptible_sync_poll(&wq->wait,
 459                                EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 460                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 461        }
 462        rcu_read_unlock();
 463}
 464
 465/* When dgram socket disconnects (or changes its peer), we clear its receive
 466 * queue of packets arrived from previous peer. First, it allows to do
 467 * flow control based only on wmem_alloc; second, sk connected to peer
 468 * may receive messages only from that peer. */
 469static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 470{
 471        if (!skb_queue_empty(&sk->sk_receive_queue)) {
 472                skb_queue_purge(&sk->sk_receive_queue);
 473                wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 474
 475                /* If one link of bidirectional dgram pipe is disconnected,
 476                 * we signal error. Messages are lost. Do not make this,
 477                 * when peer was not connected to us.
 478                 */
 479                if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 480                        other->sk_err = ECONNRESET;
 481                        other->sk_error_report(other);
 482                }
 483        }
 484}
 485
 486static void unix_sock_destructor(struct sock *sk)
 487{
 488        struct unix_sock *u = unix_sk(sk);
 489
 490        skb_queue_purge(&sk->sk_receive_queue);
 491
 492        WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 493        WARN_ON(!sk_unhashed(sk));
 494        WARN_ON(sk->sk_socket);
 495        if (!sock_flag(sk, SOCK_DEAD)) {
 496                pr_info("Attempt to release alive unix socket: %p\n", sk);
 497                return;
 498        }
 499
 500        if (u->addr)
 501                unix_release_addr(u->addr);
 502
 503        atomic_long_dec(&unix_nr_socks);
 504        local_bh_disable();
 505        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 506        local_bh_enable();
 507#ifdef UNIX_REFCNT_DEBUG
 508        pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 509                atomic_long_read(&unix_nr_socks));
 510#endif
 511}
 512
 513static void unix_release_sock(struct sock *sk, int embrion)
 514{
 515        struct unix_sock *u = unix_sk(sk);
 516        struct path path;
 517        struct sock *skpair;
 518        struct sk_buff *skb;
 519        int state;
 520
 521        unix_remove_socket(sk);
 522
 523        /* Clear state */
 524        unix_state_lock(sk);
 525        sock_orphan(sk);
 526        sk->sk_shutdown = SHUTDOWN_MASK;
 527        path         = u->path;
 528        u->path.dentry = NULL;
 529        u->path.mnt = NULL;
 530        state = sk->sk_state;
 531        sk->sk_state = TCP_CLOSE;
 532        unix_state_unlock(sk);
 533
 534        wake_up_interruptible_all(&u->peer_wait);
 535
 536        skpair = unix_peer(sk);
 537
 538        if (skpair != NULL) {
 539                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 540                        unix_state_lock(skpair);
 541                        /* No more writes */
 542                        skpair->sk_shutdown = SHUTDOWN_MASK;
 543                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 544                                skpair->sk_err = ECONNRESET;
 545                        unix_state_unlock(skpair);
 546                        skpair->sk_state_change(skpair);
 547                        sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 548                }
 549
 550                unix_dgram_peer_wake_disconnect(sk, skpair);
 551                sock_put(skpair); /* It may now die */
 552                unix_peer(sk) = NULL;
 553        }
 554
 555        /* Try to flush out this socket. Throw out buffers at least */
 556
 557        while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 558                if (state == TCP_LISTEN)
 559                        unix_release_sock(skb->sk, 1);
 560                /* passed fds are erased in the kfree_skb hook        */
 561                UNIXCB(skb).consumed = skb->len;
 562                kfree_skb(skb);
 563        }
 564
 565        if (path.dentry)
 566                path_put(&path);
 567
 568        sock_put(sk);
 569
 570        /* ---- Socket is dead now and most probably destroyed ---- */
 571
 572        /*
 573         * Fixme: BSD difference: In BSD all sockets connected to us get
 574         *        ECONNRESET and we die on the spot. In Linux we behave
 575         *        like files and pipes do and wait for the last
 576         *        dereference.
 577         *
 578         * Can't we simply set sock->err?
 579         *
 580         *        What the above comment does talk about? --ANK(980817)
 581         */
 582
 583        if (unix_tot_inflight)
 584                unix_gc();              /* Garbage collect fds */
 585}
 586
 587static void init_peercred(struct sock *sk)
 588{
 589        put_pid(sk->sk_peer_pid);
 590        if (sk->sk_peer_cred)
 591                put_cred(sk->sk_peer_cred);
 592        sk->sk_peer_pid  = get_pid(task_tgid(current));
 593        sk->sk_peer_cred = get_current_cred();
 594}
 595
 596static void copy_peercred(struct sock *sk, struct sock *peersk)
 597{
 598        put_pid(sk->sk_peer_pid);
 599        if (sk->sk_peer_cred)
 600                put_cred(sk->sk_peer_cred);
 601        sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 602        sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 603}
 604
 605static int unix_listen(struct socket *sock, int backlog)
 606{
 607        int err;
 608        struct sock *sk = sock->sk;
 609        struct unix_sock *u = unix_sk(sk);
 610        struct pid *old_pid = NULL;
 611
 612        err = -EOPNOTSUPP;
 613        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 614                goto out;       /* Only stream/seqpacket sockets accept */
 615        err = -EINVAL;
 616        if (!u->addr)
 617                goto out;       /* No listens on an unbound socket */
 618        unix_state_lock(sk);
 619        if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 620                goto out_unlock;
 621        if (backlog > sk->sk_max_ack_backlog)
 622                wake_up_interruptible_all(&u->peer_wait);
 623        sk->sk_max_ack_backlog  = backlog;
 624        sk->sk_state            = TCP_LISTEN;
 625        /* set credentials so connect can copy them */
 626        init_peercred(sk);
 627        err = 0;
 628
 629out_unlock:
 630        unix_state_unlock(sk);
 631        put_pid(old_pid);
 632out:
 633        return err;
 634}
 635
 636static int unix_release(struct socket *);
 637static int unix_bind(struct socket *, struct sockaddr *, int);
 638static int unix_stream_connect(struct socket *, struct sockaddr *,
 639                               int addr_len, int flags);
 640static int unix_socketpair(struct socket *, struct socket *);
 641static int unix_accept(struct socket *, struct socket *, int, bool);
 642static int unix_getname(struct socket *, struct sockaddr *, int);
 643static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 644static __poll_t unix_dgram_poll(struct file *, struct socket *,
 645                                    poll_table *);
 646static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 647#ifdef CONFIG_COMPAT
 648static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 649#endif
 650static int unix_shutdown(struct socket *, int);
 651static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 652static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 653static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 654                                    size_t size, int flags);
 655static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 656                                       struct pipe_inode_info *, size_t size,
 657                                       unsigned int flags);
 658static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 659static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 660static int unix_dgram_connect(struct socket *, struct sockaddr *,
 661                              int, int);
 662static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 663static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 664                                  int);
 665
 666static int unix_set_peek_off(struct sock *sk, int val)
 667{
 668        struct unix_sock *u = unix_sk(sk);
 669
 670        if (mutex_lock_interruptible(&u->iolock))
 671                return -EINTR;
 672
 673        sk->sk_peek_off = val;
 674        mutex_unlock(&u->iolock);
 675
 676        return 0;
 677}
 678
 679
 680static const struct proto_ops unix_stream_ops = {
 681        .family =       PF_UNIX,
 682        .owner =        THIS_MODULE,
 683        .release =      unix_release,
 684        .bind =         unix_bind,
 685        .connect =      unix_stream_connect,
 686        .socketpair =   unix_socketpair,
 687        .accept =       unix_accept,
 688        .getname =      unix_getname,
 689        .poll =         unix_poll,
 690        .ioctl =        unix_ioctl,
 691#ifdef CONFIG_COMPAT
 692        .compat_ioctl = unix_compat_ioctl,
 693#endif
 694        .listen =       unix_listen,
 695        .shutdown =     unix_shutdown,
 696        .setsockopt =   sock_no_setsockopt,
 697        .getsockopt =   sock_no_getsockopt,
 698        .sendmsg =      unix_stream_sendmsg,
 699        .recvmsg =      unix_stream_recvmsg,
 700        .mmap =         sock_no_mmap,
 701        .sendpage =     unix_stream_sendpage,
 702        .splice_read =  unix_stream_splice_read,
 703        .set_peek_off = unix_set_peek_off,
 704};
 705
 706static const struct proto_ops unix_dgram_ops = {
 707        .family =       PF_UNIX,
 708        .owner =        THIS_MODULE,
 709        .release =      unix_release,
 710        .bind =         unix_bind,
 711        .connect =      unix_dgram_connect,
 712        .socketpair =   unix_socketpair,
 713        .accept =       sock_no_accept,
 714        .getname =      unix_getname,
 715        .poll =         unix_dgram_poll,
 716        .ioctl =        unix_ioctl,
 717#ifdef CONFIG_COMPAT
 718        .compat_ioctl = unix_compat_ioctl,
 719#endif
 720        .listen =       sock_no_listen,
 721        .shutdown =     unix_shutdown,
 722        .setsockopt =   sock_no_setsockopt,
 723        .getsockopt =   sock_no_getsockopt,
 724        .sendmsg =      unix_dgram_sendmsg,
 725        .recvmsg =      unix_dgram_recvmsg,
 726        .mmap =         sock_no_mmap,
 727        .sendpage =     sock_no_sendpage,
 728        .set_peek_off = unix_set_peek_off,
 729};
 730
 731static const struct proto_ops unix_seqpacket_ops = {
 732        .family =       PF_UNIX,
 733        .owner =        THIS_MODULE,
 734        .release =      unix_release,
 735        .bind =         unix_bind,
 736        .connect =      unix_stream_connect,
 737        .socketpair =   unix_socketpair,
 738        .accept =       unix_accept,
 739        .getname =      unix_getname,
 740        .poll =         unix_dgram_poll,
 741        .ioctl =        unix_ioctl,
 742#ifdef CONFIG_COMPAT
 743        .compat_ioctl = unix_compat_ioctl,
 744#endif
 745        .listen =       unix_listen,
 746        .shutdown =     unix_shutdown,
 747        .setsockopt =   sock_no_setsockopt,
 748        .getsockopt =   sock_no_getsockopt,
 749        .sendmsg =      unix_seqpacket_sendmsg,
 750        .recvmsg =      unix_seqpacket_recvmsg,
 751        .mmap =         sock_no_mmap,
 752        .sendpage =     sock_no_sendpage,
 753        .set_peek_off = unix_set_peek_off,
 754};
 755
 756static struct proto unix_proto = {
 757        .name                   = "UNIX",
 758        .owner                  = THIS_MODULE,
 759        .obj_size               = sizeof(struct unix_sock),
 760};
 761
 762static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 763{
 764        struct sock *sk = NULL;
 765        struct unix_sock *u;
 766
 767        atomic_long_inc(&unix_nr_socks);
 768        if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 769                goto out;
 770
 771        sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 772        if (!sk)
 773                goto out;
 774
 775        sock_init_data(sock, sk);
 776
 777        sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 778        sk->sk_write_space      = unix_write_space;
 779        sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 780        sk->sk_destruct         = unix_sock_destructor;
 781        u         = unix_sk(sk);
 782        u->path.dentry = NULL;
 783        u->path.mnt = NULL;
 784        spin_lock_init(&u->lock);
 785        atomic_long_set(&u->inflight, 0);
 786        INIT_LIST_HEAD(&u->link);
 787        mutex_init(&u->iolock); /* single task reading lock */
 788        mutex_init(&u->bindlock); /* single task binding lock */
 789        init_waitqueue_head(&u->peer_wait);
 790        init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 791        unix_insert_socket(unix_sockets_unbound(sk), sk);
 792out:
 793        if (sk == NULL)
 794                atomic_long_dec(&unix_nr_socks);
 795        else {
 796                local_bh_disable();
 797                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 798                local_bh_enable();
 799        }
 800        return sk;
 801}
 802
 803static int unix_create(struct net *net, struct socket *sock, int protocol,
 804                       int kern)
 805{
 806        if (protocol && protocol != PF_UNIX)
 807                return -EPROTONOSUPPORT;
 808
 809        sock->state = SS_UNCONNECTED;
 810
 811        switch (sock->type) {
 812        case SOCK_STREAM:
 813                sock->ops = &unix_stream_ops;
 814                break;
 815                /*
 816                 *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 817                 *      nothing uses it.
 818                 */
 819        case SOCK_RAW:
 820                sock->type = SOCK_DGRAM;
 821                /* fall through */
 822        case SOCK_DGRAM:
 823                sock->ops = &unix_dgram_ops;
 824                break;
 825        case SOCK_SEQPACKET:
 826                sock->ops = &unix_seqpacket_ops;
 827                break;
 828        default:
 829                return -ESOCKTNOSUPPORT;
 830        }
 831
 832        return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 833}
 834
 835static int unix_release(struct socket *sock)
 836{
 837        struct sock *sk = sock->sk;
 838
 839        if (!sk)
 840                return 0;
 841
 842        unix_release_sock(sk, 0);
 843        sock->sk = NULL;
 844
 845        return 0;
 846}
 847
 848static int unix_autobind(struct socket *sock)
 849{
 850        struct sock *sk = sock->sk;
 851        struct net *net = sock_net(sk);
 852        struct unix_sock *u = unix_sk(sk);
 853        static u32 ordernum = 1;
 854        struct unix_address *addr;
 855        int err;
 856        unsigned int retries = 0;
 857
 858        err = mutex_lock_interruptible(&u->bindlock);
 859        if (err)
 860                return err;
 861
 862        err = 0;
 863        if (u->addr)
 864                goto out;
 865
 866        err = -ENOMEM;
 867        addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 868        if (!addr)
 869                goto out;
 870
 871        addr->name->sun_family = AF_UNIX;
 872        refcount_set(&addr->refcnt, 1);
 873
 874retry:
 875        addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 876        addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 877
 878        spin_lock(&unix_table_lock);
 879        ordernum = (ordernum+1)&0xFFFFF;
 880
 881        if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 882                                      addr->hash)) {
 883                spin_unlock(&unix_table_lock);
 884                /*
 885                 * __unix_find_socket_byname() may take long time if many names
 886                 * are already in use.
 887                 */
 888                cond_resched();
 889                /* Give up if all names seems to be in use. */
 890                if (retries++ == 0xFFFFF) {
 891                        err = -ENOSPC;
 892                        kfree(addr);
 893                        goto out;
 894                }
 895                goto retry;
 896        }
 897        addr->hash ^= sk->sk_type;
 898
 899        __unix_remove_socket(sk);
 900        smp_store_release(&u->addr, addr);
 901        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 902        spin_unlock(&unix_table_lock);
 903        err = 0;
 904
 905out:    mutex_unlock(&u->bindlock);
 906        return err;
 907}
 908
 909static struct sock *unix_find_other(struct net *net,
 910                                    struct sockaddr_un *sunname, int len,
 911                                    int type, unsigned int hash, int *error)
 912{
 913        struct sock *u;
 914        struct path path;
 915        int err = 0;
 916
 917        if (sunname->sun_path[0]) {
 918                struct inode *inode;
 919                err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 920                if (err)
 921                        goto fail;
 922                inode = d_backing_inode(path.dentry);
 923                err = inode_permission(inode, MAY_WRITE);
 924                if (err)
 925                        goto put_fail;
 926
 927                err = -ECONNREFUSED;
 928                if (!S_ISSOCK(inode->i_mode))
 929                        goto put_fail;
 930                u = unix_find_socket_byinode(inode);
 931                if (!u)
 932                        goto put_fail;
 933
 934                if (u->sk_type == type)
 935                        touch_atime(&path);
 936
 937                path_put(&path);
 938
 939                err = -EPROTOTYPE;
 940                if (u->sk_type != type) {
 941                        sock_put(u);
 942                        goto fail;
 943                }
 944        } else {
 945                err = -ECONNREFUSED;
 946                u = unix_find_socket_byname(net, sunname, len, type, hash);
 947                if (u) {
 948                        struct dentry *dentry;
 949                        dentry = unix_sk(u)->path.dentry;
 950                        if (dentry)
 951                                touch_atime(&unix_sk(u)->path);
 952                } else
 953                        goto fail;
 954        }
 955        return u;
 956
 957put_fail:
 958        path_put(&path);
 959fail:
 960        *error = err;
 961        return NULL;
 962}
 963
 964static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 965{
 966        struct dentry *dentry;
 967        struct path path;
 968        int err = 0;
 969        /*
 970         * Get the parent directory, calculate the hash for last
 971         * component.
 972         */
 973        dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 974        err = PTR_ERR(dentry);
 975        if (IS_ERR(dentry))
 976                return err;
 977
 978        /*
 979         * All right, let's create it.
 980         */
 981        err = security_path_mknod(&path, dentry, mode, 0);
 982        if (!err) {
 983                err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 984                if (!err) {
 985                        res->mnt = mntget(path.mnt);
 986                        res->dentry = dget(dentry);
 987                }
 988        }
 989        done_path_create(&path, dentry);
 990        return err;
 991}
 992
 993static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 994{
 995        struct sock *sk = sock->sk;
 996        struct net *net = sock_net(sk);
 997        struct unix_sock *u = unix_sk(sk);
 998        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 999        char *sun_path = sunaddr->sun_path;
1000        int err;
1001        unsigned int hash;
1002        struct unix_address *addr;
1003        struct hlist_head *list;
1004        struct path path = { };
1005
1006        err = -EINVAL;
1007        if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1008            sunaddr->sun_family != AF_UNIX)
1009                goto out;
1010
1011        if (addr_len == sizeof(short)) {
1012                err = unix_autobind(sock);
1013                goto out;
1014        }
1015
1016        err = unix_mkname(sunaddr, addr_len, &hash);
1017        if (err < 0)
1018                goto out;
1019        addr_len = err;
1020
1021        if (sun_path[0]) {
1022                umode_t mode = S_IFSOCK |
1023                       (SOCK_INODE(sock)->i_mode & ~current_umask());
1024                err = unix_mknod(sun_path, mode, &path);
1025                if (err) {
1026                        if (err == -EEXIST)
1027                                err = -EADDRINUSE;
1028                        goto out;
1029                }
1030        }
1031
1032        err = mutex_lock_interruptible(&u->bindlock);
1033        if (err)
1034                goto out_put;
1035
1036        err = -EINVAL;
1037        if (u->addr)
1038                goto out_up;
1039
1040        err = -ENOMEM;
1041        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1042        if (!addr)
1043                goto out_up;
1044
1045        memcpy(addr->name, sunaddr, addr_len);
1046        addr->len = addr_len;
1047        addr->hash = hash ^ sk->sk_type;
1048        refcount_set(&addr->refcnt, 1);
1049
1050        if (sun_path[0]) {
1051                addr->hash = UNIX_HASH_SIZE;
1052                hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1053                spin_lock(&unix_table_lock);
1054                u->path = path;
1055                list = &unix_socket_table[hash];
1056        } else {
1057                spin_lock(&unix_table_lock);
1058                err = -EADDRINUSE;
1059                if (__unix_find_socket_byname(net, sunaddr, addr_len,
1060                                              sk->sk_type, hash)) {
1061                        unix_release_addr(addr);
1062                        goto out_unlock;
1063                }
1064
1065                list = &unix_socket_table[addr->hash];
1066        }
1067
1068        err = 0;
1069        __unix_remove_socket(sk);
1070        smp_store_release(&u->addr, addr);
1071        __unix_insert_socket(list, sk);
1072
1073out_unlock:
1074        spin_unlock(&unix_table_lock);
1075out_up:
1076        mutex_unlock(&u->bindlock);
1077out_put:
1078        if (err)
1079                path_put(&path);
1080out:
1081        return err;
1082}
1083
1084static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1085{
1086        if (unlikely(sk1 == sk2) || !sk2) {
1087                unix_state_lock(sk1);
1088                return;
1089        }
1090        if (sk1 < sk2) {
1091                unix_state_lock(sk1);
1092                unix_state_lock_nested(sk2);
1093        } else {
1094                unix_state_lock(sk2);
1095                unix_state_lock_nested(sk1);
1096        }
1097}
1098
1099static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1100{
1101        if (unlikely(sk1 == sk2) || !sk2) {
1102                unix_state_unlock(sk1);
1103                return;
1104        }
1105        unix_state_unlock(sk1);
1106        unix_state_unlock(sk2);
1107}
1108
1109static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1110                              int alen, int flags)
1111{
1112        struct sock *sk = sock->sk;
1113        struct net *net = sock_net(sk);
1114        struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1115        struct sock *other;
1116        unsigned int hash;
1117        int err;
1118
1119        err = -EINVAL;
1120        if (alen < offsetofend(struct sockaddr, sa_family))
1121                goto out;
1122
1123        if (addr->sa_family != AF_UNSPEC) {
1124                err = unix_mkname(sunaddr, alen, &hash);
1125                if (err < 0)
1126                        goto out;
1127                alen = err;
1128
1129                if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1130                    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1131                        goto out;
1132
1133restart:
1134                other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1135                if (!other)
1136                        goto out;
1137
1138                unix_state_double_lock(sk, other);
1139
1140                /* Apparently VFS overslept socket death. Retry. */
1141                if (sock_flag(other, SOCK_DEAD)) {
1142                        unix_state_double_unlock(sk, other);
1143                        sock_put(other);
1144                        goto restart;
1145                }
1146
1147                err = -EPERM;
1148                if (!unix_may_send(sk, other))
1149                        goto out_unlock;
1150
1151                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1152                if (err)
1153                        goto out_unlock;
1154
1155        } else {
1156                /*
1157                 *      1003.1g breaking connected state with AF_UNSPEC
1158                 */
1159                other = NULL;
1160                unix_state_double_lock(sk, other);
1161        }
1162
1163        /*
1164         * If it was connected, reconnect.
1165         */
1166        if (unix_peer(sk)) {
1167                struct sock *old_peer = unix_peer(sk);
1168                unix_peer(sk) = other;
1169                unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1170
1171                unix_state_double_unlock(sk, other);
1172
1173                if (other != old_peer)
1174                        unix_dgram_disconnected(sk, old_peer);
1175                sock_put(old_peer);
1176        } else {
1177                unix_peer(sk) = other;
1178                unix_state_double_unlock(sk, other);
1179        }
1180        return 0;
1181
1182out_unlock:
1183        unix_state_double_unlock(sk, other);
1184        sock_put(other);
1185out:
1186        return err;
1187}
1188
1189static long unix_wait_for_peer(struct sock *other, long timeo)
1190{
1191        struct unix_sock *u = unix_sk(other);
1192        int sched;
1193        DEFINE_WAIT(wait);
1194
1195        prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1196
1197        sched = !sock_flag(other, SOCK_DEAD) &&
1198                !(other->sk_shutdown & RCV_SHUTDOWN) &&
1199                unix_recvq_full(other);
1200
1201        unix_state_unlock(other);
1202
1203        if (sched)
1204                timeo = schedule_timeout(timeo);
1205
1206        finish_wait(&u->peer_wait, &wait);
1207        return timeo;
1208}
1209
1210static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1211                               int addr_len, int flags)
1212{
1213        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1214        struct sock *sk = sock->sk;
1215        struct net *net = sock_net(sk);
1216        struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1217        struct sock *newsk = NULL;
1218        struct sock *other = NULL;
1219        struct sk_buff *skb = NULL;
1220        unsigned int hash;
1221        int st;
1222        int err;
1223        long timeo;
1224
1225        err = unix_mkname(sunaddr, addr_len, &hash);
1226        if (err < 0)
1227                goto out;
1228        addr_len = err;
1229
1230        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1231            (err = unix_autobind(sock)) != 0)
1232                goto out;
1233
1234        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1235
1236        /* First of all allocate resources.
1237           If we will make it after state is locked,
1238           we will have to recheck all again in any case.
1239         */
1240
1241        err = -ENOMEM;
1242
1243        /* create new sock for complete connection */
1244        newsk = unix_create1(sock_net(sk), NULL, 0);
1245        if (newsk == NULL)
1246                goto out;
1247
1248        /* Allocate skb for sending to listening sock */
1249        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1250        if (skb == NULL)
1251                goto out;
1252
1253restart:
1254        /*  Find listening sock. */
1255        other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1256        if (!other)
1257                goto out;
1258
1259        /* Latch state of peer */
1260        unix_state_lock(other);
1261
1262        /* Apparently VFS overslept socket death. Retry. */
1263        if (sock_flag(other, SOCK_DEAD)) {
1264                unix_state_unlock(other);
1265                sock_put(other);
1266                goto restart;
1267        }
1268
1269        err = -ECONNREFUSED;
1270        if (other->sk_state != TCP_LISTEN)
1271                goto out_unlock;
1272        if (other->sk_shutdown & RCV_SHUTDOWN)
1273                goto out_unlock;
1274
1275        if (unix_recvq_full(other)) {
1276                err = -EAGAIN;
1277                if (!timeo)
1278                        goto out_unlock;
1279
1280                timeo = unix_wait_for_peer(other, timeo);
1281
1282                err = sock_intr_errno(timeo);
1283                if (signal_pending(current))
1284                        goto out;
1285                sock_put(other);
1286                goto restart;
1287        }
1288
1289        /* Latch our state.
1290
1291           It is tricky place. We need to grab our state lock and cannot
1292           drop lock on peer. It is dangerous because deadlock is
1293           possible. Connect to self case and simultaneous
1294           attempt to connect are eliminated by checking socket
1295           state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1296           check this before attempt to grab lock.
1297
1298           Well, and we have to recheck the state after socket locked.
1299         */
1300        st = sk->sk_state;
1301
1302        switch (st) {
1303        case TCP_CLOSE:
1304                /* This is ok... continue with connect */
1305                break;
1306        case TCP_ESTABLISHED:
1307                /* Socket is already connected */
1308                err = -EISCONN;
1309                goto out_unlock;
1310        default:
1311                err = -EINVAL;
1312                goto out_unlock;
1313        }
1314
1315        unix_state_lock_nested(sk);
1316
1317        if (sk->sk_state != st) {
1318                unix_state_unlock(sk);
1319                unix_state_unlock(other);
1320                sock_put(other);
1321                goto restart;
1322        }
1323
1324        err = security_unix_stream_connect(sk, other, newsk);
1325        if (err) {
1326                unix_state_unlock(sk);
1327                goto out_unlock;
1328        }
1329
1330        /* The way is open! Fastly set all the necessary fields... */
1331
1332        sock_hold(sk);
1333        unix_peer(newsk)        = sk;
1334        newsk->sk_state         = TCP_ESTABLISHED;
1335        newsk->sk_type          = sk->sk_type;
1336        init_peercred(newsk);
1337        newu = unix_sk(newsk);
1338        RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1339        otheru = unix_sk(other);
1340
1341        /* copy address information from listening to new sock
1342         *
1343         * The contents of *(otheru->addr) and otheru->path
1344         * are seen fully set up here, since we have found
1345         * otheru in hash under unix_table_lock.  Insertion
1346         * into the hash chain we'd found it in had been done
1347         * in an earlier critical area protected by unix_table_lock,
1348         * the same one where we'd set *(otheru->addr) contents,
1349         * as well as otheru->path and otheru->addr itself.
1350         *
1351         * Using smp_store_release() here to set newu->addr
1352         * is enough to make those stores, as well as stores
1353         * to newu->path visible to anyone who gets newu->addr
1354         * by smp_load_acquire().  IOW, the same warranties
1355         * as for unix_sock instances bound in unix_bind() or
1356         * in unix_autobind().
1357         */
1358        if (otheru->path.dentry) {
1359                path_get(&otheru->path);
1360                newu->path = otheru->path;
1361        }
1362        refcount_inc(&otheru->addr->refcnt);
1363        smp_store_release(&newu->addr, otheru->addr);
1364
1365        /* Set credentials */
1366        copy_peercred(sk, other);
1367
1368        sock->state     = SS_CONNECTED;
1369        sk->sk_state    = TCP_ESTABLISHED;
1370        sock_hold(newsk);
1371
1372        smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1373        unix_peer(sk)   = newsk;
1374
1375        unix_state_unlock(sk);
1376
1377        /* take ten and and send info to listening sock */
1378        spin_lock(&other->sk_receive_queue.lock);
1379        __skb_queue_tail(&other->sk_receive_queue, skb);
1380        spin_unlock(&other->sk_receive_queue.lock);
1381        unix_state_unlock(other);
1382        other->sk_data_ready(other);
1383        sock_put(other);
1384        return 0;
1385
1386out_unlock:
1387        if (other)
1388                unix_state_unlock(other);
1389
1390out:
1391        kfree_skb(skb);
1392        if (newsk)
1393                unix_release_sock(newsk, 0);
1394        if (other)
1395                sock_put(other);
1396        return err;
1397}
1398
1399static int unix_socketpair(struct socket *socka, struct socket *sockb)
1400{
1401        struct sock *ska = socka->sk, *skb = sockb->sk;
1402
1403        /* Join our sockets back to back */
1404        sock_hold(ska);
1405        sock_hold(skb);
1406        unix_peer(ska) = skb;
1407        unix_peer(skb) = ska;
1408        init_peercred(ska);
1409        init_peercred(skb);
1410
1411        if (ska->sk_type != SOCK_DGRAM) {
1412                ska->sk_state = TCP_ESTABLISHED;
1413                skb->sk_state = TCP_ESTABLISHED;
1414                socka->state  = SS_CONNECTED;
1415                sockb->state  = SS_CONNECTED;
1416        }
1417        return 0;
1418}
1419
1420static void unix_sock_inherit_flags(const struct socket *old,
1421                                    struct socket *new)
1422{
1423        if (test_bit(SOCK_PASSCRED, &old->flags))
1424                set_bit(SOCK_PASSCRED, &new->flags);
1425        if (test_bit(SOCK_PASSSEC, &old->flags))
1426                set_bit(SOCK_PASSSEC, &new->flags);
1427}
1428
1429static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1430                       bool kern)
1431{
1432        struct sock *sk = sock->sk;
1433        struct sock *tsk;
1434        struct sk_buff *skb;
1435        int err;
1436
1437        err = -EOPNOTSUPP;
1438        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1439                goto out;
1440
1441        err = -EINVAL;
1442        if (sk->sk_state != TCP_LISTEN)
1443                goto out;
1444
1445        /* If socket state is TCP_LISTEN it cannot change (for now...),
1446         * so that no locks are necessary.
1447         */
1448
1449        skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1450        if (!skb) {
1451                /* This means receive shutdown. */
1452                if (err == 0)
1453                        err = -EINVAL;
1454                goto out;
1455        }
1456
1457        tsk = skb->sk;
1458        skb_free_datagram(sk, skb);
1459        wake_up_interruptible(&unix_sk(sk)->peer_wait);
1460
1461        /* attach accepted sock to socket */
1462        unix_state_lock(tsk);
1463        newsock->state = SS_CONNECTED;
1464        unix_sock_inherit_flags(sock, newsock);
1465        sock_graft(tsk, newsock);
1466        unix_state_unlock(tsk);
1467        return 0;
1468
1469out:
1470        return err;
1471}
1472
1473
1474static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1475{
1476        struct sock *sk = sock->sk;
1477        struct unix_address *addr;
1478        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1479        int err = 0;
1480
1481        if (peer) {
1482                sk = unix_peer_get(sk);
1483
1484                err = -ENOTCONN;
1485                if (!sk)
1486                        goto out;
1487                err = 0;
1488        } else {
1489                sock_hold(sk);
1490        }
1491
1492        addr = smp_load_acquire(&unix_sk(sk)->addr);
1493        if (!addr) {
1494                sunaddr->sun_family = AF_UNIX;
1495                sunaddr->sun_path[0] = 0;
1496                err = sizeof(short);
1497        } else {
1498                err = addr->len;
1499                memcpy(sunaddr, addr->name, addr->len);
1500        }
1501        sock_put(sk);
1502out:
1503        return err;
1504}
1505
1506static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1507{
1508        int err = 0;
1509
1510        UNIXCB(skb).pid  = get_pid(scm->pid);
1511        UNIXCB(skb).uid = scm->creds.uid;
1512        UNIXCB(skb).gid = scm->creds.gid;
1513        UNIXCB(skb).fp = NULL;
1514        unix_get_secdata(scm, skb);
1515        if (scm->fp && send_fds)
1516                err = unix_attach_fds(scm, skb);
1517
1518        skb->destructor = unix_destruct_scm;
1519        return err;
1520}
1521
1522static bool unix_passcred_enabled(const struct socket *sock,
1523                                  const struct sock *other)
1524{
1525        return test_bit(SOCK_PASSCRED, &sock->flags) ||
1526               !other->sk_socket ||
1527               test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1528}
1529
1530/*
1531 * Some apps rely on write() giving SCM_CREDENTIALS
1532 * We include credentials if source or destination socket
1533 * asserted SOCK_PASSCRED.
1534 */
1535static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1536                            const struct sock *other)
1537{
1538        if (UNIXCB(skb).pid)
1539                return;
1540        if (unix_passcred_enabled(sock, other)) {
1541                UNIXCB(skb).pid  = get_pid(task_tgid(current));
1542                current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1543        }
1544}
1545
1546static int maybe_init_creds(struct scm_cookie *scm,
1547                            struct socket *socket,
1548                            const struct sock *other)
1549{
1550        int err;
1551        struct msghdr msg = { .msg_controllen = 0 };
1552
1553        err = scm_send(socket, &msg, scm, false);
1554        if (err)
1555                return err;
1556
1557        if (unix_passcred_enabled(socket, other)) {
1558                scm->pid = get_pid(task_tgid(current));
1559                current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1560        }
1561        return err;
1562}
1563
1564static bool unix_skb_scm_eq(struct sk_buff *skb,
1565                            struct scm_cookie *scm)
1566{
1567        const struct unix_skb_parms *u = &UNIXCB(skb);
1568
1569        return u->pid == scm->pid &&
1570               uid_eq(u->uid, scm->creds.uid) &&
1571               gid_eq(u->gid, scm->creds.gid) &&
1572               unix_secdata_eq(scm, skb);
1573}
1574
1575/*
1576 *      Send AF_UNIX data.
1577 */
1578
1579static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1580                              size_t len)
1581{
1582        struct sock *sk = sock->sk;
1583        struct net *net = sock_net(sk);
1584        struct unix_sock *u = unix_sk(sk);
1585        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1586        struct sock *other = NULL;
1587        int namelen = 0; /* fake GCC */
1588        int err;
1589        unsigned int hash;
1590        struct sk_buff *skb;
1591        long timeo;
1592        struct scm_cookie scm;
1593        int data_len = 0;
1594        int sk_locked;
1595
1596        wait_for_unix_gc();
1597        err = scm_send(sock, msg, &scm, false);
1598        if (err < 0)
1599                return err;
1600
1601        err = -EOPNOTSUPP;
1602        if (msg->msg_flags&MSG_OOB)
1603                goto out;
1604
1605        if (msg->msg_namelen) {
1606                err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1607                if (err < 0)
1608                        goto out;
1609                namelen = err;
1610        } else {
1611                sunaddr = NULL;
1612                err = -ENOTCONN;
1613                other = unix_peer_get(sk);
1614                if (!other)
1615                        goto out;
1616        }
1617
1618        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1619            && (err = unix_autobind(sock)) != 0)
1620                goto out;
1621
1622        err = -EMSGSIZE;
1623        if (len > sk->sk_sndbuf - 32)
1624                goto out;
1625
1626        if (len > SKB_MAX_ALLOC) {
1627                data_len = min_t(size_t,
1628                                 len - SKB_MAX_ALLOC,
1629                                 MAX_SKB_FRAGS * PAGE_SIZE);
1630                data_len = PAGE_ALIGN(data_len);
1631
1632                BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1633        }
1634
1635        skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1636                                   msg->msg_flags & MSG_DONTWAIT, &err,
1637                                   PAGE_ALLOC_COSTLY_ORDER);
1638        if (skb == NULL)
1639                goto out;
1640
1641        err = unix_scm_to_skb(&scm, skb, true);
1642        if (err < 0)
1643                goto out_free;
1644
1645        skb_put(skb, len - data_len);
1646        skb->data_len = data_len;
1647        skb->len = len;
1648        err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1649        if (err)
1650                goto out_free;
1651
1652        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1653
1654restart:
1655        if (!other) {
1656                err = -ECONNRESET;
1657                if (sunaddr == NULL)
1658                        goto out_free;
1659
1660                other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1661                                        hash, &err);
1662                if (other == NULL)
1663                        goto out_free;
1664        }
1665
1666        if (sk_filter(other, skb) < 0) {
1667                /* Toss the packet but do not return any error to the sender */
1668                err = len;
1669                goto out_free;
1670        }
1671
1672        sk_locked = 0;
1673        unix_state_lock(other);
1674restart_locked:
1675        err = -EPERM;
1676        if (!unix_may_send(sk, other))
1677                goto out_unlock;
1678
1679        if (unlikely(sock_flag(other, SOCK_DEAD))) {
1680                /*
1681                 *      Check with 1003.1g - what should
1682                 *      datagram error
1683                 */
1684                unix_state_unlock(other);
1685                sock_put(other);
1686
1687                if (!sk_locked)
1688                        unix_state_lock(sk);
1689
1690                err = 0;
1691                if (unix_peer(sk) == other) {
1692                        unix_peer(sk) = NULL;
1693                        unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1694
1695                        unix_state_unlock(sk);
1696
1697                        unix_dgram_disconnected(sk, other);
1698                        sock_put(other);
1699                        err = -ECONNREFUSED;
1700                } else {
1701                        unix_state_unlock(sk);
1702                }
1703
1704                other = NULL;
1705                if (err)
1706                        goto out_free;
1707                goto restart;
1708        }
1709
1710        err = -EPIPE;
1711        if (other->sk_shutdown & RCV_SHUTDOWN)
1712                goto out_unlock;
1713
1714        if (sk->sk_type != SOCK_SEQPACKET) {
1715                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1716                if (err)
1717                        goto out_unlock;
1718        }
1719
1720        /* other == sk && unix_peer(other) != sk if
1721         * - unix_peer(sk) == NULL, destination address bound to sk
1722         * - unix_peer(sk) == sk by time of get but disconnected before lock
1723         */
1724        if (other != sk &&
1725            unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1726                if (timeo) {
1727                        timeo = unix_wait_for_peer(other, timeo);
1728
1729                        err = sock_intr_errno(timeo);
1730                        if (signal_pending(current))
1731                                goto out_free;
1732
1733                        goto restart;
1734                }
1735
1736                if (!sk_locked) {
1737                        unix_state_unlock(other);
1738                        unix_state_double_lock(sk, other);
1739                }
1740
1741                if (unix_peer(sk) != other ||
1742                    unix_dgram_peer_wake_me(sk, other)) {
1743                        err = -EAGAIN;
1744                        sk_locked = 1;
1745                        goto out_unlock;
1746                }
1747
1748                if (!sk_locked) {
1749                        sk_locked = 1;
1750                        goto restart_locked;
1751                }
1752        }
1753
1754        if (unlikely(sk_locked))
1755                unix_state_unlock(sk);
1756
1757        if (sock_flag(other, SOCK_RCVTSTAMP))
1758                __net_timestamp(skb);
1759        maybe_add_creds(skb, sock, other);
1760        skb_queue_tail(&other->sk_receive_queue, skb);
1761        unix_state_unlock(other);
1762        other->sk_data_ready(other);
1763        sock_put(other);
1764        scm_destroy(&scm);
1765        return len;
1766
1767out_unlock:
1768        if (sk_locked)
1769                unix_state_unlock(sk);
1770        unix_state_unlock(other);
1771out_free:
1772        kfree_skb(skb);
1773out:
1774        if (other)
1775                sock_put(other);
1776        scm_destroy(&scm);
1777        return err;
1778}
1779
1780/* We use paged skbs for stream sockets, and limit occupancy to 32768
1781 * bytes, and a minimum of a full page.
1782 */
1783#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1784
1785static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1786                               size_t len)
1787{
1788        struct sock *sk = sock->sk;
1789        struct sock *other = NULL;
1790        int err, size;
1791        struct sk_buff *skb;
1792        int sent = 0;
1793        struct scm_cookie scm;
1794        bool fds_sent = false;
1795        int data_len;
1796
1797        wait_for_unix_gc();
1798        err = scm_send(sock, msg, &scm, false);
1799        if (err < 0)
1800                return err;
1801
1802        err = -EOPNOTSUPP;
1803        if (msg->msg_flags&MSG_OOB)
1804                goto out_err;
1805
1806        if (msg->msg_namelen) {
1807                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1808                goto out_err;
1809        } else {
1810                err = -ENOTCONN;
1811                other = unix_peer(sk);
1812                if (!other)
1813                        goto out_err;
1814        }
1815
1816        if (sk->sk_shutdown & SEND_SHUTDOWN)
1817                goto pipe_err;
1818
1819        while (sent < len) {
1820                size = len - sent;
1821
1822                /* Keep two messages in the pipe so it schedules better */
1823                size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1824
1825                /* allow fallback to order-0 allocations */
1826                size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1827
1828                data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1829
1830                data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1831
1832                skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1833                                           msg->msg_flags & MSG_DONTWAIT, &err,
1834                                           get_order(UNIX_SKB_FRAGS_SZ));
1835                if (!skb)
1836                        goto out_err;
1837
1838                /* Only send the fds in the first buffer */
1839                err = unix_scm_to_skb(&scm, skb, !fds_sent);
1840                if (err < 0) {
1841                        kfree_skb(skb);
1842                        goto out_err;
1843                }
1844                fds_sent = true;
1845
1846                skb_put(skb, size - data_len);
1847                skb->data_len = data_len;
1848                skb->len = size;
1849                err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1850                if (err) {
1851                        kfree_skb(skb);
1852                        goto out_err;
1853                }
1854
1855                unix_state_lock(other);
1856
1857                if (sock_flag(other, SOCK_DEAD) ||
1858                    (other->sk_shutdown & RCV_SHUTDOWN))
1859                        goto pipe_err_free;
1860
1861                maybe_add_creds(skb, sock, other);
1862                skb_queue_tail(&other->sk_receive_queue, skb);
1863                unix_state_unlock(other);
1864                other->sk_data_ready(other);
1865                sent += size;
1866        }
1867
1868        scm_destroy(&scm);
1869
1870        return sent;
1871
1872pipe_err_free:
1873        unix_state_unlock(other);
1874        kfree_skb(skb);
1875pipe_err:
1876        if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1877                send_sig(SIGPIPE, current, 0);
1878        err = -EPIPE;
1879out_err:
1880        scm_destroy(&scm);
1881        return sent ? : err;
1882}
1883
1884static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1885                                    int offset, size_t size, int flags)
1886{
1887        int err;
1888        bool send_sigpipe = false;
1889        bool init_scm = true;
1890        struct scm_cookie scm;
1891        struct sock *other, *sk = socket->sk;
1892        struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1893
1894        if (flags & MSG_OOB)
1895                return -EOPNOTSUPP;
1896
1897        other = unix_peer(sk);
1898        if (!other || sk->sk_state != TCP_ESTABLISHED)
1899                return -ENOTCONN;
1900
1901        if (false) {
1902alloc_skb:
1903                unix_state_unlock(other);
1904                mutex_unlock(&unix_sk(other)->iolock);
1905                newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1906                                              &err, 0);
1907                if (!newskb)
1908                        goto err;
1909        }
1910
1911        /* we must acquire iolock as we modify already present
1912         * skbs in the sk_receive_queue and mess with skb->len
1913         */
1914        err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1915        if (err) {
1916                err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1917                goto err;
1918        }
1919
1920        if (sk->sk_shutdown & SEND_SHUTDOWN) {
1921                err = -EPIPE;
1922                send_sigpipe = true;
1923                goto err_unlock;
1924        }
1925
1926        unix_state_lock(other);
1927
1928        if (sock_flag(other, SOCK_DEAD) ||
1929            other->sk_shutdown & RCV_SHUTDOWN) {
1930                err = -EPIPE;
1931                send_sigpipe = true;
1932                goto err_state_unlock;
1933        }
1934
1935        if (init_scm) {
1936                err = maybe_init_creds(&scm, socket, other);
1937                if (err)
1938                        goto err_state_unlock;
1939                init_scm = false;
1940        }
1941
1942        skb = skb_peek_tail(&other->sk_receive_queue);
1943        if (tail && tail == skb) {
1944                skb = newskb;
1945        } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1946                if (newskb) {
1947                        skb = newskb;
1948                } else {
1949                        tail = skb;
1950                        goto alloc_skb;
1951                }
1952        } else if (newskb) {
1953                /* this is fast path, we don't necessarily need to
1954                 * call to kfree_skb even though with newskb == NULL
1955                 * this - does no harm
1956                 */
1957                consume_skb(newskb);
1958                newskb = NULL;
1959        }
1960
1961        if (skb_append_pagefrags(skb, page, offset, size)) {
1962                tail = skb;
1963                goto alloc_skb;
1964        }
1965
1966        skb->len += size;
1967        skb->data_len += size;
1968        skb->truesize += size;
1969        refcount_add(size, &sk->sk_wmem_alloc);
1970
1971        if (newskb) {
1972                err = unix_scm_to_skb(&scm, skb, false);
1973                if (err)
1974                        goto err_state_unlock;
1975                spin_lock(&other->sk_receive_queue.lock);
1976                __skb_queue_tail(&other->sk_receive_queue, newskb);
1977                spin_unlock(&other->sk_receive_queue.lock);
1978        }
1979
1980        unix_state_unlock(other);
1981        mutex_unlock(&unix_sk(other)->iolock);
1982
1983        other->sk_data_ready(other);
1984        scm_destroy(&scm);
1985        return size;
1986
1987err_state_unlock:
1988        unix_state_unlock(other);
1989err_unlock:
1990        mutex_unlock(&unix_sk(other)->iolock);
1991err:
1992        kfree_skb(newskb);
1993        if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1994                send_sig(SIGPIPE, current, 0);
1995        if (!init_scm)
1996                scm_destroy(&scm);
1997        return err;
1998}
1999
2000static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2001                                  size_t len)
2002{
2003        int err;
2004        struct sock *sk = sock->sk;
2005
2006        err = sock_error(sk);
2007        if (err)
2008                return err;
2009
2010        if (sk->sk_state != TCP_ESTABLISHED)
2011                return -ENOTCONN;
2012
2013        if (msg->msg_namelen)
2014                msg->msg_namelen = 0;
2015
2016        return unix_dgram_sendmsg(sock, msg, len);
2017}
2018
2019static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2020                                  size_t size, int flags)
2021{
2022        struct sock *sk = sock->sk;
2023
2024        if (sk->sk_state != TCP_ESTABLISHED)
2025                return -ENOTCONN;
2026
2027        return unix_dgram_recvmsg(sock, msg, size, flags);
2028}
2029
2030static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2031{
2032        struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2033
2034        if (addr) {
2035                msg->msg_namelen = addr->len;
2036                memcpy(msg->msg_name, addr->name, addr->len);
2037        }
2038}
2039
2040static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2041                              size_t size, int flags)
2042{
2043        struct scm_cookie scm;
2044        struct sock *sk = sock->sk;
2045        struct unix_sock *u = unix_sk(sk);
2046        struct sk_buff *skb, *last;
2047        long timeo;
2048        int skip;
2049        int err;
2050
2051        err = -EOPNOTSUPP;
2052        if (flags&MSG_OOB)
2053                goto out;
2054
2055        timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2056
2057        do {
2058                mutex_lock(&u->iolock);
2059
2060                skip = sk_peek_offset(sk, flags);
2061                skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2062                                              &last);
2063                if (skb)
2064                        break;
2065
2066                mutex_unlock(&u->iolock);
2067
2068                if (err != -EAGAIN)
2069                        break;
2070        } while (timeo &&
2071                 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2072
2073        if (!skb) { /* implies iolock unlocked */
2074                unix_state_lock(sk);
2075                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2076                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2077                    (sk->sk_shutdown & RCV_SHUTDOWN))
2078                        err = 0;
2079                unix_state_unlock(sk);
2080                goto out;
2081        }
2082
2083        if (wq_has_sleeper(&u->peer_wait))
2084                wake_up_interruptible_sync_poll(&u->peer_wait,
2085                                                EPOLLOUT | EPOLLWRNORM |
2086                                                EPOLLWRBAND);
2087
2088        if (msg->msg_name)
2089                unix_copy_addr(msg, skb->sk);
2090
2091        if (size > skb->len - skip)
2092                size = skb->len - skip;
2093        else if (size < skb->len - skip)
2094                msg->msg_flags |= MSG_TRUNC;
2095
2096        err = skb_copy_datagram_msg(skb, skip, msg, size);
2097        if (err)
2098                goto out_free;
2099
2100        if (sock_flag(sk, SOCK_RCVTSTAMP))
2101                __sock_recv_timestamp(msg, sk, skb);
2102
2103        memset(&scm, 0, sizeof(scm));
2104
2105        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2106        unix_set_secdata(&scm, skb);
2107
2108        if (!(flags & MSG_PEEK)) {
2109                if (UNIXCB(skb).fp)
2110                        unix_detach_fds(&scm, skb);
2111
2112                sk_peek_offset_bwd(sk, skb->len);
2113        } else {
2114                /* It is questionable: on PEEK we could:
2115                   - do not return fds - good, but too simple 8)
2116                   - return fds, and do not return them on read (old strategy,
2117                     apparently wrong)
2118                   - clone fds (I chose it for now, it is the most universal
2119                     solution)
2120
2121                   POSIX 1003.1g does not actually define this clearly
2122                   at all. POSIX 1003.1g doesn't define a lot of things
2123                   clearly however!
2124
2125                */
2126
2127                sk_peek_offset_fwd(sk, size);
2128
2129                if (UNIXCB(skb).fp)
2130                        scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2131        }
2132        err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2133
2134        scm_recv(sock, msg, &scm, flags);
2135
2136out_free:
2137        skb_free_datagram(sk, skb);
2138        mutex_unlock(&u->iolock);
2139out:
2140        return err;
2141}
2142
2143/*
2144 *      Sleep until more data has arrived. But check for races..
2145 */
2146static long unix_stream_data_wait(struct sock *sk, long timeo,
2147                                  struct sk_buff *last, unsigned int last_len,
2148                                  bool freezable)
2149{
2150        struct sk_buff *tail;
2151        DEFINE_WAIT(wait);
2152
2153        unix_state_lock(sk);
2154
2155        for (;;) {
2156                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2157
2158                tail = skb_peek_tail(&sk->sk_receive_queue);
2159                if (tail != last ||
2160                    (tail && tail->len != last_len) ||
2161                    sk->sk_err ||
2162                    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2163                    signal_pending(current) ||
2164                    !timeo)
2165                        break;
2166
2167                sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2168                unix_state_unlock(sk);
2169                if (freezable)
2170                        timeo = freezable_schedule_timeout(timeo);
2171                else
2172                        timeo = schedule_timeout(timeo);
2173                unix_state_lock(sk);
2174
2175                if (sock_flag(sk, SOCK_DEAD))
2176                        break;
2177
2178                sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2179        }
2180
2181        finish_wait(sk_sleep(sk), &wait);
2182        unix_state_unlock(sk);
2183        return timeo;
2184}
2185
2186static unsigned int unix_skb_len(const struct sk_buff *skb)
2187{
2188        return skb->len - UNIXCB(skb).consumed;
2189}
2190
2191struct unix_stream_read_state {
2192        int (*recv_actor)(struct sk_buff *, int, int,
2193                          struct unix_stream_read_state *);
2194        struct socket *socket;
2195        struct msghdr *msg;
2196        struct pipe_inode_info *pipe;
2197        size_t size;
2198        int flags;
2199        unsigned int splice_flags;
2200};
2201
2202static int unix_stream_read_generic(struct unix_stream_read_state *state,
2203                                    bool freezable)
2204{
2205        struct scm_cookie scm;
2206        struct socket *sock = state->socket;
2207        struct sock *sk = sock->sk;
2208        struct unix_sock *u = unix_sk(sk);
2209        int copied = 0;
2210        int flags = state->flags;
2211        int noblock = flags & MSG_DONTWAIT;
2212        bool check_creds = false;
2213        int target;
2214        int err = 0;
2215        long timeo;
2216        int skip;
2217        size_t size = state->size;
2218        unsigned int last_len;
2219
2220        if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2221                err = -EINVAL;
2222                goto out;
2223        }
2224
2225        if (unlikely(flags & MSG_OOB)) {
2226                err = -EOPNOTSUPP;
2227                goto out;
2228        }
2229
2230        target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2231        timeo = sock_rcvtimeo(sk, noblock);
2232
2233        memset(&scm, 0, sizeof(scm));
2234
2235        /* Lock the socket to prevent queue disordering
2236         * while sleeps in memcpy_tomsg
2237         */
2238        mutex_lock(&u->iolock);
2239
2240        skip = max(sk_peek_offset(sk, flags), 0);
2241
2242        do {
2243                int chunk;
2244                bool drop_skb;
2245                struct sk_buff *skb, *last;
2246
2247redo:
2248                unix_state_lock(sk);
2249                if (sock_flag(sk, SOCK_DEAD)) {
2250                        err = -ECONNRESET;
2251                        goto unlock;
2252                }
2253                last = skb = skb_peek(&sk->sk_receive_queue);
2254                last_len = last ? last->len : 0;
2255again:
2256                if (skb == NULL) {
2257                        if (copied >= target)
2258                                goto unlock;
2259
2260                        /*
2261                         *      POSIX 1003.1g mandates this order.
2262                         */
2263
2264                        err = sock_error(sk);
2265                        if (err)
2266                                goto unlock;
2267                        if (sk->sk_shutdown & RCV_SHUTDOWN)
2268                                goto unlock;
2269
2270                        unix_state_unlock(sk);
2271                        if (!timeo) {
2272                                err = -EAGAIN;
2273                                break;
2274                        }
2275
2276                        mutex_unlock(&u->iolock);
2277
2278                        timeo = unix_stream_data_wait(sk, timeo, last,
2279                                                      last_len, freezable);
2280
2281                        if (signal_pending(current)) {
2282                                err = sock_intr_errno(timeo);
2283                                scm_destroy(&scm);
2284                                goto out;
2285                        }
2286
2287                        mutex_lock(&u->iolock);
2288                        goto redo;
2289unlock:
2290                        unix_state_unlock(sk);
2291                        break;
2292                }
2293
2294                while (skip >= unix_skb_len(skb)) {
2295                        skip -= unix_skb_len(skb);
2296                        last = skb;
2297                        last_len = skb->len;
2298                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2299                        if (!skb)
2300                                goto again;
2301                }
2302
2303                unix_state_unlock(sk);
2304
2305                if (check_creds) {
2306                        /* Never glue messages from different writers */
2307                        if (!unix_skb_scm_eq(skb, &scm))
2308                                break;
2309                } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2310                        /* Copy credentials */
2311                        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2312                        unix_set_secdata(&scm, skb);
2313                        check_creds = true;
2314                }
2315
2316                /* Copy address just once */
2317                if (state->msg && state->msg->msg_name) {
2318                        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2319                                         state->msg->msg_name);
2320                        unix_copy_addr(state->msg, skb->sk);
2321                        sunaddr = NULL;
2322                }
2323
2324                chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2325                skb_get(skb);
2326                chunk = state->recv_actor(skb, skip, chunk, state);
2327                drop_skb = !unix_skb_len(skb);
2328                /* skb is only safe to use if !drop_skb */
2329                consume_skb(skb);
2330                if (chunk < 0) {
2331                        if (copied == 0)
2332                                copied = -EFAULT;
2333                        break;
2334                }
2335                copied += chunk;
2336                size -= chunk;
2337
2338                if (drop_skb) {
2339                        /* the skb was touched by a concurrent reader;
2340                         * we should not expect anything from this skb
2341                         * anymore and assume it invalid - we can be
2342                         * sure it was dropped from the socket queue
2343                         *
2344                         * let's report a short read
2345                         */
2346                        err = 0;
2347                        break;
2348                }
2349
2350                /* Mark read part of skb as used */
2351                if (!(flags & MSG_PEEK)) {
2352                        UNIXCB(skb).consumed += chunk;
2353
2354                        sk_peek_offset_bwd(sk, chunk);
2355
2356                        if (UNIXCB(skb).fp)
2357                                unix_detach_fds(&scm, skb);
2358
2359                        if (unix_skb_len(skb))
2360                                break;
2361
2362                        skb_unlink(skb, &sk->sk_receive_queue);
2363                        consume_skb(skb);
2364
2365                        if (scm.fp)
2366                                break;
2367                } else {
2368                        /* It is questionable, see note in unix_dgram_recvmsg.
2369                         */
2370                        if (UNIXCB(skb).fp)
2371                                scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2372
2373                        sk_peek_offset_fwd(sk, chunk);
2374
2375                        if (UNIXCB(skb).fp)
2376                                break;
2377
2378                        skip = 0;
2379                        last = skb;
2380                        last_len = skb->len;
2381                        unix_state_lock(sk);
2382                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2383                        if (skb)
2384                                goto again;
2385                        unix_state_unlock(sk);
2386                        break;
2387                }
2388        } while (size);
2389
2390        mutex_unlock(&u->iolock);
2391        if (state->msg)
2392                scm_recv(sock, state->msg, &scm, flags);
2393        else
2394                scm_destroy(&scm);
2395out:
2396        return copied ? : err;
2397}
2398
2399static int unix_stream_read_actor(struct sk_buff *skb,
2400                                  int skip, int chunk,
2401                                  struct unix_stream_read_state *state)
2402{
2403        int ret;
2404
2405        ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2406                                    state->msg, chunk);
2407        return ret ?: chunk;
2408}
2409
2410static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2411                               size_t size, int flags)
2412{
2413        struct unix_stream_read_state state = {
2414                .recv_actor = unix_stream_read_actor,
2415                .socket = sock,
2416                .msg = msg,
2417                .size = size,
2418                .flags = flags
2419        };
2420
2421        return unix_stream_read_generic(&state, true);
2422}
2423
2424static int unix_stream_splice_actor(struct sk_buff *skb,
2425                                    int skip, int chunk,
2426                                    struct unix_stream_read_state *state)
2427{
2428        return skb_splice_bits(skb, state->socket->sk,
2429                               UNIXCB(skb).consumed + skip,
2430                               state->pipe, chunk, state->splice_flags);
2431}
2432
2433static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2434                                       struct pipe_inode_info *pipe,
2435                                       size_t size, unsigned int flags)
2436{
2437        struct unix_stream_read_state state = {
2438                .recv_actor = unix_stream_splice_actor,
2439                .socket = sock,
2440                .pipe = pipe,
2441                .size = size,
2442                .splice_flags = flags,
2443        };
2444
2445        if (unlikely(*ppos))
2446                return -ESPIPE;
2447
2448        if (sock->file->f_flags & O_NONBLOCK ||
2449            flags & SPLICE_F_NONBLOCK)
2450                state.flags = MSG_DONTWAIT;
2451
2452        return unix_stream_read_generic(&state, false);
2453}
2454
2455static int unix_shutdown(struct socket *sock, int mode)
2456{
2457        struct sock *sk = sock->sk;
2458        struct sock *other;
2459
2460        if (mode < SHUT_RD || mode > SHUT_RDWR)
2461                return -EINVAL;
2462        /* This maps:
2463         * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2464         * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2465         * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2466         */
2467        ++mode;
2468
2469        unix_state_lock(sk);
2470        sk->sk_shutdown |= mode;
2471        other = unix_peer(sk);
2472        if (other)
2473                sock_hold(other);
2474        unix_state_unlock(sk);
2475        sk->sk_state_change(sk);
2476
2477        if (other &&
2478                (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2479
2480                int peer_mode = 0;
2481
2482                if (mode&RCV_SHUTDOWN)
2483                        peer_mode |= SEND_SHUTDOWN;
2484                if (mode&SEND_SHUTDOWN)
2485                        peer_mode |= RCV_SHUTDOWN;
2486                unix_state_lock(other);
2487                other->sk_shutdown |= peer_mode;
2488                unix_state_unlock(other);
2489                other->sk_state_change(other);
2490                if (peer_mode == SHUTDOWN_MASK)
2491                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2492                else if (peer_mode & RCV_SHUTDOWN)
2493                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2494        }
2495        if (other)
2496                sock_put(other);
2497
2498        return 0;
2499}
2500
2501long unix_inq_len(struct sock *sk)
2502{
2503        struct sk_buff *skb;
2504        long amount = 0;
2505
2506        if (sk->sk_state == TCP_LISTEN)
2507                return -EINVAL;
2508
2509        spin_lock(&sk->sk_receive_queue.lock);
2510        if (sk->sk_type == SOCK_STREAM ||
2511            sk->sk_type == SOCK_SEQPACKET) {
2512                skb_queue_walk(&sk->sk_receive_queue, skb)
2513                        amount += unix_skb_len(skb);
2514        } else {
2515                skb = skb_peek(&sk->sk_receive_queue);
2516                if (skb)
2517                        amount = skb->len;
2518        }
2519        spin_unlock(&sk->sk_receive_queue.lock);
2520
2521        return amount;
2522}
2523EXPORT_SYMBOL_GPL(unix_inq_len);
2524
2525long unix_outq_len(struct sock *sk)
2526{
2527        return sk_wmem_alloc_get(sk);
2528}
2529EXPORT_SYMBOL_GPL(unix_outq_len);
2530
2531static int unix_open_file(struct sock *sk)
2532{
2533        struct path path;
2534        struct file *f;
2535        int fd;
2536
2537        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2538                return -EPERM;
2539
2540        if (!smp_load_acquire(&unix_sk(sk)->addr))
2541                return -ENOENT;
2542
2543        path = unix_sk(sk)->path;
2544        if (!path.dentry)
2545                return -ENOENT;
2546
2547        path_get(&path);
2548
2549        fd = get_unused_fd_flags(O_CLOEXEC);
2550        if (fd < 0)
2551                goto out;
2552
2553        f = dentry_open(&path, O_PATH, current_cred());
2554        if (IS_ERR(f)) {
2555                put_unused_fd(fd);
2556                fd = PTR_ERR(f);
2557                goto out;
2558        }
2559
2560        fd_install(fd, f);
2561out:
2562        path_put(&path);
2563
2564        return fd;
2565}
2566
2567static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2568{
2569        struct sock *sk = sock->sk;
2570        long amount = 0;
2571        int err;
2572
2573        switch (cmd) {
2574        case SIOCOUTQ:
2575                amount = unix_outq_len(sk);
2576                err = put_user(amount, (int __user *)arg);
2577                break;
2578        case SIOCINQ:
2579                amount = unix_inq_len(sk);
2580                if (amount < 0)
2581                        err = amount;
2582                else
2583                        err = put_user(amount, (int __user *)arg);
2584                break;
2585        case SIOCUNIXFILE:
2586                err = unix_open_file(sk);
2587                break;
2588        default:
2589                err = -ENOIOCTLCMD;
2590                break;
2591        }
2592        return err;
2593}
2594
2595#ifdef CONFIG_COMPAT
2596static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2597{
2598        return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2599}
2600#endif
2601
2602static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2603{
2604        struct sock *sk = sock->sk;
2605        __poll_t mask;
2606
2607        sock_poll_wait(file, sock, wait);
2608        mask = 0;
2609
2610        /* exceptional events? */
2611        if (sk->sk_err)
2612                mask |= EPOLLERR;
2613        if (sk->sk_shutdown == SHUTDOWN_MASK)
2614                mask |= EPOLLHUP;
2615        if (sk->sk_shutdown & RCV_SHUTDOWN)
2616                mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2617
2618        /* readable? */
2619        if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2620                mask |= EPOLLIN | EPOLLRDNORM;
2621
2622        /* Connection-based need to check for termination and startup */
2623        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2624            sk->sk_state == TCP_CLOSE)
2625                mask |= EPOLLHUP;
2626
2627        /*
2628         * we set writable also when the other side has shut down the
2629         * connection. This prevents stuck sockets.
2630         */
2631        if (unix_writable(sk))
2632                mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2633
2634        return mask;
2635}
2636
2637static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2638                                    poll_table *wait)
2639{
2640        struct sock *sk = sock->sk, *other;
2641        unsigned int writable;
2642        __poll_t mask;
2643
2644        sock_poll_wait(file, sock, wait);
2645        mask = 0;
2646
2647        /* exceptional events? */
2648        if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2649                mask |= EPOLLERR |
2650                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2651
2652        if (sk->sk_shutdown & RCV_SHUTDOWN)
2653                mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2654        if (sk->sk_shutdown == SHUTDOWN_MASK)
2655                mask |= EPOLLHUP;
2656
2657        /* readable? */
2658        if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2659                mask |= EPOLLIN | EPOLLRDNORM;
2660
2661        /* Connection-based need to check for termination and startup */
2662        if (sk->sk_type == SOCK_SEQPACKET) {
2663                if (sk->sk_state == TCP_CLOSE)
2664                        mask |= EPOLLHUP;
2665                /* connection hasn't started yet? */
2666                if (sk->sk_state == TCP_SYN_SENT)
2667                        return mask;
2668        }
2669
2670        /* No write status requested, avoid expensive OUT tests. */
2671        if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2672                return mask;
2673
2674        writable = unix_writable(sk);
2675        if (writable) {
2676                unix_state_lock(sk);
2677
2678                other = unix_peer(sk);
2679                if (other && unix_peer(other) != sk &&
2680                    unix_recvq_full(other) &&
2681                    unix_dgram_peer_wake_me(sk, other))
2682                        writable = 0;
2683
2684                unix_state_unlock(sk);
2685        }
2686
2687        if (writable)
2688                mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2689        else
2690                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2691
2692        return mask;
2693}
2694
2695#ifdef CONFIG_PROC_FS
2696
2697#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2698
2699#define get_bucket(x) ((x) >> BUCKET_SPACE)
2700#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2701#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2702
2703static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2704{
2705        unsigned long offset = get_offset(*pos);
2706        unsigned long bucket = get_bucket(*pos);
2707        struct sock *sk;
2708        unsigned long count = 0;
2709
2710        for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2711                if (sock_net(sk) != seq_file_net(seq))
2712                        continue;
2713                if (++count == offset)
2714                        break;
2715        }
2716
2717        return sk;
2718}
2719
2720static struct sock *unix_next_socket(struct seq_file *seq,
2721                                     struct sock *sk,
2722                                     loff_t *pos)
2723{
2724        unsigned long bucket;
2725
2726        while (sk > (struct sock *)SEQ_START_TOKEN) {
2727                sk = sk_next(sk);
2728                if (!sk)
2729                        goto next_bucket;
2730                if (sock_net(sk) == seq_file_net(seq))
2731                        return sk;
2732        }
2733
2734        do {
2735                sk = unix_from_bucket(seq, pos);
2736                if (sk)
2737                        return sk;
2738
2739next_bucket:
2740                bucket = get_bucket(*pos) + 1;
2741                *pos = set_bucket_offset(bucket, 1);
2742        } while (bucket < ARRAY_SIZE(unix_socket_table));
2743
2744        return NULL;
2745}
2746
2747static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2748        __acquires(unix_table_lock)
2749{
2750        spin_lock(&unix_table_lock);
2751
2752        if (!*pos)
2753                return SEQ_START_TOKEN;
2754
2755        if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2756                return NULL;
2757
2758        return unix_next_socket(seq, NULL, pos);
2759}
2760
2761static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2762{
2763        ++*pos;
2764        return unix_next_socket(seq, v, pos);
2765}
2766
2767static void unix_seq_stop(struct seq_file *seq, void *v)
2768        __releases(unix_table_lock)
2769{
2770        spin_unlock(&unix_table_lock);
2771}
2772
2773static int unix_seq_show(struct seq_file *seq, void *v)
2774{
2775
2776        if (v == SEQ_START_TOKEN)
2777                seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2778                         "Inode Path\n");
2779        else {
2780                struct sock *s = v;
2781                struct unix_sock *u = unix_sk(s);
2782                unix_state_lock(s);
2783
2784                seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2785                        s,
2786                        refcount_read(&s->sk_refcnt),
2787                        0,
2788                        s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2789                        s->sk_type,
2790                        s->sk_socket ?
2791                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2792                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2793                        sock_i_ino(s));
2794
2795                if (u->addr) {  // under unix_table_lock here
2796                        int i, len;
2797                        seq_putc(seq, ' ');
2798
2799                        i = 0;
2800                        len = u->addr->len - sizeof(short);
2801                        if (!UNIX_ABSTRACT(s))
2802                                len--;
2803                        else {
2804                                seq_putc(seq, '@');
2805                                i++;
2806                        }
2807                        for ( ; i < len; i++)
2808                                seq_putc(seq, u->addr->name->sun_path[i] ?:
2809                                         '@');
2810                }
2811                unix_state_unlock(s);
2812                seq_putc(seq, '\n');
2813        }
2814
2815        return 0;
2816}
2817
2818static const struct seq_operations unix_seq_ops = {
2819        .start  = unix_seq_start,
2820        .next   = unix_seq_next,
2821        .stop   = unix_seq_stop,
2822        .show   = unix_seq_show,
2823};
2824#endif
2825
2826static const struct net_proto_family unix_family_ops = {
2827        .family = PF_UNIX,
2828        .create = unix_create,
2829        .owner  = THIS_MODULE,
2830};
2831
2832
2833static int __net_init unix_net_init(struct net *net)
2834{
2835        int error = -ENOMEM;
2836
2837        net->unx.sysctl_max_dgram_qlen = 10;
2838        if (unix_sysctl_register(net))
2839                goto out;
2840
2841#ifdef CONFIG_PROC_FS
2842        if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2843                        sizeof(struct seq_net_private))) {
2844                unix_sysctl_unregister(net);
2845                goto out;
2846        }
2847#endif
2848        error = 0;
2849out:
2850        return error;
2851}
2852
2853static void __net_exit unix_net_exit(struct net *net)
2854{
2855        unix_sysctl_unregister(net);
2856        remove_proc_entry("unix", net->proc_net);
2857}
2858
2859static struct pernet_operations unix_net_ops = {
2860        .init = unix_net_init,
2861        .exit = unix_net_exit,
2862};
2863
2864static int __init af_unix_init(void)
2865{
2866        int rc = -1;
2867
2868        BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2869
2870        rc = proto_register(&unix_proto, 1);
2871        if (rc != 0) {
2872                pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2873                goto out;
2874        }
2875
2876        sock_register(&unix_family_ops);
2877        register_pernet_subsys(&unix_net_ops);
2878out:
2879        return rc;
2880}
2881
2882static void __exit af_unix_exit(void)
2883{
2884        sock_unregister(PF_UNIX);
2885        proto_unregister(&unix_proto);
2886        unregister_pernet_subsys(&unix_net_ops);
2887}
2888
2889/* Earlier than device_initcall() so that other drivers invoking
2890   request_module() don't end up in a loop when modprobe tries
2891   to use a UNIX socket. But later than subsys_initcall() because
2892   we depend on stuff initialised there */
2893fs_initcall(af_unix_init);
2894module_exit(af_unix_exit);
2895
2896MODULE_LICENSE("GPL");
2897MODULE_ALIAS_NETPROTO(PF_UNIX);
2898