linux/net/unix/af_unix.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * NET4:        Implementation of BSD Unix domain sockets.
   4 *
   5 * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   6 *
   7 * Fixes:
   8 *              Linus Torvalds  :       Assorted bug cures.
   9 *              Niibe Yutaka    :       async I/O support.
  10 *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11 *              Alan Cox        :       Limit size of allocated blocks.
  12 *              Alan Cox        :       Fixed the stupid socketpair bug.
  13 *              Alan Cox        :       BSD compatibility fine tuning.
  14 *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15 *              Alan Cox        :       Sorted out a proper draft version of
  16 *                                      file descriptor passing hacked up from
  17 *                                      Mike Shaver's work.
  18 *              Marty Leisner   :       Fixes to fd passing
  19 *              Nick Nevin      :       recvmsg bugfix.
  20 *              Alan Cox        :       Started proper garbage collector
  21 *              Heiko EiBfeldt  :       Missing verify_area check
  22 *              Alan Cox        :       Started POSIXisms
  23 *              Andreas Schwab  :       Replace inode by dentry for proper
  24 *                                      reference counting
  25 *              Kirk Petersen   :       Made this a module
  26 *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27 *                                      Lots of bug fixes.
  28 *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29 *                                      by above two patches.
  30 *           Andrea Arcangeli   :       If possible we block in connect(2)
  31 *                                      if the max backlog of the listen socket
  32 *                                      is been reached. This won't break
  33 *                                      old apps and it will avoid huge amount
  34 *                                      of socks hashed (this for unix_gc()
  35 *                                      performances reasons).
  36 *                                      Security fix that limits the max
  37 *                                      number of socks to 2*max_files and
  38 *                                      the number of skb queueable in the
  39 *                                      dgram receiver.
  40 *              Artur Skawina   :       Hash function optimizations
  41 *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42 *            Malcolm Beattie   :       Set peercred for socketpair
  43 *           Michal Ostrowski   :       Module initialization cleanup.
  44 *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45 *                                      the core infrastructure is doing that
  46 *                                      for all net proto families now (2.5.69+)
  47 *
  48 * Known differences from reference BSD that was tested:
  49 *
  50 *      [TO FIX]
  51 *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52 *              other the moment one end closes.
  53 *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54 *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55 *      [NOT TO FIX]
  56 *      accept() returns a path name even if the connecting socket has closed
  57 *              in the meantime (BSD loses the path and gives up).
  58 *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59 *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60 *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61 *      BSD af_unix apparently has connect forgetting to block properly.
  62 *              (need to check this with the POSIX spec in detail)
  63 *
  64 * Differences from 2.0.0-11-... (ANK)
  65 *      Bug fixes and improvements.
  66 *              - client shutdown killed server socket.
  67 *              - removed all useless cli/sti pairs.
  68 *
  69 *      Semantic changes/extensions.
  70 *              - generic control message passing.
  71 *              - SCM_CREDENTIALS control message.
  72 *              - "Abstract" (not FS based) socket bindings.
  73 *                Abstract names are sequences of bytes (not zero terminated)
  74 *                started by 0, so that this name space does not intersect
  75 *                with BSD names.
  76 */
  77
  78#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80#include <linux/module.h>
  81#include <linux/kernel.h>
  82#include <linux/signal.h>
  83#include <linux/sched/signal.h>
  84#include <linux/errno.h>
  85#include <linux/string.h>
  86#include <linux/stat.h>
  87#include <linux/dcache.h>
  88#include <linux/namei.h>
  89#include <linux/socket.h>
  90#include <linux/un.h>
  91#include <linux/fcntl.h>
  92#include <linux/termios.h>
  93#include <linux/sockios.h>
  94#include <linux/net.h>
  95#include <linux/in.h>
  96#include <linux/fs.h>
  97#include <linux/slab.h>
  98#include <linux/uaccess.h>
  99#include <linux/skbuff.h>
 100#include <linux/netdevice.h>
 101#include <net/net_namespace.h>
 102#include <net/sock.h>
 103#include <net/tcp_states.h>
 104#include <net/af_unix.h>
 105#include <linux/proc_fs.h>
 106#include <linux/seq_file.h>
 107#include <net/scm.h>
 108#include <linux/init.h>
 109#include <linux/poll.h>
 110#include <linux/rtnetlink.h>
 111#include <linux/mount.h>
 112#include <net/checksum.h>
 113#include <linux/security.h>
 114#include <linux/freezer.h>
 115#include <linux/file.h>
 116
 117#include "scm.h"
 118
 119struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 120EXPORT_SYMBOL_GPL(unix_socket_table);
 121DEFINE_SPINLOCK(unix_table_lock);
 122EXPORT_SYMBOL_GPL(unix_table_lock);
 123static atomic_long_t unix_nr_socks;
 124
 125
 126static struct hlist_head *unix_sockets_unbound(void *addr)
 127{
 128        unsigned long hash = (unsigned long)addr;
 129
 130        hash ^= hash >> 16;
 131        hash ^= hash >> 8;
 132        hash %= UNIX_HASH_SIZE;
 133        return &unix_socket_table[UNIX_HASH_SIZE + hash];
 134}
 135
 136#define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 137
 138#ifdef CONFIG_SECURITY_NETWORK
 139static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 140{
 141        UNIXCB(skb).secid = scm->secid;
 142}
 143
 144static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 145{
 146        scm->secid = UNIXCB(skb).secid;
 147}
 148
 149static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 150{
 151        return (scm->secid == UNIXCB(skb).secid);
 152}
 153#else
 154static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 155{ }
 156
 157static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158{ }
 159
 160static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 161{
 162        return true;
 163}
 164#endif /* CONFIG_SECURITY_NETWORK */
 165
 166/*
 167 *  SMP locking strategy:
 168 *    hash table is protected with spinlock unix_table_lock
 169 *    each socket state is protected by separate spin lock.
 170 */
 171
 172static inline unsigned int unix_hash_fold(__wsum n)
 173{
 174        unsigned int hash = (__force unsigned int)csum_fold(n);
 175
 176        hash ^= hash>>8;
 177        return hash&(UNIX_HASH_SIZE-1);
 178}
 179
 180#define unix_peer(sk) (unix_sk(sk)->peer)
 181
 182static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 183{
 184        return unix_peer(osk) == sk;
 185}
 186
 187static inline int unix_may_send(struct sock *sk, struct sock *osk)
 188{
 189        return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 190}
 191
 192static inline int unix_recvq_full(const struct sock *sk)
 193{
 194        return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 195}
 196
 197static inline int unix_recvq_full_lockless(const struct sock *sk)
 198{
 199        return skb_queue_len_lockless(&sk->sk_receive_queue) >
 200                READ_ONCE(sk->sk_max_ack_backlog);
 201}
 202
 203struct sock *unix_peer_get(struct sock *s)
 204{
 205        struct sock *peer;
 206
 207        unix_state_lock(s);
 208        peer = unix_peer(s);
 209        if (peer)
 210                sock_hold(peer);
 211        unix_state_unlock(s);
 212        return peer;
 213}
 214EXPORT_SYMBOL_GPL(unix_peer_get);
 215
 216static inline void unix_release_addr(struct unix_address *addr)
 217{
 218        if (refcount_dec_and_test(&addr->refcnt))
 219                kfree(addr);
 220}
 221
 222/*
 223 *      Check unix socket name:
 224 *              - should be not zero length.
 225 *              - if started by not zero, should be NULL terminated (FS object)
 226 *              - if started by zero, it is abstract name.
 227 */
 228
 229static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 230{
 231        *hashp = 0;
 232
 233        if (len <= sizeof(short) || len > sizeof(*sunaddr))
 234                return -EINVAL;
 235        if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 236                return -EINVAL;
 237        if (sunaddr->sun_path[0]) {
 238                /*
 239                 * This may look like an off by one error but it is a bit more
 240                 * subtle. 108 is the longest valid AF_UNIX path for a binding.
 241                 * sun_path[108] doesn't as such exist.  However in kernel space
 242                 * we are guaranteed that it is a valid memory location in our
 243                 * kernel address buffer.
 244                 */
 245                ((char *)sunaddr)[len] = 0;
 246                len = strlen(sunaddr->sun_path)+1+sizeof(short);
 247                return len;
 248        }
 249
 250        *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 251        return len;
 252}
 253
 254static void __unix_remove_socket(struct sock *sk)
 255{
 256        sk_del_node_init(sk);
 257}
 258
 259static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 260{
 261        WARN_ON(!sk_unhashed(sk));
 262        sk_add_node(sk, list);
 263}
 264
 265static inline void unix_remove_socket(struct sock *sk)
 266{
 267        spin_lock(&unix_table_lock);
 268        __unix_remove_socket(sk);
 269        spin_unlock(&unix_table_lock);
 270}
 271
 272static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 273{
 274        spin_lock(&unix_table_lock);
 275        __unix_insert_socket(list, sk);
 276        spin_unlock(&unix_table_lock);
 277}
 278
 279static struct sock *__unix_find_socket_byname(struct net *net,
 280                                              struct sockaddr_un *sunname,
 281                                              int len, int type, unsigned int hash)
 282{
 283        struct sock *s;
 284
 285        sk_for_each(s, &unix_socket_table[hash ^ type]) {
 286                struct unix_sock *u = unix_sk(s);
 287
 288                if (!net_eq(sock_net(s), net))
 289                        continue;
 290
 291                if (u->addr->len == len &&
 292                    !memcmp(u->addr->name, sunname, len))
 293                        return s;
 294        }
 295        return NULL;
 296}
 297
 298static inline struct sock *unix_find_socket_byname(struct net *net,
 299                                                   struct sockaddr_un *sunname,
 300                                                   int len, int type,
 301                                                   unsigned int hash)
 302{
 303        struct sock *s;
 304
 305        spin_lock(&unix_table_lock);
 306        s = __unix_find_socket_byname(net, sunname, len, type, hash);
 307        if (s)
 308                sock_hold(s);
 309        spin_unlock(&unix_table_lock);
 310        return s;
 311}
 312
 313static struct sock *unix_find_socket_byinode(struct inode *i)
 314{
 315        struct sock *s;
 316
 317        spin_lock(&unix_table_lock);
 318        sk_for_each(s,
 319                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 320                struct dentry *dentry = unix_sk(s)->path.dentry;
 321
 322                if (dentry && d_backing_inode(dentry) == i) {
 323                        sock_hold(s);
 324                        goto found;
 325                }
 326        }
 327        s = NULL;
 328found:
 329        spin_unlock(&unix_table_lock);
 330        return s;
 331}
 332
 333/* Support code for asymmetrically connected dgram sockets
 334 *
 335 * If a datagram socket is connected to a socket not itself connected
 336 * to the first socket (eg, /dev/log), clients may only enqueue more
 337 * messages if the present receive queue of the server socket is not
 338 * "too large". This means there's a second writeability condition
 339 * poll and sendmsg need to test. The dgram recv code will do a wake
 340 * up on the peer_wait wait queue of a socket upon reception of a
 341 * datagram which needs to be propagated to sleeping would-be writers
 342 * since these might not have sent anything so far. This can't be
 343 * accomplished via poll_wait because the lifetime of the server
 344 * socket might be less than that of its clients if these break their
 345 * association with it or if the server socket is closed while clients
 346 * are still connected to it and there's no way to inform "a polling
 347 * implementation" that it should let go of a certain wait queue
 348 *
 349 * In order to propagate a wake up, a wait_queue_entry_t of the client
 350 * socket is enqueued on the peer_wait queue of the server socket
 351 * whose wake function does a wake_up on the ordinary client socket
 352 * wait queue. This connection is established whenever a write (or
 353 * poll for write) hit the flow control condition and broken when the
 354 * association to the server socket is dissolved or after a wake up
 355 * was relayed.
 356 */
 357
 358static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 359                                      void *key)
 360{
 361        struct unix_sock *u;
 362        wait_queue_head_t *u_sleep;
 363
 364        u = container_of(q, struct unix_sock, peer_wake);
 365
 366        __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 367                            q);
 368        u->peer_wake.private = NULL;
 369
 370        /* relaying can only happen while the wq still exists */
 371        u_sleep = sk_sleep(&u->sk);
 372        if (u_sleep)
 373                wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 374
 375        return 0;
 376}
 377
 378static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 379{
 380        struct unix_sock *u, *u_other;
 381        int rc;
 382
 383        u = unix_sk(sk);
 384        u_other = unix_sk(other);
 385        rc = 0;
 386        spin_lock(&u_other->peer_wait.lock);
 387
 388        if (!u->peer_wake.private) {
 389                u->peer_wake.private = other;
 390                __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 391
 392                rc = 1;
 393        }
 394
 395        spin_unlock(&u_other->peer_wait.lock);
 396        return rc;
 397}
 398
 399static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 400                                            struct sock *other)
 401{
 402        struct unix_sock *u, *u_other;
 403
 404        u = unix_sk(sk);
 405        u_other = unix_sk(other);
 406        spin_lock(&u_other->peer_wait.lock);
 407
 408        if (u->peer_wake.private == other) {
 409                __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 410                u->peer_wake.private = NULL;
 411        }
 412
 413        spin_unlock(&u_other->peer_wait.lock);
 414}
 415
 416static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 417                                                   struct sock *other)
 418{
 419        unix_dgram_peer_wake_disconnect(sk, other);
 420        wake_up_interruptible_poll(sk_sleep(sk),
 421                                   EPOLLOUT |
 422                                   EPOLLWRNORM |
 423                                   EPOLLWRBAND);
 424}
 425
 426/* preconditions:
 427 *      - unix_peer(sk) == other
 428 *      - association is stable
 429 */
 430static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 431{
 432        int connected;
 433
 434        connected = unix_dgram_peer_wake_connect(sk, other);
 435
 436        /* If other is SOCK_DEAD, we want to make sure we signal
 437         * POLLOUT, such that a subsequent write() can get a
 438         * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 439         * to other and its full, we will hang waiting for POLLOUT.
 440         */
 441        if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 442                return 1;
 443
 444        if (connected)
 445                unix_dgram_peer_wake_disconnect(sk, other);
 446
 447        return 0;
 448}
 449
 450static int unix_writable(const struct sock *sk)
 451{
 452        return sk->sk_state != TCP_LISTEN &&
 453               (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 454}
 455
 456static void unix_write_space(struct sock *sk)
 457{
 458        struct socket_wq *wq;
 459
 460        rcu_read_lock();
 461        if (unix_writable(sk)) {
 462                wq = rcu_dereference(sk->sk_wq);
 463                if (skwq_has_sleeper(wq))
 464                        wake_up_interruptible_sync_poll(&wq->wait,
 465                                EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 466                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 467        }
 468        rcu_read_unlock();
 469}
 470
 471/* When dgram socket disconnects (or changes its peer), we clear its receive
 472 * queue of packets arrived from previous peer. First, it allows to do
 473 * flow control based only on wmem_alloc; second, sk connected to peer
 474 * may receive messages only from that peer. */
 475static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 476{
 477        if (!skb_queue_empty(&sk->sk_receive_queue)) {
 478                skb_queue_purge(&sk->sk_receive_queue);
 479                wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 480
 481                /* If one link of bidirectional dgram pipe is disconnected,
 482                 * we signal error. Messages are lost. Do not make this,
 483                 * when peer was not connected to us.
 484                 */
 485                if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 486                        other->sk_err = ECONNRESET;
 487                        other->sk_error_report(other);
 488                }
 489        }
 490}
 491
 492static void unix_sock_destructor(struct sock *sk)
 493{
 494        struct unix_sock *u = unix_sk(sk);
 495
 496        skb_queue_purge(&sk->sk_receive_queue);
 497
 498        WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 499        WARN_ON(!sk_unhashed(sk));
 500        WARN_ON(sk->sk_socket);
 501        if (!sock_flag(sk, SOCK_DEAD)) {
 502                pr_info("Attempt to release alive unix socket: %p\n", sk);
 503                return;
 504        }
 505
 506        if (u->addr)
 507                unix_release_addr(u->addr);
 508
 509        atomic_long_dec(&unix_nr_socks);
 510        local_bh_disable();
 511        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 512        local_bh_enable();
 513#ifdef UNIX_REFCNT_DEBUG
 514        pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 515                atomic_long_read(&unix_nr_socks));
 516#endif
 517}
 518
 519static void unix_release_sock(struct sock *sk, int embrion)
 520{
 521        struct unix_sock *u = unix_sk(sk);
 522        struct path path;
 523        struct sock *skpair;
 524        struct sk_buff *skb;
 525        int state;
 526
 527        unix_remove_socket(sk);
 528
 529        /* Clear state */
 530        unix_state_lock(sk);
 531        sock_orphan(sk);
 532        sk->sk_shutdown = SHUTDOWN_MASK;
 533        path         = u->path;
 534        u->path.dentry = NULL;
 535        u->path.mnt = NULL;
 536        state = sk->sk_state;
 537        sk->sk_state = TCP_CLOSE;
 538
 539        skpair = unix_peer(sk);
 540        unix_peer(sk) = NULL;
 541
 542        unix_state_unlock(sk);
 543
 544        wake_up_interruptible_all(&u->peer_wait);
 545
 546        if (skpair != NULL) {
 547                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 548                        unix_state_lock(skpair);
 549                        /* No more writes */
 550                        skpair->sk_shutdown = SHUTDOWN_MASK;
 551                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 552                                skpair->sk_err = ECONNRESET;
 553                        unix_state_unlock(skpair);
 554                        skpair->sk_state_change(skpair);
 555                        sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 556                }
 557
 558                unix_dgram_peer_wake_disconnect(sk, skpair);
 559                sock_put(skpair); /* It may now die */
 560        }
 561
 562        /* Try to flush out this socket. Throw out buffers at least */
 563
 564        while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 565                if (state == TCP_LISTEN)
 566                        unix_release_sock(skb->sk, 1);
 567                /* passed fds are erased in the kfree_skb hook        */
 568                UNIXCB(skb).consumed = skb->len;
 569                kfree_skb(skb);
 570        }
 571
 572        if (path.dentry)
 573                path_put(&path);
 574
 575        sock_put(sk);
 576
 577        /* ---- Socket is dead now and most probably destroyed ---- */
 578
 579        /*
 580         * Fixme: BSD difference: In BSD all sockets connected to us get
 581         *        ECONNRESET and we die on the spot. In Linux we behave
 582         *        like files and pipes do and wait for the last
 583         *        dereference.
 584         *
 585         * Can't we simply set sock->err?
 586         *
 587         *        What the above comment does talk about? --ANK(980817)
 588         */
 589
 590        if (unix_tot_inflight)
 591                unix_gc();              /* Garbage collect fds */
 592}
 593
 594static void init_peercred(struct sock *sk)
 595{
 596        put_pid(sk->sk_peer_pid);
 597        if (sk->sk_peer_cred)
 598                put_cred(sk->sk_peer_cred);
 599        sk->sk_peer_pid  = get_pid(task_tgid(current));
 600        sk->sk_peer_cred = get_current_cred();
 601}
 602
 603static void copy_peercred(struct sock *sk, struct sock *peersk)
 604{
 605        put_pid(sk->sk_peer_pid);
 606        if (sk->sk_peer_cred)
 607                put_cred(sk->sk_peer_cred);
 608        sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 609        sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 610}
 611
 612static int unix_listen(struct socket *sock, int backlog)
 613{
 614        int err;
 615        struct sock *sk = sock->sk;
 616        struct unix_sock *u = unix_sk(sk);
 617
 618        err = -EOPNOTSUPP;
 619        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 620                goto out;       /* Only stream/seqpacket sockets accept */
 621        err = -EINVAL;
 622        if (!u->addr)
 623                goto out;       /* No listens on an unbound socket */
 624        unix_state_lock(sk);
 625        if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 626                goto out_unlock;
 627        if (backlog > sk->sk_max_ack_backlog)
 628                wake_up_interruptible_all(&u->peer_wait);
 629        sk->sk_max_ack_backlog  = backlog;
 630        sk->sk_state            = TCP_LISTEN;
 631        /* set credentials so connect can copy them */
 632        init_peercred(sk);
 633        err = 0;
 634
 635out_unlock:
 636        unix_state_unlock(sk);
 637out:
 638        return err;
 639}
 640
 641static int unix_release(struct socket *);
 642static int unix_bind(struct socket *, struct sockaddr *, int);
 643static int unix_stream_connect(struct socket *, struct sockaddr *,
 644                               int addr_len, int flags);
 645static int unix_socketpair(struct socket *, struct socket *);
 646static int unix_accept(struct socket *, struct socket *, int, bool);
 647static int unix_getname(struct socket *, struct sockaddr *, int);
 648static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 649static __poll_t unix_dgram_poll(struct file *, struct socket *,
 650                                    poll_table *);
 651static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 652#ifdef CONFIG_COMPAT
 653static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 654#endif
 655static int unix_shutdown(struct socket *, int);
 656static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 657static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 658static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 659                                    size_t size, int flags);
 660static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 661                                       struct pipe_inode_info *, size_t size,
 662                                       unsigned int flags);
 663static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 664static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 665static int unix_dgram_connect(struct socket *, struct sockaddr *,
 666                              int, int);
 667static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 668static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 669                                  int);
 670
 671static int unix_set_peek_off(struct sock *sk, int val)
 672{
 673        struct unix_sock *u = unix_sk(sk);
 674
 675        if (mutex_lock_interruptible(&u->iolock))
 676                return -EINTR;
 677
 678        sk->sk_peek_off = val;
 679        mutex_unlock(&u->iolock);
 680
 681        return 0;
 682}
 683
 684#ifdef CONFIG_PROC_FS
 685static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
 686{
 687        struct sock *sk = sock->sk;
 688        struct unix_sock *u;
 689
 690        if (sk) {
 691                u = unix_sk(sock->sk);
 692                seq_printf(m, "scm_fds: %u\n",
 693                           atomic_read(&u->scm_stat.nr_fds));
 694        }
 695}
 696#else
 697#define unix_show_fdinfo NULL
 698#endif
 699
 700static const struct proto_ops unix_stream_ops = {
 701        .family =       PF_UNIX,
 702        .owner =        THIS_MODULE,
 703        .release =      unix_release,
 704        .bind =         unix_bind,
 705        .connect =      unix_stream_connect,
 706        .socketpair =   unix_socketpair,
 707        .accept =       unix_accept,
 708        .getname =      unix_getname,
 709        .poll =         unix_poll,
 710        .ioctl =        unix_ioctl,
 711#ifdef CONFIG_COMPAT
 712        .compat_ioctl = unix_compat_ioctl,
 713#endif
 714        .listen =       unix_listen,
 715        .shutdown =     unix_shutdown,
 716        .sendmsg =      unix_stream_sendmsg,
 717        .recvmsg =      unix_stream_recvmsg,
 718        .mmap =         sock_no_mmap,
 719        .sendpage =     unix_stream_sendpage,
 720        .splice_read =  unix_stream_splice_read,
 721        .set_peek_off = unix_set_peek_off,
 722        .show_fdinfo =  unix_show_fdinfo,
 723};
 724
 725static const struct proto_ops unix_dgram_ops = {
 726        .family =       PF_UNIX,
 727        .owner =        THIS_MODULE,
 728        .release =      unix_release,
 729        .bind =         unix_bind,
 730        .connect =      unix_dgram_connect,
 731        .socketpair =   unix_socketpair,
 732        .accept =       sock_no_accept,
 733        .getname =      unix_getname,
 734        .poll =         unix_dgram_poll,
 735        .ioctl =        unix_ioctl,
 736#ifdef CONFIG_COMPAT
 737        .compat_ioctl = unix_compat_ioctl,
 738#endif
 739        .listen =       sock_no_listen,
 740        .shutdown =     unix_shutdown,
 741        .sendmsg =      unix_dgram_sendmsg,
 742        .recvmsg =      unix_dgram_recvmsg,
 743        .mmap =         sock_no_mmap,
 744        .sendpage =     sock_no_sendpage,
 745        .set_peek_off = unix_set_peek_off,
 746        .show_fdinfo =  unix_show_fdinfo,
 747};
 748
 749static const struct proto_ops unix_seqpacket_ops = {
 750        .family =       PF_UNIX,
 751        .owner =        THIS_MODULE,
 752        .release =      unix_release,
 753        .bind =         unix_bind,
 754        .connect =      unix_stream_connect,
 755        .socketpair =   unix_socketpair,
 756        .accept =       unix_accept,
 757        .getname =      unix_getname,
 758        .poll =         unix_dgram_poll,
 759        .ioctl =        unix_ioctl,
 760#ifdef CONFIG_COMPAT
 761        .compat_ioctl = unix_compat_ioctl,
 762#endif
 763        .listen =       unix_listen,
 764        .shutdown =     unix_shutdown,
 765        .sendmsg =      unix_seqpacket_sendmsg,
 766        .recvmsg =      unix_seqpacket_recvmsg,
 767        .mmap =         sock_no_mmap,
 768        .sendpage =     sock_no_sendpage,
 769        .set_peek_off = unix_set_peek_off,
 770        .show_fdinfo =  unix_show_fdinfo,
 771};
 772
 773static struct proto unix_proto = {
 774        .name                   = "UNIX",
 775        .owner                  = THIS_MODULE,
 776        .obj_size               = sizeof(struct unix_sock),
 777};
 778
 779static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 780{
 781        struct sock *sk = NULL;
 782        struct unix_sock *u;
 783
 784        atomic_long_inc(&unix_nr_socks);
 785        if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 786                goto out;
 787
 788        sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 789        if (!sk)
 790                goto out;
 791
 792        sock_init_data(sock, sk);
 793
 794        sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 795        sk->sk_write_space      = unix_write_space;
 796        sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 797        sk->sk_destruct         = unix_sock_destructor;
 798        u         = unix_sk(sk);
 799        u->path.dentry = NULL;
 800        u->path.mnt = NULL;
 801        spin_lock_init(&u->lock);
 802        atomic_long_set(&u->inflight, 0);
 803        INIT_LIST_HEAD(&u->link);
 804        mutex_init(&u->iolock); /* single task reading lock */
 805        mutex_init(&u->bindlock); /* single task binding lock */
 806        init_waitqueue_head(&u->peer_wait);
 807        init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 808        memset(&u->scm_stat, 0, sizeof(struct scm_stat));
 809        unix_insert_socket(unix_sockets_unbound(sk), sk);
 810out:
 811        if (sk == NULL)
 812                atomic_long_dec(&unix_nr_socks);
 813        else {
 814                local_bh_disable();
 815                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 816                local_bh_enable();
 817        }
 818        return sk;
 819}
 820
 821static int unix_create(struct net *net, struct socket *sock, int protocol,
 822                       int kern)
 823{
 824        if (protocol && protocol != PF_UNIX)
 825                return -EPROTONOSUPPORT;
 826
 827        sock->state = SS_UNCONNECTED;
 828
 829        switch (sock->type) {
 830        case SOCK_STREAM:
 831                sock->ops = &unix_stream_ops;
 832                break;
 833                /*
 834                 *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 835                 *      nothing uses it.
 836                 */
 837        case SOCK_RAW:
 838                sock->type = SOCK_DGRAM;
 839                fallthrough;
 840        case SOCK_DGRAM:
 841                sock->ops = &unix_dgram_ops;
 842                break;
 843        case SOCK_SEQPACKET:
 844                sock->ops = &unix_seqpacket_ops;
 845                break;
 846        default:
 847                return -ESOCKTNOSUPPORT;
 848        }
 849
 850        return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 851}
 852
 853static int unix_release(struct socket *sock)
 854{
 855        struct sock *sk = sock->sk;
 856
 857        if (!sk)
 858                return 0;
 859
 860        unix_release_sock(sk, 0);
 861        sock->sk = NULL;
 862
 863        return 0;
 864}
 865
 866static int unix_autobind(struct socket *sock)
 867{
 868        struct sock *sk = sock->sk;
 869        struct net *net = sock_net(sk);
 870        struct unix_sock *u = unix_sk(sk);
 871        static u32 ordernum = 1;
 872        struct unix_address *addr;
 873        int err;
 874        unsigned int retries = 0;
 875
 876        err = mutex_lock_interruptible(&u->bindlock);
 877        if (err)
 878                return err;
 879
 880        if (u->addr)
 881                goto out;
 882
 883        err = -ENOMEM;
 884        addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 885        if (!addr)
 886                goto out;
 887
 888        addr->name->sun_family = AF_UNIX;
 889        refcount_set(&addr->refcnt, 1);
 890
 891retry:
 892        addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 893        addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 894
 895        spin_lock(&unix_table_lock);
 896        ordernum = (ordernum+1)&0xFFFFF;
 897
 898        if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 899                                      addr->hash)) {
 900                spin_unlock(&unix_table_lock);
 901                /*
 902                 * __unix_find_socket_byname() may take long time if many names
 903                 * are already in use.
 904                 */
 905                cond_resched();
 906                /* Give up if all names seems to be in use. */
 907                if (retries++ == 0xFFFFF) {
 908                        err = -ENOSPC;
 909                        kfree(addr);
 910                        goto out;
 911                }
 912                goto retry;
 913        }
 914        addr->hash ^= sk->sk_type;
 915
 916        __unix_remove_socket(sk);
 917        smp_store_release(&u->addr, addr);
 918        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 919        spin_unlock(&unix_table_lock);
 920        err = 0;
 921
 922out:    mutex_unlock(&u->bindlock);
 923        return err;
 924}
 925
 926static struct sock *unix_find_other(struct net *net,
 927                                    struct sockaddr_un *sunname, int len,
 928                                    int type, unsigned int hash, int *error)
 929{
 930        struct sock *u;
 931        struct path path;
 932        int err = 0;
 933
 934        if (sunname->sun_path[0]) {
 935                struct inode *inode;
 936                err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 937                if (err)
 938                        goto fail;
 939                inode = d_backing_inode(path.dentry);
 940                err = path_permission(&path, MAY_WRITE);
 941                if (err)
 942                        goto put_fail;
 943
 944                err = -ECONNREFUSED;
 945                if (!S_ISSOCK(inode->i_mode))
 946                        goto put_fail;
 947                u = unix_find_socket_byinode(inode);
 948                if (!u)
 949                        goto put_fail;
 950
 951                if (u->sk_type == type)
 952                        touch_atime(&path);
 953
 954                path_put(&path);
 955
 956                err = -EPROTOTYPE;
 957                if (u->sk_type != type) {
 958                        sock_put(u);
 959                        goto fail;
 960                }
 961        } else {
 962                err = -ECONNREFUSED;
 963                u = unix_find_socket_byname(net, sunname, len, type, hash);
 964                if (u) {
 965                        struct dentry *dentry;
 966                        dentry = unix_sk(u)->path.dentry;
 967                        if (dentry)
 968                                touch_atime(&unix_sk(u)->path);
 969                } else
 970                        goto fail;
 971        }
 972        return u;
 973
 974put_fail:
 975        path_put(&path);
 976fail:
 977        *error = err;
 978        return NULL;
 979}
 980
 981static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 982{
 983        struct dentry *dentry;
 984        struct path path;
 985        int err = 0;
 986        /*
 987         * Get the parent directory, calculate the hash for last
 988         * component.
 989         */
 990        dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 991        err = PTR_ERR(dentry);
 992        if (IS_ERR(dentry))
 993                return err;
 994
 995        /*
 996         * All right, let's create it.
 997         */
 998        err = security_path_mknod(&path, dentry, mode, 0);
 999        if (!err) {
1000                err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry),
1001                                dentry, mode, 0);
1002                if (!err) {
1003                        res->mnt = mntget(path.mnt);
1004                        res->dentry = dget(dentry);
1005                }
1006        }
1007        done_path_create(&path, dentry);
1008        return err;
1009}
1010
1011static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1012{
1013        struct sock *sk = sock->sk;
1014        struct net *net = sock_net(sk);
1015        struct unix_sock *u = unix_sk(sk);
1016        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1017        char *sun_path = sunaddr->sun_path;
1018        int err;
1019        unsigned int hash;
1020        struct unix_address *addr;
1021        struct hlist_head *list;
1022        struct path path = { };
1023
1024        err = -EINVAL;
1025        if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1026            sunaddr->sun_family != AF_UNIX)
1027                goto out;
1028
1029        if (addr_len == sizeof(short)) {
1030                err = unix_autobind(sock);
1031                goto out;
1032        }
1033
1034        err = unix_mkname(sunaddr, addr_len, &hash);
1035        if (err < 0)
1036                goto out;
1037        addr_len = err;
1038
1039        if (sun_path[0]) {
1040                umode_t mode = S_IFSOCK |
1041                       (SOCK_INODE(sock)->i_mode & ~current_umask());
1042                err = unix_mknod(sun_path, mode, &path);
1043                if (err) {
1044                        if (err == -EEXIST)
1045                                err = -EADDRINUSE;
1046                        goto out;
1047                }
1048        }
1049
1050        err = mutex_lock_interruptible(&u->bindlock);
1051        if (err)
1052                goto out_put;
1053
1054        err = -EINVAL;
1055        if (u->addr)
1056                goto out_up;
1057
1058        err = -ENOMEM;
1059        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1060        if (!addr)
1061                goto out_up;
1062
1063        memcpy(addr->name, sunaddr, addr_len);
1064        addr->len = addr_len;
1065        addr->hash = hash ^ sk->sk_type;
1066        refcount_set(&addr->refcnt, 1);
1067
1068        if (sun_path[0]) {
1069                addr->hash = UNIX_HASH_SIZE;
1070                hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1071                spin_lock(&unix_table_lock);
1072                u->path = path;
1073                list = &unix_socket_table[hash];
1074        } else {
1075                spin_lock(&unix_table_lock);
1076                err = -EADDRINUSE;
1077                if (__unix_find_socket_byname(net, sunaddr, addr_len,
1078                                              sk->sk_type, hash)) {
1079                        unix_release_addr(addr);
1080                        goto out_unlock;
1081                }
1082
1083                list = &unix_socket_table[addr->hash];
1084        }
1085
1086        err = 0;
1087        __unix_remove_socket(sk);
1088        smp_store_release(&u->addr, addr);
1089        __unix_insert_socket(list, sk);
1090
1091out_unlock:
1092        spin_unlock(&unix_table_lock);
1093out_up:
1094        mutex_unlock(&u->bindlock);
1095out_put:
1096        if (err)
1097                path_put(&path);
1098out:
1099        return err;
1100}
1101
1102static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1103{
1104        if (unlikely(sk1 == sk2) || !sk2) {
1105                unix_state_lock(sk1);
1106                return;
1107        }
1108        if (sk1 < sk2) {
1109                unix_state_lock(sk1);
1110                unix_state_lock_nested(sk2);
1111        } else {
1112                unix_state_lock(sk2);
1113                unix_state_lock_nested(sk1);
1114        }
1115}
1116
1117static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1118{
1119        if (unlikely(sk1 == sk2) || !sk2) {
1120                unix_state_unlock(sk1);
1121                return;
1122        }
1123        unix_state_unlock(sk1);
1124        unix_state_unlock(sk2);
1125}
1126
1127static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1128                              int alen, int flags)
1129{
1130        struct sock *sk = sock->sk;
1131        struct net *net = sock_net(sk);
1132        struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1133        struct sock *other;
1134        unsigned int hash;
1135        int err;
1136
1137        err = -EINVAL;
1138        if (alen < offsetofend(struct sockaddr, sa_family))
1139                goto out;
1140
1141        if (addr->sa_family != AF_UNSPEC) {
1142                err = unix_mkname(sunaddr, alen, &hash);
1143                if (err < 0)
1144                        goto out;
1145                alen = err;
1146
1147                if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1148                    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1149                        goto out;
1150
1151restart:
1152                other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1153                if (!other)
1154                        goto out;
1155
1156                unix_state_double_lock(sk, other);
1157
1158                /* Apparently VFS overslept socket death. Retry. */
1159                if (sock_flag(other, SOCK_DEAD)) {
1160                        unix_state_double_unlock(sk, other);
1161                        sock_put(other);
1162                        goto restart;
1163                }
1164
1165                err = -EPERM;
1166                if (!unix_may_send(sk, other))
1167                        goto out_unlock;
1168
1169                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1170                if (err)
1171                        goto out_unlock;
1172
1173        } else {
1174                /*
1175                 *      1003.1g breaking connected state with AF_UNSPEC
1176                 */
1177                other = NULL;
1178                unix_state_double_lock(sk, other);
1179        }
1180
1181        /*
1182         * If it was connected, reconnect.
1183         */
1184        if (unix_peer(sk)) {
1185                struct sock *old_peer = unix_peer(sk);
1186                unix_peer(sk) = other;
1187                unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1188
1189                unix_state_double_unlock(sk, other);
1190
1191                if (other != old_peer)
1192                        unix_dgram_disconnected(sk, old_peer);
1193                sock_put(old_peer);
1194        } else {
1195                unix_peer(sk) = other;
1196                unix_state_double_unlock(sk, other);
1197        }
1198        return 0;
1199
1200out_unlock:
1201        unix_state_double_unlock(sk, other);
1202        sock_put(other);
1203out:
1204        return err;
1205}
1206
1207static long unix_wait_for_peer(struct sock *other, long timeo)
1208        __releases(&unix_sk(other)->lock)
1209{
1210        struct unix_sock *u = unix_sk(other);
1211        int sched;
1212        DEFINE_WAIT(wait);
1213
1214        prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1215
1216        sched = !sock_flag(other, SOCK_DEAD) &&
1217                !(other->sk_shutdown & RCV_SHUTDOWN) &&
1218                unix_recvq_full(other);
1219
1220        unix_state_unlock(other);
1221
1222        if (sched)
1223                timeo = schedule_timeout(timeo);
1224
1225        finish_wait(&u->peer_wait, &wait);
1226        return timeo;
1227}
1228
1229static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1230                               int addr_len, int flags)
1231{
1232        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1233        struct sock *sk = sock->sk;
1234        struct net *net = sock_net(sk);
1235        struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1236        struct sock *newsk = NULL;
1237        struct sock *other = NULL;
1238        struct sk_buff *skb = NULL;
1239        unsigned int hash;
1240        int st;
1241        int err;
1242        long timeo;
1243
1244        err = unix_mkname(sunaddr, addr_len, &hash);
1245        if (err < 0)
1246                goto out;
1247        addr_len = err;
1248
1249        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1250            (err = unix_autobind(sock)) != 0)
1251                goto out;
1252
1253        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1254
1255        /* First of all allocate resources.
1256           If we will make it after state is locked,
1257           we will have to recheck all again in any case.
1258         */
1259
1260        err = -ENOMEM;
1261
1262        /* create new sock for complete connection */
1263        newsk = unix_create1(sock_net(sk), NULL, 0);
1264        if (newsk == NULL)
1265                goto out;
1266
1267        /* Allocate skb for sending to listening sock */
1268        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1269        if (skb == NULL)
1270                goto out;
1271
1272restart:
1273        /*  Find listening sock. */
1274        other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1275        if (!other)
1276                goto out;
1277
1278        /* Latch state of peer */
1279        unix_state_lock(other);
1280
1281        /* Apparently VFS overslept socket death. Retry. */
1282        if (sock_flag(other, SOCK_DEAD)) {
1283                unix_state_unlock(other);
1284                sock_put(other);
1285                goto restart;
1286        }
1287
1288        err = -ECONNREFUSED;
1289        if (other->sk_state != TCP_LISTEN)
1290                goto out_unlock;
1291        if (other->sk_shutdown & RCV_SHUTDOWN)
1292                goto out_unlock;
1293
1294        if (unix_recvq_full(other)) {
1295                err = -EAGAIN;
1296                if (!timeo)
1297                        goto out_unlock;
1298
1299                timeo = unix_wait_for_peer(other, timeo);
1300
1301                err = sock_intr_errno(timeo);
1302                if (signal_pending(current))
1303                        goto out;
1304                sock_put(other);
1305                goto restart;
1306        }
1307
1308        /* Latch our state.
1309
1310           It is tricky place. We need to grab our state lock and cannot
1311           drop lock on peer. It is dangerous because deadlock is
1312           possible. Connect to self case and simultaneous
1313           attempt to connect are eliminated by checking socket
1314           state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1315           check this before attempt to grab lock.
1316
1317           Well, and we have to recheck the state after socket locked.
1318         */
1319        st = sk->sk_state;
1320
1321        switch (st) {
1322        case TCP_CLOSE:
1323                /* This is ok... continue with connect */
1324                break;
1325        case TCP_ESTABLISHED:
1326                /* Socket is already connected */
1327                err = -EISCONN;
1328                goto out_unlock;
1329        default:
1330                err = -EINVAL;
1331                goto out_unlock;
1332        }
1333
1334        unix_state_lock_nested(sk);
1335
1336        if (sk->sk_state != st) {
1337                unix_state_unlock(sk);
1338                unix_state_unlock(other);
1339                sock_put(other);
1340                goto restart;
1341        }
1342
1343        err = security_unix_stream_connect(sk, other, newsk);
1344        if (err) {
1345                unix_state_unlock(sk);
1346                goto out_unlock;
1347        }
1348
1349        /* The way is open! Fastly set all the necessary fields... */
1350
1351        sock_hold(sk);
1352        unix_peer(newsk)        = sk;
1353        newsk->sk_state         = TCP_ESTABLISHED;
1354        newsk->sk_type          = sk->sk_type;
1355        init_peercred(newsk);
1356        newu = unix_sk(newsk);
1357        RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1358        otheru = unix_sk(other);
1359
1360        /* copy address information from listening to new sock
1361         *
1362         * The contents of *(otheru->addr) and otheru->path
1363         * are seen fully set up here, since we have found
1364         * otheru in hash under unix_table_lock.  Insertion
1365         * into the hash chain we'd found it in had been done
1366         * in an earlier critical area protected by unix_table_lock,
1367         * the same one where we'd set *(otheru->addr) contents,
1368         * as well as otheru->path and otheru->addr itself.
1369         *
1370         * Using smp_store_release() here to set newu->addr
1371         * is enough to make those stores, as well as stores
1372         * to newu->path visible to anyone who gets newu->addr
1373         * by smp_load_acquire().  IOW, the same warranties
1374         * as for unix_sock instances bound in unix_bind() or
1375         * in unix_autobind().
1376         */
1377        if (otheru->path.dentry) {
1378                path_get(&otheru->path);
1379                newu->path = otheru->path;
1380        }
1381        refcount_inc(&otheru->addr->refcnt);
1382        smp_store_release(&newu->addr, otheru->addr);
1383
1384        /* Set credentials */
1385        copy_peercred(sk, other);
1386
1387        sock->state     = SS_CONNECTED;
1388        sk->sk_state    = TCP_ESTABLISHED;
1389        sock_hold(newsk);
1390
1391        smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1392        unix_peer(sk)   = newsk;
1393
1394        unix_state_unlock(sk);
1395
1396        /* take ten and and send info to listening sock */
1397        spin_lock(&other->sk_receive_queue.lock);
1398        __skb_queue_tail(&other->sk_receive_queue, skb);
1399        spin_unlock(&other->sk_receive_queue.lock);
1400        unix_state_unlock(other);
1401        other->sk_data_ready(other);
1402        sock_put(other);
1403        return 0;
1404
1405out_unlock:
1406        if (other)
1407                unix_state_unlock(other);
1408
1409out:
1410        kfree_skb(skb);
1411        if (newsk)
1412                unix_release_sock(newsk, 0);
1413        if (other)
1414                sock_put(other);
1415        return err;
1416}
1417
1418static int unix_socketpair(struct socket *socka, struct socket *sockb)
1419{
1420        struct sock *ska = socka->sk, *skb = sockb->sk;
1421
1422        /* Join our sockets back to back */
1423        sock_hold(ska);
1424        sock_hold(skb);
1425        unix_peer(ska) = skb;
1426        unix_peer(skb) = ska;
1427        init_peercred(ska);
1428        init_peercred(skb);
1429
1430        if (ska->sk_type != SOCK_DGRAM) {
1431                ska->sk_state = TCP_ESTABLISHED;
1432                skb->sk_state = TCP_ESTABLISHED;
1433                socka->state  = SS_CONNECTED;
1434                sockb->state  = SS_CONNECTED;
1435        }
1436        return 0;
1437}
1438
1439static void unix_sock_inherit_flags(const struct socket *old,
1440                                    struct socket *new)
1441{
1442        if (test_bit(SOCK_PASSCRED, &old->flags))
1443                set_bit(SOCK_PASSCRED, &new->flags);
1444        if (test_bit(SOCK_PASSSEC, &old->flags))
1445                set_bit(SOCK_PASSSEC, &new->flags);
1446}
1447
1448static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1449                       bool kern)
1450{
1451        struct sock *sk = sock->sk;
1452        struct sock *tsk;
1453        struct sk_buff *skb;
1454        int err;
1455
1456        err = -EOPNOTSUPP;
1457        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1458                goto out;
1459
1460        err = -EINVAL;
1461        if (sk->sk_state != TCP_LISTEN)
1462                goto out;
1463
1464        /* If socket state is TCP_LISTEN it cannot change (for now...),
1465         * so that no locks are necessary.
1466         */
1467
1468        skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1469        if (!skb) {
1470                /* This means receive shutdown. */
1471                if (err == 0)
1472                        err = -EINVAL;
1473                goto out;
1474        }
1475
1476        tsk = skb->sk;
1477        skb_free_datagram(sk, skb);
1478        wake_up_interruptible(&unix_sk(sk)->peer_wait);
1479
1480        /* attach accepted sock to socket */
1481        unix_state_lock(tsk);
1482        newsock->state = SS_CONNECTED;
1483        unix_sock_inherit_flags(sock, newsock);
1484        sock_graft(tsk, newsock);
1485        unix_state_unlock(tsk);
1486        return 0;
1487
1488out:
1489        return err;
1490}
1491
1492
1493static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1494{
1495        struct sock *sk = sock->sk;
1496        struct unix_address *addr;
1497        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1498        int err = 0;
1499
1500        if (peer) {
1501                sk = unix_peer_get(sk);
1502
1503                err = -ENOTCONN;
1504                if (!sk)
1505                        goto out;
1506                err = 0;
1507        } else {
1508                sock_hold(sk);
1509        }
1510
1511        addr = smp_load_acquire(&unix_sk(sk)->addr);
1512        if (!addr) {
1513                sunaddr->sun_family = AF_UNIX;
1514                sunaddr->sun_path[0] = 0;
1515                err = sizeof(short);
1516        } else {
1517                err = addr->len;
1518                memcpy(sunaddr, addr->name, addr->len);
1519        }
1520        sock_put(sk);
1521out:
1522        return err;
1523}
1524
1525static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1526{
1527        int err = 0;
1528
1529        UNIXCB(skb).pid  = get_pid(scm->pid);
1530        UNIXCB(skb).uid = scm->creds.uid;
1531        UNIXCB(skb).gid = scm->creds.gid;
1532        UNIXCB(skb).fp = NULL;
1533        unix_get_secdata(scm, skb);
1534        if (scm->fp && send_fds)
1535                err = unix_attach_fds(scm, skb);
1536
1537        skb->destructor = unix_destruct_scm;
1538        return err;
1539}
1540
1541static bool unix_passcred_enabled(const struct socket *sock,
1542                                  const struct sock *other)
1543{
1544        return test_bit(SOCK_PASSCRED, &sock->flags) ||
1545               !other->sk_socket ||
1546               test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1547}
1548
1549/*
1550 * Some apps rely on write() giving SCM_CREDENTIALS
1551 * We include credentials if source or destination socket
1552 * asserted SOCK_PASSCRED.
1553 */
1554static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1555                            const struct sock *other)
1556{
1557        if (UNIXCB(skb).pid)
1558                return;
1559        if (unix_passcred_enabled(sock, other)) {
1560                UNIXCB(skb).pid  = get_pid(task_tgid(current));
1561                current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1562        }
1563}
1564
1565static int maybe_init_creds(struct scm_cookie *scm,
1566                            struct socket *socket,
1567                            const struct sock *other)
1568{
1569        int err;
1570        struct msghdr msg = { .msg_controllen = 0 };
1571
1572        err = scm_send(socket, &msg, scm, false);
1573        if (err)
1574                return err;
1575
1576        if (unix_passcred_enabled(socket, other)) {
1577                scm->pid = get_pid(task_tgid(current));
1578                current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1579        }
1580        return err;
1581}
1582
1583static bool unix_skb_scm_eq(struct sk_buff *skb,
1584                            struct scm_cookie *scm)
1585{
1586        const struct unix_skb_parms *u = &UNIXCB(skb);
1587
1588        return u->pid == scm->pid &&
1589               uid_eq(u->uid, scm->creds.uid) &&
1590               gid_eq(u->gid, scm->creds.gid) &&
1591               unix_secdata_eq(scm, skb);
1592}
1593
1594static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1595{
1596        struct scm_fp_list *fp = UNIXCB(skb).fp;
1597        struct unix_sock *u = unix_sk(sk);
1598
1599        if (unlikely(fp && fp->count))
1600                atomic_add(fp->count, &u->scm_stat.nr_fds);
1601}
1602
1603static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1604{
1605        struct scm_fp_list *fp = UNIXCB(skb).fp;
1606        struct unix_sock *u = unix_sk(sk);
1607
1608        if (unlikely(fp && fp->count))
1609                atomic_sub(fp->count, &u->scm_stat.nr_fds);
1610}
1611
1612/*
1613 *      Send AF_UNIX data.
1614 */
1615
1616static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1617                              size_t len)
1618{
1619        struct sock *sk = sock->sk;
1620        struct net *net = sock_net(sk);
1621        struct unix_sock *u = unix_sk(sk);
1622        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1623        struct sock *other = NULL;
1624        int namelen = 0; /* fake GCC */
1625        int err;
1626        unsigned int hash;
1627        struct sk_buff *skb;
1628        long timeo;
1629        struct scm_cookie scm;
1630        int data_len = 0;
1631        int sk_locked;
1632
1633        wait_for_unix_gc();
1634        err = scm_send(sock, msg, &scm, false);
1635        if (err < 0)
1636                return err;
1637
1638        err = -EOPNOTSUPP;
1639        if (msg->msg_flags&MSG_OOB)
1640                goto out;
1641
1642        if (msg->msg_namelen) {
1643                err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1644                if (err < 0)
1645                        goto out;
1646                namelen = err;
1647        } else {
1648                sunaddr = NULL;
1649                err = -ENOTCONN;
1650                other = unix_peer_get(sk);
1651                if (!other)
1652                        goto out;
1653        }
1654
1655        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1656            && (err = unix_autobind(sock)) != 0)
1657                goto out;
1658
1659        err = -EMSGSIZE;
1660        if (len > sk->sk_sndbuf - 32)
1661                goto out;
1662
1663        if (len > SKB_MAX_ALLOC) {
1664                data_len = min_t(size_t,
1665                                 len - SKB_MAX_ALLOC,
1666                                 MAX_SKB_FRAGS * PAGE_SIZE);
1667                data_len = PAGE_ALIGN(data_len);
1668
1669                BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1670        }
1671
1672        skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1673                                   msg->msg_flags & MSG_DONTWAIT, &err,
1674                                   PAGE_ALLOC_COSTLY_ORDER);
1675        if (skb == NULL)
1676                goto out;
1677
1678        err = unix_scm_to_skb(&scm, skb, true);
1679        if (err < 0)
1680                goto out_free;
1681
1682        skb_put(skb, len - data_len);
1683        skb->data_len = data_len;
1684        skb->len = len;
1685        err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1686        if (err)
1687                goto out_free;
1688
1689        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1690
1691restart:
1692        if (!other) {
1693                err = -ECONNRESET;
1694                if (sunaddr == NULL)
1695                        goto out_free;
1696
1697                other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1698                                        hash, &err);
1699                if (other == NULL)
1700                        goto out_free;
1701        }
1702
1703        if (sk_filter(other, skb) < 0) {
1704                /* Toss the packet but do not return any error to the sender */
1705                err = len;
1706                goto out_free;
1707        }
1708
1709        sk_locked = 0;
1710        unix_state_lock(other);
1711restart_locked:
1712        err = -EPERM;
1713        if (!unix_may_send(sk, other))
1714                goto out_unlock;
1715
1716        if (unlikely(sock_flag(other, SOCK_DEAD))) {
1717                /*
1718                 *      Check with 1003.1g - what should
1719                 *      datagram error
1720                 */
1721                unix_state_unlock(other);
1722                sock_put(other);
1723
1724                if (!sk_locked)
1725                        unix_state_lock(sk);
1726
1727                err = 0;
1728                if (unix_peer(sk) == other) {
1729                        unix_peer(sk) = NULL;
1730                        unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1731
1732                        unix_state_unlock(sk);
1733
1734                        unix_dgram_disconnected(sk, other);
1735                        sock_put(other);
1736                        err = -ECONNREFUSED;
1737                } else {
1738                        unix_state_unlock(sk);
1739                }
1740
1741                other = NULL;
1742                if (err)
1743                        goto out_free;
1744                goto restart;
1745        }
1746
1747        err = -EPIPE;
1748        if (other->sk_shutdown & RCV_SHUTDOWN)
1749                goto out_unlock;
1750
1751        if (sk->sk_type != SOCK_SEQPACKET) {
1752                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1753                if (err)
1754                        goto out_unlock;
1755        }
1756
1757        /* other == sk && unix_peer(other) != sk if
1758         * - unix_peer(sk) == NULL, destination address bound to sk
1759         * - unix_peer(sk) == sk by time of get but disconnected before lock
1760         */
1761        if (other != sk &&
1762            unlikely(unix_peer(other) != sk &&
1763            unix_recvq_full_lockless(other))) {
1764                if (timeo) {
1765                        timeo = unix_wait_for_peer(other, timeo);
1766
1767                        err = sock_intr_errno(timeo);
1768                        if (signal_pending(current))
1769                                goto out_free;
1770
1771                        goto restart;
1772                }
1773
1774                if (!sk_locked) {
1775                        unix_state_unlock(other);
1776                        unix_state_double_lock(sk, other);
1777                }
1778
1779                if (unix_peer(sk) != other ||
1780                    unix_dgram_peer_wake_me(sk, other)) {
1781                        err = -EAGAIN;
1782                        sk_locked = 1;
1783                        goto out_unlock;
1784                }
1785
1786                if (!sk_locked) {
1787                        sk_locked = 1;
1788                        goto restart_locked;
1789                }
1790        }
1791
1792        if (unlikely(sk_locked))
1793                unix_state_unlock(sk);
1794
1795        if (sock_flag(other, SOCK_RCVTSTAMP))
1796                __net_timestamp(skb);
1797        maybe_add_creds(skb, sock, other);
1798        scm_stat_add(other, skb);
1799        skb_queue_tail(&other->sk_receive_queue, skb);
1800        unix_state_unlock(other);
1801        other->sk_data_ready(other);
1802        sock_put(other);
1803        scm_destroy(&scm);
1804        return len;
1805
1806out_unlock:
1807        if (sk_locked)
1808                unix_state_unlock(sk);
1809        unix_state_unlock(other);
1810out_free:
1811        kfree_skb(skb);
1812out:
1813        if (other)
1814                sock_put(other);
1815        scm_destroy(&scm);
1816        return err;
1817}
1818
1819/* We use paged skbs for stream sockets, and limit occupancy to 32768
1820 * bytes, and a minimum of a full page.
1821 */
1822#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1823
1824static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1825                               size_t len)
1826{
1827        struct sock *sk = sock->sk;
1828        struct sock *other = NULL;
1829        int err, size;
1830        struct sk_buff *skb;
1831        int sent = 0;
1832        struct scm_cookie scm;
1833        bool fds_sent = false;
1834        int data_len;
1835
1836        wait_for_unix_gc();
1837        err = scm_send(sock, msg, &scm, false);
1838        if (err < 0)
1839                return err;
1840
1841        err = -EOPNOTSUPP;
1842        if (msg->msg_flags&MSG_OOB)
1843                goto out_err;
1844
1845        if (msg->msg_namelen) {
1846                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1847                goto out_err;
1848        } else {
1849                err = -ENOTCONN;
1850                other = unix_peer(sk);
1851                if (!other)
1852                        goto out_err;
1853        }
1854
1855        if (sk->sk_shutdown & SEND_SHUTDOWN)
1856                goto pipe_err;
1857
1858        while (sent < len) {
1859                size = len - sent;
1860
1861                /* Keep two messages in the pipe so it schedules better */
1862                size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1863
1864                /* allow fallback to order-0 allocations */
1865                size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1866
1867                data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1868
1869                data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1870
1871                skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1872                                           msg->msg_flags & MSG_DONTWAIT, &err,
1873                                           get_order(UNIX_SKB_FRAGS_SZ));
1874                if (!skb)
1875                        goto out_err;
1876
1877                /* Only send the fds in the first buffer */
1878                err = unix_scm_to_skb(&scm, skb, !fds_sent);
1879                if (err < 0) {
1880                        kfree_skb(skb);
1881                        goto out_err;
1882                }
1883                fds_sent = true;
1884
1885                skb_put(skb, size - data_len);
1886                skb->data_len = data_len;
1887                skb->len = size;
1888                err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1889                if (err) {
1890                        kfree_skb(skb);
1891                        goto out_err;
1892                }
1893
1894                unix_state_lock(other);
1895
1896                if (sock_flag(other, SOCK_DEAD) ||
1897                    (other->sk_shutdown & RCV_SHUTDOWN))
1898                        goto pipe_err_free;
1899
1900                maybe_add_creds(skb, sock, other);
1901                scm_stat_add(other, skb);
1902                skb_queue_tail(&other->sk_receive_queue, skb);
1903                unix_state_unlock(other);
1904                other->sk_data_ready(other);
1905                sent += size;
1906        }
1907
1908        scm_destroy(&scm);
1909
1910        return sent;
1911
1912pipe_err_free:
1913        unix_state_unlock(other);
1914        kfree_skb(skb);
1915pipe_err:
1916        if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1917                send_sig(SIGPIPE, current, 0);
1918        err = -EPIPE;
1919out_err:
1920        scm_destroy(&scm);
1921        return sent ? : err;
1922}
1923
1924static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1925                                    int offset, size_t size, int flags)
1926{
1927        int err;
1928        bool send_sigpipe = false;
1929        bool init_scm = true;
1930        struct scm_cookie scm;
1931        struct sock *other, *sk = socket->sk;
1932        struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1933
1934        if (flags & MSG_OOB)
1935                return -EOPNOTSUPP;
1936
1937        other = unix_peer(sk);
1938        if (!other || sk->sk_state != TCP_ESTABLISHED)
1939                return -ENOTCONN;
1940
1941        if (false) {
1942alloc_skb:
1943                unix_state_unlock(other);
1944                mutex_unlock(&unix_sk(other)->iolock);
1945                newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1946                                              &err, 0);
1947                if (!newskb)
1948                        goto err;
1949        }
1950
1951        /* we must acquire iolock as we modify already present
1952         * skbs in the sk_receive_queue and mess with skb->len
1953         */
1954        err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1955        if (err) {
1956                err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1957                goto err;
1958        }
1959
1960        if (sk->sk_shutdown & SEND_SHUTDOWN) {
1961                err = -EPIPE;
1962                send_sigpipe = true;
1963                goto err_unlock;
1964        }
1965
1966        unix_state_lock(other);
1967
1968        if (sock_flag(other, SOCK_DEAD) ||
1969            other->sk_shutdown & RCV_SHUTDOWN) {
1970                err = -EPIPE;
1971                send_sigpipe = true;
1972                goto err_state_unlock;
1973        }
1974
1975        if (init_scm) {
1976                err = maybe_init_creds(&scm, socket, other);
1977                if (err)
1978                        goto err_state_unlock;
1979                init_scm = false;
1980        }
1981
1982        skb = skb_peek_tail(&other->sk_receive_queue);
1983        if (tail && tail == skb) {
1984                skb = newskb;
1985        } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1986                if (newskb) {
1987                        skb = newskb;
1988                } else {
1989                        tail = skb;
1990                        goto alloc_skb;
1991                }
1992        } else if (newskb) {
1993                /* this is fast path, we don't necessarily need to
1994                 * call to kfree_skb even though with newskb == NULL
1995                 * this - does no harm
1996                 */
1997                consume_skb(newskb);
1998                newskb = NULL;
1999        }
2000
2001        if (skb_append_pagefrags(skb, page, offset, size)) {
2002                tail = skb;
2003                goto alloc_skb;
2004        }
2005
2006        skb->len += size;
2007        skb->data_len += size;
2008        skb->truesize += size;
2009        refcount_add(size, &sk->sk_wmem_alloc);
2010
2011        if (newskb) {
2012                err = unix_scm_to_skb(&scm, skb, false);
2013                if (err)
2014                        goto err_state_unlock;
2015                spin_lock(&other->sk_receive_queue.lock);
2016                __skb_queue_tail(&other->sk_receive_queue, newskb);
2017                spin_unlock(&other->sk_receive_queue.lock);
2018        }
2019
2020        unix_state_unlock(other);
2021        mutex_unlock(&unix_sk(other)->iolock);
2022
2023        other->sk_data_ready(other);
2024        scm_destroy(&scm);
2025        return size;
2026
2027err_state_unlock:
2028        unix_state_unlock(other);
2029err_unlock:
2030        mutex_unlock(&unix_sk(other)->iolock);
2031err:
2032        kfree_skb(newskb);
2033        if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2034                send_sig(SIGPIPE, current, 0);
2035        if (!init_scm)
2036                scm_destroy(&scm);
2037        return err;
2038}
2039
2040static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2041                                  size_t len)
2042{
2043        int err;
2044        struct sock *sk = sock->sk;
2045
2046        err = sock_error(sk);
2047        if (err)
2048                return err;
2049
2050        if (sk->sk_state != TCP_ESTABLISHED)
2051                return -ENOTCONN;
2052
2053        if (msg->msg_namelen)
2054                msg->msg_namelen = 0;
2055
2056        return unix_dgram_sendmsg(sock, msg, len);
2057}
2058
2059static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2060                                  size_t size, int flags)
2061{
2062        struct sock *sk = sock->sk;
2063
2064        if (sk->sk_state != TCP_ESTABLISHED)
2065                return -ENOTCONN;
2066
2067        return unix_dgram_recvmsg(sock, msg, size, flags);
2068}
2069
2070static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2071{
2072        struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2073
2074        if (addr) {
2075                msg->msg_namelen = addr->len;
2076                memcpy(msg->msg_name, addr->name, addr->len);
2077        }
2078}
2079
2080static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2081                              size_t size, int flags)
2082{
2083        struct scm_cookie scm;
2084        struct sock *sk = sock->sk;
2085        struct unix_sock *u = unix_sk(sk);
2086        struct sk_buff *skb, *last;
2087        long timeo;
2088        int skip;
2089        int err;
2090
2091        err = -EOPNOTSUPP;
2092        if (flags&MSG_OOB)
2093                goto out;
2094
2095        timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2096
2097        do {
2098                mutex_lock(&u->iolock);
2099
2100                skip = sk_peek_offset(sk, flags);
2101                skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2102                                              &skip, &err, &last);
2103                if (skb) {
2104                        if (!(flags & MSG_PEEK))
2105                                scm_stat_del(sk, skb);
2106                        break;
2107                }
2108
2109                mutex_unlock(&u->iolock);
2110
2111                if (err != -EAGAIN)
2112                        break;
2113        } while (timeo &&
2114                 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2115                                              &err, &timeo, last));
2116
2117        if (!skb) { /* implies iolock unlocked */
2118                unix_state_lock(sk);
2119                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2120                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2121                    (sk->sk_shutdown & RCV_SHUTDOWN))
2122                        err = 0;
2123                unix_state_unlock(sk);
2124                goto out;
2125        }
2126
2127        if (wq_has_sleeper(&u->peer_wait))
2128                wake_up_interruptible_sync_poll(&u->peer_wait,
2129                                                EPOLLOUT | EPOLLWRNORM |
2130                                                EPOLLWRBAND);
2131
2132        if (msg->msg_name)
2133                unix_copy_addr(msg, skb->sk);
2134
2135        if (size > skb->len - skip)
2136                size = skb->len - skip;
2137        else if (size < skb->len - skip)
2138                msg->msg_flags |= MSG_TRUNC;
2139
2140        err = skb_copy_datagram_msg(skb, skip, msg, size);
2141        if (err)
2142                goto out_free;
2143
2144        if (sock_flag(sk, SOCK_RCVTSTAMP))
2145                __sock_recv_timestamp(msg, sk, skb);
2146
2147        memset(&scm, 0, sizeof(scm));
2148
2149        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2150        unix_set_secdata(&scm, skb);
2151
2152        if (!(flags & MSG_PEEK)) {
2153                if (UNIXCB(skb).fp)
2154                        unix_detach_fds(&scm, skb);
2155
2156                sk_peek_offset_bwd(sk, skb->len);
2157        } else {
2158                /* It is questionable: on PEEK we could:
2159                   - do not return fds - good, but too simple 8)
2160                   - return fds, and do not return them on read (old strategy,
2161                     apparently wrong)
2162                   - clone fds (I chose it for now, it is the most universal
2163                     solution)
2164
2165                   POSIX 1003.1g does not actually define this clearly
2166                   at all. POSIX 1003.1g doesn't define a lot of things
2167                   clearly however!
2168
2169                */
2170
2171                sk_peek_offset_fwd(sk, size);
2172
2173                if (UNIXCB(skb).fp)
2174                        scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2175        }
2176        err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2177
2178        scm_recv(sock, msg, &scm, flags);
2179
2180out_free:
2181        skb_free_datagram(sk, skb);
2182        mutex_unlock(&u->iolock);
2183out:
2184        return err;
2185}
2186
2187/*
2188 *      Sleep until more data has arrived. But check for races..
2189 */
2190static long unix_stream_data_wait(struct sock *sk, long timeo,
2191                                  struct sk_buff *last, unsigned int last_len,
2192                                  bool freezable)
2193{
2194        struct sk_buff *tail;
2195        DEFINE_WAIT(wait);
2196
2197        unix_state_lock(sk);
2198
2199        for (;;) {
2200                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2201
2202                tail = skb_peek_tail(&sk->sk_receive_queue);
2203                if (tail != last ||
2204                    (tail && tail->len != last_len) ||
2205                    sk->sk_err ||
2206                    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2207                    signal_pending(current) ||
2208                    !timeo)
2209                        break;
2210
2211                sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2212                unix_state_unlock(sk);
2213                if (freezable)
2214                        timeo = freezable_schedule_timeout(timeo);
2215                else
2216                        timeo = schedule_timeout(timeo);
2217                unix_state_lock(sk);
2218
2219                if (sock_flag(sk, SOCK_DEAD))
2220                        break;
2221
2222                sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2223        }
2224
2225        finish_wait(sk_sleep(sk), &wait);
2226        unix_state_unlock(sk);
2227        return timeo;
2228}
2229
2230static unsigned int unix_skb_len(const struct sk_buff *skb)
2231{
2232        return skb->len - UNIXCB(skb).consumed;
2233}
2234
2235struct unix_stream_read_state {
2236        int (*recv_actor)(struct sk_buff *, int, int,
2237                          struct unix_stream_read_state *);
2238        struct socket *socket;
2239        struct msghdr *msg;
2240        struct pipe_inode_info *pipe;
2241        size_t size;
2242        int flags;
2243        unsigned int splice_flags;
2244};
2245
2246static int unix_stream_read_generic(struct unix_stream_read_state *state,
2247                                    bool freezable)
2248{
2249        struct scm_cookie scm;
2250        struct socket *sock = state->socket;
2251        struct sock *sk = sock->sk;
2252        struct unix_sock *u = unix_sk(sk);
2253        int copied = 0;
2254        int flags = state->flags;
2255        int noblock = flags & MSG_DONTWAIT;
2256        bool check_creds = false;
2257        int target;
2258        int err = 0;
2259        long timeo;
2260        int skip;
2261        size_t size = state->size;
2262        unsigned int last_len;
2263
2264        if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2265                err = -EINVAL;
2266                goto out;
2267        }
2268
2269        if (unlikely(flags & MSG_OOB)) {
2270                err = -EOPNOTSUPP;
2271                goto out;
2272        }
2273
2274        target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2275        timeo = sock_rcvtimeo(sk, noblock);
2276
2277        memset(&scm, 0, sizeof(scm));
2278
2279        /* Lock the socket to prevent queue disordering
2280         * while sleeps in memcpy_tomsg
2281         */
2282        mutex_lock(&u->iolock);
2283
2284        skip = max(sk_peek_offset(sk, flags), 0);
2285
2286        do {
2287                int chunk;
2288                bool drop_skb;
2289                struct sk_buff *skb, *last;
2290
2291redo:
2292                unix_state_lock(sk);
2293                if (sock_flag(sk, SOCK_DEAD)) {
2294                        err = -ECONNRESET;
2295                        goto unlock;
2296                }
2297                last = skb = skb_peek(&sk->sk_receive_queue);
2298                last_len = last ? last->len : 0;
2299again:
2300                if (skb == NULL) {
2301                        if (copied >= target)
2302                                goto unlock;
2303
2304                        /*
2305                         *      POSIX 1003.1g mandates this order.
2306                         */
2307
2308                        err = sock_error(sk);
2309                        if (err)
2310                                goto unlock;
2311                        if (sk->sk_shutdown & RCV_SHUTDOWN)
2312                                goto unlock;
2313
2314                        unix_state_unlock(sk);
2315                        if (!timeo) {
2316                                err = -EAGAIN;
2317                                break;
2318                        }
2319
2320                        mutex_unlock(&u->iolock);
2321
2322                        timeo = unix_stream_data_wait(sk, timeo, last,
2323                                                      last_len, freezable);
2324
2325                        if (signal_pending(current)) {
2326                                err = sock_intr_errno(timeo);
2327                                scm_destroy(&scm);
2328                                goto out;
2329                        }
2330
2331                        mutex_lock(&u->iolock);
2332                        goto redo;
2333unlock:
2334                        unix_state_unlock(sk);
2335                        break;
2336                }
2337
2338                while (skip >= unix_skb_len(skb)) {
2339                        skip -= unix_skb_len(skb);
2340                        last = skb;
2341                        last_len = skb->len;
2342                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2343                        if (!skb)
2344                                goto again;
2345                }
2346
2347                unix_state_unlock(sk);
2348
2349                if (check_creds) {
2350                        /* Never glue messages from different writers */
2351                        if (!unix_skb_scm_eq(skb, &scm))
2352                                break;
2353                } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2354                        /* Copy credentials */
2355                        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2356                        unix_set_secdata(&scm, skb);
2357                        check_creds = true;
2358                }
2359
2360                /* Copy address just once */
2361                if (state->msg && state->msg->msg_name) {
2362                        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2363                                         state->msg->msg_name);
2364                        unix_copy_addr(state->msg, skb->sk);
2365                        sunaddr = NULL;
2366                }
2367
2368                chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2369                skb_get(skb);
2370                chunk = state->recv_actor(skb, skip, chunk, state);
2371                drop_skb = !unix_skb_len(skb);
2372                /* skb is only safe to use if !drop_skb */
2373                consume_skb(skb);
2374                if (chunk < 0) {
2375                        if (copied == 0)
2376                                copied = -EFAULT;
2377                        break;
2378                }
2379                copied += chunk;
2380                size -= chunk;
2381
2382                if (drop_skb) {
2383                        /* the skb was touched by a concurrent reader;
2384                         * we should not expect anything from this skb
2385                         * anymore and assume it invalid - we can be
2386                         * sure it was dropped from the socket queue
2387                         *
2388                         * let's report a short read
2389                         */
2390                        err = 0;
2391                        break;
2392                }
2393
2394                /* Mark read part of skb as used */
2395                if (!(flags & MSG_PEEK)) {
2396                        UNIXCB(skb).consumed += chunk;
2397
2398                        sk_peek_offset_bwd(sk, chunk);
2399
2400                        if (UNIXCB(skb).fp) {
2401                                scm_stat_del(sk, skb);
2402                                unix_detach_fds(&scm, skb);
2403                        }
2404
2405                        if (unix_skb_len(skb))
2406                                break;
2407
2408                        skb_unlink(skb, &sk->sk_receive_queue);
2409                        consume_skb(skb);
2410
2411                        if (scm.fp)
2412                                break;
2413                } else {
2414                        /* It is questionable, see note in unix_dgram_recvmsg.
2415                         */
2416                        if (UNIXCB(skb).fp)
2417                                scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2418
2419                        sk_peek_offset_fwd(sk, chunk);
2420
2421                        if (UNIXCB(skb).fp)
2422                                break;
2423
2424                        skip = 0;
2425                        last = skb;
2426                        last_len = skb->len;
2427                        unix_state_lock(sk);
2428                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2429                        if (skb)
2430                                goto again;
2431                        unix_state_unlock(sk);
2432                        break;
2433                }
2434        } while (size);
2435
2436        mutex_unlock(&u->iolock);
2437        if (state->msg)
2438                scm_recv(sock, state->msg, &scm, flags);
2439        else
2440                scm_destroy(&scm);
2441out:
2442        return copied ? : err;
2443}
2444
2445static int unix_stream_read_actor(struct sk_buff *skb,
2446                                  int skip, int chunk,
2447                                  struct unix_stream_read_state *state)
2448{
2449        int ret;
2450
2451        ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2452                                    state->msg, chunk);
2453        return ret ?: chunk;
2454}
2455
2456static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2457                               size_t size, int flags)
2458{
2459        struct unix_stream_read_state state = {
2460                .recv_actor = unix_stream_read_actor,
2461                .socket = sock,
2462                .msg = msg,
2463                .size = size,
2464                .flags = flags
2465        };
2466
2467        return unix_stream_read_generic(&state, true);
2468}
2469
2470static int unix_stream_splice_actor(struct sk_buff *skb,
2471                                    int skip, int chunk,
2472                                    struct unix_stream_read_state *state)
2473{
2474        return skb_splice_bits(skb, state->socket->sk,
2475                               UNIXCB(skb).consumed + skip,
2476                               state->pipe, chunk, state->splice_flags);
2477}
2478
2479static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2480                                       struct pipe_inode_info *pipe,
2481                                       size_t size, unsigned int flags)
2482{
2483        struct unix_stream_read_state state = {
2484                .recv_actor = unix_stream_splice_actor,
2485                .socket = sock,
2486                .pipe = pipe,
2487                .size = size,
2488                .splice_flags = flags,
2489        };
2490
2491        if (unlikely(*ppos))
2492                return -ESPIPE;
2493
2494        if (sock->file->f_flags & O_NONBLOCK ||
2495            flags & SPLICE_F_NONBLOCK)
2496                state.flags = MSG_DONTWAIT;
2497
2498        return unix_stream_read_generic(&state, false);
2499}
2500
2501static int unix_shutdown(struct socket *sock, int mode)
2502{
2503        struct sock *sk = sock->sk;
2504        struct sock *other;
2505
2506        if (mode < SHUT_RD || mode > SHUT_RDWR)
2507                return -EINVAL;
2508        /* This maps:
2509         * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2510         * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2511         * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2512         */
2513        ++mode;
2514
2515        unix_state_lock(sk);
2516        sk->sk_shutdown |= mode;
2517        other = unix_peer(sk);
2518        if (other)
2519                sock_hold(other);
2520        unix_state_unlock(sk);
2521        sk->sk_state_change(sk);
2522
2523        if (other &&
2524                (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2525
2526                int peer_mode = 0;
2527
2528                if (mode&RCV_SHUTDOWN)
2529                        peer_mode |= SEND_SHUTDOWN;
2530                if (mode&SEND_SHUTDOWN)
2531                        peer_mode |= RCV_SHUTDOWN;
2532                unix_state_lock(other);
2533                other->sk_shutdown |= peer_mode;
2534                unix_state_unlock(other);
2535                other->sk_state_change(other);
2536                if (peer_mode == SHUTDOWN_MASK)
2537                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2538                else if (peer_mode & RCV_SHUTDOWN)
2539                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2540        }
2541        if (other)
2542                sock_put(other);
2543
2544        return 0;
2545}
2546
2547long unix_inq_len(struct sock *sk)
2548{
2549        struct sk_buff *skb;
2550        long amount = 0;
2551
2552        if (sk->sk_state == TCP_LISTEN)
2553                return -EINVAL;
2554
2555        spin_lock(&sk->sk_receive_queue.lock);
2556        if (sk->sk_type == SOCK_STREAM ||
2557            sk->sk_type == SOCK_SEQPACKET) {
2558                skb_queue_walk(&sk->sk_receive_queue, skb)
2559                        amount += unix_skb_len(skb);
2560        } else {
2561                skb = skb_peek(&sk->sk_receive_queue);
2562                if (skb)
2563                        amount = skb->len;
2564        }
2565        spin_unlock(&sk->sk_receive_queue.lock);
2566
2567        return amount;
2568}
2569EXPORT_SYMBOL_GPL(unix_inq_len);
2570
2571long unix_outq_len(struct sock *sk)
2572{
2573        return sk_wmem_alloc_get(sk);
2574}
2575EXPORT_SYMBOL_GPL(unix_outq_len);
2576
2577static int unix_open_file(struct sock *sk)
2578{
2579        struct path path;
2580        struct file *f;
2581        int fd;
2582
2583        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2584                return -EPERM;
2585
2586        if (!smp_load_acquire(&unix_sk(sk)->addr))
2587                return -ENOENT;
2588
2589        path = unix_sk(sk)->path;
2590        if (!path.dentry)
2591                return -ENOENT;
2592
2593        path_get(&path);
2594
2595        fd = get_unused_fd_flags(O_CLOEXEC);
2596        if (fd < 0)
2597                goto out;
2598
2599        f = dentry_open(&path, O_PATH, current_cred());
2600        if (IS_ERR(f)) {
2601                put_unused_fd(fd);
2602                fd = PTR_ERR(f);
2603                goto out;
2604        }
2605
2606        fd_install(fd, f);
2607out:
2608        path_put(&path);
2609
2610        return fd;
2611}
2612
2613static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2614{
2615        struct sock *sk = sock->sk;
2616        long amount = 0;
2617        int err;
2618
2619        switch (cmd) {
2620        case SIOCOUTQ:
2621                amount = unix_outq_len(sk);
2622                err = put_user(amount, (int __user *)arg);
2623                break;
2624        case SIOCINQ:
2625                amount = unix_inq_len(sk);
2626                if (amount < 0)
2627                        err = amount;
2628                else
2629                        err = put_user(amount, (int __user *)arg);
2630                break;
2631        case SIOCUNIXFILE:
2632                err = unix_open_file(sk);
2633                break;
2634        default:
2635                err = -ENOIOCTLCMD;
2636                break;
2637        }
2638        return err;
2639}
2640
2641#ifdef CONFIG_COMPAT
2642static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2643{
2644        return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2645}
2646#endif
2647
2648static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2649{
2650        struct sock *sk = sock->sk;
2651        __poll_t mask;
2652
2653        sock_poll_wait(file, sock, wait);
2654        mask = 0;
2655
2656        /* exceptional events? */
2657        if (sk->sk_err)
2658                mask |= EPOLLERR;
2659        if (sk->sk_shutdown == SHUTDOWN_MASK)
2660                mask |= EPOLLHUP;
2661        if (sk->sk_shutdown & RCV_SHUTDOWN)
2662                mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2663
2664        /* readable? */
2665        if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2666                mask |= EPOLLIN | EPOLLRDNORM;
2667
2668        /* Connection-based need to check for termination and startup */
2669        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2670            sk->sk_state == TCP_CLOSE)
2671                mask |= EPOLLHUP;
2672
2673        /*
2674         * we set writable also when the other side has shut down the
2675         * connection. This prevents stuck sockets.
2676         */
2677        if (unix_writable(sk))
2678                mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2679
2680        return mask;
2681}
2682
2683static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2684                                    poll_table *wait)
2685{
2686        struct sock *sk = sock->sk, *other;
2687        unsigned int writable;
2688        __poll_t mask;
2689
2690        sock_poll_wait(file, sock, wait);
2691        mask = 0;
2692
2693        /* exceptional events? */
2694        if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2695                mask |= EPOLLERR |
2696                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2697
2698        if (sk->sk_shutdown & RCV_SHUTDOWN)
2699                mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2700        if (sk->sk_shutdown == SHUTDOWN_MASK)
2701                mask |= EPOLLHUP;
2702
2703        /* readable? */
2704        if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2705                mask |= EPOLLIN | EPOLLRDNORM;
2706
2707        /* Connection-based need to check for termination and startup */
2708        if (sk->sk_type == SOCK_SEQPACKET) {
2709                if (sk->sk_state == TCP_CLOSE)
2710                        mask |= EPOLLHUP;
2711                /* connection hasn't started yet? */
2712                if (sk->sk_state == TCP_SYN_SENT)
2713                        return mask;
2714        }
2715
2716        /* No write status requested, avoid expensive OUT tests. */
2717        if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2718                return mask;
2719
2720        writable = unix_writable(sk);
2721        if (writable) {
2722                unix_state_lock(sk);
2723
2724                other = unix_peer(sk);
2725                if (other && unix_peer(other) != sk &&
2726                    unix_recvq_full(other) &&
2727                    unix_dgram_peer_wake_me(sk, other))
2728                        writable = 0;
2729
2730                unix_state_unlock(sk);
2731        }
2732
2733        if (writable)
2734                mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2735        else
2736                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2737
2738        return mask;
2739}
2740
2741#ifdef CONFIG_PROC_FS
2742
2743#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2744
2745#define get_bucket(x) ((x) >> BUCKET_SPACE)
2746#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2747#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2748
2749static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2750{
2751        unsigned long offset = get_offset(*pos);
2752        unsigned long bucket = get_bucket(*pos);
2753        struct sock *sk;
2754        unsigned long count = 0;
2755
2756        for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2757                if (sock_net(sk) != seq_file_net(seq))
2758                        continue;
2759                if (++count == offset)
2760                        break;
2761        }
2762
2763        return sk;
2764}
2765
2766static struct sock *unix_next_socket(struct seq_file *seq,
2767                                     struct sock *sk,
2768                                     loff_t *pos)
2769{
2770        unsigned long bucket;
2771
2772        while (sk > (struct sock *)SEQ_START_TOKEN) {
2773                sk = sk_next(sk);
2774                if (!sk)
2775                        goto next_bucket;
2776                if (sock_net(sk) == seq_file_net(seq))
2777                        return sk;
2778        }
2779
2780        do {
2781                sk = unix_from_bucket(seq, pos);
2782                if (sk)
2783                        return sk;
2784
2785next_bucket:
2786                bucket = get_bucket(*pos) + 1;
2787                *pos = set_bucket_offset(bucket, 1);
2788        } while (bucket < ARRAY_SIZE(unix_socket_table));
2789
2790        return NULL;
2791}
2792
2793static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2794        __acquires(unix_table_lock)
2795{
2796        spin_lock(&unix_table_lock);
2797
2798        if (!*pos)
2799                return SEQ_START_TOKEN;
2800
2801        if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2802                return NULL;
2803
2804        return unix_next_socket(seq, NULL, pos);
2805}
2806
2807static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2808{
2809        ++*pos;
2810        return unix_next_socket(seq, v, pos);
2811}
2812
2813static void unix_seq_stop(struct seq_file *seq, void *v)
2814        __releases(unix_table_lock)
2815{
2816        spin_unlock(&unix_table_lock);
2817}
2818
2819static int unix_seq_show(struct seq_file *seq, void *v)
2820{
2821
2822        if (v == SEQ_START_TOKEN)
2823                seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2824                         "Inode Path\n");
2825        else {
2826                struct sock *s = v;
2827                struct unix_sock *u = unix_sk(s);
2828                unix_state_lock(s);
2829
2830                seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2831                        s,
2832                        refcount_read(&s->sk_refcnt),
2833                        0,
2834                        s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2835                        s->sk_type,
2836                        s->sk_socket ?
2837                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2838                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2839                        sock_i_ino(s));
2840
2841                if (u->addr) {  // under unix_table_lock here
2842                        int i, len;
2843                        seq_putc(seq, ' ');
2844
2845                        i = 0;
2846                        len = u->addr->len - sizeof(short);
2847                        if (!UNIX_ABSTRACT(s))
2848                                len--;
2849                        else {
2850                                seq_putc(seq, '@');
2851                                i++;
2852                        }
2853                        for ( ; i < len; i++)
2854                                seq_putc(seq, u->addr->name->sun_path[i] ?:
2855                                         '@');
2856                }
2857                unix_state_unlock(s);
2858                seq_putc(seq, '\n');
2859        }
2860
2861        return 0;
2862}
2863
2864static const struct seq_operations unix_seq_ops = {
2865        .start  = unix_seq_start,
2866        .next   = unix_seq_next,
2867        .stop   = unix_seq_stop,
2868        .show   = unix_seq_show,
2869};
2870#endif
2871
2872static const struct net_proto_family unix_family_ops = {
2873        .family = PF_UNIX,
2874        .create = unix_create,
2875        .owner  = THIS_MODULE,
2876};
2877
2878
2879static int __net_init unix_net_init(struct net *net)
2880{
2881        int error = -ENOMEM;
2882
2883        net->unx.sysctl_max_dgram_qlen = 10;
2884        if (unix_sysctl_register(net))
2885                goto out;
2886
2887#ifdef CONFIG_PROC_FS
2888        if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2889                        sizeof(struct seq_net_private))) {
2890                unix_sysctl_unregister(net);
2891                goto out;
2892        }
2893#endif
2894        error = 0;
2895out:
2896        return error;
2897}
2898
2899static void __net_exit unix_net_exit(struct net *net)
2900{
2901        unix_sysctl_unregister(net);
2902        remove_proc_entry("unix", net->proc_net);
2903}
2904
2905static struct pernet_operations unix_net_ops = {
2906        .init = unix_net_init,
2907        .exit = unix_net_exit,
2908};
2909
2910static int __init af_unix_init(void)
2911{
2912        int rc = -1;
2913
2914        BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2915
2916        rc = proto_register(&unix_proto, 1);
2917        if (rc != 0) {
2918                pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2919                goto out;
2920        }
2921
2922        sock_register(&unix_family_ops);
2923        register_pernet_subsys(&unix_net_ops);
2924out:
2925        return rc;
2926}
2927
2928static void __exit af_unix_exit(void)
2929{
2930        sock_unregister(PF_UNIX);
2931        proto_unregister(&unix_proto);
2932        unregister_pernet_subsys(&unix_net_ops);
2933}
2934
2935/* Earlier than device_initcall() so that other drivers invoking
2936   request_module() don't end up in a loop when modprobe tries
2937   to use a UNIX socket. But later than subsys_initcall() because
2938   we depend on stuff initialised there */
2939fs_initcall(af_unix_init);
2940module_exit(af_unix_exit);
2941
2942MODULE_LICENSE("GPL");
2943MODULE_ALIAS_NETPROTO(PF_UNIX);
2944