linux/net/unix/af_unix.c
<<
>>
Prefs
   1/*
   2 * NET4:        Implementation of BSD Unix domain sockets.
   3 *
   4 * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Fixes:
  12 *              Linus Torvalds  :       Assorted bug cures.
  13 *              Niibe Yutaka    :       async I/O support.
  14 *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15 *              Alan Cox        :       Limit size of allocated blocks.
  16 *              Alan Cox        :       Fixed the stupid socketpair bug.
  17 *              Alan Cox        :       BSD compatibility fine tuning.
  18 *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19 *              Alan Cox        :       Sorted out a proper draft version of
  20 *                                      file descriptor passing hacked up from
  21 *                                      Mike Shaver's work.
  22 *              Marty Leisner   :       Fixes to fd passing
  23 *              Nick Nevin      :       recvmsg bugfix.
  24 *              Alan Cox        :       Started proper garbage collector
  25 *              Heiko EiBfeldt  :       Missing verify_area check
  26 *              Alan Cox        :       Started POSIXisms
  27 *              Andreas Schwab  :       Replace inode by dentry for proper
  28 *                                      reference counting
  29 *              Kirk Petersen   :       Made this a module
  30 *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31 *                                      Lots of bug fixes.
  32 *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33 *                                      by above two patches.
  34 *           Andrea Arcangeli   :       If possible we block in connect(2)
  35 *                                      if the max backlog of the listen socket
  36 *                                      is been reached. This won't break
  37 *                                      old apps and it will avoid huge amount
  38 *                                      of socks hashed (this for unix_gc()
  39 *                                      performances reasons).
  40 *                                      Security fix that limits the max
  41 *                                      number of socks to 2*max_files and
  42 *                                      the number of skb queueable in the
  43 *                                      dgram receiver.
  44 *              Artur Skawina   :       Hash function optimizations
  45 *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46 *            Malcolm Beattie   :       Set peercred for socketpair
  47 *           Michal Ostrowski   :       Module initialization cleanup.
  48 *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49 *                                      the core infrastructure is doing that
  50 *                                      for all net proto families now (2.5.69+)
  51 *
  52 *
  53 * Known differences from reference BSD that was tested:
  54 *
  55 *      [TO FIX]
  56 *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57 *              other the moment one end closes.
  58 *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59 *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60 *      [NOT TO FIX]
  61 *      accept() returns a path name even if the connecting socket has closed
  62 *              in the meantime (BSD loses the path and gives up).
  63 *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64 *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65 *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66 *      BSD af_unix apparently has connect forgetting to block properly.
  67 *              (need to check this with the POSIX spec in detail)
  68 *
  69 * Differences from 2.0.0-11-... (ANK)
  70 *      Bug fixes and improvements.
  71 *              - client shutdown killed server socket.
  72 *              - removed all useless cli/sti pairs.
  73 *
  74 *      Semantic changes/extensions.
  75 *              - generic control message passing.
  76 *              - SCM_CREDENTIALS control message.
  77 *              - "Abstract" (not FS based) socket bindings.
  78 *                Abstract names are sequences of bytes (not zero terminated)
  79 *                started by 0, so that this name space does not intersect
  80 *                with BSD names.
  81 */
  82
  83#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85#include <linux/module.h>
  86#include <linux/kernel.h>
  87#include <linux/signal.h>
  88#include <linux/sched/signal.h>
  89#include <linux/errno.h>
  90#include <linux/string.h>
  91#include <linux/stat.h>
  92#include <linux/dcache.h>
  93#include <linux/namei.h>
  94#include <linux/socket.h>
  95#include <linux/un.h>
  96#include <linux/fcntl.h>
  97#include <linux/termios.h>
  98#include <linux/sockios.h>
  99#include <linux/net.h>
 100#include <linux/in.h>
 101#include <linux/fs.h>
 102#include <linux/slab.h>
 103#include <linux/uaccess.h>
 104#include <linux/skbuff.h>
 105#include <linux/netdevice.h>
 106#include <net/net_namespace.h>
 107#include <net/sock.h>
 108#include <net/tcp_states.h>
 109#include <net/af_unix.h>
 110#include <linux/proc_fs.h>
 111#include <linux/seq_file.h>
 112#include <net/scm.h>
 113#include <linux/init.h>
 114#include <linux/poll.h>
 115#include <linux/rtnetlink.h>
 116#include <linux/mount.h>
 117#include <net/checksum.h>
 118#include <linux/security.h>
 119#include <linux/freezer.h>
 120#include <linux/file.h>
 121
 122struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 123EXPORT_SYMBOL_GPL(unix_socket_table);
 124DEFINE_SPINLOCK(unix_table_lock);
 125EXPORT_SYMBOL_GPL(unix_table_lock);
 126static atomic_long_t unix_nr_socks;
 127
 128
 129static struct hlist_head *unix_sockets_unbound(void *addr)
 130{
 131        unsigned long hash = (unsigned long)addr;
 132
 133        hash ^= hash >> 16;
 134        hash ^= hash >> 8;
 135        hash %= UNIX_HASH_SIZE;
 136        return &unix_socket_table[UNIX_HASH_SIZE + hash];
 137}
 138
 139#define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 140
 141#ifdef CONFIG_SECURITY_NETWORK
 142static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 143{
 144        UNIXCB(skb).secid = scm->secid;
 145}
 146
 147static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 148{
 149        scm->secid = UNIXCB(skb).secid;
 150}
 151
 152static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 153{
 154        return (scm->secid == UNIXCB(skb).secid);
 155}
 156#else
 157static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158{ }
 159
 160static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 161{ }
 162
 163static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 164{
 165        return true;
 166}
 167#endif /* CONFIG_SECURITY_NETWORK */
 168
 169/*
 170 *  SMP locking strategy:
 171 *    hash table is protected with spinlock unix_table_lock
 172 *    each socket state is protected by separate spin lock.
 173 */
 174
 175static inline unsigned int unix_hash_fold(__wsum n)
 176{
 177        unsigned int hash = (__force unsigned int)csum_fold(n);
 178
 179        hash ^= hash>>8;
 180        return hash&(UNIX_HASH_SIZE-1);
 181}
 182
 183#define unix_peer(sk) (unix_sk(sk)->peer)
 184
 185static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 186{
 187        return unix_peer(osk) == sk;
 188}
 189
 190static inline int unix_may_send(struct sock *sk, struct sock *osk)
 191{
 192        return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 193}
 194
 195static inline int unix_recvq_full(struct sock const *sk)
 196{
 197        return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 198}
 199
 200struct sock *unix_peer_get(struct sock *s)
 201{
 202        struct sock *peer;
 203
 204        unix_state_lock(s);
 205        peer = unix_peer(s);
 206        if (peer)
 207                sock_hold(peer);
 208        unix_state_unlock(s);
 209        return peer;
 210}
 211EXPORT_SYMBOL_GPL(unix_peer_get);
 212
 213static inline void unix_release_addr(struct unix_address *addr)
 214{
 215        if (refcount_dec_and_test(&addr->refcnt))
 216                kfree(addr);
 217}
 218
 219/*
 220 *      Check unix socket name:
 221 *              - should be not zero length.
 222 *              - if started by not zero, should be NULL terminated (FS object)
 223 *              - if started by zero, it is abstract name.
 224 */
 225
 226static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 227{
 228        if (len <= sizeof(short) || len > sizeof(*sunaddr))
 229                return -EINVAL;
 230        if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 231                return -EINVAL;
 232        if (sunaddr->sun_path[0]) {
 233                /*
 234                 * This may look like an off by one error but it is a bit more
 235                 * subtle. 108 is the longest valid AF_UNIX path for a binding.
 236                 * sun_path[108] doesn't as such exist.  However in kernel space
 237                 * we are guaranteed that it is a valid memory location in our
 238                 * kernel address buffer.
 239                 */
 240                ((char *)sunaddr)[len] = 0;
 241                len = strlen(sunaddr->sun_path)+1+sizeof(short);
 242                return len;
 243        }
 244
 245        *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 246        return len;
 247}
 248
 249static void __unix_remove_socket(struct sock *sk)
 250{
 251        sk_del_node_init(sk);
 252}
 253
 254static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 255{
 256        WARN_ON(!sk_unhashed(sk));
 257        sk_add_node(sk, list);
 258}
 259
 260static inline void unix_remove_socket(struct sock *sk)
 261{
 262        spin_lock(&unix_table_lock);
 263        __unix_remove_socket(sk);
 264        spin_unlock(&unix_table_lock);
 265}
 266
 267static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 268{
 269        spin_lock(&unix_table_lock);
 270        __unix_insert_socket(list, sk);
 271        spin_unlock(&unix_table_lock);
 272}
 273
 274static struct sock *__unix_find_socket_byname(struct net *net,
 275                                              struct sockaddr_un *sunname,
 276                                              int len, int type, unsigned int hash)
 277{
 278        struct sock *s;
 279
 280        sk_for_each(s, &unix_socket_table[hash ^ type]) {
 281                struct unix_sock *u = unix_sk(s);
 282
 283                if (!net_eq(sock_net(s), net))
 284                        continue;
 285
 286                if (u->addr->len == len &&
 287                    !memcmp(u->addr->name, sunname, len))
 288                        goto found;
 289        }
 290        s = NULL;
 291found:
 292        return s;
 293}
 294
 295static inline struct sock *unix_find_socket_byname(struct net *net,
 296                                                   struct sockaddr_un *sunname,
 297                                                   int len, int type,
 298                                                   unsigned int hash)
 299{
 300        struct sock *s;
 301
 302        spin_lock(&unix_table_lock);
 303        s = __unix_find_socket_byname(net, sunname, len, type, hash);
 304        if (s)
 305                sock_hold(s);
 306        spin_unlock(&unix_table_lock);
 307        return s;
 308}
 309
 310static struct sock *unix_find_socket_byinode(struct inode *i)
 311{
 312        struct sock *s;
 313
 314        spin_lock(&unix_table_lock);
 315        sk_for_each(s,
 316                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 317                struct dentry *dentry = unix_sk(s)->path.dentry;
 318
 319                if (dentry && d_backing_inode(dentry) == i) {
 320                        sock_hold(s);
 321                        goto found;
 322                }
 323        }
 324        s = NULL;
 325found:
 326        spin_unlock(&unix_table_lock);
 327        return s;
 328}
 329
 330/* Support code for asymmetrically connected dgram sockets
 331 *
 332 * If a datagram socket is connected to a socket not itself connected
 333 * to the first socket (eg, /dev/log), clients may only enqueue more
 334 * messages if the present receive queue of the server socket is not
 335 * "too large". This means there's a second writeability condition
 336 * poll and sendmsg need to test. The dgram recv code will do a wake
 337 * up on the peer_wait wait queue of a socket upon reception of a
 338 * datagram which needs to be propagated to sleeping would-be writers
 339 * since these might not have sent anything so far. This can't be
 340 * accomplished via poll_wait because the lifetime of the server
 341 * socket might be less than that of its clients if these break their
 342 * association with it or if the server socket is closed while clients
 343 * are still connected to it and there's no way to inform "a polling
 344 * implementation" that it should let go of a certain wait queue
 345 *
 346 * In order to propagate a wake up, a wait_queue_entry_t of the client
 347 * socket is enqueued on the peer_wait queue of the server socket
 348 * whose wake function does a wake_up on the ordinary client socket
 349 * wait queue. This connection is established whenever a write (or
 350 * poll for write) hit the flow control condition and broken when the
 351 * association to the server socket is dissolved or after a wake up
 352 * was relayed.
 353 */
 354
 355static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 356                                      void *key)
 357{
 358        struct unix_sock *u;
 359        wait_queue_head_t *u_sleep;
 360
 361        u = container_of(q, struct unix_sock, peer_wake);
 362
 363        __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 364                            q);
 365        u->peer_wake.private = NULL;
 366
 367        /* relaying can only happen while the wq still exists */
 368        u_sleep = sk_sleep(&u->sk);
 369        if (u_sleep)
 370                wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 371
 372        return 0;
 373}
 374
 375static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 376{
 377        struct unix_sock *u, *u_other;
 378        int rc;
 379
 380        u = unix_sk(sk);
 381        u_other = unix_sk(other);
 382        rc = 0;
 383        spin_lock(&u_other->peer_wait.lock);
 384
 385        if (!u->peer_wake.private) {
 386                u->peer_wake.private = other;
 387                __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 388
 389                rc = 1;
 390        }
 391
 392        spin_unlock(&u_other->peer_wait.lock);
 393        return rc;
 394}
 395
 396static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 397                                            struct sock *other)
 398{
 399        struct unix_sock *u, *u_other;
 400
 401        u = unix_sk(sk);
 402        u_other = unix_sk(other);
 403        spin_lock(&u_other->peer_wait.lock);
 404
 405        if (u->peer_wake.private == other) {
 406                __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 407                u->peer_wake.private = NULL;
 408        }
 409
 410        spin_unlock(&u_other->peer_wait.lock);
 411}
 412
 413static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 414                                                   struct sock *other)
 415{
 416        unix_dgram_peer_wake_disconnect(sk, other);
 417        wake_up_interruptible_poll(sk_sleep(sk),
 418                                   EPOLLOUT |
 419                                   EPOLLWRNORM |
 420                                   EPOLLWRBAND);
 421}
 422
 423/* preconditions:
 424 *      - unix_peer(sk) == other
 425 *      - association is stable
 426 */
 427static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 428{
 429        int connected;
 430
 431        connected = unix_dgram_peer_wake_connect(sk, other);
 432
 433        if (unix_recvq_full(other))
 434                return 1;
 435
 436        if (connected)
 437                unix_dgram_peer_wake_disconnect(sk, other);
 438
 439        return 0;
 440}
 441
 442static int unix_writable(const struct sock *sk)
 443{
 444        return sk->sk_state != TCP_LISTEN &&
 445               (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 446}
 447
 448static void unix_write_space(struct sock *sk)
 449{
 450        struct socket_wq *wq;
 451
 452        rcu_read_lock();
 453        if (unix_writable(sk)) {
 454                wq = rcu_dereference(sk->sk_wq);
 455                if (skwq_has_sleeper(wq))
 456                        wake_up_interruptible_sync_poll(&wq->wait,
 457                                EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 458                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 459        }
 460        rcu_read_unlock();
 461}
 462
 463/* When dgram socket disconnects (or changes its peer), we clear its receive
 464 * queue of packets arrived from previous peer. First, it allows to do
 465 * flow control based only on wmem_alloc; second, sk connected to peer
 466 * may receive messages only from that peer. */
 467static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 468{
 469        if (!skb_queue_empty(&sk->sk_receive_queue)) {
 470                skb_queue_purge(&sk->sk_receive_queue);
 471                wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 472
 473                /* If one link of bidirectional dgram pipe is disconnected,
 474                 * we signal error. Messages are lost. Do not make this,
 475                 * when peer was not connected to us.
 476                 */
 477                if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 478                        other->sk_err = ECONNRESET;
 479                        other->sk_error_report(other);
 480                }
 481        }
 482}
 483
 484static void unix_sock_destructor(struct sock *sk)
 485{
 486        struct unix_sock *u = unix_sk(sk);
 487
 488        skb_queue_purge(&sk->sk_receive_queue);
 489
 490        WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 491        WARN_ON(!sk_unhashed(sk));
 492        WARN_ON(sk->sk_socket);
 493        if (!sock_flag(sk, SOCK_DEAD)) {
 494                pr_info("Attempt to release alive unix socket: %p\n", sk);
 495                return;
 496        }
 497
 498        if (u->addr)
 499                unix_release_addr(u->addr);
 500
 501        atomic_long_dec(&unix_nr_socks);
 502        local_bh_disable();
 503        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 504        local_bh_enable();
 505#ifdef UNIX_REFCNT_DEBUG
 506        pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 507                atomic_long_read(&unix_nr_socks));
 508#endif
 509}
 510
 511static void unix_release_sock(struct sock *sk, int embrion)
 512{
 513        struct unix_sock *u = unix_sk(sk);
 514        struct path path;
 515        struct sock *skpair;
 516        struct sk_buff *skb;
 517        int state;
 518
 519        unix_remove_socket(sk);
 520
 521        /* Clear state */
 522        unix_state_lock(sk);
 523        sock_orphan(sk);
 524        sk->sk_shutdown = SHUTDOWN_MASK;
 525        path         = u->path;
 526        u->path.dentry = NULL;
 527        u->path.mnt = NULL;
 528        state = sk->sk_state;
 529        sk->sk_state = TCP_CLOSE;
 530        unix_state_unlock(sk);
 531
 532        wake_up_interruptible_all(&u->peer_wait);
 533
 534        skpair = unix_peer(sk);
 535
 536        if (skpair != NULL) {
 537                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 538                        unix_state_lock(skpair);
 539                        /* No more writes */
 540                        skpair->sk_shutdown = SHUTDOWN_MASK;
 541                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 542                                skpair->sk_err = ECONNRESET;
 543                        unix_state_unlock(skpair);
 544                        skpair->sk_state_change(skpair);
 545                        sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 546                }
 547
 548                unix_dgram_peer_wake_disconnect(sk, skpair);
 549                sock_put(skpair); /* It may now die */
 550                unix_peer(sk) = NULL;
 551        }
 552
 553        /* Try to flush out this socket. Throw out buffers at least */
 554
 555        while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 556                if (state == TCP_LISTEN)
 557                        unix_release_sock(skb->sk, 1);
 558                /* passed fds are erased in the kfree_skb hook        */
 559                UNIXCB(skb).consumed = skb->len;
 560                kfree_skb(skb);
 561        }
 562
 563        if (path.dentry)
 564                path_put(&path);
 565
 566        sock_put(sk);
 567
 568        /* ---- Socket is dead now and most probably destroyed ---- */
 569
 570        /*
 571         * Fixme: BSD difference: In BSD all sockets connected to us get
 572         *        ECONNRESET and we die on the spot. In Linux we behave
 573         *        like files and pipes do and wait for the last
 574         *        dereference.
 575         *
 576         * Can't we simply set sock->err?
 577         *
 578         *        What the above comment does talk about? --ANK(980817)
 579         */
 580
 581        if (unix_tot_inflight)
 582                unix_gc();              /* Garbage collect fds */
 583}
 584
 585static void init_peercred(struct sock *sk)
 586{
 587        put_pid(sk->sk_peer_pid);
 588        if (sk->sk_peer_cred)
 589                put_cred(sk->sk_peer_cred);
 590        sk->sk_peer_pid  = get_pid(task_tgid(current));
 591        sk->sk_peer_cred = get_current_cred();
 592}
 593
 594static void copy_peercred(struct sock *sk, struct sock *peersk)
 595{
 596        put_pid(sk->sk_peer_pid);
 597        if (sk->sk_peer_cred)
 598                put_cred(sk->sk_peer_cred);
 599        sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 600        sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 601}
 602
 603static int unix_listen(struct socket *sock, int backlog)
 604{
 605        int err;
 606        struct sock *sk = sock->sk;
 607        struct unix_sock *u = unix_sk(sk);
 608        struct pid *old_pid = NULL;
 609
 610        err = -EOPNOTSUPP;
 611        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 612                goto out;       /* Only stream/seqpacket sockets accept */
 613        err = -EINVAL;
 614        if (!u->addr)
 615                goto out;       /* No listens on an unbound socket */
 616        unix_state_lock(sk);
 617        if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 618                goto out_unlock;
 619        if (backlog > sk->sk_max_ack_backlog)
 620                wake_up_interruptible_all(&u->peer_wait);
 621        sk->sk_max_ack_backlog  = backlog;
 622        sk->sk_state            = TCP_LISTEN;
 623        /* set credentials so connect can copy them */
 624        init_peercred(sk);
 625        err = 0;
 626
 627out_unlock:
 628        unix_state_unlock(sk);
 629        put_pid(old_pid);
 630out:
 631        return err;
 632}
 633
 634static int unix_release(struct socket *);
 635static int unix_bind(struct socket *, struct sockaddr *, int);
 636static int unix_stream_connect(struct socket *, struct sockaddr *,
 637                               int addr_len, int flags);
 638static int unix_socketpair(struct socket *, struct socket *);
 639static int unix_accept(struct socket *, struct socket *, int, bool);
 640static int unix_getname(struct socket *, struct sockaddr *, int);
 641static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 642static __poll_t unix_dgram_poll(struct file *, struct socket *,
 643                                    poll_table *);
 644static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 645static int unix_shutdown(struct socket *, int);
 646static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 647static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 648static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 649                                    size_t size, int flags);
 650static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 651                                       struct pipe_inode_info *, size_t size,
 652                                       unsigned int flags);
 653static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 654static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 655static int unix_dgram_connect(struct socket *, struct sockaddr *,
 656                              int, int);
 657static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 658static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 659                                  int);
 660
 661static int unix_set_peek_off(struct sock *sk, int val)
 662{
 663        struct unix_sock *u = unix_sk(sk);
 664
 665        if (mutex_lock_interruptible(&u->iolock))
 666                return -EINTR;
 667
 668        sk->sk_peek_off = val;
 669        mutex_unlock(&u->iolock);
 670
 671        return 0;
 672}
 673
 674
 675static const struct proto_ops unix_stream_ops = {
 676        .family =       PF_UNIX,
 677        .owner =        THIS_MODULE,
 678        .release =      unix_release,
 679        .bind =         unix_bind,
 680        .connect =      unix_stream_connect,
 681        .socketpair =   unix_socketpair,
 682        .accept =       unix_accept,
 683        .getname =      unix_getname,
 684        .poll =         unix_poll,
 685        .ioctl =        unix_ioctl,
 686        .listen =       unix_listen,
 687        .shutdown =     unix_shutdown,
 688        .setsockopt =   sock_no_setsockopt,
 689        .getsockopt =   sock_no_getsockopt,
 690        .sendmsg =      unix_stream_sendmsg,
 691        .recvmsg =      unix_stream_recvmsg,
 692        .mmap =         sock_no_mmap,
 693        .sendpage =     unix_stream_sendpage,
 694        .splice_read =  unix_stream_splice_read,
 695        .set_peek_off = unix_set_peek_off,
 696};
 697
 698static const struct proto_ops unix_dgram_ops = {
 699        .family =       PF_UNIX,
 700        .owner =        THIS_MODULE,
 701        .release =      unix_release,
 702        .bind =         unix_bind,
 703        .connect =      unix_dgram_connect,
 704        .socketpair =   unix_socketpair,
 705        .accept =       sock_no_accept,
 706        .getname =      unix_getname,
 707        .poll =         unix_dgram_poll,
 708        .ioctl =        unix_ioctl,
 709        .listen =       sock_no_listen,
 710        .shutdown =     unix_shutdown,
 711        .setsockopt =   sock_no_setsockopt,
 712        .getsockopt =   sock_no_getsockopt,
 713        .sendmsg =      unix_dgram_sendmsg,
 714        .recvmsg =      unix_dgram_recvmsg,
 715        .mmap =         sock_no_mmap,
 716        .sendpage =     sock_no_sendpage,
 717        .set_peek_off = unix_set_peek_off,
 718};
 719
 720static const struct proto_ops unix_seqpacket_ops = {
 721        .family =       PF_UNIX,
 722        .owner =        THIS_MODULE,
 723        .release =      unix_release,
 724        .bind =         unix_bind,
 725        .connect =      unix_stream_connect,
 726        .socketpair =   unix_socketpair,
 727        .accept =       unix_accept,
 728        .getname =      unix_getname,
 729        .poll =         unix_dgram_poll,
 730        .ioctl =        unix_ioctl,
 731        .listen =       unix_listen,
 732        .shutdown =     unix_shutdown,
 733        .setsockopt =   sock_no_setsockopt,
 734        .getsockopt =   sock_no_getsockopt,
 735        .sendmsg =      unix_seqpacket_sendmsg,
 736        .recvmsg =      unix_seqpacket_recvmsg,
 737        .mmap =         sock_no_mmap,
 738        .sendpage =     sock_no_sendpage,
 739        .set_peek_off = unix_set_peek_off,
 740};
 741
 742static struct proto unix_proto = {
 743        .name                   = "UNIX",
 744        .owner                  = THIS_MODULE,
 745        .obj_size               = sizeof(struct unix_sock),
 746};
 747
 748static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 749{
 750        struct sock *sk = NULL;
 751        struct unix_sock *u;
 752
 753        atomic_long_inc(&unix_nr_socks);
 754        if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 755                goto out;
 756
 757        sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 758        if (!sk)
 759                goto out;
 760
 761        sock_init_data(sock, sk);
 762
 763        sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 764        sk->sk_write_space      = unix_write_space;
 765        sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 766        sk->sk_destruct         = unix_sock_destructor;
 767        u         = unix_sk(sk);
 768        u->path.dentry = NULL;
 769        u->path.mnt = NULL;
 770        spin_lock_init(&u->lock);
 771        atomic_long_set(&u->inflight, 0);
 772        INIT_LIST_HEAD(&u->link);
 773        mutex_init(&u->iolock); /* single task reading lock */
 774        mutex_init(&u->bindlock); /* single task binding lock */
 775        init_waitqueue_head(&u->peer_wait);
 776        init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 777        unix_insert_socket(unix_sockets_unbound(sk), sk);
 778out:
 779        if (sk == NULL)
 780                atomic_long_dec(&unix_nr_socks);
 781        else {
 782                local_bh_disable();
 783                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 784                local_bh_enable();
 785        }
 786        return sk;
 787}
 788
 789static int unix_create(struct net *net, struct socket *sock, int protocol,
 790                       int kern)
 791{
 792        if (protocol && protocol != PF_UNIX)
 793                return -EPROTONOSUPPORT;
 794
 795        sock->state = SS_UNCONNECTED;
 796
 797        switch (sock->type) {
 798        case SOCK_STREAM:
 799                sock->ops = &unix_stream_ops;
 800                break;
 801                /*
 802                 *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 803                 *      nothing uses it.
 804                 */
 805        case SOCK_RAW:
 806                sock->type = SOCK_DGRAM;
 807                /* fall through */
 808        case SOCK_DGRAM:
 809                sock->ops = &unix_dgram_ops;
 810                break;
 811        case SOCK_SEQPACKET:
 812                sock->ops = &unix_seqpacket_ops;
 813                break;
 814        default:
 815                return -ESOCKTNOSUPPORT;
 816        }
 817
 818        return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 819}
 820
 821static int unix_release(struct socket *sock)
 822{
 823        struct sock *sk = sock->sk;
 824
 825        if (!sk)
 826                return 0;
 827
 828        unix_release_sock(sk, 0);
 829        sock->sk = NULL;
 830
 831        return 0;
 832}
 833
 834static int unix_autobind(struct socket *sock)
 835{
 836        struct sock *sk = sock->sk;
 837        struct net *net = sock_net(sk);
 838        struct unix_sock *u = unix_sk(sk);
 839        static u32 ordernum = 1;
 840        struct unix_address *addr;
 841        int err;
 842        unsigned int retries = 0;
 843
 844        err = mutex_lock_interruptible(&u->bindlock);
 845        if (err)
 846                return err;
 847
 848        err = 0;
 849        if (u->addr)
 850                goto out;
 851
 852        err = -ENOMEM;
 853        addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 854        if (!addr)
 855                goto out;
 856
 857        addr->name->sun_family = AF_UNIX;
 858        refcount_set(&addr->refcnt, 1);
 859
 860retry:
 861        addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 862        addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 863
 864        spin_lock(&unix_table_lock);
 865        ordernum = (ordernum+1)&0xFFFFF;
 866
 867        if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 868                                      addr->hash)) {
 869                spin_unlock(&unix_table_lock);
 870                /*
 871                 * __unix_find_socket_byname() may take long time if many names
 872                 * are already in use.
 873                 */
 874                cond_resched();
 875                /* Give up if all names seems to be in use. */
 876                if (retries++ == 0xFFFFF) {
 877                        err = -ENOSPC;
 878                        kfree(addr);
 879                        goto out;
 880                }
 881                goto retry;
 882        }
 883        addr->hash ^= sk->sk_type;
 884
 885        __unix_remove_socket(sk);
 886        u->addr = addr;
 887        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 888        spin_unlock(&unix_table_lock);
 889        err = 0;
 890
 891out:    mutex_unlock(&u->bindlock);
 892        return err;
 893}
 894
 895static struct sock *unix_find_other(struct net *net,
 896                                    struct sockaddr_un *sunname, int len,
 897                                    int type, unsigned int hash, int *error)
 898{
 899        struct sock *u;
 900        struct path path;
 901        int err = 0;
 902
 903        if (sunname->sun_path[0]) {
 904                struct inode *inode;
 905                err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 906                if (err)
 907                        goto fail;
 908                inode = d_backing_inode(path.dentry);
 909                err = inode_permission(inode, MAY_WRITE);
 910                if (err)
 911                        goto put_fail;
 912
 913                err = -ECONNREFUSED;
 914                if (!S_ISSOCK(inode->i_mode))
 915                        goto put_fail;
 916                u = unix_find_socket_byinode(inode);
 917                if (!u)
 918                        goto put_fail;
 919
 920                if (u->sk_type == type)
 921                        touch_atime(&path);
 922
 923                path_put(&path);
 924
 925                err = -EPROTOTYPE;
 926                if (u->sk_type != type) {
 927                        sock_put(u);
 928                        goto fail;
 929                }
 930        } else {
 931                err = -ECONNREFUSED;
 932                u = unix_find_socket_byname(net, sunname, len, type, hash);
 933                if (u) {
 934                        struct dentry *dentry;
 935                        dentry = unix_sk(u)->path.dentry;
 936                        if (dentry)
 937                                touch_atime(&unix_sk(u)->path);
 938                } else
 939                        goto fail;
 940        }
 941        return u;
 942
 943put_fail:
 944        path_put(&path);
 945fail:
 946        *error = err;
 947        return NULL;
 948}
 949
 950static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 951{
 952        struct dentry *dentry;
 953        struct path path;
 954        int err = 0;
 955        /*
 956         * Get the parent directory, calculate the hash for last
 957         * component.
 958         */
 959        dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 960        err = PTR_ERR(dentry);
 961        if (IS_ERR(dentry))
 962                return err;
 963
 964        /*
 965         * All right, let's create it.
 966         */
 967        err = security_path_mknod(&path, dentry, mode, 0);
 968        if (!err) {
 969                err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 970                if (!err) {
 971                        res->mnt = mntget(path.mnt);
 972                        res->dentry = dget(dentry);
 973                }
 974        }
 975        done_path_create(&path, dentry);
 976        return err;
 977}
 978
 979static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 980{
 981        struct sock *sk = sock->sk;
 982        struct net *net = sock_net(sk);
 983        struct unix_sock *u = unix_sk(sk);
 984        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 985        char *sun_path = sunaddr->sun_path;
 986        int err;
 987        unsigned int hash;
 988        struct unix_address *addr;
 989        struct hlist_head *list;
 990        struct path path = { };
 991
 992        err = -EINVAL;
 993        if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
 994            sunaddr->sun_family != AF_UNIX)
 995                goto out;
 996
 997        if (addr_len == sizeof(short)) {
 998                err = unix_autobind(sock);
 999                goto out;
1000        }
1001
1002        err = unix_mkname(sunaddr, addr_len, &hash);
1003        if (err < 0)
1004                goto out;
1005        addr_len = err;
1006
1007        if (sun_path[0]) {
1008                umode_t mode = S_IFSOCK |
1009                       (SOCK_INODE(sock)->i_mode & ~current_umask());
1010                err = unix_mknod(sun_path, mode, &path);
1011                if (err) {
1012                        if (err == -EEXIST)
1013                                err = -EADDRINUSE;
1014                        goto out;
1015                }
1016        }
1017
1018        err = mutex_lock_interruptible(&u->bindlock);
1019        if (err)
1020                goto out_put;
1021
1022        err = -EINVAL;
1023        if (u->addr)
1024                goto out_up;
1025
1026        err = -ENOMEM;
1027        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1028        if (!addr)
1029                goto out_up;
1030
1031        memcpy(addr->name, sunaddr, addr_len);
1032        addr->len = addr_len;
1033        addr->hash = hash ^ sk->sk_type;
1034        refcount_set(&addr->refcnt, 1);
1035
1036        if (sun_path[0]) {
1037                addr->hash = UNIX_HASH_SIZE;
1038                hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1039                spin_lock(&unix_table_lock);
1040                u->path = path;
1041                list = &unix_socket_table[hash];
1042        } else {
1043                spin_lock(&unix_table_lock);
1044                err = -EADDRINUSE;
1045                if (__unix_find_socket_byname(net, sunaddr, addr_len,
1046                                              sk->sk_type, hash)) {
1047                        unix_release_addr(addr);
1048                        goto out_unlock;
1049                }
1050
1051                list = &unix_socket_table[addr->hash];
1052        }
1053
1054        err = 0;
1055        __unix_remove_socket(sk);
1056        u->addr = addr;
1057        __unix_insert_socket(list, sk);
1058
1059out_unlock:
1060        spin_unlock(&unix_table_lock);
1061out_up:
1062        mutex_unlock(&u->bindlock);
1063out_put:
1064        if (err)
1065                path_put(&path);
1066out:
1067        return err;
1068}
1069
1070static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1071{
1072        if (unlikely(sk1 == sk2) || !sk2) {
1073                unix_state_lock(sk1);
1074                return;
1075        }
1076        if (sk1 < sk2) {
1077                unix_state_lock(sk1);
1078                unix_state_lock_nested(sk2);
1079        } else {
1080                unix_state_lock(sk2);
1081                unix_state_lock_nested(sk1);
1082        }
1083}
1084
1085static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1086{
1087        if (unlikely(sk1 == sk2) || !sk2) {
1088                unix_state_unlock(sk1);
1089                return;
1090        }
1091        unix_state_unlock(sk1);
1092        unix_state_unlock(sk2);
1093}
1094
1095static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1096                              int alen, int flags)
1097{
1098        struct sock *sk = sock->sk;
1099        struct net *net = sock_net(sk);
1100        struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1101        struct sock *other;
1102        unsigned int hash;
1103        int err;
1104
1105        err = -EINVAL;
1106        if (alen < offsetofend(struct sockaddr, sa_family))
1107                goto out;
1108
1109        if (addr->sa_family != AF_UNSPEC) {
1110                err = unix_mkname(sunaddr, alen, &hash);
1111                if (err < 0)
1112                        goto out;
1113                alen = err;
1114
1115                if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1116                    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1117                        goto out;
1118
1119restart:
1120                other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1121                if (!other)
1122                        goto out;
1123
1124                unix_state_double_lock(sk, other);
1125
1126                /* Apparently VFS overslept socket death. Retry. */
1127                if (sock_flag(other, SOCK_DEAD)) {
1128                        unix_state_double_unlock(sk, other);
1129                        sock_put(other);
1130                        goto restart;
1131                }
1132
1133                err = -EPERM;
1134                if (!unix_may_send(sk, other))
1135                        goto out_unlock;
1136
1137                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1138                if (err)
1139                        goto out_unlock;
1140
1141        } else {
1142                /*
1143                 *      1003.1g breaking connected state with AF_UNSPEC
1144                 */
1145                other = NULL;
1146                unix_state_double_lock(sk, other);
1147        }
1148
1149        /*
1150         * If it was connected, reconnect.
1151         */
1152        if (unix_peer(sk)) {
1153                struct sock *old_peer = unix_peer(sk);
1154                unix_peer(sk) = other;
1155                unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1156
1157                unix_state_double_unlock(sk, other);
1158
1159                if (other != old_peer)
1160                        unix_dgram_disconnected(sk, old_peer);
1161                sock_put(old_peer);
1162        } else {
1163                unix_peer(sk) = other;
1164                unix_state_double_unlock(sk, other);
1165        }
1166        return 0;
1167
1168out_unlock:
1169        unix_state_double_unlock(sk, other);
1170        sock_put(other);
1171out:
1172        return err;
1173}
1174
1175static long unix_wait_for_peer(struct sock *other, long timeo)
1176{
1177        struct unix_sock *u = unix_sk(other);
1178        int sched;
1179        DEFINE_WAIT(wait);
1180
1181        prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1182
1183        sched = !sock_flag(other, SOCK_DEAD) &&
1184                !(other->sk_shutdown & RCV_SHUTDOWN) &&
1185                unix_recvq_full(other);
1186
1187        unix_state_unlock(other);
1188
1189        if (sched)
1190                timeo = schedule_timeout(timeo);
1191
1192        finish_wait(&u->peer_wait, &wait);
1193        return timeo;
1194}
1195
1196static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1197                               int addr_len, int flags)
1198{
1199        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1200        struct sock *sk = sock->sk;
1201        struct net *net = sock_net(sk);
1202        struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1203        struct sock *newsk = NULL;
1204        struct sock *other = NULL;
1205        struct sk_buff *skb = NULL;
1206        unsigned int hash;
1207        int st;
1208        int err;
1209        long timeo;
1210
1211        err = unix_mkname(sunaddr, addr_len, &hash);
1212        if (err < 0)
1213                goto out;
1214        addr_len = err;
1215
1216        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1217            (err = unix_autobind(sock)) != 0)
1218                goto out;
1219
1220        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1221
1222        /* First of all allocate resources.
1223           If we will make it after state is locked,
1224           we will have to recheck all again in any case.
1225         */
1226
1227        err = -ENOMEM;
1228
1229        /* create new sock for complete connection */
1230        newsk = unix_create1(sock_net(sk), NULL, 0);
1231        if (newsk == NULL)
1232                goto out;
1233
1234        /* Allocate skb for sending to listening sock */
1235        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1236        if (skb == NULL)
1237                goto out;
1238
1239restart:
1240        /*  Find listening sock. */
1241        other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1242        if (!other)
1243                goto out;
1244
1245        /* Latch state of peer */
1246        unix_state_lock(other);
1247
1248        /* Apparently VFS overslept socket death. Retry. */
1249        if (sock_flag(other, SOCK_DEAD)) {
1250                unix_state_unlock(other);
1251                sock_put(other);
1252                goto restart;
1253        }
1254
1255        err = -ECONNREFUSED;
1256        if (other->sk_state != TCP_LISTEN)
1257                goto out_unlock;
1258        if (other->sk_shutdown & RCV_SHUTDOWN)
1259                goto out_unlock;
1260
1261        if (unix_recvq_full(other)) {
1262                err = -EAGAIN;
1263                if (!timeo)
1264                        goto out_unlock;
1265
1266                timeo = unix_wait_for_peer(other, timeo);
1267
1268                err = sock_intr_errno(timeo);
1269                if (signal_pending(current))
1270                        goto out;
1271                sock_put(other);
1272                goto restart;
1273        }
1274
1275        /* Latch our state.
1276
1277           It is tricky place. We need to grab our state lock and cannot
1278           drop lock on peer. It is dangerous because deadlock is
1279           possible. Connect to self case and simultaneous
1280           attempt to connect are eliminated by checking socket
1281           state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1282           check this before attempt to grab lock.
1283
1284           Well, and we have to recheck the state after socket locked.
1285         */
1286        st = sk->sk_state;
1287
1288        switch (st) {
1289        case TCP_CLOSE:
1290                /* This is ok... continue with connect */
1291                break;
1292        case TCP_ESTABLISHED:
1293                /* Socket is already connected */
1294                err = -EISCONN;
1295                goto out_unlock;
1296        default:
1297                err = -EINVAL;
1298                goto out_unlock;
1299        }
1300
1301        unix_state_lock_nested(sk);
1302
1303        if (sk->sk_state != st) {
1304                unix_state_unlock(sk);
1305                unix_state_unlock(other);
1306                sock_put(other);
1307                goto restart;
1308        }
1309
1310        err = security_unix_stream_connect(sk, other, newsk);
1311        if (err) {
1312                unix_state_unlock(sk);
1313                goto out_unlock;
1314        }
1315
1316        /* The way is open! Fastly set all the necessary fields... */
1317
1318        sock_hold(sk);
1319        unix_peer(newsk)        = sk;
1320        newsk->sk_state         = TCP_ESTABLISHED;
1321        newsk->sk_type          = sk->sk_type;
1322        init_peercred(newsk);
1323        newu = unix_sk(newsk);
1324        RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1325        otheru = unix_sk(other);
1326
1327        /* copy address information from listening to new sock*/
1328        if (otheru->addr) {
1329                refcount_inc(&otheru->addr->refcnt);
1330                newu->addr = otheru->addr;
1331        }
1332        if (otheru->path.dentry) {
1333                path_get(&otheru->path);
1334                newu->path = otheru->path;
1335        }
1336
1337        /* Set credentials */
1338        copy_peercred(sk, other);
1339
1340        sock->state     = SS_CONNECTED;
1341        sk->sk_state    = TCP_ESTABLISHED;
1342        sock_hold(newsk);
1343
1344        smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1345        unix_peer(sk)   = newsk;
1346
1347        unix_state_unlock(sk);
1348
1349        /* take ten and and send info to listening sock */
1350        spin_lock(&other->sk_receive_queue.lock);
1351        __skb_queue_tail(&other->sk_receive_queue, skb);
1352        spin_unlock(&other->sk_receive_queue.lock);
1353        unix_state_unlock(other);
1354        other->sk_data_ready(other);
1355        sock_put(other);
1356        return 0;
1357
1358out_unlock:
1359        if (other)
1360                unix_state_unlock(other);
1361
1362out:
1363        kfree_skb(skb);
1364        if (newsk)
1365                unix_release_sock(newsk, 0);
1366        if (other)
1367                sock_put(other);
1368        return err;
1369}
1370
1371static int unix_socketpair(struct socket *socka, struct socket *sockb)
1372{
1373        struct sock *ska = socka->sk, *skb = sockb->sk;
1374
1375        /* Join our sockets back to back */
1376        sock_hold(ska);
1377        sock_hold(skb);
1378        unix_peer(ska) = skb;
1379        unix_peer(skb) = ska;
1380        init_peercred(ska);
1381        init_peercred(skb);
1382
1383        if (ska->sk_type != SOCK_DGRAM) {
1384                ska->sk_state = TCP_ESTABLISHED;
1385                skb->sk_state = TCP_ESTABLISHED;
1386                socka->state  = SS_CONNECTED;
1387                sockb->state  = SS_CONNECTED;
1388        }
1389        return 0;
1390}
1391
1392static void unix_sock_inherit_flags(const struct socket *old,
1393                                    struct socket *new)
1394{
1395        if (test_bit(SOCK_PASSCRED, &old->flags))
1396                set_bit(SOCK_PASSCRED, &new->flags);
1397        if (test_bit(SOCK_PASSSEC, &old->flags))
1398                set_bit(SOCK_PASSSEC, &new->flags);
1399}
1400
1401static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1402                       bool kern)
1403{
1404        struct sock *sk = sock->sk;
1405        struct sock *tsk;
1406        struct sk_buff *skb;
1407        int err;
1408
1409        err = -EOPNOTSUPP;
1410        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1411                goto out;
1412
1413        err = -EINVAL;
1414        if (sk->sk_state != TCP_LISTEN)
1415                goto out;
1416
1417        /* If socket state is TCP_LISTEN it cannot change (for now...),
1418         * so that no locks are necessary.
1419         */
1420
1421        skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1422        if (!skb) {
1423                /* This means receive shutdown. */
1424                if (err == 0)
1425                        err = -EINVAL;
1426                goto out;
1427        }
1428
1429        tsk = skb->sk;
1430        skb_free_datagram(sk, skb);
1431        wake_up_interruptible(&unix_sk(sk)->peer_wait);
1432
1433        /* attach accepted sock to socket */
1434        unix_state_lock(tsk);
1435        newsock->state = SS_CONNECTED;
1436        unix_sock_inherit_flags(sock, newsock);
1437        sock_graft(tsk, newsock);
1438        unix_state_unlock(tsk);
1439        return 0;
1440
1441out:
1442        return err;
1443}
1444
1445
1446static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1447{
1448        struct sock *sk = sock->sk;
1449        struct unix_sock *u;
1450        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1451        int err = 0;
1452
1453        if (peer) {
1454                sk = unix_peer_get(sk);
1455
1456                err = -ENOTCONN;
1457                if (!sk)
1458                        goto out;
1459                err = 0;
1460        } else {
1461                sock_hold(sk);
1462        }
1463
1464        u = unix_sk(sk);
1465        unix_state_lock(sk);
1466        if (!u->addr) {
1467                sunaddr->sun_family = AF_UNIX;
1468                sunaddr->sun_path[0] = 0;
1469                err = sizeof(short);
1470        } else {
1471                struct unix_address *addr = u->addr;
1472
1473                err = addr->len;
1474                memcpy(sunaddr, addr->name, addr->len);
1475        }
1476        unix_state_unlock(sk);
1477        sock_put(sk);
1478out:
1479        return err;
1480}
1481
1482static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1483{
1484        int i;
1485
1486        scm->fp = UNIXCB(skb).fp;
1487        UNIXCB(skb).fp = NULL;
1488
1489        for (i = scm->fp->count-1; i >= 0; i--)
1490                unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1491}
1492
1493static void unix_destruct_scm(struct sk_buff *skb)
1494{
1495        struct scm_cookie scm;
1496        memset(&scm, 0, sizeof(scm));
1497        scm.pid  = UNIXCB(skb).pid;
1498        if (UNIXCB(skb).fp)
1499                unix_detach_fds(&scm, skb);
1500
1501        /* Alas, it calls VFS */
1502        /* So fscking what? fput() had been SMP-safe since the last Summer */
1503        scm_destroy(&scm);
1504        sock_wfree(skb);
1505}
1506
1507/*
1508 * The "user->unix_inflight" variable is protected by the garbage
1509 * collection lock, and we just read it locklessly here. If you go
1510 * over the limit, there might be a tiny race in actually noticing
1511 * it across threads. Tough.
1512 */
1513static inline bool too_many_unix_fds(struct task_struct *p)
1514{
1515        struct user_struct *user = current_user();
1516
1517        if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1518                return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1519        return false;
1520}
1521
1522static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1523{
1524        int i;
1525
1526        if (too_many_unix_fds(current))
1527                return -ETOOMANYREFS;
1528
1529        /*
1530         * Need to duplicate file references for the sake of garbage
1531         * collection.  Otherwise a socket in the fps might become a
1532         * candidate for GC while the skb is not yet queued.
1533         */
1534        UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1535        if (!UNIXCB(skb).fp)
1536                return -ENOMEM;
1537
1538        for (i = scm->fp->count - 1; i >= 0; i--)
1539                unix_inflight(scm->fp->user, scm->fp->fp[i]);
1540        return 0;
1541}
1542
1543static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1544{
1545        int err = 0;
1546
1547        UNIXCB(skb).pid  = get_pid(scm->pid);
1548        UNIXCB(skb).uid = scm->creds.uid;
1549        UNIXCB(skb).gid = scm->creds.gid;
1550        UNIXCB(skb).fp = NULL;
1551        unix_get_secdata(scm, skb);
1552        if (scm->fp && send_fds)
1553                err = unix_attach_fds(scm, skb);
1554
1555        skb->destructor = unix_destruct_scm;
1556        return err;
1557}
1558
1559static bool unix_passcred_enabled(const struct socket *sock,
1560                                  const struct sock *other)
1561{
1562        return test_bit(SOCK_PASSCRED, &sock->flags) ||
1563               !other->sk_socket ||
1564               test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1565}
1566
1567/*
1568 * Some apps rely on write() giving SCM_CREDENTIALS
1569 * We include credentials if source or destination socket
1570 * asserted SOCK_PASSCRED.
1571 */
1572static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1573                            const struct sock *other)
1574{
1575        if (UNIXCB(skb).pid)
1576                return;
1577        if (unix_passcred_enabled(sock, other)) {
1578                UNIXCB(skb).pid  = get_pid(task_tgid(current));
1579                current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1580        }
1581}
1582
1583static int maybe_init_creds(struct scm_cookie *scm,
1584                            struct socket *socket,
1585                            const struct sock *other)
1586{
1587        int err;
1588        struct msghdr msg = { .msg_controllen = 0 };
1589
1590        err = scm_send(socket, &msg, scm, false);
1591        if (err)
1592                return err;
1593
1594        if (unix_passcred_enabled(socket, other)) {
1595                scm->pid = get_pid(task_tgid(current));
1596                current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1597        }
1598        return err;
1599}
1600
1601static bool unix_skb_scm_eq(struct sk_buff *skb,
1602                            struct scm_cookie *scm)
1603{
1604        const struct unix_skb_parms *u = &UNIXCB(skb);
1605
1606        return u->pid == scm->pid &&
1607               uid_eq(u->uid, scm->creds.uid) &&
1608               gid_eq(u->gid, scm->creds.gid) &&
1609               unix_secdata_eq(scm, skb);
1610}
1611
1612/*
1613 *      Send AF_UNIX data.
1614 */
1615
1616static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1617                              size_t len)
1618{
1619        struct sock *sk = sock->sk;
1620        struct net *net = sock_net(sk);
1621        struct unix_sock *u = unix_sk(sk);
1622        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1623        struct sock *other = NULL;
1624        int namelen = 0; /* fake GCC */
1625        int err;
1626        unsigned int hash;
1627        struct sk_buff *skb;
1628        long timeo;
1629        struct scm_cookie scm;
1630        int data_len = 0;
1631        int sk_locked;
1632
1633        wait_for_unix_gc();
1634        err = scm_send(sock, msg, &scm, false);
1635        if (err < 0)
1636                return err;
1637
1638        err = -EOPNOTSUPP;
1639        if (msg->msg_flags&MSG_OOB)
1640                goto out;
1641
1642        if (msg->msg_namelen) {
1643                err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1644                if (err < 0)
1645                        goto out;
1646                namelen = err;
1647        } else {
1648                sunaddr = NULL;
1649                err = -ENOTCONN;
1650                other = unix_peer_get(sk);
1651                if (!other)
1652                        goto out;
1653        }
1654
1655        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1656            && (err = unix_autobind(sock)) != 0)
1657                goto out;
1658
1659        err = -EMSGSIZE;
1660        if (len > sk->sk_sndbuf - 32)
1661                goto out;
1662
1663        if (len > SKB_MAX_ALLOC) {
1664                data_len = min_t(size_t,
1665                                 len - SKB_MAX_ALLOC,
1666                                 MAX_SKB_FRAGS * PAGE_SIZE);
1667                data_len = PAGE_ALIGN(data_len);
1668
1669                BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1670        }
1671
1672        skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1673                                   msg->msg_flags & MSG_DONTWAIT, &err,
1674                                   PAGE_ALLOC_COSTLY_ORDER);
1675        if (skb == NULL)
1676                goto out;
1677
1678        err = unix_scm_to_skb(&scm, skb, true);
1679        if (err < 0)
1680                goto out_free;
1681
1682        skb_put(skb, len - data_len);
1683        skb->data_len = data_len;
1684        skb->len = len;
1685        err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1686        if (err)
1687                goto out_free;
1688
1689        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1690
1691restart:
1692        if (!other) {
1693                err = -ECONNRESET;
1694                if (sunaddr == NULL)
1695                        goto out_free;
1696
1697                other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1698                                        hash, &err);
1699                if (other == NULL)
1700                        goto out_free;
1701        }
1702
1703        if (sk_filter(other, skb) < 0) {
1704                /* Toss the packet but do not return any error to the sender */
1705                err = len;
1706                goto out_free;
1707        }
1708
1709        sk_locked = 0;
1710        unix_state_lock(other);
1711restart_locked:
1712        err = -EPERM;
1713        if (!unix_may_send(sk, other))
1714                goto out_unlock;
1715
1716        if (unlikely(sock_flag(other, SOCK_DEAD))) {
1717                /*
1718                 *      Check with 1003.1g - what should
1719                 *      datagram error
1720                 */
1721                unix_state_unlock(other);
1722                sock_put(other);
1723
1724                if (!sk_locked)
1725                        unix_state_lock(sk);
1726
1727                err = 0;
1728                if (unix_peer(sk) == other) {
1729                        unix_peer(sk) = NULL;
1730                        unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1731
1732                        unix_state_unlock(sk);
1733
1734                        unix_dgram_disconnected(sk, other);
1735                        sock_put(other);
1736                        err = -ECONNREFUSED;
1737                } else {
1738                        unix_state_unlock(sk);
1739                }
1740
1741                other = NULL;
1742                if (err)
1743                        goto out_free;
1744                goto restart;
1745        }
1746
1747        err = -EPIPE;
1748        if (other->sk_shutdown & RCV_SHUTDOWN)
1749                goto out_unlock;
1750
1751        if (sk->sk_type != SOCK_SEQPACKET) {
1752                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1753                if (err)
1754                        goto out_unlock;
1755        }
1756
1757        /* other == sk && unix_peer(other) != sk if
1758         * - unix_peer(sk) == NULL, destination address bound to sk
1759         * - unix_peer(sk) == sk by time of get but disconnected before lock
1760         */
1761        if (other != sk &&
1762            unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1763                if (timeo) {
1764                        timeo = unix_wait_for_peer(other, timeo);
1765
1766                        err = sock_intr_errno(timeo);
1767                        if (signal_pending(current))
1768                                goto out_free;
1769
1770                        goto restart;
1771                }
1772
1773                if (!sk_locked) {
1774                        unix_state_unlock(other);
1775                        unix_state_double_lock(sk, other);
1776                }
1777
1778                if (unix_peer(sk) != other ||
1779                    unix_dgram_peer_wake_me(sk, other)) {
1780                        err = -EAGAIN;
1781                        sk_locked = 1;
1782                        goto out_unlock;
1783                }
1784
1785                if (!sk_locked) {
1786                        sk_locked = 1;
1787                        goto restart_locked;
1788                }
1789        }
1790
1791        if (unlikely(sk_locked))
1792                unix_state_unlock(sk);
1793
1794        if (sock_flag(other, SOCK_RCVTSTAMP))
1795                __net_timestamp(skb);
1796        maybe_add_creds(skb, sock, other);
1797        skb_queue_tail(&other->sk_receive_queue, skb);
1798        unix_state_unlock(other);
1799        other->sk_data_ready(other);
1800        sock_put(other);
1801        scm_destroy(&scm);
1802        return len;
1803
1804out_unlock:
1805        if (sk_locked)
1806                unix_state_unlock(sk);
1807        unix_state_unlock(other);
1808out_free:
1809        kfree_skb(skb);
1810out:
1811        if (other)
1812                sock_put(other);
1813        scm_destroy(&scm);
1814        return err;
1815}
1816
1817/* We use paged skbs for stream sockets, and limit occupancy to 32768
1818 * bytes, and a minimum of a full page.
1819 */
1820#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1821
1822static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1823                               size_t len)
1824{
1825        struct sock *sk = sock->sk;
1826        struct sock *other = NULL;
1827        int err, size;
1828        struct sk_buff *skb;
1829        int sent = 0;
1830        struct scm_cookie scm;
1831        bool fds_sent = false;
1832        int data_len;
1833
1834        wait_for_unix_gc();
1835        err = scm_send(sock, msg, &scm, false);
1836        if (err < 0)
1837                return err;
1838
1839        err = -EOPNOTSUPP;
1840        if (msg->msg_flags&MSG_OOB)
1841                goto out_err;
1842
1843        if (msg->msg_namelen) {
1844                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1845                goto out_err;
1846        } else {
1847                err = -ENOTCONN;
1848                other = unix_peer(sk);
1849                if (!other)
1850                        goto out_err;
1851        }
1852
1853        if (sk->sk_shutdown & SEND_SHUTDOWN)
1854                goto pipe_err;
1855
1856        while (sent < len) {
1857                size = len - sent;
1858
1859                /* Keep two messages in the pipe so it schedules better */
1860                size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1861
1862                /* allow fallback to order-0 allocations */
1863                size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1864
1865                data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1866
1867                data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1868
1869                skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1870                                           msg->msg_flags & MSG_DONTWAIT, &err,
1871                                           get_order(UNIX_SKB_FRAGS_SZ));
1872                if (!skb)
1873                        goto out_err;
1874
1875                /* Only send the fds in the first buffer */
1876                err = unix_scm_to_skb(&scm, skb, !fds_sent);
1877                if (err < 0) {
1878                        kfree_skb(skb);
1879                        goto out_err;
1880                }
1881                fds_sent = true;
1882
1883                skb_put(skb, size - data_len);
1884                skb->data_len = data_len;
1885                skb->len = size;
1886                err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1887                if (err) {
1888                        kfree_skb(skb);
1889                        goto out_err;
1890                }
1891
1892                unix_state_lock(other);
1893
1894                if (sock_flag(other, SOCK_DEAD) ||
1895                    (other->sk_shutdown & RCV_SHUTDOWN))
1896                        goto pipe_err_free;
1897
1898                maybe_add_creds(skb, sock, other);
1899                skb_queue_tail(&other->sk_receive_queue, skb);
1900                unix_state_unlock(other);
1901                other->sk_data_ready(other);
1902                sent += size;
1903        }
1904
1905        scm_destroy(&scm);
1906
1907        return sent;
1908
1909pipe_err_free:
1910        unix_state_unlock(other);
1911        kfree_skb(skb);
1912pipe_err:
1913        if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1914                send_sig(SIGPIPE, current, 0);
1915        err = -EPIPE;
1916out_err:
1917        scm_destroy(&scm);
1918        return sent ? : err;
1919}
1920
1921static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1922                                    int offset, size_t size, int flags)
1923{
1924        int err;
1925        bool send_sigpipe = false;
1926        bool init_scm = true;
1927        struct scm_cookie scm;
1928        struct sock *other, *sk = socket->sk;
1929        struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1930
1931        if (flags & MSG_OOB)
1932                return -EOPNOTSUPP;
1933
1934        other = unix_peer(sk);
1935        if (!other || sk->sk_state != TCP_ESTABLISHED)
1936                return -ENOTCONN;
1937
1938        if (false) {
1939alloc_skb:
1940                unix_state_unlock(other);
1941                mutex_unlock(&unix_sk(other)->iolock);
1942                newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1943                                              &err, 0);
1944                if (!newskb)
1945                        goto err;
1946        }
1947
1948        /* we must acquire iolock as we modify already present
1949         * skbs in the sk_receive_queue and mess with skb->len
1950         */
1951        err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1952        if (err) {
1953                err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1954                goto err;
1955        }
1956
1957        if (sk->sk_shutdown & SEND_SHUTDOWN) {
1958                err = -EPIPE;
1959                send_sigpipe = true;
1960                goto err_unlock;
1961        }
1962
1963        unix_state_lock(other);
1964
1965        if (sock_flag(other, SOCK_DEAD) ||
1966            other->sk_shutdown & RCV_SHUTDOWN) {
1967                err = -EPIPE;
1968                send_sigpipe = true;
1969                goto err_state_unlock;
1970        }
1971
1972        if (init_scm) {
1973                err = maybe_init_creds(&scm, socket, other);
1974                if (err)
1975                        goto err_state_unlock;
1976                init_scm = false;
1977        }
1978
1979        skb = skb_peek_tail(&other->sk_receive_queue);
1980        if (tail && tail == skb) {
1981                skb = newskb;
1982        } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1983                if (newskb) {
1984                        skb = newskb;
1985                } else {
1986                        tail = skb;
1987                        goto alloc_skb;
1988                }
1989        } else if (newskb) {
1990                /* this is fast path, we don't necessarily need to
1991                 * call to kfree_skb even though with newskb == NULL
1992                 * this - does no harm
1993                 */
1994                consume_skb(newskb);
1995                newskb = NULL;
1996        }
1997
1998        if (skb_append_pagefrags(skb, page, offset, size)) {
1999                tail = skb;
2000                goto alloc_skb;
2001        }
2002
2003        skb->len += size;
2004        skb->data_len += size;
2005        skb->truesize += size;
2006        refcount_add(size, &sk->sk_wmem_alloc);
2007
2008        if (newskb) {
2009                err = unix_scm_to_skb(&scm, skb, false);
2010                if (err)
2011                        goto err_state_unlock;
2012                spin_lock(&other->sk_receive_queue.lock);
2013                __skb_queue_tail(&other->sk_receive_queue, newskb);
2014                spin_unlock(&other->sk_receive_queue.lock);
2015        }
2016
2017        unix_state_unlock(other);
2018        mutex_unlock(&unix_sk(other)->iolock);
2019
2020        other->sk_data_ready(other);
2021        scm_destroy(&scm);
2022        return size;
2023
2024err_state_unlock:
2025        unix_state_unlock(other);
2026err_unlock:
2027        mutex_unlock(&unix_sk(other)->iolock);
2028err:
2029        kfree_skb(newskb);
2030        if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2031                send_sig(SIGPIPE, current, 0);
2032        if (!init_scm)
2033                scm_destroy(&scm);
2034        return err;
2035}
2036
2037static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2038                                  size_t len)
2039{
2040        int err;
2041        struct sock *sk = sock->sk;
2042
2043        err = sock_error(sk);
2044        if (err)
2045                return err;
2046
2047        if (sk->sk_state != TCP_ESTABLISHED)
2048                return -ENOTCONN;
2049
2050        if (msg->msg_namelen)
2051                msg->msg_namelen = 0;
2052
2053        return unix_dgram_sendmsg(sock, msg, len);
2054}
2055
2056static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2057                                  size_t size, int flags)
2058{
2059        struct sock *sk = sock->sk;
2060
2061        if (sk->sk_state != TCP_ESTABLISHED)
2062                return -ENOTCONN;
2063
2064        return unix_dgram_recvmsg(sock, msg, size, flags);
2065}
2066
2067static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2068{
2069        struct unix_sock *u = unix_sk(sk);
2070
2071        if (u->addr) {
2072                msg->msg_namelen = u->addr->len;
2073                memcpy(msg->msg_name, u->addr->name, u->addr->len);
2074        }
2075}
2076
2077static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2078                              size_t size, int flags)
2079{
2080        struct scm_cookie scm;
2081        struct sock *sk = sock->sk;
2082        struct unix_sock *u = unix_sk(sk);
2083        struct sk_buff *skb, *last;
2084        long timeo;
2085        int err;
2086        int peeked, skip;
2087
2088        err = -EOPNOTSUPP;
2089        if (flags&MSG_OOB)
2090                goto out;
2091
2092        timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2093
2094        do {
2095                mutex_lock(&u->iolock);
2096
2097                skip = sk_peek_offset(sk, flags);
2098                skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2099                                              &err, &last);
2100                if (skb)
2101                        break;
2102
2103                mutex_unlock(&u->iolock);
2104
2105                if (err != -EAGAIN)
2106                        break;
2107        } while (timeo &&
2108                 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2109
2110        if (!skb) { /* implies iolock unlocked */
2111                unix_state_lock(sk);
2112                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2113                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2114                    (sk->sk_shutdown & RCV_SHUTDOWN))
2115                        err = 0;
2116                unix_state_unlock(sk);
2117                goto out;
2118        }
2119
2120        if (wq_has_sleeper(&u->peer_wait))
2121                wake_up_interruptible_sync_poll(&u->peer_wait,
2122                                                EPOLLOUT | EPOLLWRNORM |
2123                                                EPOLLWRBAND);
2124
2125        if (msg->msg_name)
2126                unix_copy_addr(msg, skb->sk);
2127
2128        if (size > skb->len - skip)
2129                size = skb->len - skip;
2130        else if (size < skb->len - skip)
2131                msg->msg_flags |= MSG_TRUNC;
2132
2133        err = skb_copy_datagram_msg(skb, skip, msg, size);
2134        if (err)
2135                goto out_free;
2136
2137        if (sock_flag(sk, SOCK_RCVTSTAMP))
2138                __sock_recv_timestamp(msg, sk, skb);
2139
2140        memset(&scm, 0, sizeof(scm));
2141
2142        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2143        unix_set_secdata(&scm, skb);
2144
2145        if (!(flags & MSG_PEEK)) {
2146                if (UNIXCB(skb).fp)
2147                        unix_detach_fds(&scm, skb);
2148
2149                sk_peek_offset_bwd(sk, skb->len);
2150        } else {
2151                /* It is questionable: on PEEK we could:
2152                   - do not return fds - good, but too simple 8)
2153                   - return fds, and do not return them on read (old strategy,
2154                     apparently wrong)
2155                   - clone fds (I chose it for now, it is the most universal
2156                     solution)
2157
2158                   POSIX 1003.1g does not actually define this clearly
2159                   at all. POSIX 1003.1g doesn't define a lot of things
2160                   clearly however!
2161
2162                */
2163
2164                sk_peek_offset_fwd(sk, size);
2165
2166                if (UNIXCB(skb).fp)
2167                        scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2168        }
2169        err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2170
2171        scm_recv(sock, msg, &scm, flags);
2172
2173out_free:
2174        skb_free_datagram(sk, skb);
2175        mutex_unlock(&u->iolock);
2176out:
2177        return err;
2178}
2179
2180/*
2181 *      Sleep until more data has arrived. But check for races..
2182 */
2183static long unix_stream_data_wait(struct sock *sk, long timeo,
2184                                  struct sk_buff *last, unsigned int last_len,
2185                                  bool freezable)
2186{
2187        struct sk_buff *tail;
2188        DEFINE_WAIT(wait);
2189
2190        unix_state_lock(sk);
2191
2192        for (;;) {
2193                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2194
2195                tail = skb_peek_tail(&sk->sk_receive_queue);
2196                if (tail != last ||
2197                    (tail && tail->len != last_len) ||
2198                    sk->sk_err ||
2199                    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2200                    signal_pending(current) ||
2201                    !timeo)
2202                        break;
2203
2204                sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2205                unix_state_unlock(sk);
2206                if (freezable)
2207                        timeo = freezable_schedule_timeout(timeo);
2208                else
2209                        timeo = schedule_timeout(timeo);
2210                unix_state_lock(sk);
2211
2212                if (sock_flag(sk, SOCK_DEAD))
2213                        break;
2214
2215                sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2216        }
2217
2218        finish_wait(sk_sleep(sk), &wait);
2219        unix_state_unlock(sk);
2220        return timeo;
2221}
2222
2223static unsigned int unix_skb_len(const struct sk_buff *skb)
2224{
2225        return skb->len - UNIXCB(skb).consumed;
2226}
2227
2228struct unix_stream_read_state {
2229        int (*recv_actor)(struct sk_buff *, int, int,
2230                          struct unix_stream_read_state *);
2231        struct socket *socket;
2232        struct msghdr *msg;
2233        struct pipe_inode_info *pipe;
2234        size_t size;
2235        int flags;
2236        unsigned int splice_flags;
2237};
2238
2239static int unix_stream_read_generic(struct unix_stream_read_state *state,
2240                                    bool freezable)
2241{
2242        struct scm_cookie scm;
2243        struct socket *sock = state->socket;
2244        struct sock *sk = sock->sk;
2245        struct unix_sock *u = unix_sk(sk);
2246        int copied = 0;
2247        int flags = state->flags;
2248        int noblock = flags & MSG_DONTWAIT;
2249        bool check_creds = false;
2250        int target;
2251        int err = 0;
2252        long timeo;
2253        int skip;
2254        size_t size = state->size;
2255        unsigned int last_len;
2256
2257        if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2258                err = -EINVAL;
2259                goto out;
2260        }
2261
2262        if (unlikely(flags & MSG_OOB)) {
2263                err = -EOPNOTSUPP;
2264                goto out;
2265        }
2266
2267        target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2268        timeo = sock_rcvtimeo(sk, noblock);
2269
2270        memset(&scm, 0, sizeof(scm));
2271
2272        /* Lock the socket to prevent queue disordering
2273         * while sleeps in memcpy_tomsg
2274         */
2275        mutex_lock(&u->iolock);
2276
2277        skip = max(sk_peek_offset(sk, flags), 0);
2278
2279        do {
2280                int chunk;
2281                bool drop_skb;
2282                struct sk_buff *skb, *last;
2283
2284redo:
2285                unix_state_lock(sk);
2286                if (sock_flag(sk, SOCK_DEAD)) {
2287                        err = -ECONNRESET;
2288                        goto unlock;
2289                }
2290                last = skb = skb_peek(&sk->sk_receive_queue);
2291                last_len = last ? last->len : 0;
2292again:
2293                if (skb == NULL) {
2294                        if (copied >= target)
2295                                goto unlock;
2296
2297                        /*
2298                         *      POSIX 1003.1g mandates this order.
2299                         */
2300
2301                        err = sock_error(sk);
2302                        if (err)
2303                                goto unlock;
2304                        if (sk->sk_shutdown & RCV_SHUTDOWN)
2305                                goto unlock;
2306
2307                        unix_state_unlock(sk);
2308                        if (!timeo) {
2309                                err = -EAGAIN;
2310                                break;
2311                        }
2312
2313                        mutex_unlock(&u->iolock);
2314
2315                        timeo = unix_stream_data_wait(sk, timeo, last,
2316                                                      last_len, freezable);
2317
2318                        if (signal_pending(current)) {
2319                                err = sock_intr_errno(timeo);
2320                                scm_destroy(&scm);
2321                                goto out;
2322                        }
2323
2324                        mutex_lock(&u->iolock);
2325                        goto redo;
2326unlock:
2327                        unix_state_unlock(sk);
2328                        break;
2329                }
2330
2331                while (skip >= unix_skb_len(skb)) {
2332                        skip -= unix_skb_len(skb);
2333                        last = skb;
2334                        last_len = skb->len;
2335                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2336                        if (!skb)
2337                                goto again;
2338                }
2339
2340                unix_state_unlock(sk);
2341
2342                if (check_creds) {
2343                        /* Never glue messages from different writers */
2344                        if (!unix_skb_scm_eq(skb, &scm))
2345                                break;
2346                } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2347                        /* Copy credentials */
2348                        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2349                        unix_set_secdata(&scm, skb);
2350                        check_creds = true;
2351                }
2352
2353                /* Copy address just once */
2354                if (state->msg && state->msg->msg_name) {
2355                        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2356                                         state->msg->msg_name);
2357                        unix_copy_addr(state->msg, skb->sk);
2358                        sunaddr = NULL;
2359                }
2360
2361                chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2362                skb_get(skb);
2363                chunk = state->recv_actor(skb, skip, chunk, state);
2364                drop_skb = !unix_skb_len(skb);
2365                /* skb is only safe to use if !drop_skb */
2366                consume_skb(skb);
2367                if (chunk < 0) {
2368                        if (copied == 0)
2369                                copied = -EFAULT;
2370                        break;
2371                }
2372                copied += chunk;
2373                size -= chunk;
2374
2375                if (drop_skb) {
2376                        /* the skb was touched by a concurrent reader;
2377                         * we should not expect anything from this skb
2378                         * anymore and assume it invalid - we can be
2379                         * sure it was dropped from the socket queue
2380                         *
2381                         * let's report a short read
2382                         */
2383                        err = 0;
2384                        break;
2385                }
2386
2387                /* Mark read part of skb as used */
2388                if (!(flags & MSG_PEEK)) {
2389                        UNIXCB(skb).consumed += chunk;
2390
2391                        sk_peek_offset_bwd(sk, chunk);
2392
2393                        if (UNIXCB(skb).fp)
2394                                unix_detach_fds(&scm, skb);
2395
2396                        if (unix_skb_len(skb))
2397                                break;
2398
2399                        skb_unlink(skb, &sk->sk_receive_queue);
2400                        consume_skb(skb);
2401
2402                        if (scm.fp)
2403                                break;
2404                } else {
2405                        /* It is questionable, see note in unix_dgram_recvmsg.
2406                         */
2407                        if (UNIXCB(skb).fp)
2408                                scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2409
2410                        sk_peek_offset_fwd(sk, chunk);
2411
2412                        if (UNIXCB(skb).fp)
2413                                break;
2414
2415                        skip = 0;
2416                        last = skb;
2417                        last_len = skb->len;
2418                        unix_state_lock(sk);
2419                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2420                        if (skb)
2421                                goto again;
2422                        unix_state_unlock(sk);
2423                        break;
2424                }
2425        } while (size);
2426
2427        mutex_unlock(&u->iolock);
2428        if (state->msg)
2429                scm_recv(sock, state->msg, &scm, flags);
2430        else
2431                scm_destroy(&scm);
2432out:
2433        return copied ? : err;
2434}
2435
2436static int unix_stream_read_actor(struct sk_buff *skb,
2437                                  int skip, int chunk,
2438                                  struct unix_stream_read_state *state)
2439{
2440        int ret;
2441
2442        ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2443                                    state->msg, chunk);
2444        return ret ?: chunk;
2445}
2446
2447static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2448                               size_t size, int flags)
2449{
2450        struct unix_stream_read_state state = {
2451                .recv_actor = unix_stream_read_actor,
2452                .socket = sock,
2453                .msg = msg,
2454                .size = size,
2455                .flags = flags
2456        };
2457
2458        return unix_stream_read_generic(&state, true);
2459}
2460
2461static int unix_stream_splice_actor(struct sk_buff *skb,
2462                                    int skip, int chunk,
2463                                    struct unix_stream_read_state *state)
2464{
2465        return skb_splice_bits(skb, state->socket->sk,
2466                               UNIXCB(skb).consumed + skip,
2467                               state->pipe, chunk, state->splice_flags);
2468}
2469
2470static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2471                                       struct pipe_inode_info *pipe,
2472                                       size_t size, unsigned int flags)
2473{
2474        struct unix_stream_read_state state = {
2475                .recv_actor = unix_stream_splice_actor,
2476                .socket = sock,
2477                .pipe = pipe,
2478                .size = size,
2479                .splice_flags = flags,
2480        };
2481
2482        if (unlikely(*ppos))
2483                return -ESPIPE;
2484
2485        if (sock->file->f_flags & O_NONBLOCK ||
2486            flags & SPLICE_F_NONBLOCK)
2487                state.flags = MSG_DONTWAIT;
2488
2489        return unix_stream_read_generic(&state, false);
2490}
2491
2492static int unix_shutdown(struct socket *sock, int mode)
2493{
2494        struct sock *sk = sock->sk;
2495        struct sock *other;
2496
2497        if (mode < SHUT_RD || mode > SHUT_RDWR)
2498                return -EINVAL;
2499        /* This maps:
2500         * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2501         * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2502         * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2503         */
2504        ++mode;
2505
2506        unix_state_lock(sk);
2507        sk->sk_shutdown |= mode;
2508        other = unix_peer(sk);
2509        if (other)
2510                sock_hold(other);
2511        unix_state_unlock(sk);
2512        sk->sk_state_change(sk);
2513
2514        if (other &&
2515                (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2516
2517                int peer_mode = 0;
2518
2519                if (mode&RCV_SHUTDOWN)
2520                        peer_mode |= SEND_SHUTDOWN;
2521                if (mode&SEND_SHUTDOWN)
2522                        peer_mode |= RCV_SHUTDOWN;
2523                unix_state_lock(other);
2524                other->sk_shutdown |= peer_mode;
2525                unix_state_unlock(other);
2526                other->sk_state_change(other);
2527                if (peer_mode == SHUTDOWN_MASK)
2528                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2529                else if (peer_mode & RCV_SHUTDOWN)
2530                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2531        }
2532        if (other)
2533                sock_put(other);
2534
2535        return 0;
2536}
2537
2538long unix_inq_len(struct sock *sk)
2539{
2540        struct sk_buff *skb;
2541        long amount = 0;
2542
2543        if (sk->sk_state == TCP_LISTEN)
2544                return -EINVAL;
2545
2546        spin_lock(&sk->sk_receive_queue.lock);
2547        if (sk->sk_type == SOCK_STREAM ||
2548            sk->sk_type == SOCK_SEQPACKET) {
2549                skb_queue_walk(&sk->sk_receive_queue, skb)
2550                        amount += unix_skb_len(skb);
2551        } else {
2552                skb = skb_peek(&sk->sk_receive_queue);
2553                if (skb)
2554                        amount = skb->len;
2555        }
2556        spin_unlock(&sk->sk_receive_queue.lock);
2557
2558        return amount;
2559}
2560EXPORT_SYMBOL_GPL(unix_inq_len);
2561
2562long unix_outq_len(struct sock *sk)
2563{
2564        return sk_wmem_alloc_get(sk);
2565}
2566EXPORT_SYMBOL_GPL(unix_outq_len);
2567
2568static int unix_open_file(struct sock *sk)
2569{
2570        struct path path;
2571        struct file *f;
2572        int fd;
2573
2574        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2575                return -EPERM;
2576
2577        unix_state_lock(sk);
2578        path = unix_sk(sk)->path;
2579        if (!path.dentry) {
2580                unix_state_unlock(sk);
2581                return -ENOENT;
2582        }
2583
2584        path_get(&path);
2585        unix_state_unlock(sk);
2586
2587        fd = get_unused_fd_flags(O_CLOEXEC);
2588        if (fd < 0)
2589                goto out;
2590
2591        f = dentry_open(&path, O_PATH, current_cred());
2592        if (IS_ERR(f)) {
2593                put_unused_fd(fd);
2594                fd = PTR_ERR(f);
2595                goto out;
2596        }
2597
2598        fd_install(fd, f);
2599out:
2600        path_put(&path);
2601
2602        return fd;
2603}
2604
2605static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2606{
2607        struct sock *sk = sock->sk;
2608        long amount = 0;
2609        int err;
2610
2611        switch (cmd) {
2612        case SIOCOUTQ:
2613                amount = unix_outq_len(sk);
2614                err = put_user(amount, (int __user *)arg);
2615                break;
2616        case SIOCINQ:
2617                amount = unix_inq_len(sk);
2618                if (amount < 0)
2619                        err = amount;
2620                else
2621                        err = put_user(amount, (int __user *)arg);
2622                break;
2623        case SIOCUNIXFILE:
2624                err = unix_open_file(sk);
2625                break;
2626        default:
2627                err = -ENOIOCTLCMD;
2628                break;
2629        }
2630        return err;
2631}
2632
2633static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2634{
2635        struct sock *sk = sock->sk;
2636        __poll_t mask;
2637
2638        sock_poll_wait(file, sk_sleep(sk), wait);
2639        mask = 0;
2640
2641        /* exceptional events? */
2642        if (sk->sk_err)
2643                mask |= EPOLLERR;
2644        if (sk->sk_shutdown == SHUTDOWN_MASK)
2645                mask |= EPOLLHUP;
2646        if (sk->sk_shutdown & RCV_SHUTDOWN)
2647                mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2648
2649        /* readable? */
2650        if (!skb_queue_empty(&sk->sk_receive_queue))
2651                mask |= EPOLLIN | EPOLLRDNORM;
2652
2653        /* Connection-based need to check for termination and startup */
2654        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2655            sk->sk_state == TCP_CLOSE)
2656                mask |= EPOLLHUP;
2657
2658        /*
2659         * we set writable also when the other side has shut down the
2660         * connection. This prevents stuck sockets.
2661         */
2662        if (unix_writable(sk))
2663                mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2664
2665        return mask;
2666}
2667
2668static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2669                                    poll_table *wait)
2670{
2671        struct sock *sk = sock->sk, *other;
2672        unsigned int writable;
2673        __poll_t mask;
2674
2675        sock_poll_wait(file, sk_sleep(sk), wait);
2676        mask = 0;
2677
2678        /* exceptional events? */
2679        if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2680                mask |= EPOLLERR |
2681                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2682
2683        if (sk->sk_shutdown & RCV_SHUTDOWN)
2684                mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2685        if (sk->sk_shutdown == SHUTDOWN_MASK)
2686                mask |= EPOLLHUP;
2687
2688        /* readable? */
2689        if (!skb_queue_empty(&sk->sk_receive_queue))
2690                mask |= EPOLLIN | EPOLLRDNORM;
2691
2692        /* Connection-based need to check for termination and startup */
2693        if (sk->sk_type == SOCK_SEQPACKET) {
2694                if (sk->sk_state == TCP_CLOSE)
2695                        mask |= EPOLLHUP;
2696                /* connection hasn't started yet? */
2697                if (sk->sk_state == TCP_SYN_SENT)
2698                        return mask;
2699        }
2700
2701        /* No write status requested, avoid expensive OUT tests. */
2702        if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2703                return mask;
2704
2705        writable = unix_writable(sk);
2706        if (writable) {
2707                unix_state_lock(sk);
2708
2709                other = unix_peer(sk);
2710                if (other && unix_peer(other) != sk &&
2711                    unix_recvq_full(other) &&
2712                    unix_dgram_peer_wake_me(sk, other))
2713                        writable = 0;
2714
2715                unix_state_unlock(sk);
2716        }
2717
2718        if (writable)
2719                mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2720        else
2721                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2722
2723        return mask;
2724}
2725
2726#ifdef CONFIG_PROC_FS
2727
2728#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2729
2730#define get_bucket(x) ((x) >> BUCKET_SPACE)
2731#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2732#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2733
2734static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2735{
2736        unsigned long offset = get_offset(*pos);
2737        unsigned long bucket = get_bucket(*pos);
2738        struct sock *sk;
2739        unsigned long count = 0;
2740
2741        for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2742                if (sock_net(sk) != seq_file_net(seq))
2743                        continue;
2744                if (++count == offset)
2745                        break;
2746        }
2747
2748        return sk;
2749}
2750
2751static struct sock *unix_next_socket(struct seq_file *seq,
2752                                     struct sock *sk,
2753                                     loff_t *pos)
2754{
2755        unsigned long bucket;
2756
2757        while (sk > (struct sock *)SEQ_START_TOKEN) {
2758                sk = sk_next(sk);
2759                if (!sk)
2760                        goto next_bucket;
2761                if (sock_net(sk) == seq_file_net(seq))
2762                        return sk;
2763        }
2764
2765        do {
2766                sk = unix_from_bucket(seq, pos);
2767                if (sk)
2768                        return sk;
2769
2770next_bucket:
2771                bucket = get_bucket(*pos) + 1;
2772                *pos = set_bucket_offset(bucket, 1);
2773        } while (bucket < ARRAY_SIZE(unix_socket_table));
2774
2775        return NULL;
2776}
2777
2778static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2779        __acquires(unix_table_lock)
2780{
2781        spin_lock(&unix_table_lock);
2782
2783        if (!*pos)
2784                return SEQ_START_TOKEN;
2785
2786        if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2787                return NULL;
2788
2789        return unix_next_socket(seq, NULL, pos);
2790}
2791
2792static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2793{
2794        ++*pos;
2795        return unix_next_socket(seq, v, pos);
2796}
2797
2798static void unix_seq_stop(struct seq_file *seq, void *v)
2799        __releases(unix_table_lock)
2800{
2801        spin_unlock(&unix_table_lock);
2802}
2803
2804static int unix_seq_show(struct seq_file *seq, void *v)
2805{
2806
2807        if (v == SEQ_START_TOKEN)
2808                seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2809                         "Inode Path\n");
2810        else {
2811                struct sock *s = v;
2812                struct unix_sock *u = unix_sk(s);
2813                unix_state_lock(s);
2814
2815                seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2816                        s,
2817                        refcount_read(&s->sk_refcnt),
2818                        0,
2819                        s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2820                        s->sk_type,
2821                        s->sk_socket ?
2822                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2823                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2824                        sock_i_ino(s));
2825
2826                if (u->addr) {
2827                        int i, len;
2828                        seq_putc(seq, ' ');
2829
2830                        i = 0;
2831                        len = u->addr->len - sizeof(short);
2832                        if (!UNIX_ABSTRACT(s))
2833                                len--;
2834                        else {
2835                                seq_putc(seq, '@');
2836                                i++;
2837                        }
2838                        for ( ; i < len; i++)
2839                                seq_putc(seq, u->addr->name->sun_path[i] ?:
2840                                         '@');
2841                }
2842                unix_state_unlock(s);
2843                seq_putc(seq, '\n');
2844        }
2845
2846        return 0;
2847}
2848
2849static const struct seq_operations unix_seq_ops = {
2850        .start  = unix_seq_start,
2851        .next   = unix_seq_next,
2852        .stop   = unix_seq_stop,
2853        .show   = unix_seq_show,
2854};
2855
2856static int unix_seq_open(struct inode *inode, struct file *file)
2857{
2858        return seq_open_net(inode, file, &unix_seq_ops,
2859                            sizeof(struct seq_net_private));
2860}
2861
2862static const struct file_operations unix_seq_fops = {
2863        .open           = unix_seq_open,
2864        .read           = seq_read,
2865        .llseek         = seq_lseek,
2866        .release        = seq_release_net,
2867};
2868
2869#endif
2870
2871static const struct net_proto_family unix_family_ops = {
2872        .family = PF_UNIX,
2873        .create = unix_create,
2874        .owner  = THIS_MODULE,
2875};
2876
2877
2878static int __net_init unix_net_init(struct net *net)
2879{
2880        int error = -ENOMEM;
2881
2882        net->unx.sysctl_max_dgram_qlen = 10;
2883        if (unix_sysctl_register(net))
2884                goto out;
2885
2886#ifdef CONFIG_PROC_FS
2887        if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2888                unix_sysctl_unregister(net);
2889                goto out;
2890        }
2891#endif
2892        error = 0;
2893out:
2894        return error;
2895}
2896
2897static void __net_exit unix_net_exit(struct net *net)
2898{
2899        unix_sysctl_unregister(net);
2900        remove_proc_entry("unix", net->proc_net);
2901}
2902
2903static struct pernet_operations unix_net_ops = {
2904        .init = unix_net_init,
2905        .exit = unix_net_exit,
2906};
2907
2908static int __init af_unix_init(void)
2909{
2910        int rc = -1;
2911
2912        BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2913
2914        rc = proto_register(&unix_proto, 1);
2915        if (rc != 0) {
2916                pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2917                goto out;
2918        }
2919
2920        sock_register(&unix_family_ops);
2921        register_pernet_subsys(&unix_net_ops);
2922out:
2923        return rc;
2924}
2925
2926static void __exit af_unix_exit(void)
2927{
2928        sock_unregister(PF_UNIX);
2929        proto_unregister(&unix_proto);
2930        unregister_pernet_subsys(&unix_net_ops);
2931}
2932
2933/* Earlier than device_initcall() so that other drivers invoking
2934   request_module() don't end up in a loop when modprobe tries
2935   to use a UNIX socket. But later than subsys_initcall() because
2936   we depend on stuff initialised there */
2937fs_initcall(af_unix_init);
2938module_exit(af_unix_exit);
2939
2940MODULE_LICENSE("GPL");
2941MODULE_ALIAS_NETPROTO(PF_UNIX);
2942