linux/net/unix/af_unix.c
<<
>>
Prefs
   1/*
   2 * NET4:        Implementation of BSD Unix domain sockets.
   3 *
   4 * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Fixes:
  12 *              Linus Torvalds  :       Assorted bug cures.
  13 *              Niibe Yutaka    :       async I/O support.
  14 *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15 *              Alan Cox        :       Limit size of allocated blocks.
  16 *              Alan Cox        :       Fixed the stupid socketpair bug.
  17 *              Alan Cox        :       BSD compatibility fine tuning.
  18 *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19 *              Alan Cox        :       Sorted out a proper draft version of
  20 *                                      file descriptor passing hacked up from
  21 *                                      Mike Shaver's work.
  22 *              Marty Leisner   :       Fixes to fd passing
  23 *              Nick Nevin      :       recvmsg bugfix.
  24 *              Alan Cox        :       Started proper garbage collector
  25 *              Heiko EiBfeldt  :       Missing verify_area check
  26 *              Alan Cox        :       Started POSIXisms
  27 *              Andreas Schwab  :       Replace inode by dentry for proper
  28 *                                      reference counting
  29 *              Kirk Petersen   :       Made this a module
  30 *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31 *                                      Lots of bug fixes.
  32 *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33 *                                      by above two patches.
  34 *           Andrea Arcangeli   :       If possible we block in connect(2)
  35 *                                      if the max backlog of the listen socket
  36 *                                      is been reached. This won't break
  37 *                                      old apps and it will avoid huge amount
  38 *                                      of socks hashed (this for unix_gc()
  39 *                                      performances reasons).
  40 *                                      Security fix that limits the max
  41 *                                      number of socks to 2*max_files and
  42 *                                      the number of skb queueable in the
  43 *                                      dgram receiver.
  44 *              Artur Skawina   :       Hash function optimizations
  45 *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46 *            Malcolm Beattie   :       Set peercred for socketpair
  47 *           Michal Ostrowski   :       Module initialization cleanup.
  48 *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49 *                                      the core infrastructure is doing that
  50 *                                      for all net proto families now (2.5.69+)
  51 *
  52 *
  53 * Known differences from reference BSD that was tested:
  54 *
  55 *      [TO FIX]
  56 *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57 *              other the moment one end closes.
  58 *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59 *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60 *      [NOT TO FIX]
  61 *      accept() returns a path name even if the connecting socket has closed
  62 *              in the meantime (BSD loses the path and gives up).
  63 *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64 *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65 *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66 *      BSD af_unix apparently has connect forgetting to block properly.
  67 *              (need to check this with the POSIX spec in detail)
  68 *
  69 * Differences from 2.0.0-11-... (ANK)
  70 *      Bug fixes and improvements.
  71 *              - client shutdown killed server socket.
  72 *              - removed all useless cli/sti pairs.
  73 *
  74 *      Semantic changes/extensions.
  75 *              - generic control message passing.
  76 *              - SCM_CREDENTIALS control message.
  77 *              - "Abstract" (not FS based) socket bindings.
  78 *                Abstract names are sequences of bytes (not zero terminated)
  79 *                started by 0, so that this name space does not intersect
  80 *                with BSD names.
  81 */
  82
  83#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85#include <linux/module.h>
  86#include <linux/kernel.h>
  87#include <linux/signal.h>
  88#include <linux/sched/signal.h>
  89#include <linux/errno.h>
  90#include <linux/string.h>
  91#include <linux/stat.h>
  92#include <linux/dcache.h>
  93#include <linux/namei.h>
  94#include <linux/socket.h>
  95#include <linux/un.h>
  96#include <linux/fcntl.h>
  97#include <linux/termios.h>
  98#include <linux/sockios.h>
  99#include <linux/net.h>
 100#include <linux/in.h>
 101#include <linux/fs.h>
 102#include <linux/slab.h>
 103#include <linux/uaccess.h>
 104#include <linux/skbuff.h>
 105#include <linux/netdevice.h>
 106#include <net/net_namespace.h>
 107#include <net/sock.h>
 108#include <net/tcp_states.h>
 109#include <net/af_unix.h>
 110#include <linux/proc_fs.h>
 111#include <linux/seq_file.h>
 112#include <net/scm.h>
 113#include <linux/init.h>
 114#include <linux/poll.h>
 115#include <linux/rtnetlink.h>
 116#include <linux/mount.h>
 117#include <net/checksum.h>
 118#include <linux/security.h>
 119#include <linux/freezer.h>
 120#include <linux/file.h>
 121
 122struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 123EXPORT_SYMBOL_GPL(unix_socket_table);
 124DEFINE_SPINLOCK(unix_table_lock);
 125EXPORT_SYMBOL_GPL(unix_table_lock);
 126static atomic_long_t unix_nr_socks;
 127
 128
 129static struct hlist_head *unix_sockets_unbound(void *addr)
 130{
 131        unsigned long hash = (unsigned long)addr;
 132
 133        hash ^= hash >> 16;
 134        hash ^= hash >> 8;
 135        hash %= UNIX_HASH_SIZE;
 136        return &unix_socket_table[UNIX_HASH_SIZE + hash];
 137}
 138
 139#define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 140
 141#ifdef CONFIG_SECURITY_NETWORK
 142static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 143{
 144        UNIXCB(skb).secid = scm->secid;
 145}
 146
 147static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 148{
 149        scm->secid = UNIXCB(skb).secid;
 150}
 151
 152static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 153{
 154        return (scm->secid == UNIXCB(skb).secid);
 155}
 156#else
 157static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158{ }
 159
 160static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 161{ }
 162
 163static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 164{
 165        return true;
 166}
 167#endif /* CONFIG_SECURITY_NETWORK */
 168
 169/*
 170 *  SMP locking strategy:
 171 *    hash table is protected with spinlock unix_table_lock
 172 *    each socket state is protected by separate spin lock.
 173 */
 174
 175static inline unsigned int unix_hash_fold(__wsum n)
 176{
 177        unsigned int hash = (__force unsigned int)csum_fold(n);
 178
 179        hash ^= hash>>8;
 180        return hash&(UNIX_HASH_SIZE-1);
 181}
 182
 183#define unix_peer(sk) (unix_sk(sk)->peer)
 184
 185static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 186{
 187        return unix_peer(osk) == sk;
 188}
 189
 190static inline int unix_may_send(struct sock *sk, struct sock *osk)
 191{
 192        return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 193}
 194
 195static inline int unix_recvq_full(struct sock const *sk)
 196{
 197        return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 198}
 199
 200struct sock *unix_peer_get(struct sock *s)
 201{
 202        struct sock *peer;
 203
 204        unix_state_lock(s);
 205        peer = unix_peer(s);
 206        if (peer)
 207                sock_hold(peer);
 208        unix_state_unlock(s);
 209        return peer;
 210}
 211EXPORT_SYMBOL_GPL(unix_peer_get);
 212
 213static inline void unix_release_addr(struct unix_address *addr)
 214{
 215        if (refcount_dec_and_test(&addr->refcnt))
 216                kfree(addr);
 217}
 218
 219/*
 220 *      Check unix socket name:
 221 *              - should be not zero length.
 222 *              - if started by not zero, should be NULL terminated (FS object)
 223 *              - if started by zero, it is abstract name.
 224 */
 225
 226static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 227{
 228        if (len <= sizeof(short) || len > sizeof(*sunaddr))
 229                return -EINVAL;
 230        if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 231                return -EINVAL;
 232        if (sunaddr->sun_path[0]) {
 233                /*
 234                 * This may look like an off by one error but it is a bit more
 235                 * subtle. 108 is the longest valid AF_UNIX path for a binding.
 236                 * sun_path[108] doesn't as such exist.  However in kernel space
 237                 * we are guaranteed that it is a valid memory location in our
 238                 * kernel address buffer.
 239                 */
 240                ((char *)sunaddr)[len] = 0;
 241                len = strlen(sunaddr->sun_path)+1+sizeof(short);
 242                return len;
 243        }
 244
 245        *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 246        return len;
 247}
 248
 249static void __unix_remove_socket(struct sock *sk)
 250{
 251        sk_del_node_init(sk);
 252}
 253
 254static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 255{
 256        WARN_ON(!sk_unhashed(sk));
 257        sk_add_node(sk, list);
 258}
 259
 260static inline void unix_remove_socket(struct sock *sk)
 261{
 262        spin_lock(&unix_table_lock);
 263        __unix_remove_socket(sk);
 264        spin_unlock(&unix_table_lock);
 265}
 266
 267static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 268{
 269        spin_lock(&unix_table_lock);
 270        __unix_insert_socket(list, sk);
 271        spin_unlock(&unix_table_lock);
 272}
 273
 274static struct sock *__unix_find_socket_byname(struct net *net,
 275                                              struct sockaddr_un *sunname,
 276                                              int len, int type, unsigned int hash)
 277{
 278        struct sock *s;
 279
 280        sk_for_each(s, &unix_socket_table[hash ^ type]) {
 281                struct unix_sock *u = unix_sk(s);
 282
 283                if (!net_eq(sock_net(s), net))
 284                        continue;
 285
 286                if (u->addr->len == len &&
 287                    !memcmp(u->addr->name, sunname, len))
 288                        goto found;
 289        }
 290        s = NULL;
 291found:
 292        return s;
 293}
 294
 295static inline struct sock *unix_find_socket_byname(struct net *net,
 296                                                   struct sockaddr_un *sunname,
 297                                                   int len, int type,
 298                                                   unsigned int hash)
 299{
 300        struct sock *s;
 301
 302        spin_lock(&unix_table_lock);
 303        s = __unix_find_socket_byname(net, sunname, len, type, hash);
 304        if (s)
 305                sock_hold(s);
 306        spin_unlock(&unix_table_lock);
 307        return s;
 308}
 309
 310static struct sock *unix_find_socket_byinode(struct inode *i)
 311{
 312        struct sock *s;
 313
 314        spin_lock(&unix_table_lock);
 315        sk_for_each(s,
 316                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 317                struct dentry *dentry = unix_sk(s)->path.dentry;
 318
 319                if (dentry && d_backing_inode(dentry) == i) {
 320                        sock_hold(s);
 321                        goto found;
 322                }
 323        }
 324        s = NULL;
 325found:
 326        spin_unlock(&unix_table_lock);
 327        return s;
 328}
 329
 330/* Support code for asymmetrically connected dgram sockets
 331 *
 332 * If a datagram socket is connected to a socket not itself connected
 333 * to the first socket (eg, /dev/log), clients may only enqueue more
 334 * messages if the present receive queue of the server socket is not
 335 * "too large". This means there's a second writeability condition
 336 * poll and sendmsg need to test. The dgram recv code will do a wake
 337 * up on the peer_wait wait queue of a socket upon reception of a
 338 * datagram which needs to be propagated to sleeping would-be writers
 339 * since these might not have sent anything so far. This can't be
 340 * accomplished via poll_wait because the lifetime of the server
 341 * socket might be less than that of its clients if these break their
 342 * association with it or if the server socket is closed while clients
 343 * are still connected to it and there's no way to inform "a polling
 344 * implementation" that it should let go of a certain wait queue
 345 *
 346 * In order to propagate a wake up, a wait_queue_entry_t of the client
 347 * socket is enqueued on the peer_wait queue of the server socket
 348 * whose wake function does a wake_up on the ordinary client socket
 349 * wait queue. This connection is established whenever a write (or
 350 * poll for write) hit the flow control condition and broken when the
 351 * association to the server socket is dissolved or after a wake up
 352 * was relayed.
 353 */
 354
 355static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 356                                      void *key)
 357{
 358        struct unix_sock *u;
 359        wait_queue_head_t *u_sleep;
 360
 361        u = container_of(q, struct unix_sock, peer_wake);
 362
 363        __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 364                            q);
 365        u->peer_wake.private = NULL;
 366
 367        /* relaying can only happen while the wq still exists */
 368        u_sleep = sk_sleep(&u->sk);
 369        if (u_sleep)
 370                wake_up_interruptible_poll(u_sleep, key);
 371
 372        return 0;
 373}
 374
 375static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 376{
 377        struct unix_sock *u, *u_other;
 378        int rc;
 379
 380        u = unix_sk(sk);
 381        u_other = unix_sk(other);
 382        rc = 0;
 383        spin_lock(&u_other->peer_wait.lock);
 384
 385        if (!u->peer_wake.private) {
 386                u->peer_wake.private = other;
 387                __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 388
 389                rc = 1;
 390        }
 391
 392        spin_unlock(&u_other->peer_wait.lock);
 393        return rc;
 394}
 395
 396static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 397                                            struct sock *other)
 398{
 399        struct unix_sock *u, *u_other;
 400
 401        u = unix_sk(sk);
 402        u_other = unix_sk(other);
 403        spin_lock(&u_other->peer_wait.lock);
 404
 405        if (u->peer_wake.private == other) {
 406                __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 407                u->peer_wake.private = NULL;
 408        }
 409
 410        spin_unlock(&u_other->peer_wait.lock);
 411}
 412
 413static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 414                                                   struct sock *other)
 415{
 416        unix_dgram_peer_wake_disconnect(sk, other);
 417        wake_up_interruptible_poll(sk_sleep(sk),
 418                                   POLLOUT |
 419                                   POLLWRNORM |
 420                                   POLLWRBAND);
 421}
 422
 423/* preconditions:
 424 *      - unix_peer(sk) == other
 425 *      - association is stable
 426 */
 427static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 428{
 429        int connected;
 430
 431        connected = unix_dgram_peer_wake_connect(sk, other);
 432
 433        if (unix_recvq_full(other))
 434                return 1;
 435
 436        if (connected)
 437                unix_dgram_peer_wake_disconnect(sk, other);
 438
 439        return 0;
 440}
 441
 442static int unix_writable(const struct sock *sk)
 443{
 444        return sk->sk_state != TCP_LISTEN &&
 445               (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 446}
 447
 448static void unix_write_space(struct sock *sk)
 449{
 450        struct socket_wq *wq;
 451
 452        rcu_read_lock();
 453        if (unix_writable(sk)) {
 454                wq = rcu_dereference(sk->sk_wq);
 455                if (skwq_has_sleeper(wq))
 456                        wake_up_interruptible_sync_poll(&wq->wait,
 457                                POLLOUT | POLLWRNORM | POLLWRBAND);
 458                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 459        }
 460        rcu_read_unlock();
 461}
 462
 463/* When dgram socket disconnects (or changes its peer), we clear its receive
 464 * queue of packets arrived from previous peer. First, it allows to do
 465 * flow control based only on wmem_alloc; second, sk connected to peer
 466 * may receive messages only from that peer. */
 467static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 468{
 469        if (!skb_queue_empty(&sk->sk_receive_queue)) {
 470                skb_queue_purge(&sk->sk_receive_queue);
 471                wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 472
 473                /* If one link of bidirectional dgram pipe is disconnected,
 474                 * we signal error. Messages are lost. Do not make this,
 475                 * when peer was not connected to us.
 476                 */
 477                if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 478                        other->sk_err = ECONNRESET;
 479                        other->sk_error_report(other);
 480                }
 481        }
 482}
 483
 484static void unix_sock_destructor(struct sock *sk)
 485{
 486        struct unix_sock *u = unix_sk(sk);
 487
 488        skb_queue_purge(&sk->sk_receive_queue);
 489
 490        WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 491        WARN_ON(!sk_unhashed(sk));
 492        WARN_ON(sk->sk_socket);
 493        if (!sock_flag(sk, SOCK_DEAD)) {
 494                pr_info("Attempt to release alive unix socket: %p\n", sk);
 495                return;
 496        }
 497
 498        if (u->addr)
 499                unix_release_addr(u->addr);
 500
 501        atomic_long_dec(&unix_nr_socks);
 502        local_bh_disable();
 503        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 504        local_bh_enable();
 505#ifdef UNIX_REFCNT_DEBUG
 506        pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 507                atomic_long_read(&unix_nr_socks));
 508#endif
 509}
 510
 511static void unix_release_sock(struct sock *sk, int embrion)
 512{
 513        struct unix_sock *u = unix_sk(sk);
 514        struct path path;
 515        struct sock *skpair;
 516        struct sk_buff *skb;
 517        int state;
 518
 519        unix_remove_socket(sk);
 520
 521        /* Clear state */
 522        unix_state_lock(sk);
 523        sock_orphan(sk);
 524        sk->sk_shutdown = SHUTDOWN_MASK;
 525        path         = u->path;
 526        u->path.dentry = NULL;
 527        u->path.mnt = NULL;
 528        state = sk->sk_state;
 529        sk->sk_state = TCP_CLOSE;
 530        unix_state_unlock(sk);
 531
 532        wake_up_interruptible_all(&u->peer_wait);
 533
 534        skpair = unix_peer(sk);
 535
 536        if (skpair != NULL) {
 537                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 538                        unix_state_lock(skpair);
 539                        /* No more writes */
 540                        skpair->sk_shutdown = SHUTDOWN_MASK;
 541                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 542                                skpair->sk_err = ECONNRESET;
 543                        unix_state_unlock(skpair);
 544                        skpair->sk_state_change(skpair);
 545                        sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 546                }
 547
 548                unix_dgram_peer_wake_disconnect(sk, skpair);
 549                sock_put(skpair); /* It may now die */
 550                unix_peer(sk) = NULL;
 551        }
 552
 553        /* Try to flush out this socket. Throw out buffers at least */
 554
 555        while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 556                if (state == TCP_LISTEN)
 557                        unix_release_sock(skb->sk, 1);
 558                /* passed fds are erased in the kfree_skb hook        */
 559                UNIXCB(skb).consumed = skb->len;
 560                kfree_skb(skb);
 561        }
 562
 563        if (path.dentry)
 564                path_put(&path);
 565
 566        sock_put(sk);
 567
 568        /* ---- Socket is dead now and most probably destroyed ---- */
 569
 570        /*
 571         * Fixme: BSD difference: In BSD all sockets connected to us get
 572         *        ECONNRESET and we die on the spot. In Linux we behave
 573         *        like files and pipes do and wait for the last
 574         *        dereference.
 575         *
 576         * Can't we simply set sock->err?
 577         *
 578         *        What the above comment does talk about? --ANK(980817)
 579         */
 580
 581        if (unix_tot_inflight)
 582                unix_gc();              /* Garbage collect fds */
 583}
 584
 585static void init_peercred(struct sock *sk)
 586{
 587        put_pid(sk->sk_peer_pid);
 588        if (sk->sk_peer_cred)
 589                put_cred(sk->sk_peer_cred);
 590        sk->sk_peer_pid  = get_pid(task_tgid(current));
 591        sk->sk_peer_cred = get_current_cred();
 592}
 593
 594static void copy_peercred(struct sock *sk, struct sock *peersk)
 595{
 596        put_pid(sk->sk_peer_pid);
 597        if (sk->sk_peer_cred)
 598                put_cred(sk->sk_peer_cred);
 599        sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 600        sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 601}
 602
 603static int unix_listen(struct socket *sock, int backlog)
 604{
 605        int err;
 606        struct sock *sk = sock->sk;
 607        struct unix_sock *u = unix_sk(sk);
 608        struct pid *old_pid = NULL;
 609
 610        err = -EOPNOTSUPP;
 611        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 612                goto out;       /* Only stream/seqpacket sockets accept */
 613        err = -EINVAL;
 614        if (!u->addr)
 615                goto out;       /* No listens on an unbound socket */
 616        unix_state_lock(sk);
 617        if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 618                goto out_unlock;
 619        if (backlog > sk->sk_max_ack_backlog)
 620                wake_up_interruptible_all(&u->peer_wait);
 621        sk->sk_max_ack_backlog  = backlog;
 622        sk->sk_state            = TCP_LISTEN;
 623        /* set credentials so connect can copy them */
 624        init_peercred(sk);
 625        err = 0;
 626
 627out_unlock:
 628        unix_state_unlock(sk);
 629        put_pid(old_pid);
 630out:
 631        return err;
 632}
 633
 634static int unix_release(struct socket *);
 635static int unix_bind(struct socket *, struct sockaddr *, int);
 636static int unix_stream_connect(struct socket *, struct sockaddr *,
 637                               int addr_len, int flags);
 638static int unix_socketpair(struct socket *, struct socket *);
 639static int unix_accept(struct socket *, struct socket *, int, bool);
 640static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 641static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 642static unsigned int unix_dgram_poll(struct file *, struct socket *,
 643                                    poll_table *);
 644static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 645static int unix_shutdown(struct socket *, int);
 646static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 647static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 648static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 649                                    size_t size, int flags);
 650static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 651                                       struct pipe_inode_info *, size_t size,
 652                                       unsigned int flags);
 653static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 654static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 655static int unix_dgram_connect(struct socket *, struct sockaddr *,
 656                              int, int);
 657static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 658static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 659                                  int);
 660
 661static int unix_set_peek_off(struct sock *sk, int val)
 662{
 663        struct unix_sock *u = unix_sk(sk);
 664
 665        if (mutex_lock_interruptible(&u->iolock))
 666                return -EINTR;
 667
 668        sk->sk_peek_off = val;
 669        mutex_unlock(&u->iolock);
 670
 671        return 0;
 672}
 673
 674
 675static const struct proto_ops unix_stream_ops = {
 676        .family =       PF_UNIX,
 677        .owner =        THIS_MODULE,
 678        .release =      unix_release,
 679        .bind =         unix_bind,
 680        .connect =      unix_stream_connect,
 681        .socketpair =   unix_socketpair,
 682        .accept =       unix_accept,
 683        .getname =      unix_getname,
 684        .poll =         unix_poll,
 685        .ioctl =        unix_ioctl,
 686        .listen =       unix_listen,
 687        .shutdown =     unix_shutdown,
 688        .setsockopt =   sock_no_setsockopt,
 689        .getsockopt =   sock_no_getsockopt,
 690        .sendmsg =      unix_stream_sendmsg,
 691        .recvmsg =      unix_stream_recvmsg,
 692        .mmap =         sock_no_mmap,
 693        .sendpage =     unix_stream_sendpage,
 694        .splice_read =  unix_stream_splice_read,
 695        .set_peek_off = unix_set_peek_off,
 696};
 697
 698static const struct proto_ops unix_dgram_ops = {
 699        .family =       PF_UNIX,
 700        .owner =        THIS_MODULE,
 701        .release =      unix_release,
 702        .bind =         unix_bind,
 703        .connect =      unix_dgram_connect,
 704        .socketpair =   unix_socketpair,
 705        .accept =       sock_no_accept,
 706        .getname =      unix_getname,
 707        .poll =         unix_dgram_poll,
 708        .ioctl =        unix_ioctl,
 709        .listen =       sock_no_listen,
 710        .shutdown =     unix_shutdown,
 711        .setsockopt =   sock_no_setsockopt,
 712        .getsockopt =   sock_no_getsockopt,
 713        .sendmsg =      unix_dgram_sendmsg,
 714        .recvmsg =      unix_dgram_recvmsg,
 715        .mmap =         sock_no_mmap,
 716        .sendpage =     sock_no_sendpage,
 717        .set_peek_off = unix_set_peek_off,
 718};
 719
 720static const struct proto_ops unix_seqpacket_ops = {
 721        .family =       PF_UNIX,
 722        .owner =        THIS_MODULE,
 723        .release =      unix_release,
 724        .bind =         unix_bind,
 725        .connect =      unix_stream_connect,
 726        .socketpair =   unix_socketpair,
 727        .accept =       unix_accept,
 728        .getname =      unix_getname,
 729        .poll =         unix_dgram_poll,
 730        .ioctl =        unix_ioctl,
 731        .listen =       unix_listen,
 732        .shutdown =     unix_shutdown,
 733        .setsockopt =   sock_no_setsockopt,
 734        .getsockopt =   sock_no_getsockopt,
 735        .sendmsg =      unix_seqpacket_sendmsg,
 736        .recvmsg =      unix_seqpacket_recvmsg,
 737        .mmap =         sock_no_mmap,
 738        .sendpage =     sock_no_sendpage,
 739        .set_peek_off = unix_set_peek_off,
 740};
 741
 742static struct proto unix_proto = {
 743        .name                   = "UNIX",
 744        .owner                  = THIS_MODULE,
 745        .obj_size               = sizeof(struct unix_sock),
 746};
 747
 748/*
 749 * AF_UNIX sockets do not interact with hardware, hence they
 750 * dont trigger interrupts - so it's safe for them to have
 751 * bh-unsafe locking for their sk_receive_queue.lock. Split off
 752 * this special lock-class by reinitializing the spinlock key:
 753 */
 754static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 755
 756static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 757{
 758        struct sock *sk = NULL;
 759        struct unix_sock *u;
 760
 761        atomic_long_inc(&unix_nr_socks);
 762        if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 763                goto out;
 764
 765        sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 766        if (!sk)
 767                goto out;
 768
 769        sock_init_data(sock, sk);
 770        lockdep_set_class(&sk->sk_receive_queue.lock,
 771                                &af_unix_sk_receive_queue_lock_key);
 772
 773        sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 774        sk->sk_write_space      = unix_write_space;
 775        sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 776        sk->sk_destruct         = unix_sock_destructor;
 777        u         = unix_sk(sk);
 778        u->path.dentry = NULL;
 779        u->path.mnt = NULL;
 780        spin_lock_init(&u->lock);
 781        atomic_long_set(&u->inflight, 0);
 782        INIT_LIST_HEAD(&u->link);
 783        mutex_init(&u->iolock); /* single task reading lock */
 784        mutex_init(&u->bindlock); /* single task binding lock */
 785        init_waitqueue_head(&u->peer_wait);
 786        init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 787        unix_insert_socket(unix_sockets_unbound(sk), sk);
 788out:
 789        if (sk == NULL)
 790                atomic_long_dec(&unix_nr_socks);
 791        else {
 792                local_bh_disable();
 793                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 794                local_bh_enable();
 795        }
 796        return sk;
 797}
 798
 799static int unix_create(struct net *net, struct socket *sock, int protocol,
 800                       int kern)
 801{
 802        if (protocol && protocol != PF_UNIX)
 803                return -EPROTONOSUPPORT;
 804
 805        sock->state = SS_UNCONNECTED;
 806
 807        switch (sock->type) {
 808        case SOCK_STREAM:
 809                sock->ops = &unix_stream_ops;
 810                break;
 811                /*
 812                 *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 813                 *      nothing uses it.
 814                 */
 815        case SOCK_RAW:
 816                sock->type = SOCK_DGRAM;
 817        case SOCK_DGRAM:
 818                sock->ops = &unix_dgram_ops;
 819                break;
 820        case SOCK_SEQPACKET:
 821                sock->ops = &unix_seqpacket_ops;
 822                break;
 823        default:
 824                return -ESOCKTNOSUPPORT;
 825        }
 826
 827        return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 828}
 829
 830static int unix_release(struct socket *sock)
 831{
 832        struct sock *sk = sock->sk;
 833
 834        if (!sk)
 835                return 0;
 836
 837        unix_release_sock(sk, 0);
 838        sock->sk = NULL;
 839
 840        return 0;
 841}
 842
 843static int unix_autobind(struct socket *sock)
 844{
 845        struct sock *sk = sock->sk;
 846        struct net *net = sock_net(sk);
 847        struct unix_sock *u = unix_sk(sk);
 848        static u32 ordernum = 1;
 849        struct unix_address *addr;
 850        int err;
 851        unsigned int retries = 0;
 852
 853        err = mutex_lock_interruptible(&u->bindlock);
 854        if (err)
 855                return err;
 856
 857        err = 0;
 858        if (u->addr)
 859                goto out;
 860
 861        err = -ENOMEM;
 862        addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 863        if (!addr)
 864                goto out;
 865
 866        addr->name->sun_family = AF_UNIX;
 867        refcount_set(&addr->refcnt, 1);
 868
 869retry:
 870        addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 871        addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 872
 873        spin_lock(&unix_table_lock);
 874        ordernum = (ordernum+1)&0xFFFFF;
 875
 876        if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 877                                      addr->hash)) {
 878                spin_unlock(&unix_table_lock);
 879                /*
 880                 * __unix_find_socket_byname() may take long time if many names
 881                 * are already in use.
 882                 */
 883                cond_resched();
 884                /* Give up if all names seems to be in use. */
 885                if (retries++ == 0xFFFFF) {
 886                        err = -ENOSPC;
 887                        kfree(addr);
 888                        goto out;
 889                }
 890                goto retry;
 891        }
 892        addr->hash ^= sk->sk_type;
 893
 894        __unix_remove_socket(sk);
 895        u->addr = addr;
 896        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 897        spin_unlock(&unix_table_lock);
 898        err = 0;
 899
 900out:    mutex_unlock(&u->bindlock);
 901        return err;
 902}
 903
 904static struct sock *unix_find_other(struct net *net,
 905                                    struct sockaddr_un *sunname, int len,
 906                                    int type, unsigned int hash, int *error)
 907{
 908        struct sock *u;
 909        struct path path;
 910        int err = 0;
 911
 912        if (sunname->sun_path[0]) {
 913                struct inode *inode;
 914                err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 915                if (err)
 916                        goto fail;
 917                inode = d_backing_inode(path.dentry);
 918                err = inode_permission(inode, MAY_WRITE);
 919                if (err)
 920                        goto put_fail;
 921
 922                err = -ECONNREFUSED;
 923                if (!S_ISSOCK(inode->i_mode))
 924                        goto put_fail;
 925                u = unix_find_socket_byinode(inode);
 926                if (!u)
 927                        goto put_fail;
 928
 929                if (u->sk_type == type)
 930                        touch_atime(&path);
 931
 932                path_put(&path);
 933
 934                err = -EPROTOTYPE;
 935                if (u->sk_type != type) {
 936                        sock_put(u);
 937                        goto fail;
 938                }
 939        } else {
 940                err = -ECONNREFUSED;
 941                u = unix_find_socket_byname(net, sunname, len, type, hash);
 942                if (u) {
 943                        struct dentry *dentry;
 944                        dentry = unix_sk(u)->path.dentry;
 945                        if (dentry)
 946                                touch_atime(&unix_sk(u)->path);
 947                } else
 948                        goto fail;
 949        }
 950        return u;
 951
 952put_fail:
 953        path_put(&path);
 954fail:
 955        *error = err;
 956        return NULL;
 957}
 958
 959static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 960{
 961        struct dentry *dentry;
 962        struct path path;
 963        int err = 0;
 964        /*
 965         * Get the parent directory, calculate the hash for last
 966         * component.
 967         */
 968        dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 969        err = PTR_ERR(dentry);
 970        if (IS_ERR(dentry))
 971                return err;
 972
 973        /*
 974         * All right, let's create it.
 975         */
 976        err = security_path_mknod(&path, dentry, mode, 0);
 977        if (!err) {
 978                err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 979                if (!err) {
 980                        res->mnt = mntget(path.mnt);
 981                        res->dentry = dget(dentry);
 982                }
 983        }
 984        done_path_create(&path, dentry);
 985        return err;
 986}
 987
 988static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 989{
 990        struct sock *sk = sock->sk;
 991        struct net *net = sock_net(sk);
 992        struct unix_sock *u = unix_sk(sk);
 993        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 994        char *sun_path = sunaddr->sun_path;
 995        int err;
 996        unsigned int hash;
 997        struct unix_address *addr;
 998        struct hlist_head *list;
 999        struct path path = { };
1000
1001        err = -EINVAL;
1002        if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1003            sunaddr->sun_family != AF_UNIX)
1004                goto out;
1005
1006        if (addr_len == sizeof(short)) {
1007                err = unix_autobind(sock);
1008                goto out;
1009        }
1010
1011        err = unix_mkname(sunaddr, addr_len, &hash);
1012        if (err < 0)
1013                goto out;
1014        addr_len = err;
1015
1016        if (sun_path[0]) {
1017                umode_t mode = S_IFSOCK |
1018                       (SOCK_INODE(sock)->i_mode & ~current_umask());
1019                err = unix_mknod(sun_path, mode, &path);
1020                if (err) {
1021                        if (err == -EEXIST)
1022                                err = -EADDRINUSE;
1023                        goto out;
1024                }
1025        }
1026
1027        err = mutex_lock_interruptible(&u->bindlock);
1028        if (err)
1029                goto out_put;
1030
1031        err = -EINVAL;
1032        if (u->addr)
1033                goto out_up;
1034
1035        err = -ENOMEM;
1036        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1037        if (!addr)
1038                goto out_up;
1039
1040        memcpy(addr->name, sunaddr, addr_len);
1041        addr->len = addr_len;
1042        addr->hash = hash ^ sk->sk_type;
1043        refcount_set(&addr->refcnt, 1);
1044
1045        if (sun_path[0]) {
1046                addr->hash = UNIX_HASH_SIZE;
1047                hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1048                spin_lock(&unix_table_lock);
1049                u->path = path;
1050                list = &unix_socket_table[hash];
1051        } else {
1052                spin_lock(&unix_table_lock);
1053                err = -EADDRINUSE;
1054                if (__unix_find_socket_byname(net, sunaddr, addr_len,
1055                                              sk->sk_type, hash)) {
1056                        unix_release_addr(addr);
1057                        goto out_unlock;
1058                }
1059
1060                list = &unix_socket_table[addr->hash];
1061        }
1062
1063        err = 0;
1064        __unix_remove_socket(sk);
1065        u->addr = addr;
1066        __unix_insert_socket(list, sk);
1067
1068out_unlock:
1069        spin_unlock(&unix_table_lock);
1070out_up:
1071        mutex_unlock(&u->bindlock);
1072out_put:
1073        if (err)
1074                path_put(&path);
1075out:
1076        return err;
1077}
1078
1079static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1080{
1081        if (unlikely(sk1 == sk2) || !sk2) {
1082                unix_state_lock(sk1);
1083                return;
1084        }
1085        if (sk1 < sk2) {
1086                unix_state_lock(sk1);
1087                unix_state_lock_nested(sk2);
1088        } else {
1089                unix_state_lock(sk2);
1090                unix_state_lock_nested(sk1);
1091        }
1092}
1093
1094static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1095{
1096        if (unlikely(sk1 == sk2) || !sk2) {
1097                unix_state_unlock(sk1);
1098                return;
1099        }
1100        unix_state_unlock(sk1);
1101        unix_state_unlock(sk2);
1102}
1103
1104static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1105                              int alen, int flags)
1106{
1107        struct sock *sk = sock->sk;
1108        struct net *net = sock_net(sk);
1109        struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1110        struct sock *other;
1111        unsigned int hash;
1112        int err;
1113
1114        err = -EINVAL;
1115        if (alen < offsetofend(struct sockaddr, sa_family))
1116                goto out;
1117
1118        if (addr->sa_family != AF_UNSPEC) {
1119                err = unix_mkname(sunaddr, alen, &hash);
1120                if (err < 0)
1121                        goto out;
1122                alen = err;
1123
1124                if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1125                    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1126                        goto out;
1127
1128restart:
1129                other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1130                if (!other)
1131                        goto out;
1132
1133                unix_state_double_lock(sk, other);
1134
1135                /* Apparently VFS overslept socket death. Retry. */
1136                if (sock_flag(other, SOCK_DEAD)) {
1137                        unix_state_double_unlock(sk, other);
1138                        sock_put(other);
1139                        goto restart;
1140                }
1141
1142                err = -EPERM;
1143                if (!unix_may_send(sk, other))
1144                        goto out_unlock;
1145
1146                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1147                if (err)
1148                        goto out_unlock;
1149
1150        } else {
1151                /*
1152                 *      1003.1g breaking connected state with AF_UNSPEC
1153                 */
1154                other = NULL;
1155                unix_state_double_lock(sk, other);
1156        }
1157
1158        /*
1159         * If it was connected, reconnect.
1160         */
1161        if (unix_peer(sk)) {
1162                struct sock *old_peer = unix_peer(sk);
1163                unix_peer(sk) = other;
1164                unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1165
1166                unix_state_double_unlock(sk, other);
1167
1168                if (other != old_peer)
1169                        unix_dgram_disconnected(sk, old_peer);
1170                sock_put(old_peer);
1171        } else {
1172                unix_peer(sk) = other;
1173                unix_state_double_unlock(sk, other);
1174        }
1175        return 0;
1176
1177out_unlock:
1178        unix_state_double_unlock(sk, other);
1179        sock_put(other);
1180out:
1181        return err;
1182}
1183
1184static long unix_wait_for_peer(struct sock *other, long timeo)
1185{
1186        struct unix_sock *u = unix_sk(other);
1187        int sched;
1188        DEFINE_WAIT(wait);
1189
1190        prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1191
1192        sched = !sock_flag(other, SOCK_DEAD) &&
1193                !(other->sk_shutdown & RCV_SHUTDOWN) &&
1194                unix_recvq_full(other);
1195
1196        unix_state_unlock(other);
1197
1198        if (sched)
1199                timeo = schedule_timeout(timeo);
1200
1201        finish_wait(&u->peer_wait, &wait);
1202        return timeo;
1203}
1204
1205static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1206                               int addr_len, int flags)
1207{
1208        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1209        struct sock *sk = sock->sk;
1210        struct net *net = sock_net(sk);
1211        struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1212        struct sock *newsk = NULL;
1213        struct sock *other = NULL;
1214        struct sk_buff *skb = NULL;
1215        unsigned int hash;
1216        int st;
1217        int err;
1218        long timeo;
1219
1220        err = unix_mkname(sunaddr, addr_len, &hash);
1221        if (err < 0)
1222                goto out;
1223        addr_len = err;
1224
1225        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1226            (err = unix_autobind(sock)) != 0)
1227                goto out;
1228
1229        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1230
1231        /* First of all allocate resources.
1232           If we will make it after state is locked,
1233           we will have to recheck all again in any case.
1234         */
1235
1236        err = -ENOMEM;
1237
1238        /* create new sock for complete connection */
1239        newsk = unix_create1(sock_net(sk), NULL, 0);
1240        if (newsk == NULL)
1241                goto out;
1242
1243        /* Allocate skb for sending to listening sock */
1244        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1245        if (skb == NULL)
1246                goto out;
1247
1248restart:
1249        /*  Find listening sock. */
1250        other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1251        if (!other)
1252                goto out;
1253
1254        /* Latch state of peer */
1255        unix_state_lock(other);
1256
1257        /* Apparently VFS overslept socket death. Retry. */
1258        if (sock_flag(other, SOCK_DEAD)) {
1259                unix_state_unlock(other);
1260                sock_put(other);
1261                goto restart;
1262        }
1263
1264        err = -ECONNREFUSED;
1265        if (other->sk_state != TCP_LISTEN)
1266                goto out_unlock;
1267        if (other->sk_shutdown & RCV_SHUTDOWN)
1268                goto out_unlock;
1269
1270        if (unix_recvq_full(other)) {
1271                err = -EAGAIN;
1272                if (!timeo)
1273                        goto out_unlock;
1274
1275                timeo = unix_wait_for_peer(other, timeo);
1276
1277                err = sock_intr_errno(timeo);
1278                if (signal_pending(current))
1279                        goto out;
1280                sock_put(other);
1281                goto restart;
1282        }
1283
1284        /* Latch our state.
1285
1286           It is tricky place. We need to grab our state lock and cannot
1287           drop lock on peer. It is dangerous because deadlock is
1288           possible. Connect to self case and simultaneous
1289           attempt to connect are eliminated by checking socket
1290           state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1291           check this before attempt to grab lock.
1292
1293           Well, and we have to recheck the state after socket locked.
1294         */
1295        st = sk->sk_state;
1296
1297        switch (st) {
1298        case TCP_CLOSE:
1299                /* This is ok... continue with connect */
1300                break;
1301        case TCP_ESTABLISHED:
1302                /* Socket is already connected */
1303                err = -EISCONN;
1304                goto out_unlock;
1305        default:
1306                err = -EINVAL;
1307                goto out_unlock;
1308        }
1309
1310        unix_state_lock_nested(sk);
1311
1312        if (sk->sk_state != st) {
1313                unix_state_unlock(sk);
1314                unix_state_unlock(other);
1315                sock_put(other);
1316                goto restart;
1317        }
1318
1319        err = security_unix_stream_connect(sk, other, newsk);
1320        if (err) {
1321                unix_state_unlock(sk);
1322                goto out_unlock;
1323        }
1324
1325        /* The way is open! Fastly set all the necessary fields... */
1326
1327        sock_hold(sk);
1328        unix_peer(newsk)        = sk;
1329        newsk->sk_state         = TCP_ESTABLISHED;
1330        newsk->sk_type          = sk->sk_type;
1331        init_peercred(newsk);
1332        newu = unix_sk(newsk);
1333        RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1334        otheru = unix_sk(other);
1335
1336        /* copy address information from listening to new sock*/
1337        if (otheru->addr) {
1338                refcount_inc(&otheru->addr->refcnt);
1339                newu->addr = otheru->addr;
1340        }
1341        if (otheru->path.dentry) {
1342                path_get(&otheru->path);
1343                newu->path = otheru->path;
1344        }
1345
1346        /* Set credentials */
1347        copy_peercred(sk, other);
1348
1349        sock->state     = SS_CONNECTED;
1350        sk->sk_state    = TCP_ESTABLISHED;
1351        sock_hold(newsk);
1352
1353        smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1354        unix_peer(sk)   = newsk;
1355
1356        unix_state_unlock(sk);
1357
1358        /* take ten and and send info to listening sock */
1359        spin_lock(&other->sk_receive_queue.lock);
1360        __skb_queue_tail(&other->sk_receive_queue, skb);
1361        spin_unlock(&other->sk_receive_queue.lock);
1362        unix_state_unlock(other);
1363        other->sk_data_ready(other);
1364        sock_put(other);
1365        return 0;
1366
1367out_unlock:
1368        if (other)
1369                unix_state_unlock(other);
1370
1371out:
1372        kfree_skb(skb);
1373        if (newsk)
1374                unix_release_sock(newsk, 0);
1375        if (other)
1376                sock_put(other);
1377        return err;
1378}
1379
1380static int unix_socketpair(struct socket *socka, struct socket *sockb)
1381{
1382        struct sock *ska = socka->sk, *skb = sockb->sk;
1383
1384        /* Join our sockets back to back */
1385        sock_hold(ska);
1386        sock_hold(skb);
1387        unix_peer(ska) = skb;
1388        unix_peer(skb) = ska;
1389        init_peercred(ska);
1390        init_peercred(skb);
1391
1392        if (ska->sk_type != SOCK_DGRAM) {
1393                ska->sk_state = TCP_ESTABLISHED;
1394                skb->sk_state = TCP_ESTABLISHED;
1395                socka->state  = SS_CONNECTED;
1396                sockb->state  = SS_CONNECTED;
1397        }
1398        return 0;
1399}
1400
1401static void unix_sock_inherit_flags(const struct socket *old,
1402                                    struct socket *new)
1403{
1404        if (test_bit(SOCK_PASSCRED, &old->flags))
1405                set_bit(SOCK_PASSCRED, &new->flags);
1406        if (test_bit(SOCK_PASSSEC, &old->flags))
1407                set_bit(SOCK_PASSSEC, &new->flags);
1408}
1409
1410static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1411                       bool kern)
1412{
1413        struct sock *sk = sock->sk;
1414        struct sock *tsk;
1415        struct sk_buff *skb;
1416        int err;
1417
1418        err = -EOPNOTSUPP;
1419        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1420                goto out;
1421
1422        err = -EINVAL;
1423        if (sk->sk_state != TCP_LISTEN)
1424                goto out;
1425
1426        /* If socket state is TCP_LISTEN it cannot change (for now...),
1427         * so that no locks are necessary.
1428         */
1429
1430        skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1431        if (!skb) {
1432                /* This means receive shutdown. */
1433                if (err == 0)
1434                        err = -EINVAL;
1435                goto out;
1436        }
1437
1438        tsk = skb->sk;
1439        skb_free_datagram(sk, skb);
1440        wake_up_interruptible(&unix_sk(sk)->peer_wait);
1441
1442        /* attach accepted sock to socket */
1443        unix_state_lock(tsk);
1444        newsock->state = SS_CONNECTED;
1445        unix_sock_inherit_flags(sock, newsock);
1446        sock_graft(tsk, newsock);
1447        unix_state_unlock(tsk);
1448        return 0;
1449
1450out:
1451        return err;
1452}
1453
1454
1455static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1456{
1457        struct sock *sk = sock->sk;
1458        struct unix_sock *u;
1459        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1460        int err = 0;
1461
1462        if (peer) {
1463                sk = unix_peer_get(sk);
1464
1465                err = -ENOTCONN;
1466                if (!sk)
1467                        goto out;
1468                err = 0;
1469        } else {
1470                sock_hold(sk);
1471        }
1472
1473        u = unix_sk(sk);
1474        unix_state_lock(sk);
1475        if (!u->addr) {
1476                sunaddr->sun_family = AF_UNIX;
1477                sunaddr->sun_path[0] = 0;
1478                *uaddr_len = sizeof(short);
1479        } else {
1480                struct unix_address *addr = u->addr;
1481
1482                *uaddr_len = addr->len;
1483                memcpy(sunaddr, addr->name, *uaddr_len);
1484        }
1485        unix_state_unlock(sk);
1486        sock_put(sk);
1487out:
1488        return err;
1489}
1490
1491static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1492{
1493        int i;
1494
1495        scm->fp = UNIXCB(skb).fp;
1496        UNIXCB(skb).fp = NULL;
1497
1498        for (i = scm->fp->count-1; i >= 0; i--)
1499                unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1500}
1501
1502static void unix_destruct_scm(struct sk_buff *skb)
1503{
1504        struct scm_cookie scm;
1505        memset(&scm, 0, sizeof(scm));
1506        scm.pid  = UNIXCB(skb).pid;
1507        if (UNIXCB(skb).fp)
1508                unix_detach_fds(&scm, skb);
1509
1510        /* Alas, it calls VFS */
1511        /* So fscking what? fput() had been SMP-safe since the last Summer */
1512        scm_destroy(&scm);
1513        sock_wfree(skb);
1514}
1515
1516/*
1517 * The "user->unix_inflight" variable is protected by the garbage
1518 * collection lock, and we just read it locklessly here. If you go
1519 * over the limit, there might be a tiny race in actually noticing
1520 * it across threads. Tough.
1521 */
1522static inline bool too_many_unix_fds(struct task_struct *p)
1523{
1524        struct user_struct *user = current_user();
1525
1526        if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1527                return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1528        return false;
1529}
1530
1531#define MAX_RECURSION_LEVEL 4
1532
1533static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1534{
1535        int i;
1536        unsigned char max_level = 0;
1537
1538        if (too_many_unix_fds(current))
1539                return -ETOOMANYREFS;
1540
1541        for (i = scm->fp->count - 1; i >= 0; i--) {
1542                struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1543
1544                if (sk)
1545                        max_level = max(max_level,
1546                                        unix_sk(sk)->recursion_level);
1547        }
1548        if (unlikely(max_level > MAX_RECURSION_LEVEL))
1549                return -ETOOMANYREFS;
1550
1551        /*
1552         * Need to duplicate file references for the sake of garbage
1553         * collection.  Otherwise a socket in the fps might become a
1554         * candidate for GC while the skb is not yet queued.
1555         */
1556        UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1557        if (!UNIXCB(skb).fp)
1558                return -ENOMEM;
1559
1560        for (i = scm->fp->count - 1; i >= 0; i--)
1561                unix_inflight(scm->fp->user, scm->fp->fp[i]);
1562        return max_level;
1563}
1564
1565static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1566{
1567        int err = 0;
1568
1569        UNIXCB(skb).pid  = get_pid(scm->pid);
1570        UNIXCB(skb).uid = scm->creds.uid;
1571        UNIXCB(skb).gid = scm->creds.gid;
1572        UNIXCB(skb).fp = NULL;
1573        unix_get_secdata(scm, skb);
1574        if (scm->fp && send_fds)
1575                err = unix_attach_fds(scm, skb);
1576
1577        skb->destructor = unix_destruct_scm;
1578        return err;
1579}
1580
1581static bool unix_passcred_enabled(const struct socket *sock,
1582                                  const struct sock *other)
1583{
1584        return test_bit(SOCK_PASSCRED, &sock->flags) ||
1585               !other->sk_socket ||
1586               test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1587}
1588
1589/*
1590 * Some apps rely on write() giving SCM_CREDENTIALS
1591 * We include credentials if source or destination socket
1592 * asserted SOCK_PASSCRED.
1593 */
1594static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1595                            const struct sock *other)
1596{
1597        if (UNIXCB(skb).pid)
1598                return;
1599        if (unix_passcred_enabled(sock, other)) {
1600                UNIXCB(skb).pid  = get_pid(task_tgid(current));
1601                current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1602        }
1603}
1604
1605static int maybe_init_creds(struct scm_cookie *scm,
1606                            struct socket *socket,
1607                            const struct sock *other)
1608{
1609        int err;
1610        struct msghdr msg = { .msg_controllen = 0 };
1611
1612        err = scm_send(socket, &msg, scm, false);
1613        if (err)
1614                return err;
1615
1616        if (unix_passcred_enabled(socket, other)) {
1617                scm->pid = get_pid(task_tgid(current));
1618                current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1619        }
1620        return err;
1621}
1622
1623static bool unix_skb_scm_eq(struct sk_buff *skb,
1624                            struct scm_cookie *scm)
1625{
1626        const struct unix_skb_parms *u = &UNIXCB(skb);
1627
1628        return u->pid == scm->pid &&
1629               uid_eq(u->uid, scm->creds.uid) &&
1630               gid_eq(u->gid, scm->creds.gid) &&
1631               unix_secdata_eq(scm, skb);
1632}
1633
1634/*
1635 *      Send AF_UNIX data.
1636 */
1637
1638static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1639                              size_t len)
1640{
1641        struct sock *sk = sock->sk;
1642        struct net *net = sock_net(sk);
1643        struct unix_sock *u = unix_sk(sk);
1644        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1645        struct sock *other = NULL;
1646        int namelen = 0; /* fake GCC */
1647        int err;
1648        unsigned int hash;
1649        struct sk_buff *skb;
1650        long timeo;
1651        struct scm_cookie scm;
1652        int max_level;
1653        int data_len = 0;
1654        int sk_locked;
1655
1656        wait_for_unix_gc();
1657        err = scm_send(sock, msg, &scm, false);
1658        if (err < 0)
1659                return err;
1660
1661        err = -EOPNOTSUPP;
1662        if (msg->msg_flags&MSG_OOB)
1663                goto out;
1664
1665        if (msg->msg_namelen) {
1666                err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1667                if (err < 0)
1668                        goto out;
1669                namelen = err;
1670        } else {
1671                sunaddr = NULL;
1672                err = -ENOTCONN;
1673                other = unix_peer_get(sk);
1674                if (!other)
1675                        goto out;
1676        }
1677
1678        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1679            && (err = unix_autobind(sock)) != 0)
1680                goto out;
1681
1682        err = -EMSGSIZE;
1683        if (len > sk->sk_sndbuf - 32)
1684                goto out;
1685
1686        if (len > SKB_MAX_ALLOC) {
1687                data_len = min_t(size_t,
1688                                 len - SKB_MAX_ALLOC,
1689                                 MAX_SKB_FRAGS * PAGE_SIZE);
1690                data_len = PAGE_ALIGN(data_len);
1691
1692                BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1693        }
1694
1695        skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1696                                   msg->msg_flags & MSG_DONTWAIT, &err,
1697                                   PAGE_ALLOC_COSTLY_ORDER);
1698        if (skb == NULL)
1699                goto out;
1700
1701        err = unix_scm_to_skb(&scm, skb, true);
1702        if (err < 0)
1703                goto out_free;
1704        max_level = err + 1;
1705
1706        skb_put(skb, len - data_len);
1707        skb->data_len = data_len;
1708        skb->len = len;
1709        err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1710        if (err)
1711                goto out_free;
1712
1713        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1714
1715restart:
1716        if (!other) {
1717                err = -ECONNRESET;
1718                if (sunaddr == NULL)
1719                        goto out_free;
1720
1721                other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1722                                        hash, &err);
1723                if (other == NULL)
1724                        goto out_free;
1725        }
1726
1727        if (sk_filter(other, skb) < 0) {
1728                /* Toss the packet but do not return any error to the sender */
1729                err = len;
1730                goto out_free;
1731        }
1732
1733        sk_locked = 0;
1734        unix_state_lock(other);
1735restart_locked:
1736        err = -EPERM;
1737        if (!unix_may_send(sk, other))
1738                goto out_unlock;
1739
1740        if (unlikely(sock_flag(other, SOCK_DEAD))) {
1741                /*
1742                 *      Check with 1003.1g - what should
1743                 *      datagram error
1744                 */
1745                unix_state_unlock(other);
1746                sock_put(other);
1747
1748                if (!sk_locked)
1749                        unix_state_lock(sk);
1750
1751                err = 0;
1752                if (unix_peer(sk) == other) {
1753                        unix_peer(sk) = NULL;
1754                        unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1755
1756                        unix_state_unlock(sk);
1757
1758                        unix_dgram_disconnected(sk, other);
1759                        sock_put(other);
1760                        err = -ECONNREFUSED;
1761                } else {
1762                        unix_state_unlock(sk);
1763                }
1764
1765                other = NULL;
1766                if (err)
1767                        goto out_free;
1768                goto restart;
1769        }
1770
1771        err = -EPIPE;
1772        if (other->sk_shutdown & RCV_SHUTDOWN)
1773                goto out_unlock;
1774
1775        if (sk->sk_type != SOCK_SEQPACKET) {
1776                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1777                if (err)
1778                        goto out_unlock;
1779        }
1780
1781        /* other == sk && unix_peer(other) != sk if
1782         * - unix_peer(sk) == NULL, destination address bound to sk
1783         * - unix_peer(sk) == sk by time of get but disconnected before lock
1784         */
1785        if (other != sk &&
1786            unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1787                if (timeo) {
1788                        timeo = unix_wait_for_peer(other, timeo);
1789
1790                        err = sock_intr_errno(timeo);
1791                        if (signal_pending(current))
1792                                goto out_free;
1793
1794                        goto restart;
1795                }
1796
1797                if (!sk_locked) {
1798                        unix_state_unlock(other);
1799                        unix_state_double_lock(sk, other);
1800                }
1801
1802                if (unix_peer(sk) != other ||
1803                    unix_dgram_peer_wake_me(sk, other)) {
1804                        err = -EAGAIN;
1805                        sk_locked = 1;
1806                        goto out_unlock;
1807                }
1808
1809                if (!sk_locked) {
1810                        sk_locked = 1;
1811                        goto restart_locked;
1812                }
1813        }
1814
1815        if (unlikely(sk_locked))
1816                unix_state_unlock(sk);
1817
1818        if (sock_flag(other, SOCK_RCVTSTAMP))
1819                __net_timestamp(skb);
1820        maybe_add_creds(skb, sock, other);
1821        skb_queue_tail(&other->sk_receive_queue, skb);
1822        if (max_level > unix_sk(other)->recursion_level)
1823                unix_sk(other)->recursion_level = max_level;
1824        unix_state_unlock(other);
1825        other->sk_data_ready(other);
1826        sock_put(other);
1827        scm_destroy(&scm);
1828        return len;
1829
1830out_unlock:
1831        if (sk_locked)
1832                unix_state_unlock(sk);
1833        unix_state_unlock(other);
1834out_free:
1835        kfree_skb(skb);
1836out:
1837        if (other)
1838                sock_put(other);
1839        scm_destroy(&scm);
1840        return err;
1841}
1842
1843/* We use paged skbs for stream sockets, and limit occupancy to 32768
1844 * bytes, and a minimun of a full page.
1845 */
1846#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1847
1848static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1849                               size_t len)
1850{
1851        struct sock *sk = sock->sk;
1852        struct sock *other = NULL;
1853        int err, size;
1854        struct sk_buff *skb;
1855        int sent = 0;
1856        struct scm_cookie scm;
1857        bool fds_sent = false;
1858        int max_level;
1859        int data_len;
1860
1861        wait_for_unix_gc();
1862        err = scm_send(sock, msg, &scm, false);
1863        if (err < 0)
1864                return err;
1865
1866        err = -EOPNOTSUPP;
1867        if (msg->msg_flags&MSG_OOB)
1868                goto out_err;
1869
1870        if (msg->msg_namelen) {
1871                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1872                goto out_err;
1873        } else {
1874                err = -ENOTCONN;
1875                other = unix_peer(sk);
1876                if (!other)
1877                        goto out_err;
1878        }
1879
1880        if (sk->sk_shutdown & SEND_SHUTDOWN)
1881                goto pipe_err;
1882
1883        while (sent < len) {
1884                size = len - sent;
1885
1886                /* Keep two messages in the pipe so it schedules better */
1887                size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1888
1889                /* allow fallback to order-0 allocations */
1890                size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1891
1892                data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1893
1894                data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1895
1896                skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1897                                           msg->msg_flags & MSG_DONTWAIT, &err,
1898                                           get_order(UNIX_SKB_FRAGS_SZ));
1899                if (!skb)
1900                        goto out_err;
1901
1902                /* Only send the fds in the first buffer */
1903                err = unix_scm_to_skb(&scm, skb, !fds_sent);
1904                if (err < 0) {
1905                        kfree_skb(skb);
1906                        goto out_err;
1907                }
1908                max_level = err + 1;
1909                fds_sent = true;
1910
1911                skb_put(skb, size - data_len);
1912                skb->data_len = data_len;
1913                skb->len = size;
1914                err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1915                if (err) {
1916                        kfree_skb(skb);
1917                        goto out_err;
1918                }
1919
1920                unix_state_lock(other);
1921
1922                if (sock_flag(other, SOCK_DEAD) ||
1923                    (other->sk_shutdown & RCV_SHUTDOWN))
1924                        goto pipe_err_free;
1925
1926                maybe_add_creds(skb, sock, other);
1927                skb_queue_tail(&other->sk_receive_queue, skb);
1928                if (max_level > unix_sk(other)->recursion_level)
1929                        unix_sk(other)->recursion_level = max_level;
1930                unix_state_unlock(other);
1931                other->sk_data_ready(other);
1932                sent += size;
1933        }
1934
1935        scm_destroy(&scm);
1936
1937        return sent;
1938
1939pipe_err_free:
1940        unix_state_unlock(other);
1941        kfree_skb(skb);
1942pipe_err:
1943        if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1944                send_sig(SIGPIPE, current, 0);
1945        err = -EPIPE;
1946out_err:
1947        scm_destroy(&scm);
1948        return sent ? : err;
1949}
1950
1951static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1952                                    int offset, size_t size, int flags)
1953{
1954        int err;
1955        bool send_sigpipe = false;
1956        bool init_scm = true;
1957        struct scm_cookie scm;
1958        struct sock *other, *sk = socket->sk;
1959        struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1960
1961        if (flags & MSG_OOB)
1962                return -EOPNOTSUPP;
1963
1964        other = unix_peer(sk);
1965        if (!other || sk->sk_state != TCP_ESTABLISHED)
1966                return -ENOTCONN;
1967
1968        if (false) {
1969alloc_skb:
1970                unix_state_unlock(other);
1971                mutex_unlock(&unix_sk(other)->iolock);
1972                newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1973                                              &err, 0);
1974                if (!newskb)
1975                        goto err;
1976        }
1977
1978        /* we must acquire iolock as we modify already present
1979         * skbs in the sk_receive_queue and mess with skb->len
1980         */
1981        err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1982        if (err) {
1983                err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1984                goto err;
1985        }
1986
1987        if (sk->sk_shutdown & SEND_SHUTDOWN) {
1988                err = -EPIPE;
1989                send_sigpipe = true;
1990                goto err_unlock;
1991        }
1992
1993        unix_state_lock(other);
1994
1995        if (sock_flag(other, SOCK_DEAD) ||
1996            other->sk_shutdown & RCV_SHUTDOWN) {
1997                err = -EPIPE;
1998                send_sigpipe = true;
1999                goto err_state_unlock;
2000        }
2001
2002        if (init_scm) {
2003                err = maybe_init_creds(&scm, socket, other);
2004                if (err)
2005                        goto err_state_unlock;
2006                init_scm = false;
2007        }
2008
2009        skb = skb_peek_tail(&other->sk_receive_queue);
2010        if (tail && tail == skb) {
2011                skb = newskb;
2012        } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2013                if (newskb) {
2014                        skb = newskb;
2015                } else {
2016                        tail = skb;
2017                        goto alloc_skb;
2018                }
2019        } else if (newskb) {
2020                /* this is fast path, we don't necessarily need to
2021                 * call to kfree_skb even though with newskb == NULL
2022                 * this - does no harm
2023                 */
2024                consume_skb(newskb);
2025                newskb = NULL;
2026        }
2027
2028        if (skb_append_pagefrags(skb, page, offset, size)) {
2029                tail = skb;
2030                goto alloc_skb;
2031        }
2032
2033        skb->len += size;
2034        skb->data_len += size;
2035        skb->truesize += size;
2036        refcount_add(size, &sk->sk_wmem_alloc);
2037
2038        if (newskb) {
2039                err = unix_scm_to_skb(&scm, skb, false);
2040                if (err)
2041                        goto err_state_unlock;
2042                spin_lock(&other->sk_receive_queue.lock);
2043                __skb_queue_tail(&other->sk_receive_queue, newskb);
2044                spin_unlock(&other->sk_receive_queue.lock);
2045        }
2046
2047        unix_state_unlock(other);
2048        mutex_unlock(&unix_sk(other)->iolock);
2049
2050        other->sk_data_ready(other);
2051        scm_destroy(&scm);
2052        return size;
2053
2054err_state_unlock:
2055        unix_state_unlock(other);
2056err_unlock:
2057        mutex_unlock(&unix_sk(other)->iolock);
2058err:
2059        kfree_skb(newskb);
2060        if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2061                send_sig(SIGPIPE, current, 0);
2062        if (!init_scm)
2063                scm_destroy(&scm);
2064        return err;
2065}
2066
2067static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2068                                  size_t len)
2069{
2070        int err;
2071        struct sock *sk = sock->sk;
2072
2073        err = sock_error(sk);
2074        if (err)
2075                return err;
2076
2077        if (sk->sk_state != TCP_ESTABLISHED)
2078                return -ENOTCONN;
2079
2080        if (msg->msg_namelen)
2081                msg->msg_namelen = 0;
2082
2083        return unix_dgram_sendmsg(sock, msg, len);
2084}
2085
2086static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2087                                  size_t size, int flags)
2088{
2089        struct sock *sk = sock->sk;
2090
2091        if (sk->sk_state != TCP_ESTABLISHED)
2092                return -ENOTCONN;
2093
2094        return unix_dgram_recvmsg(sock, msg, size, flags);
2095}
2096
2097static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2098{
2099        struct unix_sock *u = unix_sk(sk);
2100
2101        if (u->addr) {
2102                msg->msg_namelen = u->addr->len;
2103                memcpy(msg->msg_name, u->addr->name, u->addr->len);
2104        }
2105}
2106
2107static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2108                              size_t size, int flags)
2109{
2110        struct scm_cookie scm;
2111        struct sock *sk = sock->sk;
2112        struct unix_sock *u = unix_sk(sk);
2113        struct sk_buff *skb, *last;
2114        long timeo;
2115        int err;
2116        int peeked, skip;
2117
2118        err = -EOPNOTSUPP;
2119        if (flags&MSG_OOB)
2120                goto out;
2121
2122        timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2123
2124        do {
2125                mutex_lock(&u->iolock);
2126
2127                skip = sk_peek_offset(sk, flags);
2128                skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2129                                              &err, &last);
2130                if (skb)
2131                        break;
2132
2133                mutex_unlock(&u->iolock);
2134
2135                if (err != -EAGAIN)
2136                        break;
2137        } while (timeo &&
2138                 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2139
2140        if (!skb) { /* implies iolock unlocked */
2141                unix_state_lock(sk);
2142                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2143                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2144                    (sk->sk_shutdown & RCV_SHUTDOWN))
2145                        err = 0;
2146                unix_state_unlock(sk);
2147                goto out;
2148        }
2149
2150        if (wq_has_sleeper(&u->peer_wait))
2151                wake_up_interruptible_sync_poll(&u->peer_wait,
2152                                                POLLOUT | POLLWRNORM |
2153                                                POLLWRBAND);
2154
2155        if (msg->msg_name)
2156                unix_copy_addr(msg, skb->sk);
2157
2158        if (size > skb->len - skip)
2159                size = skb->len - skip;
2160        else if (size < skb->len - skip)
2161                msg->msg_flags |= MSG_TRUNC;
2162
2163        err = skb_copy_datagram_msg(skb, skip, msg, size);
2164        if (err)
2165                goto out_free;
2166
2167        if (sock_flag(sk, SOCK_RCVTSTAMP))
2168                __sock_recv_timestamp(msg, sk, skb);
2169
2170        memset(&scm, 0, sizeof(scm));
2171
2172        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2173        unix_set_secdata(&scm, skb);
2174
2175        if (!(flags & MSG_PEEK)) {
2176                if (UNIXCB(skb).fp)
2177                        unix_detach_fds(&scm, skb);
2178
2179                sk_peek_offset_bwd(sk, skb->len);
2180        } else {
2181                /* It is questionable: on PEEK we could:
2182                   - do not return fds - good, but too simple 8)
2183                   - return fds, and do not return them on read (old strategy,
2184                     apparently wrong)
2185                   - clone fds (I chose it for now, it is the most universal
2186                     solution)
2187
2188                   POSIX 1003.1g does not actually define this clearly
2189                   at all. POSIX 1003.1g doesn't define a lot of things
2190                   clearly however!
2191
2192                */
2193
2194                sk_peek_offset_fwd(sk, size);
2195
2196                if (UNIXCB(skb).fp)
2197                        scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2198        }
2199        err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2200
2201        scm_recv(sock, msg, &scm, flags);
2202
2203out_free:
2204        skb_free_datagram(sk, skb);
2205        mutex_unlock(&u->iolock);
2206out:
2207        return err;
2208}
2209
2210/*
2211 *      Sleep until more data has arrived. But check for races..
2212 */
2213static long unix_stream_data_wait(struct sock *sk, long timeo,
2214                                  struct sk_buff *last, unsigned int last_len,
2215                                  bool freezable)
2216{
2217        struct sk_buff *tail;
2218        DEFINE_WAIT(wait);
2219
2220        unix_state_lock(sk);
2221
2222        for (;;) {
2223                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2224
2225                tail = skb_peek_tail(&sk->sk_receive_queue);
2226                if (tail != last ||
2227                    (tail && tail->len != last_len) ||
2228                    sk->sk_err ||
2229                    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2230                    signal_pending(current) ||
2231                    !timeo)
2232                        break;
2233
2234                sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2235                unix_state_unlock(sk);
2236                if (freezable)
2237                        timeo = freezable_schedule_timeout(timeo);
2238                else
2239                        timeo = schedule_timeout(timeo);
2240                unix_state_lock(sk);
2241
2242                if (sock_flag(sk, SOCK_DEAD))
2243                        break;
2244
2245                sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2246        }
2247
2248        finish_wait(sk_sleep(sk), &wait);
2249        unix_state_unlock(sk);
2250        return timeo;
2251}
2252
2253static unsigned int unix_skb_len(const struct sk_buff *skb)
2254{
2255        return skb->len - UNIXCB(skb).consumed;
2256}
2257
2258struct unix_stream_read_state {
2259        int (*recv_actor)(struct sk_buff *, int, int,
2260                          struct unix_stream_read_state *);
2261        struct socket *socket;
2262        struct msghdr *msg;
2263        struct pipe_inode_info *pipe;
2264        size_t size;
2265        int flags;
2266        unsigned int splice_flags;
2267};
2268
2269static int unix_stream_read_generic(struct unix_stream_read_state *state,
2270                                    bool freezable)
2271{
2272        struct scm_cookie scm;
2273        struct socket *sock = state->socket;
2274        struct sock *sk = sock->sk;
2275        struct unix_sock *u = unix_sk(sk);
2276        int copied = 0;
2277        int flags = state->flags;
2278        int noblock = flags & MSG_DONTWAIT;
2279        bool check_creds = false;
2280        int target;
2281        int err = 0;
2282        long timeo;
2283        int skip;
2284        size_t size = state->size;
2285        unsigned int last_len;
2286
2287        if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2288                err = -EINVAL;
2289                goto out;
2290        }
2291
2292        if (unlikely(flags & MSG_OOB)) {
2293                err = -EOPNOTSUPP;
2294                goto out;
2295        }
2296
2297        target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2298        timeo = sock_rcvtimeo(sk, noblock);
2299
2300        memset(&scm, 0, sizeof(scm));
2301
2302        /* Lock the socket to prevent queue disordering
2303         * while sleeps in memcpy_tomsg
2304         */
2305        mutex_lock(&u->iolock);
2306
2307        skip = max(sk_peek_offset(sk, flags), 0);
2308
2309        do {
2310                int chunk;
2311                bool drop_skb;
2312                struct sk_buff *skb, *last;
2313
2314redo:
2315                unix_state_lock(sk);
2316                if (sock_flag(sk, SOCK_DEAD)) {
2317                        err = -ECONNRESET;
2318                        goto unlock;
2319                }
2320                last = skb = skb_peek(&sk->sk_receive_queue);
2321                last_len = last ? last->len : 0;
2322again:
2323                if (skb == NULL) {
2324                        unix_sk(sk)->recursion_level = 0;
2325                        if (copied >= target)
2326                                goto unlock;
2327
2328                        /*
2329                         *      POSIX 1003.1g mandates this order.
2330                         */
2331
2332                        err = sock_error(sk);
2333                        if (err)
2334                                goto unlock;
2335                        if (sk->sk_shutdown & RCV_SHUTDOWN)
2336                                goto unlock;
2337
2338                        unix_state_unlock(sk);
2339                        if (!timeo) {
2340                                err = -EAGAIN;
2341                                break;
2342                        }
2343
2344                        mutex_unlock(&u->iolock);
2345
2346                        timeo = unix_stream_data_wait(sk, timeo, last,
2347                                                      last_len, freezable);
2348
2349                        if (signal_pending(current)) {
2350                                err = sock_intr_errno(timeo);
2351                                scm_destroy(&scm);
2352                                goto out;
2353                        }
2354
2355                        mutex_lock(&u->iolock);
2356                        goto redo;
2357unlock:
2358                        unix_state_unlock(sk);
2359                        break;
2360                }
2361
2362                while (skip >= unix_skb_len(skb)) {
2363                        skip -= unix_skb_len(skb);
2364                        last = skb;
2365                        last_len = skb->len;
2366                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2367                        if (!skb)
2368                                goto again;
2369                }
2370
2371                unix_state_unlock(sk);
2372
2373                if (check_creds) {
2374                        /* Never glue messages from different writers */
2375                        if (!unix_skb_scm_eq(skb, &scm))
2376                                break;
2377                } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2378                        /* Copy credentials */
2379                        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2380                        unix_set_secdata(&scm, skb);
2381                        check_creds = true;
2382                }
2383
2384                /* Copy address just once */
2385                if (state->msg && state->msg->msg_name) {
2386                        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2387                                         state->msg->msg_name);
2388                        unix_copy_addr(state->msg, skb->sk);
2389                        sunaddr = NULL;
2390                }
2391
2392                chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2393                skb_get(skb);
2394                chunk = state->recv_actor(skb, skip, chunk, state);
2395                drop_skb = !unix_skb_len(skb);
2396                /* skb is only safe to use if !drop_skb */
2397                consume_skb(skb);
2398                if (chunk < 0) {
2399                        if (copied == 0)
2400                                copied = -EFAULT;
2401                        break;
2402                }
2403                copied += chunk;
2404                size -= chunk;
2405
2406                if (drop_skb) {
2407                        /* the skb was touched by a concurrent reader;
2408                         * we should not expect anything from this skb
2409                         * anymore and assume it invalid - we can be
2410                         * sure it was dropped from the socket queue
2411                         *
2412                         * let's report a short read
2413                         */
2414                        err = 0;
2415                        break;
2416                }
2417
2418                /* Mark read part of skb as used */
2419                if (!(flags & MSG_PEEK)) {
2420                        UNIXCB(skb).consumed += chunk;
2421
2422                        sk_peek_offset_bwd(sk, chunk);
2423
2424                        if (UNIXCB(skb).fp)
2425                                unix_detach_fds(&scm, skb);
2426
2427                        if (unix_skb_len(skb))
2428                                break;
2429
2430                        skb_unlink(skb, &sk->sk_receive_queue);
2431                        consume_skb(skb);
2432
2433                        if (scm.fp)
2434                                break;
2435                } else {
2436                        /* It is questionable, see note in unix_dgram_recvmsg.
2437                         */
2438                        if (UNIXCB(skb).fp)
2439                                scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2440
2441                        sk_peek_offset_fwd(sk, chunk);
2442
2443                        if (UNIXCB(skb).fp)
2444                                break;
2445
2446                        skip = 0;
2447                        last = skb;
2448                        last_len = skb->len;
2449                        unix_state_lock(sk);
2450                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2451                        if (skb)
2452                                goto again;
2453                        unix_state_unlock(sk);
2454                        break;
2455                }
2456        } while (size);
2457
2458        mutex_unlock(&u->iolock);
2459        if (state->msg)
2460                scm_recv(sock, state->msg, &scm, flags);
2461        else
2462                scm_destroy(&scm);
2463out:
2464        return copied ? : err;
2465}
2466
2467static int unix_stream_read_actor(struct sk_buff *skb,
2468                                  int skip, int chunk,
2469                                  struct unix_stream_read_state *state)
2470{
2471        int ret;
2472
2473        ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2474                                    state->msg, chunk);
2475        return ret ?: chunk;
2476}
2477
2478static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2479                               size_t size, int flags)
2480{
2481        struct unix_stream_read_state state = {
2482                .recv_actor = unix_stream_read_actor,
2483                .socket = sock,
2484                .msg = msg,
2485                .size = size,
2486                .flags = flags
2487        };
2488
2489        return unix_stream_read_generic(&state, true);
2490}
2491
2492static int unix_stream_splice_actor(struct sk_buff *skb,
2493                                    int skip, int chunk,
2494                                    struct unix_stream_read_state *state)
2495{
2496        return skb_splice_bits(skb, state->socket->sk,
2497                               UNIXCB(skb).consumed + skip,
2498                               state->pipe, chunk, state->splice_flags);
2499}
2500
2501static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2502                                       struct pipe_inode_info *pipe,
2503                                       size_t size, unsigned int flags)
2504{
2505        struct unix_stream_read_state state = {
2506                .recv_actor = unix_stream_splice_actor,
2507                .socket = sock,
2508                .pipe = pipe,
2509                .size = size,
2510                .splice_flags = flags,
2511        };
2512
2513        if (unlikely(*ppos))
2514                return -ESPIPE;
2515
2516        if (sock->file->f_flags & O_NONBLOCK ||
2517            flags & SPLICE_F_NONBLOCK)
2518                state.flags = MSG_DONTWAIT;
2519
2520        return unix_stream_read_generic(&state, false);
2521}
2522
2523static int unix_shutdown(struct socket *sock, int mode)
2524{
2525        struct sock *sk = sock->sk;
2526        struct sock *other;
2527
2528        if (mode < SHUT_RD || mode > SHUT_RDWR)
2529                return -EINVAL;
2530        /* This maps:
2531         * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2532         * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2533         * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2534         */
2535        ++mode;
2536
2537        unix_state_lock(sk);
2538        sk->sk_shutdown |= mode;
2539        other = unix_peer(sk);
2540        if (other)
2541                sock_hold(other);
2542        unix_state_unlock(sk);
2543        sk->sk_state_change(sk);
2544
2545        if (other &&
2546                (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2547
2548                int peer_mode = 0;
2549
2550                if (mode&RCV_SHUTDOWN)
2551                        peer_mode |= SEND_SHUTDOWN;
2552                if (mode&SEND_SHUTDOWN)
2553                        peer_mode |= RCV_SHUTDOWN;
2554                unix_state_lock(other);
2555                other->sk_shutdown |= peer_mode;
2556                unix_state_unlock(other);
2557                other->sk_state_change(other);
2558                if (peer_mode == SHUTDOWN_MASK)
2559                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2560                else if (peer_mode & RCV_SHUTDOWN)
2561                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2562        }
2563        if (other)
2564                sock_put(other);
2565
2566        return 0;
2567}
2568
2569long unix_inq_len(struct sock *sk)
2570{
2571        struct sk_buff *skb;
2572        long amount = 0;
2573
2574        if (sk->sk_state == TCP_LISTEN)
2575                return -EINVAL;
2576
2577        spin_lock(&sk->sk_receive_queue.lock);
2578        if (sk->sk_type == SOCK_STREAM ||
2579            sk->sk_type == SOCK_SEQPACKET) {
2580                skb_queue_walk(&sk->sk_receive_queue, skb)
2581                        amount += unix_skb_len(skb);
2582        } else {
2583                skb = skb_peek(&sk->sk_receive_queue);
2584                if (skb)
2585                        amount = skb->len;
2586        }
2587        spin_unlock(&sk->sk_receive_queue.lock);
2588
2589        return amount;
2590}
2591EXPORT_SYMBOL_GPL(unix_inq_len);
2592
2593long unix_outq_len(struct sock *sk)
2594{
2595        return sk_wmem_alloc_get(sk);
2596}
2597EXPORT_SYMBOL_GPL(unix_outq_len);
2598
2599static int unix_open_file(struct sock *sk)
2600{
2601        struct path path;
2602        struct file *f;
2603        int fd;
2604
2605        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2606                return -EPERM;
2607
2608        unix_state_lock(sk);
2609        path = unix_sk(sk)->path;
2610        if (!path.dentry) {
2611                unix_state_unlock(sk);
2612                return -ENOENT;
2613        }
2614
2615        path_get(&path);
2616        unix_state_unlock(sk);
2617
2618        fd = get_unused_fd_flags(O_CLOEXEC);
2619        if (fd < 0)
2620                goto out;
2621
2622        f = dentry_open(&path, O_PATH, current_cred());
2623        if (IS_ERR(f)) {
2624                put_unused_fd(fd);
2625                fd = PTR_ERR(f);
2626                goto out;
2627        }
2628
2629        fd_install(fd, f);
2630out:
2631        path_put(&path);
2632
2633        return fd;
2634}
2635
2636static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2637{
2638        struct sock *sk = sock->sk;
2639        long amount = 0;
2640        int err;
2641
2642        switch (cmd) {
2643        case SIOCOUTQ:
2644                amount = unix_outq_len(sk);
2645                err = put_user(amount, (int __user *)arg);
2646                break;
2647        case SIOCINQ:
2648                amount = unix_inq_len(sk);
2649                if (amount < 0)
2650                        err = amount;
2651                else
2652                        err = put_user(amount, (int __user *)arg);
2653                break;
2654        case SIOCUNIXFILE:
2655                err = unix_open_file(sk);
2656                break;
2657        default:
2658                err = -ENOIOCTLCMD;
2659                break;
2660        }
2661        return err;
2662}
2663
2664static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2665{
2666        struct sock *sk = sock->sk;
2667        unsigned int mask;
2668
2669        sock_poll_wait(file, sk_sleep(sk), wait);
2670        mask = 0;
2671
2672        /* exceptional events? */
2673        if (sk->sk_err)
2674                mask |= POLLERR;
2675        if (sk->sk_shutdown == SHUTDOWN_MASK)
2676                mask |= POLLHUP;
2677        if (sk->sk_shutdown & RCV_SHUTDOWN)
2678                mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2679
2680        /* readable? */
2681        if (!skb_queue_empty(&sk->sk_receive_queue))
2682                mask |= POLLIN | POLLRDNORM;
2683
2684        /* Connection-based need to check for termination and startup */
2685        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2686            sk->sk_state == TCP_CLOSE)
2687                mask |= POLLHUP;
2688
2689        /*
2690         * we set writable also when the other side has shut down the
2691         * connection. This prevents stuck sockets.
2692         */
2693        if (unix_writable(sk))
2694                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2695
2696        return mask;
2697}
2698
2699static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2700                                    poll_table *wait)
2701{
2702        struct sock *sk = sock->sk, *other;
2703        unsigned int mask, writable;
2704
2705        sock_poll_wait(file, sk_sleep(sk), wait);
2706        mask = 0;
2707
2708        /* exceptional events? */
2709        if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2710                mask |= POLLERR |
2711                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2712
2713        if (sk->sk_shutdown & RCV_SHUTDOWN)
2714                mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2715        if (sk->sk_shutdown == SHUTDOWN_MASK)
2716                mask |= POLLHUP;
2717
2718        /* readable? */
2719        if (!skb_queue_empty(&sk->sk_receive_queue))
2720                mask |= POLLIN | POLLRDNORM;
2721
2722        /* Connection-based need to check for termination and startup */
2723        if (sk->sk_type == SOCK_SEQPACKET) {
2724                if (sk->sk_state == TCP_CLOSE)
2725                        mask |= POLLHUP;
2726                /* connection hasn't started yet? */
2727                if (sk->sk_state == TCP_SYN_SENT)
2728                        return mask;
2729        }
2730
2731        /* No write status requested, avoid expensive OUT tests. */
2732        if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2733                return mask;
2734
2735        writable = unix_writable(sk);
2736        if (writable) {
2737                unix_state_lock(sk);
2738
2739                other = unix_peer(sk);
2740                if (other && unix_peer(other) != sk &&
2741                    unix_recvq_full(other) &&
2742                    unix_dgram_peer_wake_me(sk, other))
2743                        writable = 0;
2744
2745                unix_state_unlock(sk);
2746        }
2747
2748        if (writable)
2749                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2750        else
2751                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2752
2753        return mask;
2754}
2755
2756#ifdef CONFIG_PROC_FS
2757
2758#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2759
2760#define get_bucket(x) ((x) >> BUCKET_SPACE)
2761#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2762#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2763
2764static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2765{
2766        unsigned long offset = get_offset(*pos);
2767        unsigned long bucket = get_bucket(*pos);
2768        struct sock *sk;
2769        unsigned long count = 0;
2770
2771        for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2772                if (sock_net(sk) != seq_file_net(seq))
2773                        continue;
2774                if (++count == offset)
2775                        break;
2776        }
2777
2778        return sk;
2779}
2780
2781static struct sock *unix_next_socket(struct seq_file *seq,
2782                                     struct sock *sk,
2783                                     loff_t *pos)
2784{
2785        unsigned long bucket;
2786
2787        while (sk > (struct sock *)SEQ_START_TOKEN) {
2788                sk = sk_next(sk);
2789                if (!sk)
2790                        goto next_bucket;
2791                if (sock_net(sk) == seq_file_net(seq))
2792                        return sk;
2793        }
2794
2795        do {
2796                sk = unix_from_bucket(seq, pos);
2797                if (sk)
2798                        return sk;
2799
2800next_bucket:
2801                bucket = get_bucket(*pos) + 1;
2802                *pos = set_bucket_offset(bucket, 1);
2803        } while (bucket < ARRAY_SIZE(unix_socket_table));
2804
2805        return NULL;
2806}
2807
2808static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2809        __acquires(unix_table_lock)
2810{
2811        spin_lock(&unix_table_lock);
2812
2813        if (!*pos)
2814                return SEQ_START_TOKEN;
2815
2816        if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2817                return NULL;
2818
2819        return unix_next_socket(seq, NULL, pos);
2820}
2821
2822static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2823{
2824        ++*pos;
2825        return unix_next_socket(seq, v, pos);
2826}
2827
2828static void unix_seq_stop(struct seq_file *seq, void *v)
2829        __releases(unix_table_lock)
2830{
2831        spin_unlock(&unix_table_lock);
2832}
2833
2834static int unix_seq_show(struct seq_file *seq, void *v)
2835{
2836
2837        if (v == SEQ_START_TOKEN)
2838                seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2839                         "Inode Path\n");
2840        else {
2841                struct sock *s = v;
2842                struct unix_sock *u = unix_sk(s);
2843                unix_state_lock(s);
2844
2845                seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2846                        s,
2847                        refcount_read(&s->sk_refcnt),
2848                        0,
2849                        s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2850                        s->sk_type,
2851                        s->sk_socket ?
2852                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2853                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2854                        sock_i_ino(s));
2855
2856                if (u->addr) {
2857                        int i, len;
2858                        seq_putc(seq, ' ');
2859
2860                        i = 0;
2861                        len = u->addr->len - sizeof(short);
2862                        if (!UNIX_ABSTRACT(s))
2863                                len--;
2864                        else {
2865                                seq_putc(seq, '@');
2866                                i++;
2867                        }
2868                        for ( ; i < len; i++)
2869                                seq_putc(seq, u->addr->name->sun_path[i] ?:
2870                                         '@');
2871                }
2872                unix_state_unlock(s);
2873                seq_putc(seq, '\n');
2874        }
2875
2876        return 0;
2877}
2878
2879static const struct seq_operations unix_seq_ops = {
2880        .start  = unix_seq_start,
2881        .next   = unix_seq_next,
2882        .stop   = unix_seq_stop,
2883        .show   = unix_seq_show,
2884};
2885
2886static int unix_seq_open(struct inode *inode, struct file *file)
2887{
2888        return seq_open_net(inode, file, &unix_seq_ops,
2889                            sizeof(struct seq_net_private));
2890}
2891
2892static const struct file_operations unix_seq_fops = {
2893        .owner          = THIS_MODULE,
2894        .open           = unix_seq_open,
2895        .read           = seq_read,
2896        .llseek         = seq_lseek,
2897        .release        = seq_release_net,
2898};
2899
2900#endif
2901
2902static const struct net_proto_family unix_family_ops = {
2903        .family = PF_UNIX,
2904        .create = unix_create,
2905        .owner  = THIS_MODULE,
2906};
2907
2908
2909static int __net_init unix_net_init(struct net *net)
2910{
2911        int error = -ENOMEM;
2912
2913        net->unx.sysctl_max_dgram_qlen = 10;
2914        if (unix_sysctl_register(net))
2915                goto out;
2916
2917#ifdef CONFIG_PROC_FS
2918        if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2919                unix_sysctl_unregister(net);
2920                goto out;
2921        }
2922#endif
2923        error = 0;
2924out:
2925        return error;
2926}
2927
2928static void __net_exit unix_net_exit(struct net *net)
2929{
2930        unix_sysctl_unregister(net);
2931        remove_proc_entry("unix", net->proc_net);
2932}
2933
2934static struct pernet_operations unix_net_ops = {
2935        .init = unix_net_init,
2936        .exit = unix_net_exit,
2937};
2938
2939static int __init af_unix_init(void)
2940{
2941        int rc = -1;
2942
2943        BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2944
2945        rc = proto_register(&unix_proto, 1);
2946        if (rc != 0) {
2947                pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2948                goto out;
2949        }
2950
2951        sock_register(&unix_family_ops);
2952        register_pernet_subsys(&unix_net_ops);
2953out:
2954        return rc;
2955}
2956
2957static void __exit af_unix_exit(void)
2958{
2959        sock_unregister(PF_UNIX);
2960        proto_unregister(&unix_proto);
2961        unregister_pernet_subsys(&unix_net_ops);
2962}
2963
2964/* Earlier than device_initcall() so that other drivers invoking
2965   request_module() don't end up in a loop when modprobe tries
2966   to use a UNIX socket. But later than subsys_initcall() because
2967   we depend on stuff initialised there */
2968fs_initcall(af_unix_init);
2969module_exit(af_unix_exit);
2970
2971MODULE_LICENSE("GPL");
2972MODULE_ALIAS_NETPROTO(PF_UNIX);
2973