linux/net/unix/af_unix.c
<<
>>
Prefs
   1/*
   2 * NET4:        Implementation of BSD Unix domain sockets.
   3 *
   4 * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Fixes:
  12 *              Linus Torvalds  :       Assorted bug cures.
  13 *              Niibe Yutaka    :       async I/O support.
  14 *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15 *              Alan Cox        :       Limit size of allocated blocks.
  16 *              Alan Cox        :       Fixed the stupid socketpair bug.
  17 *              Alan Cox        :       BSD compatibility fine tuning.
  18 *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19 *              Alan Cox        :       Sorted out a proper draft version of
  20 *                                      file descriptor passing hacked up from
  21 *                                      Mike Shaver's work.
  22 *              Marty Leisner   :       Fixes to fd passing
  23 *              Nick Nevin      :       recvmsg bugfix.
  24 *              Alan Cox        :       Started proper garbage collector
  25 *              Heiko EiBfeldt  :       Missing verify_area check
  26 *              Alan Cox        :       Started POSIXisms
  27 *              Andreas Schwab  :       Replace inode by dentry for proper
  28 *                                      reference counting
  29 *              Kirk Petersen   :       Made this a module
  30 *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31 *                                      Lots of bug fixes.
  32 *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33 *                                      by above two patches.
  34 *           Andrea Arcangeli   :       If possible we block in connect(2)
  35 *                                      if the max backlog of the listen socket
  36 *                                      is been reached. This won't break
  37 *                                      old apps and it will avoid huge amount
  38 *                                      of socks hashed (this for unix_gc()
  39 *                                      performances reasons).
  40 *                                      Security fix that limits the max
  41 *                                      number of socks to 2*max_files and
  42 *                                      the number of skb queueable in the
  43 *                                      dgram receiver.
  44 *              Artur Skawina   :       Hash function optimizations
  45 *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46 *            Malcolm Beattie   :       Set peercred for socketpair
  47 *           Michal Ostrowski   :       Module initialization cleanup.
  48 *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49 *                                      the core infrastructure is doing that
  50 *                                      for all net proto families now (2.5.69+)
  51 *
  52 *
  53 * Known differences from reference BSD that was tested:
  54 *
  55 *      [TO FIX]
  56 *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57 *              other the moment one end closes.
  58 *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59 *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60 *      [NOT TO FIX]
  61 *      accept() returns a path name even if the connecting socket has closed
  62 *              in the meantime (BSD loses the path and gives up).
  63 *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64 *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65 *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66 *      BSD af_unix apparently has connect forgetting to block properly.
  67 *              (need to check this with the POSIX spec in detail)
  68 *
  69 * Differences from 2.0.0-11-... (ANK)
  70 *      Bug fixes and improvements.
  71 *              - client shutdown killed server socket.
  72 *              - removed all useless cli/sti pairs.
  73 *
  74 *      Semantic changes/extensions.
  75 *              - generic control message passing.
  76 *              - SCM_CREDENTIALS control message.
  77 *              - "Abstract" (not FS based) socket bindings.
  78 *                Abstract names are sequences of bytes (not zero terminated)
  79 *                started by 0, so that this name space does not intersect
  80 *                with BSD names.
  81 */
  82
  83#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85#include <linux/module.h>
  86#include <linux/kernel.h>
  87#include <linux/signal.h>
  88#include <linux/sched/signal.h>
  89#include <linux/errno.h>
  90#include <linux/string.h>
  91#include <linux/stat.h>
  92#include <linux/dcache.h>
  93#include <linux/namei.h>
  94#include <linux/socket.h>
  95#include <linux/un.h>
  96#include <linux/fcntl.h>
  97#include <linux/termios.h>
  98#include <linux/sockios.h>
  99#include <linux/net.h>
 100#include <linux/in.h>
 101#include <linux/fs.h>
 102#include <linux/slab.h>
 103#include <linux/uaccess.h>
 104#include <linux/skbuff.h>
 105#include <linux/netdevice.h>
 106#include <net/net_namespace.h>
 107#include <net/sock.h>
 108#include <net/tcp_states.h>
 109#include <net/af_unix.h>
 110#include <linux/proc_fs.h>
 111#include <linux/seq_file.h>
 112#include <net/scm.h>
 113#include <linux/init.h>
 114#include <linux/poll.h>
 115#include <linux/rtnetlink.h>
 116#include <linux/mount.h>
 117#include <net/checksum.h>
 118#include <linux/security.h>
 119#include <linux/freezer.h>
 120#include <linux/file.h>
 121
 122struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 123EXPORT_SYMBOL_GPL(unix_socket_table);
 124DEFINE_SPINLOCK(unix_table_lock);
 125EXPORT_SYMBOL_GPL(unix_table_lock);
 126static atomic_long_t unix_nr_socks;
 127
 128
 129static struct hlist_head *unix_sockets_unbound(void *addr)
 130{
 131        unsigned long hash = (unsigned long)addr;
 132
 133        hash ^= hash >> 16;
 134        hash ^= hash >> 8;
 135        hash %= UNIX_HASH_SIZE;
 136        return &unix_socket_table[UNIX_HASH_SIZE + hash];
 137}
 138
 139#define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 140
 141#ifdef CONFIG_SECURITY_NETWORK
 142static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 143{
 144        UNIXCB(skb).secid = scm->secid;
 145}
 146
 147static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 148{
 149        scm->secid = UNIXCB(skb).secid;
 150}
 151
 152static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 153{
 154        return (scm->secid == UNIXCB(skb).secid);
 155}
 156#else
 157static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158{ }
 159
 160static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 161{ }
 162
 163static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 164{
 165        return true;
 166}
 167#endif /* CONFIG_SECURITY_NETWORK */
 168
 169/*
 170 *  SMP locking strategy:
 171 *    hash table is protected with spinlock unix_table_lock
 172 *    each socket state is protected by separate spin lock.
 173 */
 174
 175static inline unsigned int unix_hash_fold(__wsum n)
 176{
 177        unsigned int hash = (__force unsigned int)csum_fold(n);
 178
 179        hash ^= hash>>8;
 180        return hash&(UNIX_HASH_SIZE-1);
 181}
 182
 183#define unix_peer(sk) (unix_sk(sk)->peer)
 184
 185static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 186{
 187        return unix_peer(osk) == sk;
 188}
 189
 190static inline int unix_may_send(struct sock *sk, struct sock *osk)
 191{
 192        return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 193}
 194
 195static inline int unix_recvq_full(struct sock const *sk)
 196{
 197        return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 198}
 199
 200struct sock *unix_peer_get(struct sock *s)
 201{
 202        struct sock *peer;
 203
 204        unix_state_lock(s);
 205        peer = unix_peer(s);
 206        if (peer)
 207                sock_hold(peer);
 208        unix_state_unlock(s);
 209        return peer;
 210}
 211EXPORT_SYMBOL_GPL(unix_peer_get);
 212
 213static inline void unix_release_addr(struct unix_address *addr)
 214{
 215        if (refcount_dec_and_test(&addr->refcnt))
 216                kfree(addr);
 217}
 218
 219/*
 220 *      Check unix socket name:
 221 *              - should be not zero length.
 222 *              - if started by not zero, should be NULL terminated (FS object)
 223 *              - if started by zero, it is abstract name.
 224 */
 225
 226static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 227{
 228        if (len <= sizeof(short) || len > sizeof(*sunaddr))
 229                return -EINVAL;
 230        if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 231                return -EINVAL;
 232        if (sunaddr->sun_path[0]) {
 233                /*
 234                 * This may look like an off by one error but it is a bit more
 235                 * subtle. 108 is the longest valid AF_UNIX path for a binding.
 236                 * sun_path[108] doesn't as such exist.  However in kernel space
 237                 * we are guaranteed that it is a valid memory location in our
 238                 * kernel address buffer.
 239                 */
 240                ((char *)sunaddr)[len] = 0;
 241                len = strlen(sunaddr->sun_path)+1+sizeof(short);
 242                return len;
 243        }
 244
 245        *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 246        return len;
 247}
 248
 249static void __unix_remove_socket(struct sock *sk)
 250{
 251        sk_del_node_init(sk);
 252}
 253
 254static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 255{
 256        WARN_ON(!sk_unhashed(sk));
 257        sk_add_node(sk, list);
 258}
 259
 260static inline void unix_remove_socket(struct sock *sk)
 261{
 262        spin_lock(&unix_table_lock);
 263        __unix_remove_socket(sk);
 264        spin_unlock(&unix_table_lock);
 265}
 266
 267static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 268{
 269        spin_lock(&unix_table_lock);
 270        __unix_insert_socket(list, sk);
 271        spin_unlock(&unix_table_lock);
 272}
 273
 274static struct sock *__unix_find_socket_byname(struct net *net,
 275                                              struct sockaddr_un *sunname,
 276                                              int len, int type, unsigned int hash)
 277{
 278        struct sock *s;
 279
 280        sk_for_each(s, &unix_socket_table[hash ^ type]) {
 281                struct unix_sock *u = unix_sk(s);
 282
 283                if (!net_eq(sock_net(s), net))
 284                        continue;
 285
 286                if (u->addr->len == len &&
 287                    !memcmp(u->addr->name, sunname, len))
 288                        goto found;
 289        }
 290        s = NULL;
 291found:
 292        return s;
 293}
 294
 295static inline struct sock *unix_find_socket_byname(struct net *net,
 296                                                   struct sockaddr_un *sunname,
 297                                                   int len, int type,
 298                                                   unsigned int hash)
 299{
 300        struct sock *s;
 301
 302        spin_lock(&unix_table_lock);
 303        s = __unix_find_socket_byname(net, sunname, len, type, hash);
 304        if (s)
 305                sock_hold(s);
 306        spin_unlock(&unix_table_lock);
 307        return s;
 308}
 309
 310static struct sock *unix_find_socket_byinode(struct inode *i)
 311{
 312        struct sock *s;
 313
 314        spin_lock(&unix_table_lock);
 315        sk_for_each(s,
 316                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 317                struct dentry *dentry = unix_sk(s)->path.dentry;
 318
 319                if (dentry && d_backing_inode(dentry) == i) {
 320                        sock_hold(s);
 321                        goto found;
 322                }
 323        }
 324        s = NULL;
 325found:
 326        spin_unlock(&unix_table_lock);
 327        return s;
 328}
 329
 330/* Support code for asymmetrically connected dgram sockets
 331 *
 332 * If a datagram socket is connected to a socket not itself connected
 333 * to the first socket (eg, /dev/log), clients may only enqueue more
 334 * messages if the present receive queue of the server socket is not
 335 * "too large". This means there's a second writeability condition
 336 * poll and sendmsg need to test. The dgram recv code will do a wake
 337 * up on the peer_wait wait queue of a socket upon reception of a
 338 * datagram which needs to be propagated to sleeping would-be writers
 339 * since these might not have sent anything so far. This can't be
 340 * accomplished via poll_wait because the lifetime of the server
 341 * socket might be less than that of its clients if these break their
 342 * association with it or if the server socket is closed while clients
 343 * are still connected to it and there's no way to inform "a polling
 344 * implementation" that it should let go of a certain wait queue
 345 *
 346 * In order to propagate a wake up, a wait_queue_entry_t of the client
 347 * socket is enqueued on the peer_wait queue of the server socket
 348 * whose wake function does a wake_up on the ordinary client socket
 349 * wait queue. This connection is established whenever a write (or
 350 * poll for write) hit the flow control condition and broken when the
 351 * association to the server socket is dissolved or after a wake up
 352 * was relayed.
 353 */
 354
 355static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 356                                      void *key)
 357{
 358        struct unix_sock *u;
 359        wait_queue_head_t *u_sleep;
 360
 361        u = container_of(q, struct unix_sock, peer_wake);
 362
 363        __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 364                            q);
 365        u->peer_wake.private = NULL;
 366
 367        /* relaying can only happen while the wq still exists */
 368        u_sleep = sk_sleep(&u->sk);
 369        if (u_sleep)
 370                wake_up_interruptible_poll(u_sleep, key);
 371
 372        return 0;
 373}
 374
 375static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 376{
 377        struct unix_sock *u, *u_other;
 378        int rc;
 379
 380        u = unix_sk(sk);
 381        u_other = unix_sk(other);
 382        rc = 0;
 383        spin_lock(&u_other->peer_wait.lock);
 384
 385        if (!u->peer_wake.private) {
 386                u->peer_wake.private = other;
 387                __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 388
 389                rc = 1;
 390        }
 391
 392        spin_unlock(&u_other->peer_wait.lock);
 393        return rc;
 394}
 395
 396static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 397                                            struct sock *other)
 398{
 399        struct unix_sock *u, *u_other;
 400
 401        u = unix_sk(sk);
 402        u_other = unix_sk(other);
 403        spin_lock(&u_other->peer_wait.lock);
 404
 405        if (u->peer_wake.private == other) {
 406                __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 407                u->peer_wake.private = NULL;
 408        }
 409
 410        spin_unlock(&u_other->peer_wait.lock);
 411}
 412
 413static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 414                                                   struct sock *other)
 415{
 416        unix_dgram_peer_wake_disconnect(sk, other);
 417        wake_up_interruptible_poll(sk_sleep(sk),
 418                                   POLLOUT |
 419                                   POLLWRNORM |
 420                                   POLLWRBAND);
 421}
 422
 423/* preconditions:
 424 *      - unix_peer(sk) == other
 425 *      - association is stable
 426 */
 427static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 428{
 429        int connected;
 430
 431        connected = unix_dgram_peer_wake_connect(sk, other);
 432
 433        if (unix_recvq_full(other))
 434                return 1;
 435
 436        if (connected)
 437                unix_dgram_peer_wake_disconnect(sk, other);
 438
 439        return 0;
 440}
 441
 442static int unix_writable(const struct sock *sk)
 443{
 444        return sk->sk_state != TCP_LISTEN &&
 445               (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 446}
 447
 448static void unix_write_space(struct sock *sk)
 449{
 450        struct socket_wq *wq;
 451
 452        rcu_read_lock();
 453        if (unix_writable(sk)) {
 454                wq = rcu_dereference(sk->sk_wq);
 455                if (skwq_has_sleeper(wq))
 456                        wake_up_interruptible_sync_poll(&wq->wait,
 457                                POLLOUT | POLLWRNORM | POLLWRBAND);
 458                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 459        }
 460        rcu_read_unlock();
 461}
 462
 463/* When dgram socket disconnects (or changes its peer), we clear its receive
 464 * queue of packets arrived from previous peer. First, it allows to do
 465 * flow control based only on wmem_alloc; second, sk connected to peer
 466 * may receive messages only from that peer. */
 467static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 468{
 469        if (!skb_queue_empty(&sk->sk_receive_queue)) {
 470                skb_queue_purge(&sk->sk_receive_queue);
 471                wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 472
 473                /* If one link of bidirectional dgram pipe is disconnected,
 474                 * we signal error. Messages are lost. Do not make this,
 475                 * when peer was not connected to us.
 476                 */
 477                if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 478                        other->sk_err = ECONNRESET;
 479                        other->sk_error_report(other);
 480                }
 481        }
 482}
 483
 484static void unix_sock_destructor(struct sock *sk)
 485{
 486        struct unix_sock *u = unix_sk(sk);
 487
 488        skb_queue_purge(&sk->sk_receive_queue);
 489
 490        WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 491        WARN_ON(!sk_unhashed(sk));
 492        WARN_ON(sk->sk_socket);
 493        if (!sock_flag(sk, SOCK_DEAD)) {
 494                pr_info("Attempt to release alive unix socket: %p\n", sk);
 495                return;
 496        }
 497
 498        if (u->addr)
 499                unix_release_addr(u->addr);
 500
 501        atomic_long_dec(&unix_nr_socks);
 502        local_bh_disable();
 503        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 504        local_bh_enable();
 505#ifdef UNIX_REFCNT_DEBUG
 506        pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 507                atomic_long_read(&unix_nr_socks));
 508#endif
 509}
 510
 511static void unix_release_sock(struct sock *sk, int embrion)
 512{
 513        struct unix_sock *u = unix_sk(sk);
 514        struct path path;
 515        struct sock *skpair;
 516        struct sk_buff *skb;
 517        int state;
 518
 519        unix_remove_socket(sk);
 520
 521        /* Clear state */
 522        unix_state_lock(sk);
 523        sock_orphan(sk);
 524        sk->sk_shutdown = SHUTDOWN_MASK;
 525        path         = u->path;
 526        u->path.dentry = NULL;
 527        u->path.mnt = NULL;
 528        state = sk->sk_state;
 529        sk->sk_state = TCP_CLOSE;
 530        unix_state_unlock(sk);
 531
 532        wake_up_interruptible_all(&u->peer_wait);
 533
 534        skpair = unix_peer(sk);
 535
 536        if (skpair != NULL) {
 537                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 538                        unix_state_lock(skpair);
 539                        /* No more writes */
 540                        skpair->sk_shutdown = SHUTDOWN_MASK;
 541                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 542                                skpair->sk_err = ECONNRESET;
 543                        unix_state_unlock(skpair);
 544                        skpair->sk_state_change(skpair);
 545                        sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 546                }
 547
 548                unix_dgram_peer_wake_disconnect(sk, skpair);
 549                sock_put(skpair); /* It may now die */
 550                unix_peer(sk) = NULL;
 551        }
 552
 553        /* Try to flush out this socket. Throw out buffers at least */
 554
 555        while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 556                if (state == TCP_LISTEN)
 557                        unix_release_sock(skb->sk, 1);
 558                /* passed fds are erased in the kfree_skb hook        */
 559                UNIXCB(skb).consumed = skb->len;
 560                kfree_skb(skb);
 561        }
 562
 563        if (path.dentry)
 564                path_put(&path);
 565
 566        sock_put(sk);
 567
 568        /* ---- Socket is dead now and most probably destroyed ---- */
 569
 570        /*
 571         * Fixme: BSD difference: In BSD all sockets connected to us get
 572         *        ECONNRESET and we die on the spot. In Linux we behave
 573         *        like files and pipes do and wait for the last
 574         *        dereference.
 575         *
 576         * Can't we simply set sock->err?
 577         *
 578         *        What the above comment does talk about? --ANK(980817)
 579         */
 580
 581        if (unix_tot_inflight)
 582                unix_gc();              /* Garbage collect fds */
 583}
 584
 585static void init_peercred(struct sock *sk)
 586{
 587        put_pid(sk->sk_peer_pid);
 588        if (sk->sk_peer_cred)
 589                put_cred(sk->sk_peer_cred);
 590        sk->sk_peer_pid  = get_pid(task_tgid(current));
 591        sk->sk_peer_cred = get_current_cred();
 592}
 593
 594static void copy_peercred(struct sock *sk, struct sock *peersk)
 595{
 596        put_pid(sk->sk_peer_pid);
 597        if (sk->sk_peer_cred)
 598                put_cred(sk->sk_peer_cred);
 599        sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 600        sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 601}
 602
 603static int unix_listen(struct socket *sock, int backlog)
 604{
 605        int err;
 606        struct sock *sk = sock->sk;
 607        struct unix_sock *u = unix_sk(sk);
 608        struct pid *old_pid = NULL;
 609
 610        err = -EOPNOTSUPP;
 611        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 612                goto out;       /* Only stream/seqpacket sockets accept */
 613        err = -EINVAL;
 614        if (!u->addr)
 615                goto out;       /* No listens on an unbound socket */
 616        unix_state_lock(sk);
 617        if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 618                goto out_unlock;
 619        if (backlog > sk->sk_max_ack_backlog)
 620                wake_up_interruptible_all(&u->peer_wait);
 621        sk->sk_max_ack_backlog  = backlog;
 622        sk->sk_state            = TCP_LISTEN;
 623        /* set credentials so connect can copy them */
 624        init_peercred(sk);
 625        err = 0;
 626
 627out_unlock:
 628        unix_state_unlock(sk);
 629        put_pid(old_pid);
 630out:
 631        return err;
 632}
 633
 634static int unix_release(struct socket *);
 635static int unix_bind(struct socket *, struct sockaddr *, int);
 636static int unix_stream_connect(struct socket *, struct sockaddr *,
 637                               int addr_len, int flags);
 638static int unix_socketpair(struct socket *, struct socket *);
 639static int unix_accept(struct socket *, struct socket *, int, bool);
 640static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 641static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 642static unsigned int unix_dgram_poll(struct file *, struct socket *,
 643                                    poll_table *);
 644static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 645static int unix_shutdown(struct socket *, int);
 646static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 647static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 648static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 649                                    size_t size, int flags);
 650static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 651                                       struct pipe_inode_info *, size_t size,
 652                                       unsigned int flags);
 653static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 654static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 655static int unix_dgram_connect(struct socket *, struct sockaddr *,
 656                              int, int);
 657static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 658static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 659                                  int);
 660
 661static int unix_set_peek_off(struct sock *sk, int val)
 662{
 663        struct unix_sock *u = unix_sk(sk);
 664
 665        if (mutex_lock_interruptible(&u->iolock))
 666                return -EINTR;
 667
 668        sk->sk_peek_off = val;
 669        mutex_unlock(&u->iolock);
 670
 671        return 0;
 672}
 673
 674
 675static const struct proto_ops unix_stream_ops = {
 676        .family =       PF_UNIX,
 677        .owner =        THIS_MODULE,
 678        .release =      unix_release,
 679        .bind =         unix_bind,
 680        .connect =      unix_stream_connect,
 681        .socketpair =   unix_socketpair,
 682        .accept =       unix_accept,
 683        .getname =      unix_getname,
 684        .poll =         unix_poll,
 685        .ioctl =        unix_ioctl,
 686        .listen =       unix_listen,
 687        .shutdown =     unix_shutdown,
 688        .setsockopt =   sock_no_setsockopt,
 689        .getsockopt =   sock_no_getsockopt,
 690        .sendmsg =      unix_stream_sendmsg,
 691        .recvmsg =      unix_stream_recvmsg,
 692        .mmap =         sock_no_mmap,
 693        .sendpage =     unix_stream_sendpage,
 694        .splice_read =  unix_stream_splice_read,
 695        .set_peek_off = unix_set_peek_off,
 696};
 697
 698static const struct proto_ops unix_dgram_ops = {
 699        .family =       PF_UNIX,
 700        .owner =        THIS_MODULE,
 701        .release =      unix_release,
 702        .bind =         unix_bind,
 703        .connect =      unix_dgram_connect,
 704        .socketpair =   unix_socketpair,
 705        .accept =       sock_no_accept,
 706        .getname =      unix_getname,
 707        .poll =         unix_dgram_poll,
 708        .ioctl =        unix_ioctl,
 709        .listen =       sock_no_listen,
 710        .shutdown =     unix_shutdown,
 711        .setsockopt =   sock_no_setsockopt,
 712        .getsockopt =   sock_no_getsockopt,
 713        .sendmsg =      unix_dgram_sendmsg,
 714        .recvmsg =      unix_dgram_recvmsg,
 715        .mmap =         sock_no_mmap,
 716        .sendpage =     sock_no_sendpage,
 717        .set_peek_off = unix_set_peek_off,
 718};
 719
 720static const struct proto_ops unix_seqpacket_ops = {
 721        .family =       PF_UNIX,
 722        .owner =        THIS_MODULE,
 723        .release =      unix_release,
 724        .bind =         unix_bind,
 725        .connect =      unix_stream_connect,
 726        .socketpair =   unix_socketpair,
 727        .accept =       unix_accept,
 728        .getname =      unix_getname,
 729        .poll =         unix_dgram_poll,
 730        .ioctl =        unix_ioctl,
 731        .listen =       unix_listen,
 732        .shutdown =     unix_shutdown,
 733        .setsockopt =   sock_no_setsockopt,
 734        .getsockopt =   sock_no_getsockopt,
 735        .sendmsg =      unix_seqpacket_sendmsg,
 736        .recvmsg =      unix_seqpacket_recvmsg,
 737        .mmap =         sock_no_mmap,
 738        .sendpage =     sock_no_sendpage,
 739        .set_peek_off = unix_set_peek_off,
 740};
 741
 742static struct proto unix_proto = {
 743        .name                   = "UNIX",
 744        .owner                  = THIS_MODULE,
 745        .obj_size               = sizeof(struct unix_sock),
 746};
 747
 748/*
 749 * AF_UNIX sockets do not interact with hardware, hence they
 750 * dont trigger interrupts - so it's safe for them to have
 751 * bh-unsafe locking for their sk_receive_queue.lock. Split off
 752 * this special lock-class by reinitializing the spinlock key:
 753 */
 754static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 755
 756static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 757{
 758        struct sock *sk = NULL;
 759        struct unix_sock *u;
 760
 761        atomic_long_inc(&unix_nr_socks);
 762        if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 763                goto out;
 764
 765        sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 766        if (!sk)
 767                goto out;
 768
 769        sock_init_data(sock, sk);
 770        lockdep_set_class(&sk->sk_receive_queue.lock,
 771                                &af_unix_sk_receive_queue_lock_key);
 772
 773        sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 774        sk->sk_write_space      = unix_write_space;
 775        sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 776        sk->sk_destruct         = unix_sock_destructor;
 777        u         = unix_sk(sk);
 778        u->path.dentry = NULL;
 779        u->path.mnt = NULL;
 780        spin_lock_init(&u->lock);
 781        atomic_long_set(&u->inflight, 0);
 782        INIT_LIST_HEAD(&u->link);
 783        mutex_init(&u->iolock); /* single task reading lock */
 784        mutex_init(&u->bindlock); /* single task binding lock */
 785        init_waitqueue_head(&u->peer_wait);
 786        init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 787        unix_insert_socket(unix_sockets_unbound(sk), sk);
 788out:
 789        if (sk == NULL)
 790                atomic_long_dec(&unix_nr_socks);
 791        else {
 792                local_bh_disable();
 793                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 794                local_bh_enable();
 795        }
 796        return sk;
 797}
 798
 799static int unix_create(struct net *net, struct socket *sock, int protocol,
 800                       int kern)
 801{
 802        if (protocol && protocol != PF_UNIX)
 803                return -EPROTONOSUPPORT;
 804
 805        sock->state = SS_UNCONNECTED;
 806
 807        switch (sock->type) {
 808        case SOCK_STREAM:
 809                sock->ops = &unix_stream_ops;
 810                break;
 811                /*
 812                 *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 813                 *      nothing uses it.
 814                 */
 815        case SOCK_RAW:
 816                sock->type = SOCK_DGRAM;
 817        case SOCK_DGRAM:
 818                sock->ops = &unix_dgram_ops;
 819                break;
 820        case SOCK_SEQPACKET:
 821                sock->ops = &unix_seqpacket_ops;
 822                break;
 823        default:
 824                return -ESOCKTNOSUPPORT;
 825        }
 826
 827        return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 828}
 829
 830static int unix_release(struct socket *sock)
 831{
 832        struct sock *sk = sock->sk;
 833
 834        if (!sk)
 835                return 0;
 836
 837        unix_release_sock(sk, 0);
 838        sock->sk = NULL;
 839
 840        return 0;
 841}
 842
 843static int unix_autobind(struct socket *sock)
 844{
 845        struct sock *sk = sock->sk;
 846        struct net *net = sock_net(sk);
 847        struct unix_sock *u = unix_sk(sk);
 848        static u32 ordernum = 1;
 849        struct unix_address *addr;
 850        int err;
 851        unsigned int retries = 0;
 852
 853        err = mutex_lock_interruptible(&u->bindlock);
 854        if (err)
 855                return err;
 856
 857        err = 0;
 858        if (u->addr)
 859                goto out;
 860
 861        err = -ENOMEM;
 862        addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 863        if (!addr)
 864                goto out;
 865
 866        addr->name->sun_family = AF_UNIX;
 867        refcount_set(&addr->refcnt, 1);
 868
 869retry:
 870        addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 871        addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 872
 873        spin_lock(&unix_table_lock);
 874        ordernum = (ordernum+1)&0xFFFFF;
 875
 876        if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 877                                      addr->hash)) {
 878                spin_unlock(&unix_table_lock);
 879                /*
 880                 * __unix_find_socket_byname() may take long time if many names
 881                 * are already in use.
 882                 */
 883                cond_resched();
 884                /* Give up if all names seems to be in use. */
 885                if (retries++ == 0xFFFFF) {
 886                        err = -ENOSPC;
 887                        kfree(addr);
 888                        goto out;
 889                }
 890                goto retry;
 891        }
 892        addr->hash ^= sk->sk_type;
 893
 894        __unix_remove_socket(sk);
 895        u->addr = addr;
 896        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 897        spin_unlock(&unix_table_lock);
 898        err = 0;
 899
 900out:    mutex_unlock(&u->bindlock);
 901        return err;
 902}
 903
 904static struct sock *unix_find_other(struct net *net,
 905                                    struct sockaddr_un *sunname, int len,
 906                                    int type, unsigned int hash, int *error)
 907{
 908        struct sock *u;
 909        struct path path;
 910        int err = 0;
 911
 912        if (sunname->sun_path[0]) {
 913                struct inode *inode;
 914                err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 915                if (err)
 916                        goto fail;
 917                inode = d_backing_inode(path.dentry);
 918                err = inode_permission(inode, MAY_WRITE);
 919                if (err)
 920                        goto put_fail;
 921
 922                err = -ECONNREFUSED;
 923                if (!S_ISSOCK(inode->i_mode))
 924                        goto put_fail;
 925                u = unix_find_socket_byinode(inode);
 926                if (!u)
 927                        goto put_fail;
 928
 929                if (u->sk_type == type)
 930                        touch_atime(&path);
 931
 932                path_put(&path);
 933
 934                err = -EPROTOTYPE;
 935                if (u->sk_type != type) {
 936                        sock_put(u);
 937                        goto fail;
 938                }
 939        } else {
 940                err = -ECONNREFUSED;
 941                u = unix_find_socket_byname(net, sunname, len, type, hash);
 942                if (u) {
 943                        struct dentry *dentry;
 944                        dentry = unix_sk(u)->path.dentry;
 945                        if (dentry)
 946                                touch_atime(&unix_sk(u)->path);
 947                } else
 948                        goto fail;
 949        }
 950        return u;
 951
 952put_fail:
 953        path_put(&path);
 954fail:
 955        *error = err;
 956        return NULL;
 957}
 958
 959static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 960{
 961        struct dentry *dentry;
 962        struct path path;
 963        int err = 0;
 964        /*
 965         * Get the parent directory, calculate the hash for last
 966         * component.
 967         */
 968        dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 969        err = PTR_ERR(dentry);
 970        if (IS_ERR(dentry))
 971                return err;
 972
 973        /*
 974         * All right, let's create it.
 975         */
 976        err = security_path_mknod(&path, dentry, mode, 0);
 977        if (!err) {
 978                err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 979                if (!err) {
 980                        res->mnt = mntget(path.mnt);
 981                        res->dentry = dget(dentry);
 982                }
 983        }
 984        done_path_create(&path, dentry);
 985        return err;
 986}
 987
 988static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 989{
 990        struct sock *sk = sock->sk;
 991        struct net *net = sock_net(sk);
 992        struct unix_sock *u = unix_sk(sk);
 993        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 994        char *sun_path = sunaddr->sun_path;
 995        int err;
 996        unsigned int hash;
 997        struct unix_address *addr;
 998        struct hlist_head *list;
 999        struct path path = { };
1000
1001        err = -EINVAL;
1002        if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1003            sunaddr->sun_family != AF_UNIX)
1004                goto out;
1005
1006        if (addr_len == sizeof(short)) {
1007                err = unix_autobind(sock);
1008                goto out;
1009        }
1010
1011        err = unix_mkname(sunaddr, addr_len, &hash);
1012        if (err < 0)
1013                goto out;
1014        addr_len = err;
1015
1016        if (sun_path[0]) {
1017                umode_t mode = S_IFSOCK |
1018                       (SOCK_INODE(sock)->i_mode & ~current_umask());
1019                err = unix_mknod(sun_path, mode, &path);
1020                if (err) {
1021                        if (err == -EEXIST)
1022                                err = -EADDRINUSE;
1023                        goto out;
1024                }
1025        }
1026
1027        err = mutex_lock_interruptible(&u->bindlock);
1028        if (err)
1029                goto out_put;
1030
1031        err = -EINVAL;
1032        if (u->addr)
1033                goto out_up;
1034
1035        err = -ENOMEM;
1036        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1037        if (!addr)
1038                goto out_up;
1039
1040        memcpy(addr->name, sunaddr, addr_len);
1041        addr->len = addr_len;
1042        addr->hash = hash ^ sk->sk_type;
1043        refcount_set(&addr->refcnt, 1);
1044
1045        if (sun_path[0]) {
1046                addr->hash = UNIX_HASH_SIZE;
1047                hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1048                spin_lock(&unix_table_lock);
1049                u->path = path;
1050                list = &unix_socket_table[hash];
1051        } else {
1052                spin_lock(&unix_table_lock);
1053                err = -EADDRINUSE;
1054                if (__unix_find_socket_byname(net, sunaddr, addr_len,
1055                                              sk->sk_type, hash)) {
1056                        unix_release_addr(addr);
1057                        goto out_unlock;
1058                }
1059
1060                list = &unix_socket_table[addr->hash];
1061        }
1062
1063        err = 0;
1064        __unix_remove_socket(sk);
1065        u->addr = addr;
1066        __unix_insert_socket(list, sk);
1067
1068out_unlock:
1069        spin_unlock(&unix_table_lock);
1070out_up:
1071        mutex_unlock(&u->bindlock);
1072out_put:
1073        if (err)
1074                path_put(&path);
1075out:
1076        return err;
1077}
1078
1079static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1080{
1081        if (unlikely(sk1 == sk2) || !sk2) {
1082                unix_state_lock(sk1);
1083                return;
1084        }
1085        if (sk1 < sk2) {
1086                unix_state_lock(sk1);
1087                unix_state_lock_nested(sk2);
1088        } else {
1089                unix_state_lock(sk2);
1090                unix_state_lock_nested(sk1);
1091        }
1092}
1093
1094static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1095{
1096        if (unlikely(sk1 == sk2) || !sk2) {
1097                unix_state_unlock(sk1);
1098                return;
1099        }
1100        unix_state_unlock(sk1);
1101        unix_state_unlock(sk2);
1102}
1103
1104static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1105                              int alen, int flags)
1106{
1107        struct sock *sk = sock->sk;
1108        struct net *net = sock_net(sk);
1109        struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1110        struct sock *other;
1111        unsigned int hash;
1112        int err;
1113
1114        err = -EINVAL;
1115        if (alen < offsetofend(struct sockaddr, sa_family))
1116                goto out;
1117
1118        if (addr->sa_family != AF_UNSPEC) {
1119                err = unix_mkname(sunaddr, alen, &hash);
1120                if (err < 0)
1121                        goto out;
1122                alen = err;
1123
1124                if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1125                    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1126                        goto out;
1127
1128restart:
1129                other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1130                if (!other)
1131                        goto out;
1132
1133                unix_state_double_lock(sk, other);
1134
1135                /* Apparently VFS overslept socket death. Retry. */
1136                if (sock_flag(other, SOCK_DEAD)) {
1137                        unix_state_double_unlock(sk, other);
1138                        sock_put(other);
1139                        goto restart;
1140                }
1141
1142                err = -EPERM;
1143                if (!unix_may_send(sk, other))
1144                        goto out_unlock;
1145
1146                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1147                if (err)
1148                        goto out_unlock;
1149
1150        } else {
1151                /*
1152                 *      1003.1g breaking connected state with AF_UNSPEC
1153                 */
1154                other = NULL;
1155                unix_state_double_lock(sk, other);
1156        }
1157
1158        /*
1159         * If it was connected, reconnect.
1160         */
1161        if (unix_peer(sk)) {
1162                struct sock *old_peer = unix_peer(sk);
1163                unix_peer(sk) = other;
1164                unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1165
1166                unix_state_double_unlock(sk, other);
1167
1168                if (other != old_peer)
1169                        unix_dgram_disconnected(sk, old_peer);
1170                sock_put(old_peer);
1171        } else {
1172                unix_peer(sk) = other;
1173                unix_state_double_unlock(sk, other);
1174        }
1175        return 0;
1176
1177out_unlock:
1178        unix_state_double_unlock(sk, other);
1179        sock_put(other);
1180out:
1181        return err;
1182}
1183
1184static long unix_wait_for_peer(struct sock *other, long timeo)
1185{
1186        struct unix_sock *u = unix_sk(other);
1187        int sched;
1188        DEFINE_WAIT(wait);
1189
1190        prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1191
1192        sched = !sock_flag(other, SOCK_DEAD) &&
1193                !(other->sk_shutdown & RCV_SHUTDOWN) &&
1194                unix_recvq_full(other);
1195
1196        unix_state_unlock(other);
1197
1198        if (sched)
1199                timeo = schedule_timeout(timeo);
1200
1201        finish_wait(&u->peer_wait, &wait);
1202        return timeo;
1203}
1204
1205static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1206                               int addr_len, int flags)
1207{
1208        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1209        struct sock *sk = sock->sk;
1210        struct net *net = sock_net(sk);
1211        struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1212        struct sock *newsk = NULL;
1213        struct sock *other = NULL;
1214        struct sk_buff *skb = NULL;
1215        unsigned int hash;
1216        int st;
1217        int err;
1218        long timeo;
1219
1220        err = unix_mkname(sunaddr, addr_len, &hash);
1221        if (err < 0)
1222                goto out;
1223        addr_len = err;
1224
1225        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1226            (err = unix_autobind(sock)) != 0)
1227                goto out;
1228
1229        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1230
1231        /* First of all allocate resources.
1232           If we will make it after state is locked,
1233           we will have to recheck all again in any case.
1234         */
1235
1236        err = -ENOMEM;
1237
1238        /* create new sock for complete connection */
1239        newsk = unix_create1(sock_net(sk), NULL, 0);
1240        if (newsk == NULL)
1241                goto out;
1242
1243        /* Allocate skb for sending to listening sock */
1244        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1245        if (skb == NULL)
1246                goto out;
1247
1248restart:
1249        /*  Find listening sock. */
1250        other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1251        if (!other)
1252                goto out;
1253
1254        /* Latch state of peer */
1255        unix_state_lock(other);
1256
1257        /* Apparently VFS overslept socket death. Retry. */
1258        if (sock_flag(other, SOCK_DEAD)) {
1259                unix_state_unlock(other);
1260                sock_put(other);
1261                goto restart;
1262        }
1263
1264        err = -ECONNREFUSED;
1265        if (other->sk_state != TCP_LISTEN)
1266                goto out_unlock;
1267        if (other->sk_shutdown & RCV_SHUTDOWN)
1268                goto out_unlock;
1269
1270        if (unix_recvq_full(other)) {
1271                err = -EAGAIN;
1272                if (!timeo)
1273                        goto out_unlock;
1274
1275                timeo = unix_wait_for_peer(other, timeo);
1276
1277                err = sock_intr_errno(timeo);
1278                if (signal_pending(current))
1279                        goto out;
1280                sock_put(other);
1281                goto restart;
1282        }
1283
1284        /* Latch our state.
1285
1286           It is tricky place. We need to grab our state lock and cannot
1287           drop lock on peer. It is dangerous because deadlock is
1288           possible. Connect to self case and simultaneous
1289           attempt to connect are eliminated by checking socket
1290           state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1291           check this before attempt to grab lock.
1292
1293           Well, and we have to recheck the state after socket locked.
1294         */
1295        st = sk->sk_state;
1296
1297        switch (st) {
1298        case TCP_CLOSE:
1299                /* This is ok... continue with connect */
1300                break;
1301        case TCP_ESTABLISHED:
1302                /* Socket is already connected */
1303                err = -EISCONN;
1304                goto out_unlock;
1305        default:
1306                err = -EINVAL;
1307                goto out_unlock;
1308        }
1309
1310        unix_state_lock_nested(sk);
1311
1312        if (sk->sk_state != st) {
1313                unix_state_unlock(sk);
1314                unix_state_unlock(other);
1315                sock_put(other);
1316                goto restart;
1317        }
1318
1319        err = security_unix_stream_connect(sk, other, newsk);
1320        if (err) {
1321                unix_state_unlock(sk);
1322                goto out_unlock;
1323        }
1324
1325        /* The way is open! Fastly set all the necessary fields... */
1326
1327        sock_hold(sk);
1328        unix_peer(newsk)        = sk;
1329        newsk->sk_state         = TCP_ESTABLISHED;
1330        newsk->sk_type          = sk->sk_type;
1331        init_peercred(newsk);
1332        newu = unix_sk(newsk);
1333        RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1334        otheru = unix_sk(other);
1335
1336        /* copy address information from listening to new sock*/
1337        if (otheru->addr) {
1338                refcount_inc(&otheru->addr->refcnt);
1339                newu->addr = otheru->addr;
1340        }
1341        if (otheru->path.dentry) {
1342                path_get(&otheru->path);
1343                newu->path = otheru->path;
1344        }
1345
1346        /* Set credentials */
1347        copy_peercred(sk, other);
1348
1349        sock->state     = SS_CONNECTED;
1350        sk->sk_state    = TCP_ESTABLISHED;
1351        sock_hold(newsk);
1352
1353        smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1354        unix_peer(sk)   = newsk;
1355
1356        unix_state_unlock(sk);
1357
1358        /* take ten and and send info to listening sock */
1359        spin_lock(&other->sk_receive_queue.lock);
1360        __skb_queue_tail(&other->sk_receive_queue, skb);
1361        spin_unlock(&other->sk_receive_queue.lock);
1362        unix_state_unlock(other);
1363        other->sk_data_ready(other);
1364        sock_put(other);
1365        return 0;
1366
1367out_unlock:
1368        if (other)
1369                unix_state_unlock(other);
1370
1371out:
1372        kfree_skb(skb);
1373        if (newsk)
1374                unix_release_sock(newsk, 0);
1375        if (other)
1376                sock_put(other);
1377        return err;
1378}
1379
1380static int unix_socketpair(struct socket *socka, struct socket *sockb)
1381{
1382        struct sock *ska = socka->sk, *skb = sockb->sk;
1383
1384        /* Join our sockets back to back */
1385        sock_hold(ska);
1386        sock_hold(skb);
1387        unix_peer(ska) = skb;
1388        unix_peer(skb) = ska;
1389        init_peercred(ska);
1390        init_peercred(skb);
1391
1392        if (ska->sk_type != SOCK_DGRAM) {
1393                ska->sk_state = TCP_ESTABLISHED;
1394                skb->sk_state = TCP_ESTABLISHED;
1395                socka->state  = SS_CONNECTED;
1396                sockb->state  = SS_CONNECTED;
1397        }
1398        return 0;
1399}
1400
1401static void unix_sock_inherit_flags(const struct socket *old,
1402                                    struct socket *new)
1403{
1404        if (test_bit(SOCK_PASSCRED, &old->flags))
1405                set_bit(SOCK_PASSCRED, &new->flags);
1406        if (test_bit(SOCK_PASSSEC, &old->flags))
1407                set_bit(SOCK_PASSSEC, &new->flags);
1408}
1409
1410static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1411                       bool kern)
1412{
1413        struct sock *sk = sock->sk;
1414        struct sock *tsk;
1415        struct sk_buff *skb;
1416        int err;
1417
1418        err = -EOPNOTSUPP;
1419        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1420                goto out;
1421
1422        err = -EINVAL;
1423        if (sk->sk_state != TCP_LISTEN)
1424                goto out;
1425
1426        /* If socket state is TCP_LISTEN it cannot change (for now...),
1427         * so that no locks are necessary.
1428         */
1429
1430        skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1431        if (!skb) {
1432                /* This means receive shutdown. */
1433                if (err == 0)
1434                        err = -EINVAL;
1435                goto out;
1436        }
1437
1438        tsk = skb->sk;
1439        skb_free_datagram(sk, skb);
1440        wake_up_interruptible(&unix_sk(sk)->peer_wait);
1441
1442        /* attach accepted sock to socket */
1443        unix_state_lock(tsk);
1444        newsock->state = SS_CONNECTED;
1445        unix_sock_inherit_flags(sock, newsock);
1446        sock_graft(tsk, newsock);
1447        unix_state_unlock(tsk);
1448        return 0;
1449
1450out:
1451        return err;
1452}
1453
1454
1455static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1456{
1457        struct sock *sk = sock->sk;
1458        struct unix_sock *u;
1459        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1460        int err = 0;
1461
1462        if (peer) {
1463                sk = unix_peer_get(sk);
1464
1465                err = -ENOTCONN;
1466                if (!sk)
1467                        goto out;
1468                err = 0;
1469        } else {
1470                sock_hold(sk);
1471        }
1472
1473        u = unix_sk(sk);
1474        unix_state_lock(sk);
1475        if (!u->addr) {
1476                sunaddr->sun_family = AF_UNIX;
1477                sunaddr->sun_path[0] = 0;
1478                *uaddr_len = sizeof(short);
1479        } else {
1480                struct unix_address *addr = u->addr;
1481
1482                *uaddr_len = addr->len;
1483                memcpy(sunaddr, addr->name, *uaddr_len);
1484        }
1485        unix_state_unlock(sk);
1486        sock_put(sk);
1487out:
1488        return err;
1489}
1490
1491static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1492{
1493        int i;
1494
1495        scm->fp = UNIXCB(skb).fp;
1496        UNIXCB(skb).fp = NULL;
1497
1498        for (i = scm->fp->count-1; i >= 0; i--)
1499                unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1500}
1501
1502static void unix_destruct_scm(struct sk_buff *skb)
1503{
1504        struct scm_cookie scm;
1505        memset(&scm, 0, sizeof(scm));
1506        scm.pid  = UNIXCB(skb).pid;
1507        if (UNIXCB(skb).fp)
1508                unix_detach_fds(&scm, skb);
1509
1510        /* Alas, it calls VFS */
1511        /* So fscking what? fput() had been SMP-safe since the last Summer */
1512        scm_destroy(&scm);
1513        sock_wfree(skb);
1514}
1515
1516/*
1517 * The "user->unix_inflight" variable is protected by the garbage
1518 * collection lock, and we just read it locklessly here. If you go
1519 * over the limit, there might be a tiny race in actually noticing
1520 * it across threads. Tough.
1521 */
1522static inline bool too_many_unix_fds(struct task_struct *p)
1523{
1524        struct user_struct *user = current_user();
1525
1526        if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1527                return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1528        return false;
1529}
1530
1531static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1532{
1533        int i;
1534
1535        if (too_many_unix_fds(current))
1536                return -ETOOMANYREFS;
1537
1538        /*
1539         * Need to duplicate file references for the sake of garbage
1540         * collection.  Otherwise a socket in the fps might become a
1541         * candidate for GC while the skb is not yet queued.
1542         */
1543        UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1544        if (!UNIXCB(skb).fp)
1545                return -ENOMEM;
1546
1547        for (i = scm->fp->count - 1; i >= 0; i--)
1548                unix_inflight(scm->fp->user, scm->fp->fp[i]);
1549        return 0;
1550}
1551
1552static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1553{
1554        int err = 0;
1555
1556        UNIXCB(skb).pid  = get_pid(scm->pid);
1557        UNIXCB(skb).uid = scm->creds.uid;
1558        UNIXCB(skb).gid = scm->creds.gid;
1559        UNIXCB(skb).fp = NULL;
1560        unix_get_secdata(scm, skb);
1561        if (scm->fp && send_fds)
1562                err = unix_attach_fds(scm, skb);
1563
1564        skb->destructor = unix_destruct_scm;
1565        return err;
1566}
1567
1568static bool unix_passcred_enabled(const struct socket *sock,
1569                                  const struct sock *other)
1570{
1571        return test_bit(SOCK_PASSCRED, &sock->flags) ||
1572               !other->sk_socket ||
1573               test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1574}
1575
1576/*
1577 * Some apps rely on write() giving SCM_CREDENTIALS
1578 * We include credentials if source or destination socket
1579 * asserted SOCK_PASSCRED.
1580 */
1581static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1582                            const struct sock *other)
1583{
1584        if (UNIXCB(skb).pid)
1585                return;
1586        if (unix_passcred_enabled(sock, other)) {
1587                UNIXCB(skb).pid  = get_pid(task_tgid(current));
1588                current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1589        }
1590}
1591
1592static int maybe_init_creds(struct scm_cookie *scm,
1593                            struct socket *socket,
1594                            const struct sock *other)
1595{
1596        int err;
1597        struct msghdr msg = { .msg_controllen = 0 };
1598
1599        err = scm_send(socket, &msg, scm, false);
1600        if (err)
1601                return err;
1602
1603        if (unix_passcred_enabled(socket, other)) {
1604                scm->pid = get_pid(task_tgid(current));
1605                current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1606        }
1607        return err;
1608}
1609
1610static bool unix_skb_scm_eq(struct sk_buff *skb,
1611                            struct scm_cookie *scm)
1612{
1613        const struct unix_skb_parms *u = &UNIXCB(skb);
1614
1615        return u->pid == scm->pid &&
1616               uid_eq(u->uid, scm->creds.uid) &&
1617               gid_eq(u->gid, scm->creds.gid) &&
1618               unix_secdata_eq(scm, skb);
1619}
1620
1621/*
1622 *      Send AF_UNIX data.
1623 */
1624
1625static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1626                              size_t len)
1627{
1628        struct sock *sk = sock->sk;
1629        struct net *net = sock_net(sk);
1630        struct unix_sock *u = unix_sk(sk);
1631        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1632        struct sock *other = NULL;
1633        int namelen = 0; /* fake GCC */
1634        int err;
1635        unsigned int hash;
1636        struct sk_buff *skb;
1637        long timeo;
1638        struct scm_cookie scm;
1639        int data_len = 0;
1640        int sk_locked;
1641
1642        wait_for_unix_gc();
1643        err = scm_send(sock, msg, &scm, false);
1644        if (err < 0)
1645                return err;
1646
1647        err = -EOPNOTSUPP;
1648        if (msg->msg_flags&MSG_OOB)
1649                goto out;
1650
1651        if (msg->msg_namelen) {
1652                err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1653                if (err < 0)
1654                        goto out;
1655                namelen = err;
1656        } else {
1657                sunaddr = NULL;
1658                err = -ENOTCONN;
1659                other = unix_peer_get(sk);
1660                if (!other)
1661                        goto out;
1662        }
1663
1664        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1665            && (err = unix_autobind(sock)) != 0)
1666                goto out;
1667
1668        err = -EMSGSIZE;
1669        if (len > sk->sk_sndbuf - 32)
1670                goto out;
1671
1672        if (len > SKB_MAX_ALLOC) {
1673                data_len = min_t(size_t,
1674                                 len - SKB_MAX_ALLOC,
1675                                 MAX_SKB_FRAGS * PAGE_SIZE);
1676                data_len = PAGE_ALIGN(data_len);
1677
1678                BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1679        }
1680
1681        skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1682                                   msg->msg_flags & MSG_DONTWAIT, &err,
1683                                   PAGE_ALLOC_COSTLY_ORDER);
1684        if (skb == NULL)
1685                goto out;
1686
1687        err = unix_scm_to_skb(&scm, skb, true);
1688        if (err < 0)
1689                goto out_free;
1690
1691        skb_put(skb, len - data_len);
1692        skb->data_len = data_len;
1693        skb->len = len;
1694        err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1695        if (err)
1696                goto out_free;
1697
1698        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1699
1700restart:
1701        if (!other) {
1702                err = -ECONNRESET;
1703                if (sunaddr == NULL)
1704                        goto out_free;
1705
1706                other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1707                                        hash, &err);
1708                if (other == NULL)
1709                        goto out_free;
1710        }
1711
1712        if (sk_filter(other, skb) < 0) {
1713                /* Toss the packet but do not return any error to the sender */
1714                err = len;
1715                goto out_free;
1716        }
1717
1718        sk_locked = 0;
1719        unix_state_lock(other);
1720restart_locked:
1721        err = -EPERM;
1722        if (!unix_may_send(sk, other))
1723                goto out_unlock;
1724
1725        if (unlikely(sock_flag(other, SOCK_DEAD))) {
1726                /*
1727                 *      Check with 1003.1g - what should
1728                 *      datagram error
1729                 */
1730                unix_state_unlock(other);
1731                sock_put(other);
1732
1733                if (!sk_locked)
1734                        unix_state_lock(sk);
1735
1736                err = 0;
1737                if (unix_peer(sk) == other) {
1738                        unix_peer(sk) = NULL;
1739                        unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1740
1741                        unix_state_unlock(sk);
1742
1743                        unix_dgram_disconnected(sk, other);
1744                        sock_put(other);
1745                        err = -ECONNREFUSED;
1746                } else {
1747                        unix_state_unlock(sk);
1748                }
1749
1750                other = NULL;
1751                if (err)
1752                        goto out_free;
1753                goto restart;
1754        }
1755
1756        err = -EPIPE;
1757        if (other->sk_shutdown & RCV_SHUTDOWN)
1758                goto out_unlock;
1759
1760        if (sk->sk_type != SOCK_SEQPACKET) {
1761                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1762                if (err)
1763                        goto out_unlock;
1764        }
1765
1766        /* other == sk && unix_peer(other) != sk if
1767         * - unix_peer(sk) == NULL, destination address bound to sk
1768         * - unix_peer(sk) == sk by time of get but disconnected before lock
1769         */
1770        if (other != sk &&
1771            unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1772                if (timeo) {
1773                        timeo = unix_wait_for_peer(other, timeo);
1774
1775                        err = sock_intr_errno(timeo);
1776                        if (signal_pending(current))
1777                                goto out_free;
1778
1779                        goto restart;
1780                }
1781
1782                if (!sk_locked) {
1783                        unix_state_unlock(other);
1784                        unix_state_double_lock(sk, other);
1785                }
1786
1787                if (unix_peer(sk) != other ||
1788                    unix_dgram_peer_wake_me(sk, other)) {
1789                        err = -EAGAIN;
1790                        sk_locked = 1;
1791                        goto out_unlock;
1792                }
1793
1794                if (!sk_locked) {
1795                        sk_locked = 1;
1796                        goto restart_locked;
1797                }
1798        }
1799
1800        if (unlikely(sk_locked))
1801                unix_state_unlock(sk);
1802
1803        if (sock_flag(other, SOCK_RCVTSTAMP))
1804                __net_timestamp(skb);
1805        maybe_add_creds(skb, sock, other);
1806        skb_queue_tail(&other->sk_receive_queue, skb);
1807        unix_state_unlock(other);
1808        other->sk_data_ready(other);
1809        sock_put(other);
1810        scm_destroy(&scm);
1811        return len;
1812
1813out_unlock:
1814        if (sk_locked)
1815                unix_state_unlock(sk);
1816        unix_state_unlock(other);
1817out_free:
1818        kfree_skb(skb);
1819out:
1820        if (other)
1821                sock_put(other);
1822        scm_destroy(&scm);
1823        return err;
1824}
1825
1826/* We use paged skbs for stream sockets, and limit occupancy to 32768
1827 * bytes, and a minimun of a full page.
1828 */
1829#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1830
1831static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1832                               size_t len)
1833{
1834        struct sock *sk = sock->sk;
1835        struct sock *other = NULL;
1836        int err, size;
1837        struct sk_buff *skb;
1838        int sent = 0;
1839        struct scm_cookie scm;
1840        bool fds_sent = false;
1841        int data_len;
1842
1843        wait_for_unix_gc();
1844        err = scm_send(sock, msg, &scm, false);
1845        if (err < 0)
1846                return err;
1847
1848        err = -EOPNOTSUPP;
1849        if (msg->msg_flags&MSG_OOB)
1850                goto out_err;
1851
1852        if (msg->msg_namelen) {
1853                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1854                goto out_err;
1855        } else {
1856                err = -ENOTCONN;
1857                other = unix_peer(sk);
1858                if (!other)
1859                        goto out_err;
1860        }
1861
1862        if (sk->sk_shutdown & SEND_SHUTDOWN)
1863                goto pipe_err;
1864
1865        while (sent < len) {
1866                size = len - sent;
1867
1868                /* Keep two messages in the pipe so it schedules better */
1869                size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1870
1871                /* allow fallback to order-0 allocations */
1872                size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1873
1874                data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1875
1876                data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1877
1878                skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1879                                           msg->msg_flags & MSG_DONTWAIT, &err,
1880                                           get_order(UNIX_SKB_FRAGS_SZ));
1881                if (!skb)
1882                        goto out_err;
1883
1884                /* Only send the fds in the first buffer */
1885                err = unix_scm_to_skb(&scm, skb, !fds_sent);
1886                if (err < 0) {
1887                        kfree_skb(skb);
1888                        goto out_err;
1889                }
1890                fds_sent = true;
1891
1892                skb_put(skb, size - data_len);
1893                skb->data_len = data_len;
1894                skb->len = size;
1895                err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1896                if (err) {
1897                        kfree_skb(skb);
1898                        goto out_err;
1899                }
1900
1901                unix_state_lock(other);
1902
1903                if (sock_flag(other, SOCK_DEAD) ||
1904                    (other->sk_shutdown & RCV_SHUTDOWN))
1905                        goto pipe_err_free;
1906
1907                maybe_add_creds(skb, sock, other);
1908                skb_queue_tail(&other->sk_receive_queue, skb);
1909                unix_state_unlock(other);
1910                other->sk_data_ready(other);
1911                sent += size;
1912        }
1913
1914        scm_destroy(&scm);
1915
1916        return sent;
1917
1918pipe_err_free:
1919        unix_state_unlock(other);
1920        kfree_skb(skb);
1921pipe_err:
1922        if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1923                send_sig(SIGPIPE, current, 0);
1924        err = -EPIPE;
1925out_err:
1926        scm_destroy(&scm);
1927        return sent ? : err;
1928}
1929
1930static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1931                                    int offset, size_t size, int flags)
1932{
1933        int err;
1934        bool send_sigpipe = false;
1935        bool init_scm = true;
1936        struct scm_cookie scm;
1937        struct sock *other, *sk = socket->sk;
1938        struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1939
1940        if (flags & MSG_OOB)
1941                return -EOPNOTSUPP;
1942
1943        other = unix_peer(sk);
1944        if (!other || sk->sk_state != TCP_ESTABLISHED)
1945                return -ENOTCONN;
1946
1947        if (false) {
1948alloc_skb:
1949                unix_state_unlock(other);
1950                mutex_unlock(&unix_sk(other)->iolock);
1951                newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1952                                              &err, 0);
1953                if (!newskb)
1954                        goto err;
1955        }
1956
1957        /* we must acquire iolock as we modify already present
1958         * skbs in the sk_receive_queue and mess with skb->len
1959         */
1960        err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1961        if (err) {
1962                err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1963                goto err;
1964        }
1965
1966        if (sk->sk_shutdown & SEND_SHUTDOWN) {
1967                err = -EPIPE;
1968                send_sigpipe = true;
1969                goto err_unlock;
1970        }
1971
1972        unix_state_lock(other);
1973
1974        if (sock_flag(other, SOCK_DEAD) ||
1975            other->sk_shutdown & RCV_SHUTDOWN) {
1976                err = -EPIPE;
1977                send_sigpipe = true;
1978                goto err_state_unlock;
1979        }
1980
1981        if (init_scm) {
1982                err = maybe_init_creds(&scm, socket, other);
1983                if (err)
1984                        goto err_state_unlock;
1985                init_scm = false;
1986        }
1987
1988        skb = skb_peek_tail(&other->sk_receive_queue);
1989        if (tail && tail == skb) {
1990                skb = newskb;
1991        } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1992                if (newskb) {
1993                        skb = newskb;
1994                } else {
1995                        tail = skb;
1996                        goto alloc_skb;
1997                }
1998        } else if (newskb) {
1999                /* this is fast path, we don't necessarily need to
2000                 * call to kfree_skb even though with newskb == NULL
2001                 * this - does no harm
2002                 */
2003                consume_skb(newskb);
2004                newskb = NULL;
2005        }
2006
2007        if (skb_append_pagefrags(skb, page, offset, size)) {
2008                tail = skb;
2009                goto alloc_skb;
2010        }
2011
2012        skb->len += size;
2013        skb->data_len += size;
2014        skb->truesize += size;
2015        refcount_add(size, &sk->sk_wmem_alloc);
2016
2017        if (newskb) {
2018                err = unix_scm_to_skb(&scm, skb, false);
2019                if (err)
2020                        goto err_state_unlock;
2021                spin_lock(&other->sk_receive_queue.lock);
2022                __skb_queue_tail(&other->sk_receive_queue, newskb);
2023                spin_unlock(&other->sk_receive_queue.lock);
2024        }
2025
2026        unix_state_unlock(other);
2027        mutex_unlock(&unix_sk(other)->iolock);
2028
2029        other->sk_data_ready(other);
2030        scm_destroy(&scm);
2031        return size;
2032
2033err_state_unlock:
2034        unix_state_unlock(other);
2035err_unlock:
2036        mutex_unlock(&unix_sk(other)->iolock);
2037err:
2038        kfree_skb(newskb);
2039        if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2040                send_sig(SIGPIPE, current, 0);
2041        if (!init_scm)
2042                scm_destroy(&scm);
2043        return err;
2044}
2045
2046static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2047                                  size_t len)
2048{
2049        int err;
2050        struct sock *sk = sock->sk;
2051
2052        err = sock_error(sk);
2053        if (err)
2054                return err;
2055
2056        if (sk->sk_state != TCP_ESTABLISHED)
2057                return -ENOTCONN;
2058
2059        if (msg->msg_namelen)
2060                msg->msg_namelen = 0;
2061
2062        return unix_dgram_sendmsg(sock, msg, len);
2063}
2064
2065static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2066                                  size_t size, int flags)
2067{
2068        struct sock *sk = sock->sk;
2069
2070        if (sk->sk_state != TCP_ESTABLISHED)
2071                return -ENOTCONN;
2072
2073        return unix_dgram_recvmsg(sock, msg, size, flags);
2074}
2075
2076static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2077{
2078        struct unix_sock *u = unix_sk(sk);
2079
2080        if (u->addr) {
2081                msg->msg_namelen = u->addr->len;
2082                memcpy(msg->msg_name, u->addr->name, u->addr->len);
2083        }
2084}
2085
2086static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2087                              size_t size, int flags)
2088{
2089        struct scm_cookie scm;
2090        struct sock *sk = sock->sk;
2091        struct unix_sock *u = unix_sk(sk);
2092        struct sk_buff *skb, *last;
2093        long timeo;
2094        int err;
2095        int peeked, skip;
2096
2097        err = -EOPNOTSUPP;
2098        if (flags&MSG_OOB)
2099                goto out;
2100
2101        timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2102
2103        do {
2104                mutex_lock(&u->iolock);
2105
2106                skip = sk_peek_offset(sk, flags);
2107                skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2108                                              &err, &last);
2109                if (skb)
2110                        break;
2111
2112                mutex_unlock(&u->iolock);
2113
2114                if (err != -EAGAIN)
2115                        break;
2116        } while (timeo &&
2117                 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2118
2119        if (!skb) { /* implies iolock unlocked */
2120                unix_state_lock(sk);
2121                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2122                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2123                    (sk->sk_shutdown & RCV_SHUTDOWN))
2124                        err = 0;
2125                unix_state_unlock(sk);
2126                goto out;
2127        }
2128
2129        if (wq_has_sleeper(&u->peer_wait))
2130                wake_up_interruptible_sync_poll(&u->peer_wait,
2131                                                POLLOUT | POLLWRNORM |
2132                                                POLLWRBAND);
2133
2134        if (msg->msg_name)
2135                unix_copy_addr(msg, skb->sk);
2136
2137        if (size > skb->len - skip)
2138                size = skb->len - skip;
2139        else if (size < skb->len - skip)
2140                msg->msg_flags |= MSG_TRUNC;
2141
2142        err = skb_copy_datagram_msg(skb, skip, msg, size);
2143        if (err)
2144                goto out_free;
2145
2146        if (sock_flag(sk, SOCK_RCVTSTAMP))
2147                __sock_recv_timestamp(msg, sk, skb);
2148
2149        memset(&scm, 0, sizeof(scm));
2150
2151        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2152        unix_set_secdata(&scm, skb);
2153
2154        if (!(flags & MSG_PEEK)) {
2155                if (UNIXCB(skb).fp)
2156                        unix_detach_fds(&scm, skb);
2157
2158                sk_peek_offset_bwd(sk, skb->len);
2159        } else {
2160                /* It is questionable: on PEEK we could:
2161                   - do not return fds - good, but too simple 8)
2162                   - return fds, and do not return them on read (old strategy,
2163                     apparently wrong)
2164                   - clone fds (I chose it for now, it is the most universal
2165                     solution)
2166
2167                   POSIX 1003.1g does not actually define this clearly
2168                   at all. POSIX 1003.1g doesn't define a lot of things
2169                   clearly however!
2170
2171                */
2172
2173                sk_peek_offset_fwd(sk, size);
2174
2175                if (UNIXCB(skb).fp)
2176                        scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2177        }
2178        err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2179
2180        scm_recv(sock, msg, &scm, flags);
2181
2182out_free:
2183        skb_free_datagram(sk, skb);
2184        mutex_unlock(&u->iolock);
2185out:
2186        return err;
2187}
2188
2189/*
2190 *      Sleep until more data has arrived. But check for races..
2191 */
2192static long unix_stream_data_wait(struct sock *sk, long timeo,
2193                                  struct sk_buff *last, unsigned int last_len,
2194                                  bool freezable)
2195{
2196        struct sk_buff *tail;
2197        DEFINE_WAIT(wait);
2198
2199        unix_state_lock(sk);
2200
2201        for (;;) {
2202                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2203
2204                tail = skb_peek_tail(&sk->sk_receive_queue);
2205                if (tail != last ||
2206                    (tail && tail->len != last_len) ||
2207                    sk->sk_err ||
2208                    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2209                    signal_pending(current) ||
2210                    !timeo)
2211                        break;
2212
2213                sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2214                unix_state_unlock(sk);
2215                if (freezable)
2216                        timeo = freezable_schedule_timeout(timeo);
2217                else
2218                        timeo = schedule_timeout(timeo);
2219                unix_state_lock(sk);
2220
2221                if (sock_flag(sk, SOCK_DEAD))
2222                        break;
2223
2224                sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2225        }
2226
2227        finish_wait(sk_sleep(sk), &wait);
2228        unix_state_unlock(sk);
2229        return timeo;
2230}
2231
2232static unsigned int unix_skb_len(const struct sk_buff *skb)
2233{
2234        return skb->len - UNIXCB(skb).consumed;
2235}
2236
2237struct unix_stream_read_state {
2238        int (*recv_actor)(struct sk_buff *, int, int,
2239                          struct unix_stream_read_state *);
2240        struct socket *socket;
2241        struct msghdr *msg;
2242        struct pipe_inode_info *pipe;
2243        size_t size;
2244        int flags;
2245        unsigned int splice_flags;
2246};
2247
2248static int unix_stream_read_generic(struct unix_stream_read_state *state,
2249                                    bool freezable)
2250{
2251        struct scm_cookie scm;
2252        struct socket *sock = state->socket;
2253        struct sock *sk = sock->sk;
2254        struct unix_sock *u = unix_sk(sk);
2255        int copied = 0;
2256        int flags = state->flags;
2257        int noblock = flags & MSG_DONTWAIT;
2258        bool check_creds = false;
2259        int target;
2260        int err = 0;
2261        long timeo;
2262        int skip;
2263        size_t size = state->size;
2264        unsigned int last_len;
2265
2266        if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2267                err = -EINVAL;
2268                goto out;
2269        }
2270
2271        if (unlikely(flags & MSG_OOB)) {
2272                err = -EOPNOTSUPP;
2273                goto out;
2274        }
2275
2276        target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2277        timeo = sock_rcvtimeo(sk, noblock);
2278
2279        memset(&scm, 0, sizeof(scm));
2280
2281        /* Lock the socket to prevent queue disordering
2282         * while sleeps in memcpy_tomsg
2283         */
2284        mutex_lock(&u->iolock);
2285
2286        skip = max(sk_peek_offset(sk, flags), 0);
2287
2288        do {
2289                int chunk;
2290                bool drop_skb;
2291                struct sk_buff *skb, *last;
2292
2293redo:
2294                unix_state_lock(sk);
2295                if (sock_flag(sk, SOCK_DEAD)) {
2296                        err = -ECONNRESET;
2297                        goto unlock;
2298                }
2299                last = skb = skb_peek(&sk->sk_receive_queue);
2300                last_len = last ? last->len : 0;
2301again:
2302                if (skb == NULL) {
2303                        if (copied >= target)
2304                                goto unlock;
2305
2306                        /*
2307                         *      POSIX 1003.1g mandates this order.
2308                         */
2309
2310                        err = sock_error(sk);
2311                        if (err)
2312                                goto unlock;
2313                        if (sk->sk_shutdown & RCV_SHUTDOWN)
2314                                goto unlock;
2315
2316                        unix_state_unlock(sk);
2317                        if (!timeo) {
2318                                err = -EAGAIN;
2319                                break;
2320                        }
2321
2322                        mutex_unlock(&u->iolock);
2323
2324                        timeo = unix_stream_data_wait(sk, timeo, last,
2325                                                      last_len, freezable);
2326
2327                        if (signal_pending(current)) {
2328                                err = sock_intr_errno(timeo);
2329                                scm_destroy(&scm);
2330                                goto out;
2331                        }
2332
2333                        mutex_lock(&u->iolock);
2334                        goto redo;
2335unlock:
2336                        unix_state_unlock(sk);
2337                        break;
2338                }
2339
2340                while (skip >= unix_skb_len(skb)) {
2341                        skip -= unix_skb_len(skb);
2342                        last = skb;
2343                        last_len = skb->len;
2344                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2345                        if (!skb)
2346                                goto again;
2347                }
2348
2349                unix_state_unlock(sk);
2350
2351                if (check_creds) {
2352                        /* Never glue messages from different writers */
2353                        if (!unix_skb_scm_eq(skb, &scm))
2354                                break;
2355                } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2356                        /* Copy credentials */
2357                        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2358                        unix_set_secdata(&scm, skb);
2359                        check_creds = true;
2360                }
2361
2362                /* Copy address just once */
2363                if (state->msg && state->msg->msg_name) {
2364                        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2365                                         state->msg->msg_name);
2366                        unix_copy_addr(state->msg, skb->sk);
2367                        sunaddr = NULL;
2368                }
2369
2370                chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2371                skb_get(skb);
2372                chunk = state->recv_actor(skb, skip, chunk, state);
2373                drop_skb = !unix_skb_len(skb);
2374                /* skb is only safe to use if !drop_skb */
2375                consume_skb(skb);
2376                if (chunk < 0) {
2377                        if (copied == 0)
2378                                copied = -EFAULT;
2379                        break;
2380                }
2381                copied += chunk;
2382                size -= chunk;
2383
2384                if (drop_skb) {
2385                        /* the skb was touched by a concurrent reader;
2386                         * we should not expect anything from this skb
2387                         * anymore and assume it invalid - we can be
2388                         * sure it was dropped from the socket queue
2389                         *
2390                         * let's report a short read
2391                         */
2392                        err = 0;
2393                        break;
2394                }
2395
2396                /* Mark read part of skb as used */
2397                if (!(flags & MSG_PEEK)) {
2398                        UNIXCB(skb).consumed += chunk;
2399
2400                        sk_peek_offset_bwd(sk, chunk);
2401
2402                        if (UNIXCB(skb).fp)
2403                                unix_detach_fds(&scm, skb);
2404
2405                        if (unix_skb_len(skb))
2406                                break;
2407
2408                        skb_unlink(skb, &sk->sk_receive_queue);
2409                        consume_skb(skb);
2410
2411                        if (scm.fp)
2412                                break;
2413                } else {
2414                        /* It is questionable, see note in unix_dgram_recvmsg.
2415                         */
2416                        if (UNIXCB(skb).fp)
2417                                scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2418
2419                        sk_peek_offset_fwd(sk, chunk);
2420
2421                        if (UNIXCB(skb).fp)
2422                                break;
2423
2424                        skip = 0;
2425                        last = skb;
2426                        last_len = skb->len;
2427                        unix_state_lock(sk);
2428                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2429                        if (skb)
2430                                goto again;
2431                        unix_state_unlock(sk);
2432                        break;
2433                }
2434        } while (size);
2435
2436        mutex_unlock(&u->iolock);
2437        if (state->msg)
2438                scm_recv(sock, state->msg, &scm, flags);
2439        else
2440                scm_destroy(&scm);
2441out:
2442        return copied ? : err;
2443}
2444
2445static int unix_stream_read_actor(struct sk_buff *skb,
2446                                  int skip, int chunk,
2447                                  struct unix_stream_read_state *state)
2448{
2449        int ret;
2450
2451        ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2452                                    state->msg, chunk);
2453        return ret ?: chunk;
2454}
2455
2456static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2457                               size_t size, int flags)
2458{
2459        struct unix_stream_read_state state = {
2460                .recv_actor = unix_stream_read_actor,
2461                .socket = sock,
2462                .msg = msg,
2463                .size = size,
2464                .flags = flags
2465        };
2466
2467        return unix_stream_read_generic(&state, true);
2468}
2469
2470static int unix_stream_splice_actor(struct sk_buff *skb,
2471                                    int skip, int chunk,
2472                                    struct unix_stream_read_state *state)
2473{
2474        return skb_splice_bits(skb, state->socket->sk,
2475                               UNIXCB(skb).consumed + skip,
2476                               state->pipe, chunk, state->splice_flags);
2477}
2478
2479static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2480                                       struct pipe_inode_info *pipe,
2481                                       size_t size, unsigned int flags)
2482{
2483        struct unix_stream_read_state state = {
2484                .recv_actor = unix_stream_splice_actor,
2485                .socket = sock,
2486                .pipe = pipe,
2487                .size = size,
2488                .splice_flags = flags,
2489        };
2490
2491        if (unlikely(*ppos))
2492                return -ESPIPE;
2493
2494        if (sock->file->f_flags & O_NONBLOCK ||
2495            flags & SPLICE_F_NONBLOCK)
2496                state.flags = MSG_DONTWAIT;
2497
2498        return unix_stream_read_generic(&state, false);
2499}
2500
2501static int unix_shutdown(struct socket *sock, int mode)
2502{
2503        struct sock *sk = sock->sk;
2504        struct sock *other;
2505
2506        if (mode < SHUT_RD || mode > SHUT_RDWR)
2507                return -EINVAL;
2508        /* This maps:
2509         * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2510         * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2511         * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2512         */
2513        ++mode;
2514
2515        unix_state_lock(sk);
2516        sk->sk_shutdown |= mode;
2517        other = unix_peer(sk);
2518        if (other)
2519                sock_hold(other);
2520        unix_state_unlock(sk);
2521        sk->sk_state_change(sk);
2522
2523        if (other &&
2524                (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2525
2526                int peer_mode = 0;
2527
2528                if (mode&RCV_SHUTDOWN)
2529                        peer_mode |= SEND_SHUTDOWN;
2530                if (mode&SEND_SHUTDOWN)
2531                        peer_mode |= RCV_SHUTDOWN;
2532                unix_state_lock(other);
2533                other->sk_shutdown |= peer_mode;
2534                unix_state_unlock(other);
2535                other->sk_state_change(other);
2536                if (peer_mode == SHUTDOWN_MASK)
2537                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2538                else if (peer_mode & RCV_SHUTDOWN)
2539                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2540        }
2541        if (other)
2542                sock_put(other);
2543
2544        return 0;
2545}
2546
2547long unix_inq_len(struct sock *sk)
2548{
2549        struct sk_buff *skb;
2550        long amount = 0;
2551
2552        if (sk->sk_state == TCP_LISTEN)
2553                return -EINVAL;
2554
2555        spin_lock(&sk->sk_receive_queue.lock);
2556        if (sk->sk_type == SOCK_STREAM ||
2557            sk->sk_type == SOCK_SEQPACKET) {
2558                skb_queue_walk(&sk->sk_receive_queue, skb)
2559                        amount += unix_skb_len(skb);
2560        } else {
2561                skb = skb_peek(&sk->sk_receive_queue);
2562                if (skb)
2563                        amount = skb->len;
2564        }
2565        spin_unlock(&sk->sk_receive_queue.lock);
2566
2567        return amount;
2568}
2569EXPORT_SYMBOL_GPL(unix_inq_len);
2570
2571long unix_outq_len(struct sock *sk)
2572{
2573        return sk_wmem_alloc_get(sk);
2574}
2575EXPORT_SYMBOL_GPL(unix_outq_len);
2576
2577static int unix_open_file(struct sock *sk)
2578{
2579        struct path path;
2580        struct file *f;
2581        int fd;
2582
2583        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2584                return -EPERM;
2585
2586        unix_state_lock(sk);
2587        path = unix_sk(sk)->path;
2588        if (!path.dentry) {
2589                unix_state_unlock(sk);
2590                return -ENOENT;
2591        }
2592
2593        path_get(&path);
2594        unix_state_unlock(sk);
2595
2596        fd = get_unused_fd_flags(O_CLOEXEC);
2597        if (fd < 0)
2598                goto out;
2599
2600        f = dentry_open(&path, O_PATH, current_cred());
2601        if (IS_ERR(f)) {
2602                put_unused_fd(fd);
2603                fd = PTR_ERR(f);
2604                goto out;
2605        }
2606
2607        fd_install(fd, f);
2608out:
2609        path_put(&path);
2610
2611        return fd;
2612}
2613
2614static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2615{
2616        struct sock *sk = sock->sk;
2617        long amount = 0;
2618        int err;
2619
2620        switch (cmd) {
2621        case SIOCOUTQ:
2622                amount = unix_outq_len(sk);
2623                err = put_user(amount, (int __user *)arg);
2624                break;
2625        case SIOCINQ:
2626                amount = unix_inq_len(sk);
2627                if (amount < 0)
2628                        err = amount;
2629                else
2630                        err = put_user(amount, (int __user *)arg);
2631                break;
2632        case SIOCUNIXFILE:
2633                err = unix_open_file(sk);
2634                break;
2635        default:
2636                err = -ENOIOCTLCMD;
2637                break;
2638        }
2639        return err;
2640}
2641
2642static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2643{
2644        struct sock *sk = sock->sk;
2645        unsigned int mask;
2646
2647        sock_poll_wait(file, sk_sleep(sk), wait);
2648        mask = 0;
2649
2650        /* exceptional events? */
2651        if (sk->sk_err)
2652                mask |= POLLERR;
2653        if (sk->sk_shutdown == SHUTDOWN_MASK)
2654                mask |= POLLHUP;
2655        if (sk->sk_shutdown & RCV_SHUTDOWN)
2656                mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2657
2658        /* readable? */
2659        if (!skb_queue_empty(&sk->sk_receive_queue))
2660                mask |= POLLIN | POLLRDNORM;
2661
2662        /* Connection-based need to check for termination and startup */
2663        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2664            sk->sk_state == TCP_CLOSE)
2665                mask |= POLLHUP;
2666
2667        /*
2668         * we set writable also when the other side has shut down the
2669         * connection. This prevents stuck sockets.
2670         */
2671        if (unix_writable(sk))
2672                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2673
2674        return mask;
2675}
2676
2677static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2678                                    poll_table *wait)
2679{
2680        struct sock *sk = sock->sk, *other;
2681        unsigned int mask, writable;
2682
2683        sock_poll_wait(file, sk_sleep(sk), wait);
2684        mask = 0;
2685
2686        /* exceptional events? */
2687        if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2688                mask |= POLLERR |
2689                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2690
2691        if (sk->sk_shutdown & RCV_SHUTDOWN)
2692                mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2693        if (sk->sk_shutdown == SHUTDOWN_MASK)
2694                mask |= POLLHUP;
2695
2696        /* readable? */
2697        if (!skb_queue_empty(&sk->sk_receive_queue))
2698                mask |= POLLIN | POLLRDNORM;
2699
2700        /* Connection-based need to check for termination and startup */
2701        if (sk->sk_type == SOCK_SEQPACKET) {
2702                if (sk->sk_state == TCP_CLOSE)
2703                        mask |= POLLHUP;
2704                /* connection hasn't started yet? */
2705                if (sk->sk_state == TCP_SYN_SENT)
2706                        return mask;
2707        }
2708
2709        /* No write status requested, avoid expensive OUT tests. */
2710        if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2711                return mask;
2712
2713        writable = unix_writable(sk);
2714        if (writable) {
2715                unix_state_lock(sk);
2716
2717                other = unix_peer(sk);
2718                if (other && unix_peer(other) != sk &&
2719                    unix_recvq_full(other) &&
2720                    unix_dgram_peer_wake_me(sk, other))
2721                        writable = 0;
2722
2723                unix_state_unlock(sk);
2724        }
2725
2726        if (writable)
2727                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2728        else
2729                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2730
2731        return mask;
2732}
2733
2734#ifdef CONFIG_PROC_FS
2735
2736#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2737
2738#define get_bucket(x) ((x) >> BUCKET_SPACE)
2739#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2740#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2741
2742static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2743{
2744        unsigned long offset = get_offset(*pos);
2745        unsigned long bucket = get_bucket(*pos);
2746        struct sock *sk;
2747        unsigned long count = 0;
2748
2749        for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2750                if (sock_net(sk) != seq_file_net(seq))
2751                        continue;
2752                if (++count == offset)
2753                        break;
2754        }
2755
2756        return sk;
2757}
2758
2759static struct sock *unix_next_socket(struct seq_file *seq,
2760                                     struct sock *sk,
2761                                     loff_t *pos)
2762{
2763        unsigned long bucket;
2764
2765        while (sk > (struct sock *)SEQ_START_TOKEN) {
2766                sk = sk_next(sk);
2767                if (!sk)
2768                        goto next_bucket;
2769                if (sock_net(sk) == seq_file_net(seq))
2770                        return sk;
2771        }
2772
2773        do {
2774                sk = unix_from_bucket(seq, pos);
2775                if (sk)
2776                        return sk;
2777
2778next_bucket:
2779                bucket = get_bucket(*pos) + 1;
2780                *pos = set_bucket_offset(bucket, 1);
2781        } while (bucket < ARRAY_SIZE(unix_socket_table));
2782
2783        return NULL;
2784}
2785
2786static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2787        __acquires(unix_table_lock)
2788{
2789        spin_lock(&unix_table_lock);
2790
2791        if (!*pos)
2792                return SEQ_START_TOKEN;
2793
2794        if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2795                return NULL;
2796
2797        return unix_next_socket(seq, NULL, pos);
2798}
2799
2800static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2801{
2802        ++*pos;
2803        return unix_next_socket(seq, v, pos);
2804}
2805
2806static void unix_seq_stop(struct seq_file *seq, void *v)
2807        __releases(unix_table_lock)
2808{
2809        spin_unlock(&unix_table_lock);
2810}
2811
2812static int unix_seq_show(struct seq_file *seq, void *v)
2813{
2814
2815        if (v == SEQ_START_TOKEN)
2816                seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2817                         "Inode Path\n");
2818        else {
2819                struct sock *s = v;
2820                struct unix_sock *u = unix_sk(s);
2821                unix_state_lock(s);
2822
2823                seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2824                        s,
2825                        refcount_read(&s->sk_refcnt),
2826                        0,
2827                        s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2828                        s->sk_type,
2829                        s->sk_socket ?
2830                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2831                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2832                        sock_i_ino(s));
2833
2834                if (u->addr) {
2835                        int i, len;
2836                        seq_putc(seq, ' ');
2837
2838                        i = 0;
2839                        len = u->addr->len - sizeof(short);
2840                        if (!UNIX_ABSTRACT(s))
2841                                len--;
2842                        else {
2843                                seq_putc(seq, '@');
2844                                i++;
2845                        }
2846                        for ( ; i < len; i++)
2847                                seq_putc(seq, u->addr->name->sun_path[i] ?:
2848                                         '@');
2849                }
2850                unix_state_unlock(s);
2851                seq_putc(seq, '\n');
2852        }
2853
2854        return 0;
2855}
2856
2857static const struct seq_operations unix_seq_ops = {
2858        .start  = unix_seq_start,
2859        .next   = unix_seq_next,
2860        .stop   = unix_seq_stop,
2861        .show   = unix_seq_show,
2862};
2863
2864static int unix_seq_open(struct inode *inode, struct file *file)
2865{
2866        return seq_open_net(inode, file, &unix_seq_ops,
2867                            sizeof(struct seq_net_private));
2868}
2869
2870static const struct file_operations unix_seq_fops = {
2871        .owner          = THIS_MODULE,
2872        .open           = unix_seq_open,
2873        .read           = seq_read,
2874        .llseek         = seq_lseek,
2875        .release        = seq_release_net,
2876};
2877
2878#endif
2879
2880static const struct net_proto_family unix_family_ops = {
2881        .family = PF_UNIX,
2882        .create = unix_create,
2883        .owner  = THIS_MODULE,
2884};
2885
2886
2887static int __net_init unix_net_init(struct net *net)
2888{
2889        int error = -ENOMEM;
2890
2891        net->unx.sysctl_max_dgram_qlen = 10;
2892        if (unix_sysctl_register(net))
2893                goto out;
2894
2895#ifdef CONFIG_PROC_FS
2896        if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2897                unix_sysctl_unregister(net);
2898                goto out;
2899        }
2900#endif
2901        error = 0;
2902out:
2903        return error;
2904}
2905
2906static void __net_exit unix_net_exit(struct net *net)
2907{
2908        unix_sysctl_unregister(net);
2909        remove_proc_entry("unix", net->proc_net);
2910}
2911
2912static struct pernet_operations unix_net_ops = {
2913        .init = unix_net_init,
2914        .exit = unix_net_exit,
2915};
2916
2917static int __init af_unix_init(void)
2918{
2919        int rc = -1;
2920
2921        BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2922
2923        rc = proto_register(&unix_proto, 1);
2924        if (rc != 0) {
2925                pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2926                goto out;
2927        }
2928
2929        sock_register(&unix_family_ops);
2930        register_pernet_subsys(&unix_net_ops);
2931out:
2932        return rc;
2933}
2934
2935static void __exit af_unix_exit(void)
2936{
2937        sock_unregister(PF_UNIX);
2938        proto_unregister(&unix_proto);
2939        unregister_pernet_subsys(&unix_net_ops);
2940}
2941
2942/* Earlier than device_initcall() so that other drivers invoking
2943   request_module() don't end up in a loop when modprobe tries
2944   to use a UNIX socket. But later than subsys_initcall() because
2945   we depend on stuff initialised there */
2946fs_initcall(af_unix_init);
2947module_exit(af_unix_exit);
2948
2949MODULE_LICENSE("GPL");
2950MODULE_ALIAS_NETPROTO(PF_UNIX);
2951