linux/net/unix/af_unix.c
<<
>>
Prefs
   1/*
   2 * NET4:        Implementation of BSD Unix domain sockets.
   3 *
   4 * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Fixes:
  12 *              Linus Torvalds  :       Assorted bug cures.
  13 *              Niibe Yutaka    :       async I/O support.
  14 *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15 *              Alan Cox        :       Limit size of allocated blocks.
  16 *              Alan Cox        :       Fixed the stupid socketpair bug.
  17 *              Alan Cox        :       BSD compatibility fine tuning.
  18 *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19 *              Alan Cox        :       Sorted out a proper draft version of
  20 *                                      file descriptor passing hacked up from
  21 *                                      Mike Shaver's work.
  22 *              Marty Leisner   :       Fixes to fd passing
  23 *              Nick Nevin      :       recvmsg bugfix.
  24 *              Alan Cox        :       Started proper garbage collector
  25 *              Heiko EiBfeldt  :       Missing verify_area check
  26 *              Alan Cox        :       Started POSIXisms
  27 *              Andreas Schwab  :       Replace inode by dentry for proper
  28 *                                      reference counting
  29 *              Kirk Petersen   :       Made this a module
  30 *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31 *                                      Lots of bug fixes.
  32 *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33 *                                      by above two patches.
  34 *           Andrea Arcangeli   :       If possible we block in connect(2)
  35 *                                      if the max backlog of the listen socket
  36 *                                      is been reached. This won't break
  37 *                                      old apps and it will avoid huge amount
  38 *                                      of socks hashed (this for unix_gc()
  39 *                                      performances reasons).
  40 *                                      Security fix that limits the max
  41 *                                      number of socks to 2*max_files and
  42 *                                      the number of skb queueable in the
  43 *                                      dgram receiver.
  44 *              Artur Skawina   :       Hash function optimizations
  45 *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46 *            Malcolm Beattie   :       Set peercred for socketpair
  47 *           Michal Ostrowski   :       Module initialization cleanup.
  48 *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49 *                                      the core infrastructure is doing that
  50 *                                      for all net proto families now (2.5.69+)
  51 *
  52 *
  53 * Known differences from reference BSD that was tested:
  54 *
  55 *      [TO FIX]
  56 *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57 *              other the moment one end closes.
  58 *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59 *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60 *      [NOT TO FIX]
  61 *      accept() returns a path name even if the connecting socket has closed
  62 *              in the meantime (BSD loses the path and gives up).
  63 *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64 *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65 *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66 *      BSD af_unix apparently has connect forgetting to block properly.
  67 *              (need to check this with the POSIX spec in detail)
  68 *
  69 * Differences from 2.0.0-11-... (ANK)
  70 *      Bug fixes and improvements.
  71 *              - client shutdown killed server socket.
  72 *              - removed all useless cli/sti pairs.
  73 *
  74 *      Semantic changes/extensions.
  75 *              - generic control message passing.
  76 *              - SCM_CREDENTIALS control message.
  77 *              - "Abstract" (not FS based) socket bindings.
  78 *                Abstract names are sequences of bytes (not zero terminated)
  79 *                started by 0, so that this name space does not intersect
  80 *                with BSD names.
  81 */
  82
  83#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85#include <linux/module.h>
  86#include <linux/kernel.h>
  87#include <linux/signal.h>
  88#include <linux/sched.h>
  89#include <linux/errno.h>
  90#include <linux/string.h>
  91#include <linux/stat.h>
  92#include <linux/dcache.h>
  93#include <linux/namei.h>
  94#include <linux/socket.h>
  95#include <linux/un.h>
  96#include <linux/fcntl.h>
  97#include <linux/termios.h>
  98#include <linux/sockios.h>
  99#include <linux/net.h>
 100#include <linux/in.h>
 101#include <linux/fs.h>
 102#include <linux/slab.h>
 103#include <asm/uaccess.h>
 104#include <linux/skbuff.h>
 105#include <linux/netdevice.h>
 106#include <net/net_namespace.h>
 107#include <net/sock.h>
 108#include <net/tcp_states.h>
 109#include <net/af_unix.h>
 110#include <linux/proc_fs.h>
 111#include <linux/seq_file.h>
 112#include <net/scm.h>
 113#include <linux/init.h>
 114#include <linux/poll.h>
 115#include <linux/rtnetlink.h>
 116#include <linux/mount.h>
 117#include <net/checksum.h>
 118#include <linux/security.h>
 119#include <linux/freezer.h>
 120
 121struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 122EXPORT_SYMBOL_GPL(unix_socket_table);
 123DEFINE_SPINLOCK(unix_table_lock);
 124EXPORT_SYMBOL_GPL(unix_table_lock);
 125static atomic_long_t unix_nr_socks;
 126
 127
 128static struct hlist_head *unix_sockets_unbound(void *addr)
 129{
 130        unsigned long hash = (unsigned long)addr;
 131
 132        hash ^= hash >> 16;
 133        hash ^= hash >> 8;
 134        hash %= UNIX_HASH_SIZE;
 135        return &unix_socket_table[UNIX_HASH_SIZE + hash];
 136}
 137
 138#define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 139
 140#ifdef CONFIG_SECURITY_NETWORK
 141static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 142{
 143        UNIXCB(skb).secid = scm->secid;
 144}
 145
 146static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 147{
 148        scm->secid = UNIXCB(skb).secid;
 149}
 150
 151static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 152{
 153        return (scm->secid == UNIXCB(skb).secid);
 154}
 155#else
 156static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 157{ }
 158
 159static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 160{ }
 161
 162static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 163{
 164        return true;
 165}
 166#endif /* CONFIG_SECURITY_NETWORK */
 167
 168/*
 169 *  SMP locking strategy:
 170 *    hash table is protected with spinlock unix_table_lock
 171 *    each socket state is protected by separate spin lock.
 172 */
 173
 174static inline unsigned int unix_hash_fold(__wsum n)
 175{
 176        unsigned int hash = (__force unsigned int)csum_fold(n);
 177
 178        hash ^= hash>>8;
 179        return hash&(UNIX_HASH_SIZE-1);
 180}
 181
 182#define unix_peer(sk) (unix_sk(sk)->peer)
 183
 184static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 185{
 186        return unix_peer(osk) == sk;
 187}
 188
 189static inline int unix_may_send(struct sock *sk, struct sock *osk)
 190{
 191        return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 192}
 193
 194static inline int unix_recvq_full(struct sock const *sk)
 195{
 196        return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 197}
 198
 199struct sock *unix_peer_get(struct sock *s)
 200{
 201        struct sock *peer;
 202
 203        unix_state_lock(s);
 204        peer = unix_peer(s);
 205        if (peer)
 206                sock_hold(peer);
 207        unix_state_unlock(s);
 208        return peer;
 209}
 210EXPORT_SYMBOL_GPL(unix_peer_get);
 211
 212static inline void unix_release_addr(struct unix_address *addr)
 213{
 214        if (atomic_dec_and_test(&addr->refcnt))
 215                kfree(addr);
 216}
 217
 218/*
 219 *      Check unix socket name:
 220 *              - should be not zero length.
 221 *              - if started by not zero, should be NULL terminated (FS object)
 222 *              - if started by zero, it is abstract name.
 223 */
 224
 225static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 226{
 227        if (len <= sizeof(short) || len > sizeof(*sunaddr))
 228                return -EINVAL;
 229        if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 230                return -EINVAL;
 231        if (sunaddr->sun_path[0]) {
 232                /*
 233                 * This may look like an off by one error but it is a bit more
 234                 * subtle. 108 is the longest valid AF_UNIX path for a binding.
 235                 * sun_path[108] doesn't as such exist.  However in kernel space
 236                 * we are guaranteed that it is a valid memory location in our
 237                 * kernel address buffer.
 238                 */
 239                ((char *)sunaddr)[len] = 0;
 240                len = strlen(sunaddr->sun_path)+1+sizeof(short);
 241                return len;
 242        }
 243
 244        *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 245        return len;
 246}
 247
 248static void __unix_remove_socket(struct sock *sk)
 249{
 250        sk_del_node_init(sk);
 251}
 252
 253static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 254{
 255        WARN_ON(!sk_unhashed(sk));
 256        sk_add_node(sk, list);
 257}
 258
 259static inline void unix_remove_socket(struct sock *sk)
 260{
 261        spin_lock(&unix_table_lock);
 262        __unix_remove_socket(sk);
 263        spin_unlock(&unix_table_lock);
 264}
 265
 266static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 267{
 268        spin_lock(&unix_table_lock);
 269        __unix_insert_socket(list, sk);
 270        spin_unlock(&unix_table_lock);
 271}
 272
 273static struct sock *__unix_find_socket_byname(struct net *net,
 274                                              struct sockaddr_un *sunname,
 275                                              int len, int type, unsigned int hash)
 276{
 277        struct sock *s;
 278
 279        sk_for_each(s, &unix_socket_table[hash ^ type]) {
 280                struct unix_sock *u = unix_sk(s);
 281
 282                if (!net_eq(sock_net(s), net))
 283                        continue;
 284
 285                if (u->addr->len == len &&
 286                    !memcmp(u->addr->name, sunname, len))
 287                        goto found;
 288        }
 289        s = NULL;
 290found:
 291        return s;
 292}
 293
 294static inline struct sock *unix_find_socket_byname(struct net *net,
 295                                                   struct sockaddr_un *sunname,
 296                                                   int len, int type,
 297                                                   unsigned int hash)
 298{
 299        struct sock *s;
 300
 301        spin_lock(&unix_table_lock);
 302        s = __unix_find_socket_byname(net, sunname, len, type, hash);
 303        if (s)
 304                sock_hold(s);
 305        spin_unlock(&unix_table_lock);
 306        return s;
 307}
 308
 309static struct sock *unix_find_socket_byinode(struct inode *i)
 310{
 311        struct sock *s;
 312
 313        spin_lock(&unix_table_lock);
 314        sk_for_each(s,
 315                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 316                struct dentry *dentry = unix_sk(s)->path.dentry;
 317
 318                if (dentry && d_real_inode(dentry) == i) {
 319                        sock_hold(s);
 320                        goto found;
 321                }
 322        }
 323        s = NULL;
 324found:
 325        spin_unlock(&unix_table_lock);
 326        return s;
 327}
 328
 329/* Support code for asymmetrically connected dgram sockets
 330 *
 331 * If a datagram socket is connected to a socket not itself connected
 332 * to the first socket (eg, /dev/log), clients may only enqueue more
 333 * messages if the present receive queue of the server socket is not
 334 * "too large". This means there's a second writeability condition
 335 * poll and sendmsg need to test. The dgram recv code will do a wake
 336 * up on the peer_wait wait queue of a socket upon reception of a
 337 * datagram which needs to be propagated to sleeping would-be writers
 338 * since these might not have sent anything so far. This can't be
 339 * accomplished via poll_wait because the lifetime of the server
 340 * socket might be less than that of its clients if these break their
 341 * association with it or if the server socket is closed while clients
 342 * are still connected to it and there's no way to inform "a polling
 343 * implementation" that it should let go of a certain wait queue
 344 *
 345 * In order to propagate a wake up, a wait_queue_t of the client
 346 * socket is enqueued on the peer_wait queue of the server socket
 347 * whose wake function does a wake_up on the ordinary client socket
 348 * wait queue. This connection is established whenever a write (or
 349 * poll for write) hit the flow control condition and broken when the
 350 * association to the server socket is dissolved or after a wake up
 351 * was relayed.
 352 */
 353
 354static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
 355                                      void *key)
 356{
 357        struct unix_sock *u;
 358        wait_queue_head_t *u_sleep;
 359
 360        u = container_of(q, struct unix_sock, peer_wake);
 361
 362        __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 363                            q);
 364        u->peer_wake.private = NULL;
 365
 366        /* relaying can only happen while the wq still exists */
 367        u_sleep = sk_sleep(&u->sk);
 368        if (u_sleep)
 369                wake_up_interruptible_poll(u_sleep, key);
 370
 371        return 0;
 372}
 373
 374static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 375{
 376        struct unix_sock *u, *u_other;
 377        int rc;
 378
 379        u = unix_sk(sk);
 380        u_other = unix_sk(other);
 381        rc = 0;
 382        spin_lock(&u_other->peer_wait.lock);
 383
 384        if (!u->peer_wake.private) {
 385                u->peer_wake.private = other;
 386                __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 387
 388                rc = 1;
 389        }
 390
 391        spin_unlock(&u_other->peer_wait.lock);
 392        return rc;
 393}
 394
 395static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 396                                            struct sock *other)
 397{
 398        struct unix_sock *u, *u_other;
 399
 400        u = unix_sk(sk);
 401        u_other = unix_sk(other);
 402        spin_lock(&u_other->peer_wait.lock);
 403
 404        if (u->peer_wake.private == other) {
 405                __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 406                u->peer_wake.private = NULL;
 407        }
 408
 409        spin_unlock(&u_other->peer_wait.lock);
 410}
 411
 412static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 413                                                   struct sock *other)
 414{
 415        unix_dgram_peer_wake_disconnect(sk, other);
 416        wake_up_interruptible_poll(sk_sleep(sk),
 417                                   POLLOUT |
 418                                   POLLWRNORM |
 419                                   POLLWRBAND);
 420}
 421
 422/* preconditions:
 423 *      - unix_peer(sk) == other
 424 *      - association is stable
 425 */
 426static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 427{
 428        int connected;
 429
 430        connected = unix_dgram_peer_wake_connect(sk, other);
 431
 432        if (unix_recvq_full(other))
 433                return 1;
 434
 435        if (connected)
 436                unix_dgram_peer_wake_disconnect(sk, other);
 437
 438        return 0;
 439}
 440
 441static int unix_writable(const struct sock *sk)
 442{
 443        return sk->sk_state != TCP_LISTEN &&
 444               (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 445}
 446
 447static void unix_write_space(struct sock *sk)
 448{
 449        struct socket_wq *wq;
 450
 451        rcu_read_lock();
 452        if (unix_writable(sk)) {
 453                wq = rcu_dereference(sk->sk_wq);
 454                if (skwq_has_sleeper(wq))
 455                        wake_up_interruptible_sync_poll(&wq->wait,
 456                                POLLOUT | POLLWRNORM | POLLWRBAND);
 457                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 458        }
 459        rcu_read_unlock();
 460}
 461
 462/* When dgram socket disconnects (or changes its peer), we clear its receive
 463 * queue of packets arrived from previous peer. First, it allows to do
 464 * flow control based only on wmem_alloc; second, sk connected to peer
 465 * may receive messages only from that peer. */
 466static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 467{
 468        if (!skb_queue_empty(&sk->sk_receive_queue)) {
 469                skb_queue_purge(&sk->sk_receive_queue);
 470                wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 471
 472                /* If one link of bidirectional dgram pipe is disconnected,
 473                 * we signal error. Messages are lost. Do not make this,
 474                 * when peer was not connected to us.
 475                 */
 476                if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 477                        other->sk_err = ECONNRESET;
 478                        other->sk_error_report(other);
 479                }
 480        }
 481}
 482
 483static void unix_sock_destructor(struct sock *sk)
 484{
 485        struct unix_sock *u = unix_sk(sk);
 486
 487        skb_queue_purge(&sk->sk_receive_queue);
 488
 489        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 490        WARN_ON(!sk_unhashed(sk));
 491        WARN_ON(sk->sk_socket);
 492        if (!sock_flag(sk, SOCK_DEAD)) {
 493                pr_info("Attempt to release alive unix socket: %p\n", sk);
 494                return;
 495        }
 496
 497        if (u->addr)
 498                unix_release_addr(u->addr);
 499
 500        atomic_long_dec(&unix_nr_socks);
 501        local_bh_disable();
 502        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 503        local_bh_enable();
 504#ifdef UNIX_REFCNT_DEBUG
 505        pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 506                atomic_long_read(&unix_nr_socks));
 507#endif
 508}
 509
 510static void unix_release_sock(struct sock *sk, int embrion)
 511{
 512        struct unix_sock *u = unix_sk(sk);
 513        struct path path;
 514        struct sock *skpair;
 515        struct sk_buff *skb;
 516        int state;
 517
 518        unix_remove_socket(sk);
 519
 520        /* Clear state */
 521        unix_state_lock(sk);
 522        sock_orphan(sk);
 523        sk->sk_shutdown = SHUTDOWN_MASK;
 524        path         = u->path;
 525        u->path.dentry = NULL;
 526        u->path.mnt = NULL;
 527        state = sk->sk_state;
 528        sk->sk_state = TCP_CLOSE;
 529        unix_state_unlock(sk);
 530
 531        wake_up_interruptible_all(&u->peer_wait);
 532
 533        skpair = unix_peer(sk);
 534
 535        if (skpair != NULL) {
 536                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 537                        unix_state_lock(skpair);
 538                        /* No more writes */
 539                        skpair->sk_shutdown = SHUTDOWN_MASK;
 540                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 541                                skpair->sk_err = ECONNRESET;
 542                        unix_state_unlock(skpair);
 543                        skpair->sk_state_change(skpair);
 544                        sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 545                }
 546
 547                unix_dgram_peer_wake_disconnect(sk, skpair);
 548                sock_put(skpair); /* It may now die */
 549                unix_peer(sk) = NULL;
 550        }
 551
 552        /* Try to flush out this socket. Throw out buffers at least */
 553
 554        while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 555                if (state == TCP_LISTEN)
 556                        unix_release_sock(skb->sk, 1);
 557                /* passed fds are erased in the kfree_skb hook        */
 558                UNIXCB(skb).consumed = skb->len;
 559                kfree_skb(skb);
 560        }
 561
 562        if (path.dentry)
 563                path_put(&path);
 564
 565        sock_put(sk);
 566
 567        /* ---- Socket is dead now and most probably destroyed ---- */
 568
 569        /*
 570         * Fixme: BSD difference: In BSD all sockets connected to us get
 571         *        ECONNRESET and we die on the spot. In Linux we behave
 572         *        like files and pipes do and wait for the last
 573         *        dereference.
 574         *
 575         * Can't we simply set sock->err?
 576         *
 577         *        What the above comment does talk about? --ANK(980817)
 578         */
 579
 580        if (unix_tot_inflight)
 581                unix_gc();              /* Garbage collect fds */
 582}
 583
 584static void init_peercred(struct sock *sk)
 585{
 586        put_pid(sk->sk_peer_pid);
 587        if (sk->sk_peer_cred)
 588                put_cred(sk->sk_peer_cred);
 589        sk->sk_peer_pid  = get_pid(task_tgid(current));
 590        sk->sk_peer_cred = get_current_cred();
 591}
 592
 593static void copy_peercred(struct sock *sk, struct sock *peersk)
 594{
 595        put_pid(sk->sk_peer_pid);
 596        if (sk->sk_peer_cred)
 597                put_cred(sk->sk_peer_cred);
 598        sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 599        sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 600}
 601
 602static int unix_listen(struct socket *sock, int backlog)
 603{
 604        int err;
 605        struct sock *sk = sock->sk;
 606        struct unix_sock *u = unix_sk(sk);
 607        struct pid *old_pid = NULL;
 608
 609        err = -EOPNOTSUPP;
 610        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 611                goto out;       /* Only stream/seqpacket sockets accept */
 612        err = -EINVAL;
 613        if (!u->addr)
 614                goto out;       /* No listens on an unbound socket */
 615        unix_state_lock(sk);
 616        if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 617                goto out_unlock;
 618        if (backlog > sk->sk_max_ack_backlog)
 619                wake_up_interruptible_all(&u->peer_wait);
 620        sk->sk_max_ack_backlog  = backlog;
 621        sk->sk_state            = TCP_LISTEN;
 622        /* set credentials so connect can copy them */
 623        init_peercred(sk);
 624        err = 0;
 625
 626out_unlock:
 627        unix_state_unlock(sk);
 628        put_pid(old_pid);
 629out:
 630        return err;
 631}
 632
 633static int unix_release(struct socket *);
 634static int unix_bind(struct socket *, struct sockaddr *, int);
 635static int unix_stream_connect(struct socket *, struct sockaddr *,
 636                               int addr_len, int flags);
 637static int unix_socketpair(struct socket *, struct socket *);
 638static int unix_accept(struct socket *, struct socket *, int);
 639static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 640static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 641static unsigned int unix_dgram_poll(struct file *, struct socket *,
 642                                    poll_table *);
 643static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 644static int unix_shutdown(struct socket *, int);
 645static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 646static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 647static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 648                                    size_t size, int flags);
 649static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 650                                       struct pipe_inode_info *, size_t size,
 651                                       unsigned int flags);
 652static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 653static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 654static int unix_dgram_connect(struct socket *, struct sockaddr *,
 655                              int, int);
 656static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 657static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 658                                  int);
 659
 660static int unix_set_peek_off(struct sock *sk, int val)
 661{
 662        struct unix_sock *u = unix_sk(sk);
 663
 664        if (mutex_lock_interruptible(&u->iolock))
 665                return -EINTR;
 666
 667        sk->sk_peek_off = val;
 668        mutex_unlock(&u->iolock);
 669
 670        return 0;
 671}
 672
 673
 674static const struct proto_ops unix_stream_ops = {
 675        .family =       PF_UNIX,
 676        .owner =        THIS_MODULE,
 677        .release =      unix_release,
 678        .bind =         unix_bind,
 679        .connect =      unix_stream_connect,
 680        .socketpair =   unix_socketpair,
 681        .accept =       unix_accept,
 682        .getname =      unix_getname,
 683        .poll =         unix_poll,
 684        .ioctl =        unix_ioctl,
 685        .listen =       unix_listen,
 686        .shutdown =     unix_shutdown,
 687        .setsockopt =   sock_no_setsockopt,
 688        .getsockopt =   sock_no_getsockopt,
 689        .sendmsg =      unix_stream_sendmsg,
 690        .recvmsg =      unix_stream_recvmsg,
 691        .mmap =         sock_no_mmap,
 692        .sendpage =     unix_stream_sendpage,
 693        .splice_read =  unix_stream_splice_read,
 694        .set_peek_off = unix_set_peek_off,
 695};
 696
 697static const struct proto_ops unix_dgram_ops = {
 698        .family =       PF_UNIX,
 699        .owner =        THIS_MODULE,
 700        .release =      unix_release,
 701        .bind =         unix_bind,
 702        .connect =      unix_dgram_connect,
 703        .socketpair =   unix_socketpair,
 704        .accept =       sock_no_accept,
 705        .getname =      unix_getname,
 706        .poll =         unix_dgram_poll,
 707        .ioctl =        unix_ioctl,
 708        .listen =       sock_no_listen,
 709        .shutdown =     unix_shutdown,
 710        .setsockopt =   sock_no_setsockopt,
 711        .getsockopt =   sock_no_getsockopt,
 712        .sendmsg =      unix_dgram_sendmsg,
 713        .recvmsg =      unix_dgram_recvmsg,
 714        .mmap =         sock_no_mmap,
 715        .sendpage =     sock_no_sendpage,
 716        .set_peek_off = unix_set_peek_off,
 717};
 718
 719static const struct proto_ops unix_seqpacket_ops = {
 720        .family =       PF_UNIX,
 721        .owner =        THIS_MODULE,
 722        .release =      unix_release,
 723        .bind =         unix_bind,
 724        .connect =      unix_stream_connect,
 725        .socketpair =   unix_socketpair,
 726        .accept =       unix_accept,
 727        .getname =      unix_getname,
 728        .poll =         unix_dgram_poll,
 729        .ioctl =        unix_ioctl,
 730        .listen =       unix_listen,
 731        .shutdown =     unix_shutdown,
 732        .setsockopt =   sock_no_setsockopt,
 733        .getsockopt =   sock_no_getsockopt,
 734        .sendmsg =      unix_seqpacket_sendmsg,
 735        .recvmsg =      unix_seqpacket_recvmsg,
 736        .mmap =         sock_no_mmap,
 737        .sendpage =     sock_no_sendpage,
 738        .set_peek_off = unix_set_peek_off,
 739};
 740
 741static struct proto unix_proto = {
 742        .name                   = "UNIX",
 743        .owner                  = THIS_MODULE,
 744        .obj_size               = sizeof(struct unix_sock),
 745};
 746
 747/*
 748 * AF_UNIX sockets do not interact with hardware, hence they
 749 * dont trigger interrupts - so it's safe for them to have
 750 * bh-unsafe locking for their sk_receive_queue.lock. Split off
 751 * this special lock-class by reinitializing the spinlock key:
 752 */
 753static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 754
 755static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 756{
 757        struct sock *sk = NULL;
 758        struct unix_sock *u;
 759
 760        atomic_long_inc(&unix_nr_socks);
 761        if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 762                goto out;
 763
 764        sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 765        if (!sk)
 766                goto out;
 767
 768        sock_init_data(sock, sk);
 769        lockdep_set_class(&sk->sk_receive_queue.lock,
 770                                &af_unix_sk_receive_queue_lock_key);
 771
 772        sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 773        sk->sk_write_space      = unix_write_space;
 774        sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 775        sk->sk_destruct         = unix_sock_destructor;
 776        u         = unix_sk(sk);
 777        u->path.dentry = NULL;
 778        u->path.mnt = NULL;
 779        spin_lock_init(&u->lock);
 780        atomic_long_set(&u->inflight, 0);
 781        INIT_LIST_HEAD(&u->link);
 782        mutex_init(&u->iolock); /* single task reading lock */
 783        mutex_init(&u->bindlock); /* single task binding lock */
 784        init_waitqueue_head(&u->peer_wait);
 785        init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 786        unix_insert_socket(unix_sockets_unbound(sk), sk);
 787out:
 788        if (sk == NULL)
 789                atomic_long_dec(&unix_nr_socks);
 790        else {
 791                local_bh_disable();
 792                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 793                local_bh_enable();
 794        }
 795        return sk;
 796}
 797
 798static int unix_create(struct net *net, struct socket *sock, int protocol,
 799                       int kern)
 800{
 801        if (protocol && protocol != PF_UNIX)
 802                return -EPROTONOSUPPORT;
 803
 804        sock->state = SS_UNCONNECTED;
 805
 806        switch (sock->type) {
 807        case SOCK_STREAM:
 808                sock->ops = &unix_stream_ops;
 809                break;
 810                /*
 811                 *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 812                 *      nothing uses it.
 813                 */
 814        case SOCK_RAW:
 815                sock->type = SOCK_DGRAM;
 816        case SOCK_DGRAM:
 817                sock->ops = &unix_dgram_ops;
 818                break;
 819        case SOCK_SEQPACKET:
 820                sock->ops = &unix_seqpacket_ops;
 821                break;
 822        default:
 823                return -ESOCKTNOSUPPORT;
 824        }
 825
 826        return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 827}
 828
 829static int unix_release(struct socket *sock)
 830{
 831        struct sock *sk = sock->sk;
 832
 833        if (!sk)
 834                return 0;
 835
 836        unix_release_sock(sk, 0);
 837        sock->sk = NULL;
 838
 839        return 0;
 840}
 841
 842static int unix_autobind(struct socket *sock)
 843{
 844        struct sock *sk = sock->sk;
 845        struct net *net = sock_net(sk);
 846        struct unix_sock *u = unix_sk(sk);
 847        static u32 ordernum = 1;
 848        struct unix_address *addr;
 849        int err;
 850        unsigned int retries = 0;
 851
 852        err = mutex_lock_interruptible(&u->bindlock);
 853        if (err)
 854                return err;
 855
 856        err = 0;
 857        if (u->addr)
 858                goto out;
 859
 860        err = -ENOMEM;
 861        addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 862        if (!addr)
 863                goto out;
 864
 865        addr->name->sun_family = AF_UNIX;
 866        atomic_set(&addr->refcnt, 1);
 867
 868retry:
 869        addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 870        addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 871
 872        spin_lock(&unix_table_lock);
 873        ordernum = (ordernum+1)&0xFFFFF;
 874
 875        if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 876                                      addr->hash)) {
 877                spin_unlock(&unix_table_lock);
 878                /*
 879                 * __unix_find_socket_byname() may take long time if many names
 880                 * are already in use.
 881                 */
 882                cond_resched();
 883                /* Give up if all names seems to be in use. */
 884                if (retries++ == 0xFFFFF) {
 885                        err = -ENOSPC;
 886                        kfree(addr);
 887                        goto out;
 888                }
 889                goto retry;
 890        }
 891        addr->hash ^= sk->sk_type;
 892
 893        __unix_remove_socket(sk);
 894        u->addr = addr;
 895        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 896        spin_unlock(&unix_table_lock);
 897        err = 0;
 898
 899out:    mutex_unlock(&u->bindlock);
 900        return err;
 901}
 902
 903static struct sock *unix_find_other(struct net *net,
 904                                    struct sockaddr_un *sunname, int len,
 905                                    int type, unsigned int hash, int *error)
 906{
 907        struct sock *u;
 908        struct path path;
 909        int err = 0;
 910
 911        if (sunname->sun_path[0]) {
 912                struct inode *inode;
 913                err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 914                if (err)
 915                        goto fail;
 916                inode = d_real_inode(path.dentry);
 917                err = inode_permission(inode, MAY_WRITE);
 918                if (err)
 919                        goto put_fail;
 920
 921                err = -ECONNREFUSED;
 922                if (!S_ISSOCK(inode->i_mode))
 923                        goto put_fail;
 924                u = unix_find_socket_byinode(inode);
 925                if (!u)
 926                        goto put_fail;
 927
 928                if (u->sk_type == type)
 929                        touch_atime(&path);
 930
 931                path_put(&path);
 932
 933                err = -EPROTOTYPE;
 934                if (u->sk_type != type) {
 935                        sock_put(u);
 936                        goto fail;
 937                }
 938        } else {
 939                err = -ECONNREFUSED;
 940                u = unix_find_socket_byname(net, sunname, len, type, hash);
 941                if (u) {
 942                        struct dentry *dentry;
 943                        dentry = unix_sk(u)->path.dentry;
 944                        if (dentry)
 945                                touch_atime(&unix_sk(u)->path);
 946                } else
 947                        goto fail;
 948        }
 949        return u;
 950
 951put_fail:
 952        path_put(&path);
 953fail:
 954        *error = err;
 955        return NULL;
 956}
 957
 958static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 959{
 960        struct dentry *dentry;
 961        struct path path;
 962        int err = 0;
 963        /*
 964         * Get the parent directory, calculate the hash for last
 965         * component.
 966         */
 967        dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 968        err = PTR_ERR(dentry);
 969        if (IS_ERR(dentry))
 970                return err;
 971
 972        /*
 973         * All right, let's create it.
 974         */
 975        err = security_path_mknod(&path, dentry, mode, 0);
 976        if (!err) {
 977                err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 978                if (!err) {
 979                        res->mnt = mntget(path.mnt);
 980                        res->dentry = dget(dentry);
 981                }
 982        }
 983        done_path_create(&path, dentry);
 984        return err;
 985}
 986
 987static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 988{
 989        struct sock *sk = sock->sk;
 990        struct net *net = sock_net(sk);
 991        struct unix_sock *u = unix_sk(sk);
 992        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 993        char *sun_path = sunaddr->sun_path;
 994        int err;
 995        unsigned int hash;
 996        struct unix_address *addr;
 997        struct hlist_head *list;
 998
 999        err = -EINVAL;
1000        if (sunaddr->sun_family != AF_UNIX)
1001                goto out;
1002
1003        if (addr_len == sizeof(short)) {
1004                err = unix_autobind(sock);
1005                goto out;
1006        }
1007
1008        err = unix_mkname(sunaddr, addr_len, &hash);
1009        if (err < 0)
1010                goto out;
1011        addr_len = err;
1012
1013        err = mutex_lock_interruptible(&u->bindlock);
1014        if (err)
1015                goto out;
1016
1017        err = -EINVAL;
1018        if (u->addr)
1019                goto out_up;
1020
1021        err = -ENOMEM;
1022        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1023        if (!addr)
1024                goto out_up;
1025
1026        memcpy(addr->name, sunaddr, addr_len);
1027        addr->len = addr_len;
1028        addr->hash = hash ^ sk->sk_type;
1029        atomic_set(&addr->refcnt, 1);
1030
1031        if (sun_path[0]) {
1032                struct path path;
1033                umode_t mode = S_IFSOCK |
1034                       (SOCK_INODE(sock)->i_mode & ~current_umask());
1035                err = unix_mknod(sun_path, mode, &path);
1036                if (err) {
1037                        if (err == -EEXIST)
1038                                err = -EADDRINUSE;
1039                        unix_release_addr(addr);
1040                        goto out_up;
1041                }
1042                addr->hash = UNIX_HASH_SIZE;
1043                hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1044                spin_lock(&unix_table_lock);
1045                u->path = path;
1046                list = &unix_socket_table[hash];
1047        } else {
1048                spin_lock(&unix_table_lock);
1049                err = -EADDRINUSE;
1050                if (__unix_find_socket_byname(net, sunaddr, addr_len,
1051                                              sk->sk_type, hash)) {
1052                        unix_release_addr(addr);
1053                        goto out_unlock;
1054                }
1055
1056                list = &unix_socket_table[addr->hash];
1057        }
1058
1059        err = 0;
1060        __unix_remove_socket(sk);
1061        u->addr = addr;
1062        __unix_insert_socket(list, sk);
1063
1064out_unlock:
1065        spin_unlock(&unix_table_lock);
1066out_up:
1067        mutex_unlock(&u->bindlock);
1068out:
1069        return err;
1070}
1071
1072static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1073{
1074        if (unlikely(sk1 == sk2) || !sk2) {
1075                unix_state_lock(sk1);
1076                return;
1077        }
1078        if (sk1 < sk2) {
1079                unix_state_lock(sk1);
1080                unix_state_lock_nested(sk2);
1081        } else {
1082                unix_state_lock(sk2);
1083                unix_state_lock_nested(sk1);
1084        }
1085}
1086
1087static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1088{
1089        if (unlikely(sk1 == sk2) || !sk2) {
1090                unix_state_unlock(sk1);
1091                return;
1092        }
1093        unix_state_unlock(sk1);
1094        unix_state_unlock(sk2);
1095}
1096
1097static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1098                              int alen, int flags)
1099{
1100        struct sock *sk = sock->sk;
1101        struct net *net = sock_net(sk);
1102        struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1103        struct sock *other;
1104        unsigned int hash;
1105        int err;
1106
1107        if (addr->sa_family != AF_UNSPEC) {
1108                err = unix_mkname(sunaddr, alen, &hash);
1109                if (err < 0)
1110                        goto out;
1111                alen = err;
1112
1113                if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1114                    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1115                        goto out;
1116
1117restart:
1118                other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1119                if (!other)
1120                        goto out;
1121
1122                unix_state_double_lock(sk, other);
1123
1124                /* Apparently VFS overslept socket death. Retry. */
1125                if (sock_flag(other, SOCK_DEAD)) {
1126                        unix_state_double_unlock(sk, other);
1127                        sock_put(other);
1128                        goto restart;
1129                }
1130
1131                err = -EPERM;
1132                if (!unix_may_send(sk, other))
1133                        goto out_unlock;
1134
1135                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1136                if (err)
1137                        goto out_unlock;
1138
1139        } else {
1140                /*
1141                 *      1003.1g breaking connected state with AF_UNSPEC
1142                 */
1143                other = NULL;
1144                unix_state_double_lock(sk, other);
1145        }
1146
1147        /*
1148         * If it was connected, reconnect.
1149         */
1150        if (unix_peer(sk)) {
1151                struct sock *old_peer = unix_peer(sk);
1152                unix_peer(sk) = other;
1153                unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1154
1155                unix_state_double_unlock(sk, other);
1156
1157                if (other != old_peer)
1158                        unix_dgram_disconnected(sk, old_peer);
1159                sock_put(old_peer);
1160        } else {
1161                unix_peer(sk) = other;
1162                unix_state_double_unlock(sk, other);
1163        }
1164        return 0;
1165
1166out_unlock:
1167        unix_state_double_unlock(sk, other);
1168        sock_put(other);
1169out:
1170        return err;
1171}
1172
1173static long unix_wait_for_peer(struct sock *other, long timeo)
1174{
1175        struct unix_sock *u = unix_sk(other);
1176        int sched;
1177        DEFINE_WAIT(wait);
1178
1179        prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1180
1181        sched = !sock_flag(other, SOCK_DEAD) &&
1182                !(other->sk_shutdown & RCV_SHUTDOWN) &&
1183                unix_recvq_full(other);
1184
1185        unix_state_unlock(other);
1186
1187        if (sched)
1188                timeo = schedule_timeout(timeo);
1189
1190        finish_wait(&u->peer_wait, &wait);
1191        return timeo;
1192}
1193
1194static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1195                               int addr_len, int flags)
1196{
1197        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1198        struct sock *sk = sock->sk;
1199        struct net *net = sock_net(sk);
1200        struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1201        struct sock *newsk = NULL;
1202        struct sock *other = NULL;
1203        struct sk_buff *skb = NULL;
1204        unsigned int hash;
1205        int st;
1206        int err;
1207        long timeo;
1208
1209        err = unix_mkname(sunaddr, addr_len, &hash);
1210        if (err < 0)
1211                goto out;
1212        addr_len = err;
1213
1214        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1215            (err = unix_autobind(sock)) != 0)
1216                goto out;
1217
1218        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1219
1220        /* First of all allocate resources.
1221           If we will make it after state is locked,
1222           we will have to recheck all again in any case.
1223         */
1224
1225        err = -ENOMEM;
1226
1227        /* create new sock for complete connection */
1228        newsk = unix_create1(sock_net(sk), NULL, 0);
1229        if (newsk == NULL)
1230                goto out;
1231
1232        /* Allocate skb for sending to listening sock */
1233        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1234        if (skb == NULL)
1235                goto out;
1236
1237restart:
1238        /*  Find listening sock. */
1239        other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1240        if (!other)
1241                goto out;
1242
1243        /* Latch state of peer */
1244        unix_state_lock(other);
1245
1246        /* Apparently VFS overslept socket death. Retry. */
1247        if (sock_flag(other, SOCK_DEAD)) {
1248                unix_state_unlock(other);
1249                sock_put(other);
1250                goto restart;
1251        }
1252
1253        err = -ECONNREFUSED;
1254        if (other->sk_state != TCP_LISTEN)
1255                goto out_unlock;
1256        if (other->sk_shutdown & RCV_SHUTDOWN)
1257                goto out_unlock;
1258
1259        if (unix_recvq_full(other)) {
1260                err = -EAGAIN;
1261                if (!timeo)
1262                        goto out_unlock;
1263
1264                timeo = unix_wait_for_peer(other, timeo);
1265
1266                err = sock_intr_errno(timeo);
1267                if (signal_pending(current))
1268                        goto out;
1269                sock_put(other);
1270                goto restart;
1271        }
1272
1273        /* Latch our state.
1274
1275           It is tricky place. We need to grab our state lock and cannot
1276           drop lock on peer. It is dangerous because deadlock is
1277           possible. Connect to self case and simultaneous
1278           attempt to connect are eliminated by checking socket
1279           state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1280           check this before attempt to grab lock.
1281
1282           Well, and we have to recheck the state after socket locked.
1283         */
1284        st = sk->sk_state;
1285
1286        switch (st) {
1287        case TCP_CLOSE:
1288                /* This is ok... continue with connect */
1289                break;
1290        case TCP_ESTABLISHED:
1291                /* Socket is already connected */
1292                err = -EISCONN;
1293                goto out_unlock;
1294        default:
1295                err = -EINVAL;
1296                goto out_unlock;
1297        }
1298
1299        unix_state_lock_nested(sk);
1300
1301        if (sk->sk_state != st) {
1302                unix_state_unlock(sk);
1303                unix_state_unlock(other);
1304                sock_put(other);
1305                goto restart;
1306        }
1307
1308        err = security_unix_stream_connect(sk, other, newsk);
1309        if (err) {
1310                unix_state_unlock(sk);
1311                goto out_unlock;
1312        }
1313
1314        /* The way is open! Fastly set all the necessary fields... */
1315
1316        sock_hold(sk);
1317        unix_peer(newsk)        = sk;
1318        newsk->sk_state         = TCP_ESTABLISHED;
1319        newsk->sk_type          = sk->sk_type;
1320        init_peercred(newsk);
1321        newu = unix_sk(newsk);
1322        RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1323        otheru = unix_sk(other);
1324
1325        /* copy address information from listening to new sock*/
1326        if (otheru->addr) {
1327                atomic_inc(&otheru->addr->refcnt);
1328                newu->addr = otheru->addr;
1329        }
1330        if (otheru->path.dentry) {
1331                path_get(&otheru->path);
1332                newu->path = otheru->path;
1333        }
1334
1335        /* Set credentials */
1336        copy_peercred(sk, other);
1337
1338        sock->state     = SS_CONNECTED;
1339        sk->sk_state    = TCP_ESTABLISHED;
1340        sock_hold(newsk);
1341
1342        smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1343        unix_peer(sk)   = newsk;
1344
1345        unix_state_unlock(sk);
1346
1347        /* take ten and and send info to listening sock */
1348        spin_lock(&other->sk_receive_queue.lock);
1349        __skb_queue_tail(&other->sk_receive_queue, skb);
1350        spin_unlock(&other->sk_receive_queue.lock);
1351        unix_state_unlock(other);
1352        other->sk_data_ready(other);
1353        sock_put(other);
1354        return 0;
1355
1356out_unlock:
1357        if (other)
1358                unix_state_unlock(other);
1359
1360out:
1361        kfree_skb(skb);
1362        if (newsk)
1363                unix_release_sock(newsk, 0);
1364        if (other)
1365                sock_put(other);
1366        return err;
1367}
1368
1369static int unix_socketpair(struct socket *socka, struct socket *sockb)
1370{
1371        struct sock *ska = socka->sk, *skb = sockb->sk;
1372
1373        /* Join our sockets back to back */
1374        sock_hold(ska);
1375        sock_hold(skb);
1376        unix_peer(ska) = skb;
1377        unix_peer(skb) = ska;
1378        init_peercred(ska);
1379        init_peercred(skb);
1380
1381        if (ska->sk_type != SOCK_DGRAM) {
1382                ska->sk_state = TCP_ESTABLISHED;
1383                skb->sk_state = TCP_ESTABLISHED;
1384                socka->state  = SS_CONNECTED;
1385                sockb->state  = SS_CONNECTED;
1386        }
1387        return 0;
1388}
1389
1390static void unix_sock_inherit_flags(const struct socket *old,
1391                                    struct socket *new)
1392{
1393        if (test_bit(SOCK_PASSCRED, &old->flags))
1394                set_bit(SOCK_PASSCRED, &new->flags);
1395        if (test_bit(SOCK_PASSSEC, &old->flags))
1396                set_bit(SOCK_PASSSEC, &new->flags);
1397}
1398
1399static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1400{
1401        struct sock *sk = sock->sk;
1402        struct sock *tsk;
1403        struct sk_buff *skb;
1404        int err;
1405
1406        err = -EOPNOTSUPP;
1407        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1408                goto out;
1409
1410        err = -EINVAL;
1411        if (sk->sk_state != TCP_LISTEN)
1412                goto out;
1413
1414        /* If socket state is TCP_LISTEN it cannot change (for now...),
1415         * so that no locks are necessary.
1416         */
1417
1418        skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1419        if (!skb) {
1420                /* This means receive shutdown. */
1421                if (err == 0)
1422                        err = -EINVAL;
1423                goto out;
1424        }
1425
1426        tsk = skb->sk;
1427        skb_free_datagram(sk, skb);
1428        wake_up_interruptible(&unix_sk(sk)->peer_wait);
1429
1430        /* attach accepted sock to socket */
1431        unix_state_lock(tsk);
1432        newsock->state = SS_CONNECTED;
1433        unix_sock_inherit_flags(sock, newsock);
1434        sock_graft(tsk, newsock);
1435        unix_state_unlock(tsk);
1436        return 0;
1437
1438out:
1439        return err;
1440}
1441
1442
1443static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1444{
1445        struct sock *sk = sock->sk;
1446        struct unix_sock *u;
1447        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1448        int err = 0;
1449
1450        if (peer) {
1451                sk = unix_peer_get(sk);
1452
1453                err = -ENOTCONN;
1454                if (!sk)
1455                        goto out;
1456                err = 0;
1457        } else {
1458                sock_hold(sk);
1459        }
1460
1461        u = unix_sk(sk);
1462        unix_state_lock(sk);
1463        if (!u->addr) {
1464                sunaddr->sun_family = AF_UNIX;
1465                sunaddr->sun_path[0] = 0;
1466                *uaddr_len = sizeof(short);
1467        } else {
1468                struct unix_address *addr = u->addr;
1469
1470                *uaddr_len = addr->len;
1471                memcpy(sunaddr, addr->name, *uaddr_len);
1472        }
1473        unix_state_unlock(sk);
1474        sock_put(sk);
1475out:
1476        return err;
1477}
1478
1479static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1480{
1481        int i;
1482
1483        scm->fp = UNIXCB(skb).fp;
1484        UNIXCB(skb).fp = NULL;
1485
1486        for (i = scm->fp->count-1; i >= 0; i--)
1487                unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1488}
1489
1490static void unix_destruct_scm(struct sk_buff *skb)
1491{
1492        struct scm_cookie scm;
1493        memset(&scm, 0, sizeof(scm));
1494        scm.pid  = UNIXCB(skb).pid;
1495        if (UNIXCB(skb).fp)
1496                unix_detach_fds(&scm, skb);
1497
1498        /* Alas, it calls VFS */
1499        /* So fscking what? fput() had been SMP-safe since the last Summer */
1500        scm_destroy(&scm);
1501        sock_wfree(skb);
1502}
1503
1504/*
1505 * The "user->unix_inflight" variable is protected by the garbage
1506 * collection lock, and we just read it locklessly here. If you go
1507 * over the limit, there might be a tiny race in actually noticing
1508 * it across threads. Tough.
1509 */
1510static inline bool too_many_unix_fds(struct task_struct *p)
1511{
1512        struct user_struct *user = current_user();
1513
1514        if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1515                return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1516        return false;
1517}
1518
1519#define MAX_RECURSION_LEVEL 4
1520
1521static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1522{
1523        int i;
1524        unsigned char max_level = 0;
1525
1526        if (too_many_unix_fds(current))
1527                return -ETOOMANYREFS;
1528
1529        for (i = scm->fp->count - 1; i >= 0; i--) {
1530                struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1531
1532                if (sk)
1533                        max_level = max(max_level,
1534                                        unix_sk(sk)->recursion_level);
1535        }
1536        if (unlikely(max_level > MAX_RECURSION_LEVEL))
1537                return -ETOOMANYREFS;
1538
1539        /*
1540         * Need to duplicate file references for the sake of garbage
1541         * collection.  Otherwise a socket in the fps might become a
1542         * candidate for GC while the skb is not yet queued.
1543         */
1544        UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1545        if (!UNIXCB(skb).fp)
1546                return -ENOMEM;
1547
1548        for (i = scm->fp->count - 1; i >= 0; i--)
1549                unix_inflight(scm->fp->user, scm->fp->fp[i]);
1550        return max_level;
1551}
1552
1553static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1554{
1555        int err = 0;
1556
1557        UNIXCB(skb).pid  = get_pid(scm->pid);
1558        UNIXCB(skb).uid = scm->creds.uid;
1559        UNIXCB(skb).gid = scm->creds.gid;
1560        UNIXCB(skb).fp = NULL;
1561        unix_get_secdata(scm, skb);
1562        if (scm->fp && send_fds)
1563                err = unix_attach_fds(scm, skb);
1564
1565        skb->destructor = unix_destruct_scm;
1566        return err;
1567}
1568
1569static bool unix_passcred_enabled(const struct socket *sock,
1570                                  const struct sock *other)
1571{
1572        return test_bit(SOCK_PASSCRED, &sock->flags) ||
1573               !other->sk_socket ||
1574               test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1575}
1576
1577/*
1578 * Some apps rely on write() giving SCM_CREDENTIALS
1579 * We include credentials if source or destination socket
1580 * asserted SOCK_PASSCRED.
1581 */
1582static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1583                            const struct sock *other)
1584{
1585        if (UNIXCB(skb).pid)
1586                return;
1587        if (unix_passcred_enabled(sock, other)) {
1588                UNIXCB(skb).pid  = get_pid(task_tgid(current));
1589                current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1590        }
1591}
1592
1593static int maybe_init_creds(struct scm_cookie *scm,
1594                            struct socket *socket,
1595                            const struct sock *other)
1596{
1597        int err;
1598        struct msghdr msg = { .msg_controllen = 0 };
1599
1600        err = scm_send(socket, &msg, scm, false);
1601        if (err)
1602                return err;
1603
1604        if (unix_passcred_enabled(socket, other)) {
1605                scm->pid = get_pid(task_tgid(current));
1606                current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1607        }
1608        return err;
1609}
1610
1611static bool unix_skb_scm_eq(struct sk_buff *skb,
1612                            struct scm_cookie *scm)
1613{
1614        const struct unix_skb_parms *u = &UNIXCB(skb);
1615
1616        return u->pid == scm->pid &&
1617               uid_eq(u->uid, scm->creds.uid) &&
1618               gid_eq(u->gid, scm->creds.gid) &&
1619               unix_secdata_eq(scm, skb);
1620}
1621
1622/*
1623 *      Send AF_UNIX data.
1624 */
1625
1626static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1627                              size_t len)
1628{
1629        struct sock *sk = sock->sk;
1630        struct net *net = sock_net(sk);
1631        struct unix_sock *u = unix_sk(sk);
1632        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1633        struct sock *other = NULL;
1634        int namelen = 0; /* fake GCC */
1635        int err;
1636        unsigned int hash;
1637        struct sk_buff *skb;
1638        long timeo;
1639        struct scm_cookie scm;
1640        int max_level;
1641        int data_len = 0;
1642        int sk_locked;
1643
1644        wait_for_unix_gc();
1645        err = scm_send(sock, msg, &scm, false);
1646        if (err < 0)
1647                return err;
1648
1649        err = -EOPNOTSUPP;
1650        if (msg->msg_flags&MSG_OOB)
1651                goto out;
1652
1653        if (msg->msg_namelen) {
1654                err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1655                if (err < 0)
1656                        goto out;
1657                namelen = err;
1658        } else {
1659                sunaddr = NULL;
1660                err = -ENOTCONN;
1661                other = unix_peer_get(sk);
1662                if (!other)
1663                        goto out;
1664        }
1665
1666        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1667            && (err = unix_autobind(sock)) != 0)
1668                goto out;
1669
1670        err = -EMSGSIZE;
1671        if (len > sk->sk_sndbuf - 32)
1672                goto out;
1673
1674        if (len > SKB_MAX_ALLOC) {
1675                data_len = min_t(size_t,
1676                                 len - SKB_MAX_ALLOC,
1677                                 MAX_SKB_FRAGS * PAGE_SIZE);
1678                data_len = PAGE_ALIGN(data_len);
1679
1680                BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1681        }
1682
1683        skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1684                                   msg->msg_flags & MSG_DONTWAIT, &err,
1685                                   PAGE_ALLOC_COSTLY_ORDER);
1686        if (skb == NULL)
1687                goto out;
1688
1689        err = unix_scm_to_skb(&scm, skb, true);
1690        if (err < 0)
1691                goto out_free;
1692        max_level = err + 1;
1693
1694        skb_put(skb, len - data_len);
1695        skb->data_len = data_len;
1696        skb->len = len;
1697        err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1698        if (err)
1699                goto out_free;
1700
1701        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1702
1703restart:
1704        if (!other) {
1705                err = -ECONNRESET;
1706                if (sunaddr == NULL)
1707                        goto out_free;
1708
1709                other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1710                                        hash, &err);
1711                if (other == NULL)
1712                        goto out_free;
1713        }
1714
1715        if (sk_filter(other, skb) < 0) {
1716                /* Toss the packet but do not return any error to the sender */
1717                err = len;
1718                goto out_free;
1719        }
1720
1721        sk_locked = 0;
1722        unix_state_lock(other);
1723restart_locked:
1724        err = -EPERM;
1725        if (!unix_may_send(sk, other))
1726                goto out_unlock;
1727
1728        if (unlikely(sock_flag(other, SOCK_DEAD))) {
1729                /*
1730                 *      Check with 1003.1g - what should
1731                 *      datagram error
1732                 */
1733                unix_state_unlock(other);
1734                sock_put(other);
1735
1736                if (!sk_locked)
1737                        unix_state_lock(sk);
1738
1739                err = 0;
1740                if (unix_peer(sk) == other) {
1741                        unix_peer(sk) = NULL;
1742                        unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1743
1744                        unix_state_unlock(sk);
1745
1746                        unix_dgram_disconnected(sk, other);
1747                        sock_put(other);
1748                        err = -ECONNREFUSED;
1749                } else {
1750                        unix_state_unlock(sk);
1751                }
1752
1753                other = NULL;
1754                if (err)
1755                        goto out_free;
1756                goto restart;
1757        }
1758
1759        err = -EPIPE;
1760        if (other->sk_shutdown & RCV_SHUTDOWN)
1761                goto out_unlock;
1762
1763        if (sk->sk_type != SOCK_SEQPACKET) {
1764                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1765                if (err)
1766                        goto out_unlock;
1767        }
1768
1769        /* other == sk && unix_peer(other) != sk if
1770         * - unix_peer(sk) == NULL, destination address bound to sk
1771         * - unix_peer(sk) == sk by time of get but disconnected before lock
1772         */
1773        if (other != sk &&
1774            unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1775                if (timeo) {
1776                        timeo = unix_wait_for_peer(other, timeo);
1777
1778                        err = sock_intr_errno(timeo);
1779                        if (signal_pending(current))
1780                                goto out_free;
1781
1782                        goto restart;
1783                }
1784
1785                if (!sk_locked) {
1786                        unix_state_unlock(other);
1787                        unix_state_double_lock(sk, other);
1788                }
1789
1790                if (unix_peer(sk) != other ||
1791                    unix_dgram_peer_wake_me(sk, other)) {
1792                        err = -EAGAIN;
1793                        sk_locked = 1;
1794                        goto out_unlock;
1795                }
1796
1797                if (!sk_locked) {
1798                        sk_locked = 1;
1799                        goto restart_locked;
1800                }
1801        }
1802
1803        if (unlikely(sk_locked))
1804                unix_state_unlock(sk);
1805
1806        if (sock_flag(other, SOCK_RCVTSTAMP))
1807                __net_timestamp(skb);
1808        maybe_add_creds(skb, sock, other);
1809        skb_queue_tail(&other->sk_receive_queue, skb);
1810        if (max_level > unix_sk(other)->recursion_level)
1811                unix_sk(other)->recursion_level = max_level;
1812        unix_state_unlock(other);
1813        other->sk_data_ready(other);
1814        sock_put(other);
1815        scm_destroy(&scm);
1816        return len;
1817
1818out_unlock:
1819        if (sk_locked)
1820                unix_state_unlock(sk);
1821        unix_state_unlock(other);
1822out_free:
1823        kfree_skb(skb);
1824out:
1825        if (other)
1826                sock_put(other);
1827        scm_destroy(&scm);
1828        return err;
1829}
1830
1831/* We use paged skbs for stream sockets, and limit occupancy to 32768
1832 * bytes, and a minimun of a full page.
1833 */
1834#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1835
1836static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1837                               size_t len)
1838{
1839        struct sock *sk = sock->sk;
1840        struct sock *other = NULL;
1841        int err, size;
1842        struct sk_buff *skb;
1843        int sent = 0;
1844        struct scm_cookie scm;
1845        bool fds_sent = false;
1846        int max_level;
1847        int data_len;
1848
1849        wait_for_unix_gc();
1850        err = scm_send(sock, msg, &scm, false);
1851        if (err < 0)
1852                return err;
1853
1854        err = -EOPNOTSUPP;
1855        if (msg->msg_flags&MSG_OOB)
1856                goto out_err;
1857
1858        if (msg->msg_namelen) {
1859                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1860                goto out_err;
1861        } else {
1862                err = -ENOTCONN;
1863                other = unix_peer(sk);
1864                if (!other)
1865                        goto out_err;
1866        }
1867
1868        if (sk->sk_shutdown & SEND_SHUTDOWN)
1869                goto pipe_err;
1870
1871        while (sent < len) {
1872                size = len - sent;
1873
1874                /* Keep two messages in the pipe so it schedules better */
1875                size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1876
1877                /* allow fallback to order-0 allocations */
1878                size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1879
1880                data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1881
1882                data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1883
1884                skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1885                                           msg->msg_flags & MSG_DONTWAIT, &err,
1886                                           get_order(UNIX_SKB_FRAGS_SZ));
1887                if (!skb)
1888                        goto out_err;
1889
1890                /* Only send the fds in the first buffer */
1891                err = unix_scm_to_skb(&scm, skb, !fds_sent);
1892                if (err < 0) {
1893                        kfree_skb(skb);
1894                        goto out_err;
1895                }
1896                max_level = err + 1;
1897                fds_sent = true;
1898
1899                skb_put(skb, size - data_len);
1900                skb->data_len = data_len;
1901                skb->len = size;
1902                err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1903                if (err) {
1904                        kfree_skb(skb);
1905                        goto out_err;
1906                }
1907
1908                unix_state_lock(other);
1909
1910                if (sock_flag(other, SOCK_DEAD) ||
1911                    (other->sk_shutdown & RCV_SHUTDOWN))
1912                        goto pipe_err_free;
1913
1914                maybe_add_creds(skb, sock, other);
1915                skb_queue_tail(&other->sk_receive_queue, skb);
1916                if (max_level > unix_sk(other)->recursion_level)
1917                        unix_sk(other)->recursion_level = max_level;
1918                unix_state_unlock(other);
1919                other->sk_data_ready(other);
1920                sent += size;
1921        }
1922
1923        scm_destroy(&scm);
1924
1925        return sent;
1926
1927pipe_err_free:
1928        unix_state_unlock(other);
1929        kfree_skb(skb);
1930pipe_err:
1931        if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1932                send_sig(SIGPIPE, current, 0);
1933        err = -EPIPE;
1934out_err:
1935        scm_destroy(&scm);
1936        return sent ? : err;
1937}
1938
1939static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1940                                    int offset, size_t size, int flags)
1941{
1942        int err;
1943        bool send_sigpipe = false;
1944        bool init_scm = true;
1945        struct scm_cookie scm;
1946        struct sock *other, *sk = socket->sk;
1947        struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1948
1949        if (flags & MSG_OOB)
1950                return -EOPNOTSUPP;
1951
1952        other = unix_peer(sk);
1953        if (!other || sk->sk_state != TCP_ESTABLISHED)
1954                return -ENOTCONN;
1955
1956        if (false) {
1957alloc_skb:
1958                unix_state_unlock(other);
1959                mutex_unlock(&unix_sk(other)->iolock);
1960                newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1961                                              &err, 0);
1962                if (!newskb)
1963                        goto err;
1964        }
1965
1966        /* we must acquire iolock as we modify already present
1967         * skbs in the sk_receive_queue and mess with skb->len
1968         */
1969        err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1970        if (err) {
1971                err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1972                goto err;
1973        }
1974
1975        if (sk->sk_shutdown & SEND_SHUTDOWN) {
1976                err = -EPIPE;
1977                send_sigpipe = true;
1978                goto err_unlock;
1979        }
1980
1981        unix_state_lock(other);
1982
1983        if (sock_flag(other, SOCK_DEAD) ||
1984            other->sk_shutdown & RCV_SHUTDOWN) {
1985                err = -EPIPE;
1986                send_sigpipe = true;
1987                goto err_state_unlock;
1988        }
1989
1990        if (init_scm) {
1991                err = maybe_init_creds(&scm, socket, other);
1992                if (err)
1993                        goto err_state_unlock;
1994                init_scm = false;
1995        }
1996
1997        skb = skb_peek_tail(&other->sk_receive_queue);
1998        if (tail && tail == skb) {
1999                skb = newskb;
2000        } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2001                if (newskb) {
2002                        skb = newskb;
2003                } else {
2004                        tail = skb;
2005                        goto alloc_skb;
2006                }
2007        } else if (newskb) {
2008                /* this is fast path, we don't necessarily need to
2009                 * call to kfree_skb even though with newskb == NULL
2010                 * this - does no harm
2011                 */
2012                consume_skb(newskb);
2013                newskb = NULL;
2014        }
2015
2016        if (skb_append_pagefrags(skb, page, offset, size)) {
2017                tail = skb;
2018                goto alloc_skb;
2019        }
2020
2021        skb->len += size;
2022        skb->data_len += size;
2023        skb->truesize += size;
2024        atomic_add(size, &sk->sk_wmem_alloc);
2025
2026        if (newskb) {
2027                err = unix_scm_to_skb(&scm, skb, false);
2028                if (err)
2029                        goto err_state_unlock;
2030                spin_lock(&other->sk_receive_queue.lock);
2031                __skb_queue_tail(&other->sk_receive_queue, newskb);
2032                spin_unlock(&other->sk_receive_queue.lock);
2033        }
2034
2035        unix_state_unlock(other);
2036        mutex_unlock(&unix_sk(other)->iolock);
2037
2038        other->sk_data_ready(other);
2039        scm_destroy(&scm);
2040        return size;
2041
2042err_state_unlock:
2043        unix_state_unlock(other);
2044err_unlock:
2045        mutex_unlock(&unix_sk(other)->iolock);
2046err:
2047        kfree_skb(newskb);
2048        if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2049                send_sig(SIGPIPE, current, 0);
2050        if (!init_scm)
2051                scm_destroy(&scm);
2052        return err;
2053}
2054
2055static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2056                                  size_t len)
2057{
2058        int err;
2059        struct sock *sk = sock->sk;
2060
2061        err = sock_error(sk);
2062        if (err)
2063                return err;
2064
2065        if (sk->sk_state != TCP_ESTABLISHED)
2066                return -ENOTCONN;
2067
2068        if (msg->msg_namelen)
2069                msg->msg_namelen = 0;
2070
2071        return unix_dgram_sendmsg(sock, msg, len);
2072}
2073
2074static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2075                                  size_t size, int flags)
2076{
2077        struct sock *sk = sock->sk;
2078
2079        if (sk->sk_state != TCP_ESTABLISHED)
2080                return -ENOTCONN;
2081
2082        return unix_dgram_recvmsg(sock, msg, size, flags);
2083}
2084
2085static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2086{
2087        struct unix_sock *u = unix_sk(sk);
2088
2089        if (u->addr) {
2090                msg->msg_namelen = u->addr->len;
2091                memcpy(msg->msg_name, u->addr->name, u->addr->len);
2092        }
2093}
2094
2095static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2096                              size_t size, int flags)
2097{
2098        struct scm_cookie scm;
2099        struct sock *sk = sock->sk;
2100        struct unix_sock *u = unix_sk(sk);
2101        struct sk_buff *skb, *last;
2102        long timeo;
2103        int err;
2104        int peeked, skip;
2105
2106        err = -EOPNOTSUPP;
2107        if (flags&MSG_OOB)
2108                goto out;
2109
2110        timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2111
2112        do {
2113                mutex_lock(&u->iolock);
2114
2115                skip = sk_peek_offset(sk, flags);
2116                skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
2117                                              &last);
2118                if (skb)
2119                        break;
2120
2121                mutex_unlock(&u->iolock);
2122
2123                if (err != -EAGAIN)
2124                        break;
2125        } while (timeo &&
2126                 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2127
2128        if (!skb) { /* implies iolock unlocked */
2129                unix_state_lock(sk);
2130                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2131                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2132                    (sk->sk_shutdown & RCV_SHUTDOWN))
2133                        err = 0;
2134                unix_state_unlock(sk);
2135                goto out;
2136        }
2137
2138        if (wq_has_sleeper(&u->peer_wait))
2139                wake_up_interruptible_sync_poll(&u->peer_wait,
2140                                                POLLOUT | POLLWRNORM |
2141                                                POLLWRBAND);
2142
2143        if (msg->msg_name)
2144                unix_copy_addr(msg, skb->sk);
2145
2146        if (size > skb->len - skip)
2147                size = skb->len - skip;
2148        else if (size < skb->len - skip)
2149                msg->msg_flags |= MSG_TRUNC;
2150
2151        err = skb_copy_datagram_msg(skb, skip, msg, size);
2152        if (err)
2153                goto out_free;
2154
2155        if (sock_flag(sk, SOCK_RCVTSTAMP))
2156                __sock_recv_timestamp(msg, sk, skb);
2157
2158        memset(&scm, 0, sizeof(scm));
2159
2160        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2161        unix_set_secdata(&scm, skb);
2162
2163        if (!(flags & MSG_PEEK)) {
2164                if (UNIXCB(skb).fp)
2165                        unix_detach_fds(&scm, skb);
2166
2167                sk_peek_offset_bwd(sk, skb->len);
2168        } else {
2169                /* It is questionable: on PEEK we could:
2170                   - do not return fds - good, but too simple 8)
2171                   - return fds, and do not return them on read (old strategy,
2172                     apparently wrong)
2173                   - clone fds (I chose it for now, it is the most universal
2174                     solution)
2175
2176                   POSIX 1003.1g does not actually define this clearly
2177                   at all. POSIX 1003.1g doesn't define a lot of things
2178                   clearly however!
2179
2180                */
2181
2182                sk_peek_offset_fwd(sk, size);
2183
2184                if (UNIXCB(skb).fp)
2185                        scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2186        }
2187        err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2188
2189        scm_recv(sock, msg, &scm, flags);
2190
2191out_free:
2192        skb_free_datagram(sk, skb);
2193        mutex_unlock(&u->iolock);
2194out:
2195        return err;
2196}
2197
2198/*
2199 *      Sleep until more data has arrived. But check for races..
2200 */
2201static long unix_stream_data_wait(struct sock *sk, long timeo,
2202                                  struct sk_buff *last, unsigned int last_len,
2203                                  bool freezable)
2204{
2205        struct sk_buff *tail;
2206        DEFINE_WAIT(wait);
2207
2208        unix_state_lock(sk);
2209
2210        for (;;) {
2211                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2212
2213                tail = skb_peek_tail(&sk->sk_receive_queue);
2214                if (tail != last ||
2215                    (tail && tail->len != last_len) ||
2216                    sk->sk_err ||
2217                    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2218                    signal_pending(current) ||
2219                    !timeo)
2220                        break;
2221
2222                sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2223                unix_state_unlock(sk);
2224                if (freezable)
2225                        timeo = freezable_schedule_timeout(timeo);
2226                else
2227                        timeo = schedule_timeout(timeo);
2228                unix_state_lock(sk);
2229
2230                if (sock_flag(sk, SOCK_DEAD))
2231                        break;
2232
2233                sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2234        }
2235
2236        finish_wait(sk_sleep(sk), &wait);
2237        unix_state_unlock(sk);
2238        return timeo;
2239}
2240
2241static unsigned int unix_skb_len(const struct sk_buff *skb)
2242{
2243        return skb->len - UNIXCB(skb).consumed;
2244}
2245
2246struct unix_stream_read_state {
2247        int (*recv_actor)(struct sk_buff *, int, int,
2248                          struct unix_stream_read_state *);
2249        struct socket *socket;
2250        struct msghdr *msg;
2251        struct pipe_inode_info *pipe;
2252        size_t size;
2253        int flags;
2254        unsigned int splice_flags;
2255};
2256
2257static int unix_stream_read_generic(struct unix_stream_read_state *state,
2258                                    bool freezable)
2259{
2260        struct scm_cookie scm;
2261        struct socket *sock = state->socket;
2262        struct sock *sk = sock->sk;
2263        struct unix_sock *u = unix_sk(sk);
2264        int copied = 0;
2265        int flags = state->flags;
2266        int noblock = flags & MSG_DONTWAIT;
2267        bool check_creds = false;
2268        int target;
2269        int err = 0;
2270        long timeo;
2271        int skip;
2272        size_t size = state->size;
2273        unsigned int last_len;
2274
2275        if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2276                err = -EINVAL;
2277                goto out;
2278        }
2279
2280        if (unlikely(flags & MSG_OOB)) {
2281                err = -EOPNOTSUPP;
2282                goto out;
2283        }
2284
2285        target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2286        timeo = sock_rcvtimeo(sk, noblock);
2287
2288        memset(&scm, 0, sizeof(scm));
2289
2290        /* Lock the socket to prevent queue disordering
2291         * while sleeps in memcpy_tomsg
2292         */
2293        mutex_lock(&u->iolock);
2294
2295        if (flags & MSG_PEEK)
2296                skip = sk_peek_offset(sk, flags);
2297        else
2298                skip = 0;
2299
2300        do {
2301                int chunk;
2302                bool drop_skb;
2303                struct sk_buff *skb, *last;
2304
2305redo:
2306                unix_state_lock(sk);
2307                if (sock_flag(sk, SOCK_DEAD)) {
2308                        err = -ECONNRESET;
2309                        goto unlock;
2310                }
2311                last = skb = skb_peek(&sk->sk_receive_queue);
2312                last_len = last ? last->len : 0;
2313again:
2314                if (skb == NULL) {
2315                        unix_sk(sk)->recursion_level = 0;
2316                        if (copied >= target)
2317                                goto unlock;
2318
2319                        /*
2320                         *      POSIX 1003.1g mandates this order.
2321                         */
2322
2323                        err = sock_error(sk);
2324                        if (err)
2325                                goto unlock;
2326                        if (sk->sk_shutdown & RCV_SHUTDOWN)
2327                                goto unlock;
2328
2329                        unix_state_unlock(sk);
2330                        if (!timeo) {
2331                                err = -EAGAIN;
2332                                break;
2333                        }
2334
2335                        mutex_unlock(&u->iolock);
2336
2337                        timeo = unix_stream_data_wait(sk, timeo, last,
2338                                                      last_len, freezable);
2339
2340                        if (signal_pending(current)) {
2341                                err = sock_intr_errno(timeo);
2342                                scm_destroy(&scm);
2343                                goto out;
2344                        }
2345
2346                        mutex_lock(&u->iolock);
2347                        goto redo;
2348unlock:
2349                        unix_state_unlock(sk);
2350                        break;
2351                }
2352
2353                while (skip >= unix_skb_len(skb)) {
2354                        skip -= unix_skb_len(skb);
2355                        last = skb;
2356                        last_len = skb->len;
2357                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2358                        if (!skb)
2359                                goto again;
2360                }
2361
2362                unix_state_unlock(sk);
2363
2364                if (check_creds) {
2365                        /* Never glue messages from different writers */
2366                        if (!unix_skb_scm_eq(skb, &scm))
2367                                break;
2368                } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2369                        /* Copy credentials */
2370                        scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2371                        unix_set_secdata(&scm, skb);
2372                        check_creds = true;
2373                }
2374
2375                /* Copy address just once */
2376                if (state->msg && state->msg->msg_name) {
2377                        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2378                                         state->msg->msg_name);
2379                        unix_copy_addr(state->msg, skb->sk);
2380                        sunaddr = NULL;
2381                }
2382
2383                chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2384                skb_get(skb);
2385                chunk = state->recv_actor(skb, skip, chunk, state);
2386                drop_skb = !unix_skb_len(skb);
2387                /* skb is only safe to use if !drop_skb */
2388                consume_skb(skb);
2389                if (chunk < 0) {
2390                        if (copied == 0)
2391                                copied = -EFAULT;
2392                        break;
2393                }
2394                copied += chunk;
2395                size -= chunk;
2396
2397                if (drop_skb) {
2398                        /* the skb was touched by a concurrent reader;
2399                         * we should not expect anything from this skb
2400                         * anymore and assume it invalid - we can be
2401                         * sure it was dropped from the socket queue
2402                         *
2403                         * let's report a short read
2404                         */
2405                        err = 0;
2406                        break;
2407                }
2408
2409                /* Mark read part of skb as used */
2410                if (!(flags & MSG_PEEK)) {
2411                        UNIXCB(skb).consumed += chunk;
2412
2413                        sk_peek_offset_bwd(sk, chunk);
2414
2415                        if (UNIXCB(skb).fp)
2416                                unix_detach_fds(&scm, skb);
2417
2418                        if (unix_skb_len(skb))
2419                                break;
2420
2421                        skb_unlink(skb, &sk->sk_receive_queue);
2422                        consume_skb(skb);
2423
2424                        if (scm.fp)
2425                                break;
2426                } else {
2427                        /* It is questionable, see note in unix_dgram_recvmsg.
2428                         */
2429                        if (UNIXCB(skb).fp)
2430                                scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2431
2432                        sk_peek_offset_fwd(sk, chunk);
2433
2434                        if (UNIXCB(skb).fp)
2435                                break;
2436
2437                        skip = 0;
2438                        last = skb;
2439                        last_len = skb->len;
2440                        unix_state_lock(sk);
2441                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2442                        if (skb)
2443                                goto again;
2444                        unix_state_unlock(sk);
2445                        break;
2446                }
2447        } while (size);
2448
2449        mutex_unlock(&u->iolock);
2450        if (state->msg)
2451                scm_recv(sock, state->msg, &scm, flags);
2452        else
2453                scm_destroy(&scm);
2454out:
2455        return copied ? : err;
2456}
2457
2458static int unix_stream_read_actor(struct sk_buff *skb,
2459                                  int skip, int chunk,
2460                                  struct unix_stream_read_state *state)
2461{
2462        int ret;
2463
2464        ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2465                                    state->msg, chunk);
2466        return ret ?: chunk;
2467}
2468
2469static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2470                               size_t size, int flags)
2471{
2472        struct unix_stream_read_state state = {
2473                .recv_actor = unix_stream_read_actor,
2474                .socket = sock,
2475                .msg = msg,
2476                .size = size,
2477                .flags = flags
2478        };
2479
2480        return unix_stream_read_generic(&state, true);
2481}
2482
2483static int unix_stream_splice_actor(struct sk_buff *skb,
2484                                    int skip, int chunk,
2485                                    struct unix_stream_read_state *state)
2486{
2487        return skb_splice_bits(skb, state->socket->sk,
2488                               UNIXCB(skb).consumed + skip,
2489                               state->pipe, chunk, state->splice_flags);
2490}
2491
2492static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2493                                       struct pipe_inode_info *pipe,
2494                                       size_t size, unsigned int flags)
2495{
2496        struct unix_stream_read_state state = {
2497                .recv_actor = unix_stream_splice_actor,
2498                .socket = sock,
2499                .pipe = pipe,
2500                .size = size,
2501                .splice_flags = flags,
2502        };
2503
2504        if (unlikely(*ppos))
2505                return -ESPIPE;
2506
2507        if (sock->file->f_flags & O_NONBLOCK ||
2508            flags & SPLICE_F_NONBLOCK)
2509                state.flags = MSG_DONTWAIT;
2510
2511        return unix_stream_read_generic(&state, false);
2512}
2513
2514static int unix_shutdown(struct socket *sock, int mode)
2515{
2516        struct sock *sk = sock->sk;
2517        struct sock *other;
2518
2519        if (mode < SHUT_RD || mode > SHUT_RDWR)
2520                return -EINVAL;
2521        /* This maps:
2522         * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2523         * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2524         * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2525         */
2526        ++mode;
2527
2528        unix_state_lock(sk);
2529        sk->sk_shutdown |= mode;
2530        other = unix_peer(sk);
2531        if (other)
2532                sock_hold(other);
2533        unix_state_unlock(sk);
2534        sk->sk_state_change(sk);
2535
2536        if (other &&
2537                (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2538
2539                int peer_mode = 0;
2540
2541                if (mode&RCV_SHUTDOWN)
2542                        peer_mode |= SEND_SHUTDOWN;
2543                if (mode&SEND_SHUTDOWN)
2544                        peer_mode |= RCV_SHUTDOWN;
2545                unix_state_lock(other);
2546                other->sk_shutdown |= peer_mode;
2547                unix_state_unlock(other);
2548                other->sk_state_change(other);
2549                if (peer_mode == SHUTDOWN_MASK)
2550                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2551                else if (peer_mode & RCV_SHUTDOWN)
2552                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2553        }
2554        if (other)
2555                sock_put(other);
2556
2557        return 0;
2558}
2559
2560long unix_inq_len(struct sock *sk)
2561{
2562        struct sk_buff *skb;
2563        long amount = 0;
2564
2565        if (sk->sk_state == TCP_LISTEN)
2566                return -EINVAL;
2567
2568        spin_lock(&sk->sk_receive_queue.lock);
2569        if (sk->sk_type == SOCK_STREAM ||
2570            sk->sk_type == SOCK_SEQPACKET) {
2571                skb_queue_walk(&sk->sk_receive_queue, skb)
2572                        amount += unix_skb_len(skb);
2573        } else {
2574                skb = skb_peek(&sk->sk_receive_queue);
2575                if (skb)
2576                        amount = skb->len;
2577        }
2578        spin_unlock(&sk->sk_receive_queue.lock);
2579
2580        return amount;
2581}
2582EXPORT_SYMBOL_GPL(unix_inq_len);
2583
2584long unix_outq_len(struct sock *sk)
2585{
2586        return sk_wmem_alloc_get(sk);
2587}
2588EXPORT_SYMBOL_GPL(unix_outq_len);
2589
2590static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2591{
2592        struct sock *sk = sock->sk;
2593        long amount = 0;
2594        int err;
2595
2596        switch (cmd) {
2597        case SIOCOUTQ:
2598                amount = unix_outq_len(sk);
2599                err = put_user(amount, (int __user *)arg);
2600                break;
2601        case SIOCINQ:
2602                amount = unix_inq_len(sk);
2603                if (amount < 0)
2604                        err = amount;
2605                else
2606                        err = put_user(amount, (int __user *)arg);
2607                break;
2608        default:
2609                err = -ENOIOCTLCMD;
2610                break;
2611        }
2612        return err;
2613}
2614
2615static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2616{
2617        struct sock *sk = sock->sk;
2618        unsigned int mask;
2619
2620        sock_poll_wait(file, sk_sleep(sk), wait);
2621        mask = 0;
2622
2623        /* exceptional events? */
2624        if (sk->sk_err)
2625                mask |= POLLERR;
2626        if (sk->sk_shutdown == SHUTDOWN_MASK)
2627                mask |= POLLHUP;
2628        if (sk->sk_shutdown & RCV_SHUTDOWN)
2629                mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2630
2631        /* readable? */
2632        if (!skb_queue_empty(&sk->sk_receive_queue))
2633                mask |= POLLIN | POLLRDNORM;
2634
2635        /* Connection-based need to check for termination and startup */
2636        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2637            sk->sk_state == TCP_CLOSE)
2638                mask |= POLLHUP;
2639
2640        /*
2641         * we set writable also when the other side has shut down the
2642         * connection. This prevents stuck sockets.
2643         */
2644        if (unix_writable(sk))
2645                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2646
2647        return mask;
2648}
2649
2650static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2651                                    poll_table *wait)
2652{
2653        struct sock *sk = sock->sk, *other;
2654        unsigned int mask, writable;
2655
2656        sock_poll_wait(file, sk_sleep(sk), wait);
2657        mask = 0;
2658
2659        /* exceptional events? */
2660        if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2661                mask |= POLLERR |
2662                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2663
2664        if (sk->sk_shutdown & RCV_SHUTDOWN)
2665                mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2666        if (sk->sk_shutdown == SHUTDOWN_MASK)
2667                mask |= POLLHUP;
2668
2669        /* readable? */
2670        if (!skb_queue_empty(&sk->sk_receive_queue))
2671                mask |= POLLIN | POLLRDNORM;
2672
2673        /* Connection-based need to check for termination and startup */
2674        if (sk->sk_type == SOCK_SEQPACKET) {
2675                if (sk->sk_state == TCP_CLOSE)
2676                        mask |= POLLHUP;
2677                /* connection hasn't started yet? */
2678                if (sk->sk_state == TCP_SYN_SENT)
2679                        return mask;
2680        }
2681
2682        /* No write status requested, avoid expensive OUT tests. */
2683        if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2684                return mask;
2685
2686        writable = unix_writable(sk);
2687        if (writable) {
2688                unix_state_lock(sk);
2689
2690                other = unix_peer(sk);
2691                if (other && unix_peer(other) != sk &&
2692                    unix_recvq_full(other) &&
2693                    unix_dgram_peer_wake_me(sk, other))
2694                        writable = 0;
2695
2696                unix_state_unlock(sk);
2697        }
2698
2699        if (writable)
2700                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2701        else
2702                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2703
2704        return mask;
2705}
2706
2707#ifdef CONFIG_PROC_FS
2708
2709#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2710
2711#define get_bucket(x) ((x) >> BUCKET_SPACE)
2712#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2713#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2714
2715static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2716{
2717        unsigned long offset = get_offset(*pos);
2718        unsigned long bucket = get_bucket(*pos);
2719        struct sock *sk;
2720        unsigned long count = 0;
2721
2722        for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2723                if (sock_net(sk) != seq_file_net(seq))
2724                        continue;
2725                if (++count == offset)
2726                        break;
2727        }
2728
2729        return sk;
2730}
2731
2732static struct sock *unix_next_socket(struct seq_file *seq,
2733                                     struct sock *sk,
2734                                     loff_t *pos)
2735{
2736        unsigned long bucket;
2737
2738        while (sk > (struct sock *)SEQ_START_TOKEN) {
2739                sk = sk_next(sk);
2740                if (!sk)
2741                        goto next_bucket;
2742                if (sock_net(sk) == seq_file_net(seq))
2743                        return sk;
2744        }
2745
2746        do {
2747                sk = unix_from_bucket(seq, pos);
2748                if (sk)
2749                        return sk;
2750
2751next_bucket:
2752                bucket = get_bucket(*pos) + 1;
2753                *pos = set_bucket_offset(bucket, 1);
2754        } while (bucket < ARRAY_SIZE(unix_socket_table));
2755
2756        return NULL;
2757}
2758
2759static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2760        __acquires(unix_table_lock)
2761{
2762        spin_lock(&unix_table_lock);
2763
2764        if (!*pos)
2765                return SEQ_START_TOKEN;
2766
2767        if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2768                return NULL;
2769
2770        return unix_next_socket(seq, NULL, pos);
2771}
2772
2773static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2774{
2775        ++*pos;
2776        return unix_next_socket(seq, v, pos);
2777}
2778
2779static void unix_seq_stop(struct seq_file *seq, void *v)
2780        __releases(unix_table_lock)
2781{
2782        spin_unlock(&unix_table_lock);
2783}
2784
2785static int unix_seq_show(struct seq_file *seq, void *v)
2786{
2787
2788        if (v == SEQ_START_TOKEN)
2789                seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2790                         "Inode Path\n");
2791        else {
2792                struct sock *s = v;
2793                struct unix_sock *u = unix_sk(s);
2794                unix_state_lock(s);
2795
2796                seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2797                        s,
2798                        atomic_read(&s->sk_refcnt),
2799                        0,
2800                        s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2801                        s->sk_type,
2802                        s->sk_socket ?
2803                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2804                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2805                        sock_i_ino(s));
2806
2807                if (u->addr) {
2808                        int i, len;
2809                        seq_putc(seq, ' ');
2810
2811                        i = 0;
2812                        len = u->addr->len - sizeof(short);
2813                        if (!UNIX_ABSTRACT(s))
2814                                len--;
2815                        else {
2816                                seq_putc(seq, '@');
2817                                i++;
2818                        }
2819                        for ( ; i < len; i++)
2820                                seq_putc(seq, u->addr->name->sun_path[i] ?:
2821                                         '@');
2822                }
2823                unix_state_unlock(s);
2824                seq_putc(seq, '\n');
2825        }
2826
2827        return 0;
2828}
2829
2830static const struct seq_operations unix_seq_ops = {
2831        .start  = unix_seq_start,
2832        .next   = unix_seq_next,
2833        .stop   = unix_seq_stop,
2834        .show   = unix_seq_show,
2835};
2836
2837static int unix_seq_open(struct inode *inode, struct file *file)
2838{
2839        return seq_open_net(inode, file, &unix_seq_ops,
2840                            sizeof(struct seq_net_private));
2841}
2842
2843static const struct file_operations unix_seq_fops = {
2844        .owner          = THIS_MODULE,
2845        .open           = unix_seq_open,
2846        .read           = seq_read,
2847        .llseek         = seq_lseek,
2848        .release        = seq_release_net,
2849};
2850
2851#endif
2852
2853static const struct net_proto_family unix_family_ops = {
2854        .family = PF_UNIX,
2855        .create = unix_create,
2856        .owner  = THIS_MODULE,
2857};
2858
2859
2860static int __net_init unix_net_init(struct net *net)
2861{
2862        int error = -ENOMEM;
2863
2864        net->unx.sysctl_max_dgram_qlen = 10;
2865        if (unix_sysctl_register(net))
2866                goto out;
2867
2868#ifdef CONFIG_PROC_FS
2869        if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2870                unix_sysctl_unregister(net);
2871                goto out;
2872        }
2873#endif
2874        error = 0;
2875out:
2876        return error;
2877}
2878
2879static void __net_exit unix_net_exit(struct net *net)
2880{
2881        unix_sysctl_unregister(net);
2882        remove_proc_entry("unix", net->proc_net);
2883}
2884
2885static struct pernet_operations unix_net_ops = {
2886        .init = unix_net_init,
2887        .exit = unix_net_exit,
2888};
2889
2890static int __init af_unix_init(void)
2891{
2892        int rc = -1;
2893
2894        BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2895
2896        rc = proto_register(&unix_proto, 1);
2897        if (rc != 0) {
2898                pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2899                goto out;
2900        }
2901
2902        sock_register(&unix_family_ops);
2903        register_pernet_subsys(&unix_net_ops);
2904out:
2905        return rc;
2906}
2907
2908static void __exit af_unix_exit(void)
2909{
2910        sock_unregister(PF_UNIX);
2911        proto_unregister(&unix_proto);
2912        unregister_pernet_subsys(&unix_net_ops);
2913}
2914
2915/* Earlier than device_initcall() so that other drivers invoking
2916   request_module() don't end up in a loop when modprobe tries
2917   to use a UNIX socket. But later than subsys_initcall() because
2918   we depend on stuff initialised there */
2919fs_initcall(af_unix_init);
2920module_exit(af_unix_exit);
2921
2922MODULE_LICENSE("GPL");
2923MODULE_ALIAS_NETPROTO(PF_UNIX);
2924