linux/net/unix/af_unix.c
<<
>>
Prefs
   1/*
   2 * NET4:        Implementation of BSD Unix domain sockets.
   3 *
   4 * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Fixes:
  12 *              Linus Torvalds  :       Assorted bug cures.
  13 *              Niibe Yutaka    :       async I/O support.
  14 *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15 *              Alan Cox        :       Limit size of allocated blocks.
  16 *              Alan Cox        :       Fixed the stupid socketpair bug.
  17 *              Alan Cox        :       BSD compatibility fine tuning.
  18 *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19 *              Alan Cox        :       Sorted out a proper draft version of
  20 *                                      file descriptor passing hacked up from
  21 *                                      Mike Shaver's work.
  22 *              Marty Leisner   :       Fixes to fd passing
  23 *              Nick Nevin      :       recvmsg bugfix.
  24 *              Alan Cox        :       Started proper garbage collector
  25 *              Heiko EiBfeldt  :       Missing verify_area check
  26 *              Alan Cox        :       Started POSIXisms
  27 *              Andreas Schwab  :       Replace inode by dentry for proper
  28 *                                      reference counting
  29 *              Kirk Petersen   :       Made this a module
  30 *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31 *                                      Lots of bug fixes.
  32 *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33 *                                      by above two patches.
  34 *           Andrea Arcangeli   :       If possible we block in connect(2)
  35 *                                      if the max backlog of the listen socket
  36 *                                      is been reached. This won't break
  37 *                                      old apps and it will avoid huge amount
  38 *                                      of socks hashed (this for unix_gc()
  39 *                                      performances reasons).
  40 *                                      Security fix that limits the max
  41 *                                      number of socks to 2*max_files and
  42 *                                      the number of skb queueable in the
  43 *                                      dgram receiver.
  44 *              Artur Skawina   :       Hash function optimizations
  45 *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46 *            Malcolm Beattie   :       Set peercred for socketpair
  47 *           Michal Ostrowski   :       Module initialization cleanup.
  48 *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49 *                                      the core infrastructure is doing that
  50 *                                      for all net proto families now (2.5.69+)
  51 *
  52 *
  53 * Known differences from reference BSD that was tested:
  54 *
  55 *      [TO FIX]
  56 *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57 *              other the moment one end closes.
  58 *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59 *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60 *      [NOT TO FIX]
  61 *      accept() returns a path name even if the connecting socket has closed
  62 *              in the meantime (BSD loses the path and gives up).
  63 *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64 *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65 *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66 *      BSD af_unix apparently has connect forgetting to block properly.
  67 *              (need to check this with the POSIX spec in detail)
  68 *
  69 * Differences from 2.0.0-11-... (ANK)
  70 *      Bug fixes and improvements.
  71 *              - client shutdown killed server socket.
  72 *              - removed all useless cli/sti pairs.
  73 *
  74 *      Semantic changes/extensions.
  75 *              - generic control message passing.
  76 *              - SCM_CREDENTIALS control message.
  77 *              - "Abstract" (not FS based) socket bindings.
  78 *                Abstract names are sequences of bytes (not zero terminated)
  79 *                started by 0, so that this name space does not intersect
  80 *                with BSD names.
  81 */
  82
  83#include <linux/module.h>
  84#include <linux/kernel.h>
  85#include <linux/signal.h>
  86#include <linux/sched.h>
  87#include <linux/errno.h>
  88#include <linux/string.h>
  89#include <linux/stat.h>
  90#include <linux/dcache.h>
  91#include <linux/namei.h>
  92#include <linux/socket.h>
  93#include <linux/un.h>
  94#include <linux/fcntl.h>
  95#include <linux/termios.h>
  96#include <linux/sockios.h>
  97#include <linux/net.h>
  98#include <linux/in.h>
  99#include <linux/fs.h>
 100#include <linux/slab.h>
 101#include <asm/uaccess.h>
 102#include <linux/skbuff.h>
 103#include <linux/netdevice.h>
 104#include <net/net_namespace.h>
 105#include <net/sock.h>
 106#include <net/tcp_states.h>
 107#include <net/af_unix.h>
 108#include <linux/proc_fs.h>
 109#include <linux/seq_file.h>
 110#include <net/scm.h>
 111#include <linux/init.h>
 112#include <linux/poll.h>
 113#include <linux/rtnetlink.h>
 114#include <linux/mount.h>
 115#include <net/checksum.h>
 116#include <linux/security.h>
 117#include <linux/freezer.h>
 118#include <linux/splice.h>
 119
 120struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 121EXPORT_SYMBOL_GPL(unix_socket_table);
 122DEFINE_SPINLOCK(unix_table_lock);
 123EXPORT_SYMBOL_GPL(unix_table_lock);
 124static atomic_long_t unix_nr_socks;
 125
 126
 127static struct hlist_head *unix_sockets_unbound(void *addr)
 128{
 129        unsigned long hash = (unsigned long)addr;
 130
 131        hash ^= hash >> 16;
 132        hash ^= hash >> 8;
 133        hash %= UNIX_HASH_SIZE;
 134        return &unix_socket_table[UNIX_HASH_SIZE + hash];
 135}
 136
 137#define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 138
 139#ifdef CONFIG_SECURITY_NETWORK
 140static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 141{
 142        memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
 143}
 144
 145static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 146{
 147        scm->secid = *UNIXSID(skb);
 148}
 149#else
 150static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 151{ }
 152
 153static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 154{ }
 155#endif /* CONFIG_SECURITY_NETWORK */
 156
 157/*
 158 *  SMP locking strategy:
 159 *    hash table is protected with spinlock unix_table_lock
 160 *    each socket state is protected by separate spin lock.
 161 */
 162
 163static inline unsigned int unix_hash_fold(__wsum n)
 164{
 165        unsigned int hash = (__force unsigned int)csum_fold(n);
 166
 167        hash ^= hash>>8;
 168        return hash&(UNIX_HASH_SIZE-1);
 169}
 170
 171#define unix_peer(sk) (unix_sk(sk)->peer)
 172
 173static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 174{
 175        return unix_peer(osk) == sk;
 176}
 177
 178static inline int unix_may_send(struct sock *sk, struct sock *osk)
 179{
 180        return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 181}
 182
 183static inline int unix_recvq_full(struct sock const *sk)
 184{
 185        return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 186}
 187
 188struct sock *unix_peer_get(struct sock *s)
 189{
 190        struct sock *peer;
 191
 192        unix_state_lock(s);
 193        peer = unix_peer(s);
 194        if (peer)
 195                sock_hold(peer);
 196        unix_state_unlock(s);
 197        return peer;
 198}
 199EXPORT_SYMBOL_GPL(unix_peer_get);
 200
 201static inline void unix_release_addr(struct unix_address *addr)
 202{
 203        if (atomic_dec_and_test(&addr->refcnt))
 204                kfree(addr);
 205}
 206
 207/*
 208 *      Check unix socket name:
 209 *              - should be not zero length.
 210 *              - if started by not zero, should be NULL terminated (FS object)
 211 *              - if started by zero, it is abstract name.
 212 */
 213
 214static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 215{
 216        if (len <= sizeof(short) || len > sizeof(*sunaddr))
 217                return -EINVAL;
 218        if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 219                return -EINVAL;
 220        if (sunaddr->sun_path[0]) {
 221                /*
 222                 * This may look like an off by one error but it is a bit more
 223                 * subtle. 108 is the longest valid AF_UNIX path for a binding.
 224                 * sun_path[108] doesn't as such exist.  However in kernel space
 225                 * we are guaranteed that it is a valid memory location in our
 226                 * kernel address buffer.
 227                 */
 228                ((char *)sunaddr)[len] = 0;
 229                len = strlen(sunaddr->sun_path)+1+sizeof(short);
 230                return len;
 231        }
 232
 233        *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 234        return len;
 235}
 236
 237static void __unix_remove_socket(struct sock *sk)
 238{
 239        sk_del_node_init(sk);
 240}
 241
 242static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 243{
 244        WARN_ON(!sk_unhashed(sk));
 245        sk_add_node(sk, list);
 246}
 247
 248static inline void unix_remove_socket(struct sock *sk)
 249{
 250        spin_lock(&unix_table_lock);
 251        __unix_remove_socket(sk);
 252        spin_unlock(&unix_table_lock);
 253}
 254
 255static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 256{
 257        spin_lock(&unix_table_lock);
 258        __unix_insert_socket(list, sk);
 259        spin_unlock(&unix_table_lock);
 260}
 261
 262static struct sock *__unix_find_socket_byname(struct net *net,
 263                                              struct sockaddr_un *sunname,
 264                                              int len, int type, unsigned int hash)
 265{
 266        struct sock *s;
 267
 268        sk_for_each(s, &unix_socket_table[hash ^ type]) {
 269                struct unix_sock *u = unix_sk(s);
 270
 271                if (!net_eq(sock_net(s), net))
 272                        continue;
 273
 274                if (u->addr->len == len &&
 275                    !memcmp(u->addr->name, sunname, len))
 276                        goto found;
 277        }
 278        s = NULL;
 279found:
 280        return s;
 281}
 282
 283static inline struct sock *unix_find_socket_byname(struct net *net,
 284                                                   struct sockaddr_un *sunname,
 285                                                   int len, int type,
 286                                                   unsigned int hash)
 287{
 288        struct sock *s;
 289
 290        spin_lock(&unix_table_lock);
 291        s = __unix_find_socket_byname(net, sunname, len, type, hash);
 292        if (s)
 293                sock_hold(s);
 294        spin_unlock(&unix_table_lock);
 295        return s;
 296}
 297
 298static struct sock *unix_find_socket_byinode(struct inode *i)
 299{
 300        struct sock *s;
 301
 302        spin_lock(&unix_table_lock);
 303        sk_for_each(s,
 304                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 305                struct dentry *dentry = unix_sk(s)->path.dentry;
 306
 307                if (dentry && d_real_inode(dentry) == i) {
 308                        sock_hold(s);
 309                        goto found;
 310                }
 311        }
 312        s = NULL;
 313found:
 314        spin_unlock(&unix_table_lock);
 315        return s;
 316}
 317
 318/* Support code for asymmetrically connected dgram sockets
 319 *
 320 * If a datagram socket is connected to a socket not itself connected
 321 * to the first socket (eg, /dev/log), clients may only enqueue more
 322 * messages if the present receive queue of the server socket is not
 323 * "too large". This means there's a second writeability condition
 324 * poll and sendmsg need to test. The dgram recv code will do a wake
 325 * up on the peer_wait wait queue of a socket upon reception of a
 326 * datagram which needs to be propagated to sleeping would-be writers
 327 * since these might not have sent anything so far. This can't be
 328 * accomplished via poll_wait because the lifetime of the server
 329 * socket might be less than that of its clients if these break their
 330 * association with it or if the server socket is closed while clients
 331 * are still connected to it and there's no way to inform "a polling
 332 * implementation" that it should let go of a certain wait queue
 333 *
 334 * In order to propagate a wake up, a wait_queue_t of the client
 335 * socket is enqueued on the peer_wait queue of the server socket
 336 * whose wake function does a wake_up on the ordinary client socket
 337 * wait queue. This connection is established whenever a write (or
 338 * poll for write) hit the flow control condition and broken when the
 339 * association to the server socket is dissolved or after a wake up
 340 * was relayed.
 341 */
 342
 343static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
 344                                      void *key)
 345{
 346        struct unix_sock *u;
 347        wait_queue_head_t *u_sleep;
 348
 349        u = container_of(q, struct unix_sock, peer_wake);
 350
 351        __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 352                            q);
 353        u->peer_wake.private = NULL;
 354
 355        /* relaying can only happen while the wq still exists */
 356        u_sleep = sk_sleep(&u->sk);
 357        if (u_sleep)
 358                wake_up_interruptible_poll(u_sleep, key);
 359
 360        return 0;
 361}
 362
 363static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 364{
 365        struct unix_sock *u, *u_other;
 366        int rc;
 367
 368        u = unix_sk(sk);
 369        u_other = unix_sk(other);
 370        rc = 0;
 371        spin_lock(&u_other->peer_wait.lock);
 372
 373        if (!u->peer_wake.private) {
 374                u->peer_wake.private = other;
 375                __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 376
 377                rc = 1;
 378        }
 379
 380        spin_unlock(&u_other->peer_wait.lock);
 381        return rc;
 382}
 383
 384static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 385                                            struct sock *other)
 386{
 387        struct unix_sock *u, *u_other;
 388
 389        u = unix_sk(sk);
 390        u_other = unix_sk(other);
 391        spin_lock(&u_other->peer_wait.lock);
 392
 393        if (u->peer_wake.private == other) {
 394                __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 395                u->peer_wake.private = NULL;
 396        }
 397
 398        spin_unlock(&u_other->peer_wait.lock);
 399}
 400
 401static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 402                                                   struct sock *other)
 403{
 404        unix_dgram_peer_wake_disconnect(sk, other);
 405        wake_up_interruptible_poll(sk_sleep(sk),
 406                                   POLLOUT |
 407                                   POLLWRNORM |
 408                                   POLLWRBAND);
 409}
 410
 411/* preconditions:
 412 *      - unix_peer(sk) == other
 413 *      - association is stable
 414 */
 415static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 416{
 417        int connected;
 418
 419        connected = unix_dgram_peer_wake_connect(sk, other);
 420
 421        if (unix_recvq_full(other))
 422                return 1;
 423
 424        if (connected)
 425                unix_dgram_peer_wake_disconnect(sk, other);
 426
 427        return 0;
 428}
 429
 430static inline int unix_writable(struct sock *sk)
 431{
 432        return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 433}
 434
 435static void unix_write_space(struct sock *sk)
 436{
 437        struct socket_wq *wq;
 438
 439        rcu_read_lock();
 440        if (unix_writable(sk)) {
 441                wq = rcu_dereference(sk->sk_wq);
 442                if (wq_has_sleeper(wq))
 443                        wake_up_interruptible_sync_poll(&wq->wait,
 444                                POLLOUT | POLLWRNORM | POLLWRBAND);
 445                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 446        }
 447        rcu_read_unlock();
 448}
 449
 450/* When dgram socket disconnects (or changes its peer), we clear its receive
 451 * queue of packets arrived from previous peer. First, it allows to do
 452 * flow control based only on wmem_alloc; second, sk connected to peer
 453 * may receive messages only from that peer. */
 454static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 455{
 456        if (!skb_queue_empty(&sk->sk_receive_queue)) {
 457                skb_queue_purge(&sk->sk_receive_queue);
 458                wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 459
 460                /* If one link of bidirectional dgram pipe is disconnected,
 461                 * we signal error. Messages are lost. Do not make this,
 462                 * when peer was not connected to us.
 463                 */
 464                if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 465                        other->sk_err = ECONNRESET;
 466                        other->sk_error_report(other);
 467                }
 468        }
 469}
 470
 471static void unix_sock_destructor(struct sock *sk)
 472{
 473        struct unix_sock *u = unix_sk(sk);
 474
 475        skb_queue_purge(&sk->sk_receive_queue);
 476
 477        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 478        WARN_ON(!sk_unhashed(sk));
 479        WARN_ON(sk->sk_socket);
 480        if (!sock_flag(sk, SOCK_DEAD)) {
 481                printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
 482                return;
 483        }
 484
 485        if (u->addr)
 486                unix_release_addr(u->addr);
 487
 488        atomic_long_dec(&unix_nr_socks);
 489        local_bh_disable();
 490        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 491        local_bh_enable();
 492#ifdef UNIX_REFCNT_DEBUG
 493        printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
 494                atomic_long_read(&unix_nr_socks));
 495#endif
 496}
 497
 498static void unix_release_sock(struct sock *sk, int embrion)
 499{
 500        struct unix_sock *u = unix_sk(sk);
 501        struct path path;
 502        struct sock *skpair;
 503        struct sk_buff *skb;
 504        int state;
 505
 506        unix_remove_socket(sk);
 507
 508        /* Clear state */
 509        unix_state_lock(sk);
 510        sock_orphan(sk);
 511        sk->sk_shutdown = SHUTDOWN_MASK;
 512        path         = u->path;
 513        u->path.dentry = NULL;
 514        u->path.mnt = NULL;
 515        state = sk->sk_state;
 516        sk->sk_state = TCP_CLOSE;
 517        unix_state_unlock(sk);
 518
 519        wake_up_interruptible_all(&u->peer_wait);
 520
 521        skpair = unix_peer(sk);
 522
 523        if (skpair != NULL) {
 524                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 525                        unix_state_lock(skpair);
 526                        /* No more writes */
 527                        skpair->sk_shutdown = SHUTDOWN_MASK;
 528                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 529                                skpair->sk_err = ECONNRESET;
 530                        unix_state_unlock(skpair);
 531                        skpair->sk_state_change(skpair);
 532                        sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 533                }
 534
 535                unix_dgram_peer_wake_disconnect(sk, skpair);
 536                sock_put(skpair); /* It may now die */
 537                unix_peer(sk) = NULL;
 538        }
 539
 540        /* Try to flush out this socket. Throw out buffers at least */
 541
 542        while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 543                if (state == TCP_LISTEN)
 544                        unix_release_sock(skb->sk, 1);
 545                /* passed fds are erased in the kfree_skb hook        */
 546                UNIXCB(skb).consumed = skb->len;
 547                kfree_skb(skb);
 548        }
 549
 550        if (path.dentry)
 551                path_put(&path);
 552
 553        sock_put(sk);
 554
 555        /* ---- Socket is dead now and most probably destroyed ---- */
 556
 557        /*
 558         * Fixme: BSD difference: In BSD all sockets connected to us get
 559         *        ECONNRESET and we die on the spot. In Linux we behave
 560         *        like files and pipes do and wait for the last
 561         *        dereference.
 562         *
 563         * Can't we simply set sock->err?
 564         *
 565         *        What the above comment does talk about? --ANK(980817)
 566         */
 567
 568        if (unix_tot_inflight)
 569                unix_gc();              /* Garbage collect fds */
 570}
 571
 572static void init_peercred(struct sock *sk)
 573{
 574        put_pid(sk->sk_peer_pid);
 575        if (sk->sk_peer_cred)
 576                put_cred(sk->sk_peer_cred);
 577        sk->sk_peer_pid  = get_pid(task_tgid(current));
 578        sk->sk_peer_cred = get_current_cred();
 579}
 580
 581static void copy_peercred(struct sock *sk, struct sock *peersk)
 582{
 583        put_pid(sk->sk_peer_pid);
 584        if (sk->sk_peer_cred)
 585                put_cred(sk->sk_peer_cred);
 586        sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 587        sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 588}
 589
 590static int unix_listen(struct socket *sock, int backlog)
 591{
 592        int err;
 593        struct sock *sk = sock->sk;
 594        struct unix_sock *u = unix_sk(sk);
 595        struct pid *old_pid = NULL;
 596
 597        err = -EOPNOTSUPP;
 598        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 599                goto out;       /* Only stream/seqpacket sockets accept */
 600        err = -EINVAL;
 601        if (!u->addr)
 602                goto out;       /* No listens on an unbound socket */
 603        unix_state_lock(sk);
 604        if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 605                goto out_unlock;
 606        if (backlog > sk->sk_max_ack_backlog)
 607                wake_up_interruptible_all(&u->peer_wait);
 608        sk->sk_max_ack_backlog  = backlog;
 609        sk->sk_state            = TCP_LISTEN;
 610        /* set credentials so connect can copy them */
 611        init_peercred(sk);
 612        err = 0;
 613
 614out_unlock:
 615        unix_state_unlock(sk);
 616        put_pid(old_pid);
 617out:
 618        return err;
 619}
 620
 621static int unix_release(struct socket *);
 622static int unix_bind(struct socket *, struct sockaddr *, int);
 623static int unix_stream_connect(struct socket *, struct sockaddr *,
 624                               int addr_len, int flags);
 625static int unix_socketpair(struct socket *, struct socket *);
 626static int unix_accept(struct socket *, struct socket *, int);
 627static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 628static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 629static unsigned int unix_dgram_poll(struct file *, struct socket *,
 630                                    poll_table *);
 631static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 632static int unix_shutdown(struct socket *, int);
 633static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 634                               struct msghdr *, size_t);
 635static int unix_stream_recvmsg(struct kiocb *, struct socket *,
 636                               struct msghdr *, size_t, int);
 637static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
 638                              struct msghdr *, size_t);
 639static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
 640                              struct msghdr *, size_t, int);
 641static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 642                                    size_t size, int flags);
 643static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 644                                       struct pipe_inode_info *, size_t size,
 645                                       unsigned int flags);
 646static int unix_dgram_connect(struct socket *, struct sockaddr *,
 647                              int, int);
 648static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 649                                  struct msghdr *, size_t);
 650static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
 651                                  struct msghdr *, size_t, int);
 652
 653static int unix_set_peek_off(struct sock *sk, int val)
 654{
 655        struct unix_sock *u = unix_sk(sk);
 656
 657        if (mutex_lock_interruptible(&u->readlock))
 658                return -EINTR;
 659
 660        sk->sk_peek_off = val;
 661        mutex_unlock(&u->readlock);
 662
 663        return 0;
 664}
 665
 666
 667static const struct proto_ops unix_stream_ops = {
 668        .family =       PF_UNIX,
 669        .owner =        THIS_MODULE,
 670        .release =      unix_release,
 671        .bind =         unix_bind,
 672        .connect =      unix_stream_connect,
 673        .socketpair =   unix_socketpair,
 674        .accept =       unix_accept,
 675        .getname =      unix_getname,
 676        .poll =         unix_poll,
 677        .ioctl =        unix_ioctl,
 678        .listen =       unix_listen,
 679        .shutdown =     unix_shutdown,
 680        .setsockopt =   sock_no_setsockopt,
 681        .getsockopt =   sock_no_getsockopt,
 682        .sendmsg =      unix_stream_sendmsg,
 683        .recvmsg =      unix_stream_recvmsg,
 684        .mmap =         sock_no_mmap,
 685        .sendpage =     unix_stream_sendpage,
 686        .splice_read =  unix_stream_splice_read,
 687        .set_peek_off = unix_set_peek_off,
 688};
 689
 690static const struct proto_ops unix_dgram_ops = {
 691        .family =       PF_UNIX,
 692        .owner =        THIS_MODULE,
 693        .release =      unix_release,
 694        .bind =         unix_bind,
 695        .connect =      unix_dgram_connect,
 696        .socketpair =   unix_socketpair,
 697        .accept =       sock_no_accept,
 698        .getname =      unix_getname,
 699        .poll =         unix_dgram_poll,
 700        .ioctl =        unix_ioctl,
 701        .listen =       sock_no_listen,
 702        .shutdown =     unix_shutdown,
 703        .setsockopt =   sock_no_setsockopt,
 704        .getsockopt =   sock_no_getsockopt,
 705        .sendmsg =      unix_dgram_sendmsg,
 706        .recvmsg =      unix_dgram_recvmsg,
 707        .mmap =         sock_no_mmap,
 708        .sendpage =     sock_no_sendpage,
 709        .set_peek_off = unix_set_peek_off,
 710};
 711
 712static const struct proto_ops unix_seqpacket_ops = {
 713        .family =       PF_UNIX,
 714        .owner =        THIS_MODULE,
 715        .release =      unix_release,
 716        .bind =         unix_bind,
 717        .connect =      unix_stream_connect,
 718        .socketpair =   unix_socketpair,
 719        .accept =       unix_accept,
 720        .getname =      unix_getname,
 721        .poll =         unix_dgram_poll,
 722        .ioctl =        unix_ioctl,
 723        .listen =       unix_listen,
 724        .shutdown =     unix_shutdown,
 725        .setsockopt =   sock_no_setsockopt,
 726        .getsockopt =   sock_no_getsockopt,
 727        .sendmsg =      unix_seqpacket_sendmsg,
 728        .recvmsg =      unix_seqpacket_recvmsg,
 729        .mmap =         sock_no_mmap,
 730        .sendpage =     sock_no_sendpage,
 731        .set_peek_off = unix_set_peek_off,
 732};
 733
 734static struct proto unix_proto = {
 735        .name                   = "UNIX",
 736        .owner                  = THIS_MODULE,
 737        .obj_size               = sizeof(struct unix_sock),
 738};
 739
 740/*
 741 * AF_UNIX sockets do not interact with hardware, hence they
 742 * dont trigger interrupts - so it's safe for them to have
 743 * bh-unsafe locking for their sk_receive_queue.lock. Split off
 744 * this special lock-class by reinitializing the spinlock key:
 745 */
 746static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 747
 748static struct sock *unix_create1(struct net *net, struct socket *sock)
 749{
 750        struct sock *sk = NULL;
 751        struct unix_sock *u;
 752
 753        atomic_long_inc(&unix_nr_socks);
 754        if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 755                goto out;
 756
 757        sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
 758        if (!sk)
 759                goto out;
 760
 761        sock_init_data(sock, sk);
 762        lockdep_set_class(&sk->sk_receive_queue.lock,
 763                                &af_unix_sk_receive_queue_lock_key);
 764
 765        sk->sk_write_space      = unix_write_space;
 766        sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 767        sk->sk_destruct         = unix_sock_destructor;
 768        u         = unix_sk(sk);
 769        u->path.dentry = NULL;
 770        u->path.mnt = NULL;
 771        spin_lock_init(&u->lock);
 772        atomic_long_set(&u->inflight, 0);
 773        INIT_LIST_HEAD(&u->link);
 774        mutex_init(&u->readlock); /* single task reading lock */
 775        init_waitqueue_head(&u->peer_wait);
 776        init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 777        unix_insert_socket(unix_sockets_unbound(sk), sk);
 778out:
 779        if (sk == NULL)
 780                atomic_long_dec(&unix_nr_socks);
 781        else {
 782                local_bh_disable();
 783                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 784                local_bh_enable();
 785        }
 786        return sk;
 787}
 788
 789static int unix_create(struct net *net, struct socket *sock, int protocol,
 790                       int kern)
 791{
 792        if (protocol && protocol != PF_UNIX)
 793                return -EPROTONOSUPPORT;
 794
 795        sock->state = SS_UNCONNECTED;
 796
 797        switch (sock->type) {
 798        case SOCK_STREAM:
 799                sock->ops = &unix_stream_ops;
 800                break;
 801                /*
 802                 *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 803                 *      nothing uses it.
 804                 */
 805        case SOCK_RAW:
 806                sock->type = SOCK_DGRAM;
 807        case SOCK_DGRAM:
 808                sock->ops = &unix_dgram_ops;
 809                break;
 810        case SOCK_SEQPACKET:
 811                sock->ops = &unix_seqpacket_ops;
 812                break;
 813        default:
 814                return -ESOCKTNOSUPPORT;
 815        }
 816
 817        return unix_create1(net, sock) ? 0 : -ENOMEM;
 818}
 819
 820static int unix_release(struct socket *sock)
 821{
 822        struct sock *sk = sock->sk;
 823
 824        if (!sk)
 825                return 0;
 826
 827        unix_release_sock(sk, 0);
 828        sock->sk = NULL;
 829
 830        return 0;
 831}
 832
 833static int unix_autobind(struct socket *sock)
 834{
 835        struct sock *sk = sock->sk;
 836        struct net *net = sock_net(sk);
 837        struct unix_sock *u = unix_sk(sk);
 838        static u32 ordernum = 1;
 839        struct unix_address *addr;
 840        int err;
 841        unsigned int retries = 0;
 842
 843        err = mutex_lock_interruptible(&u->readlock);
 844        if (err)
 845                return err;
 846
 847        err = 0;
 848        if (u->addr)
 849                goto out;
 850
 851        err = -ENOMEM;
 852        addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 853        if (!addr)
 854                goto out;
 855
 856        addr->name->sun_family = AF_UNIX;
 857        atomic_set(&addr->refcnt, 1);
 858
 859retry:
 860        addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 861        addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 862
 863        spin_lock(&unix_table_lock);
 864        ordernum = (ordernum+1)&0xFFFFF;
 865
 866        if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 867                                      addr->hash)) {
 868                spin_unlock(&unix_table_lock);
 869                /*
 870                 * __unix_find_socket_byname() may take long time if many names
 871                 * are already in use.
 872                 */
 873                cond_resched();
 874                /* Give up if all names seems to be in use. */
 875                if (retries++ == 0xFFFFF) {
 876                        err = -ENOSPC;
 877                        kfree(addr);
 878                        goto out;
 879                }
 880                goto retry;
 881        }
 882        addr->hash ^= sk->sk_type;
 883
 884        __unix_remove_socket(sk);
 885        u->addr = addr;
 886        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 887        spin_unlock(&unix_table_lock);
 888        err = 0;
 889
 890out:    mutex_unlock(&u->readlock);
 891        return err;
 892}
 893
 894static struct sock *unix_find_other(struct net *net,
 895                                    struct sockaddr_un *sunname, int len,
 896                                    int type, unsigned int hash, int *error)
 897{
 898        struct sock *u;
 899        struct path path;
 900        int err = 0;
 901
 902        if (sunname->sun_path[0]) {
 903                struct inode *inode;
 904                err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 905                if (err)
 906                        goto fail;
 907                inode = d_real_inode(path.dentry);
 908                err = inode_permission(inode, MAY_WRITE);
 909                if (err)
 910                        goto put_fail;
 911
 912                err = -ECONNREFUSED;
 913                if (!S_ISSOCK(inode->i_mode))
 914                        goto put_fail;
 915                u = unix_find_socket_byinode(inode);
 916                if (!u)
 917                        goto put_fail;
 918
 919                if (u->sk_type == type)
 920                        touch_atime(&path);
 921
 922                path_put(&path);
 923
 924                err = -EPROTOTYPE;
 925                if (u->sk_type != type) {
 926                        sock_put(u);
 927                        goto fail;
 928                }
 929        } else {
 930                err = -ECONNREFUSED;
 931                u = unix_find_socket_byname(net, sunname, len, type, hash);
 932                if (u) {
 933                        struct dentry *dentry;
 934                        dentry = unix_sk(u)->path.dentry;
 935                        if (dentry)
 936                                touch_atime(&unix_sk(u)->path);
 937                } else
 938                        goto fail;
 939        }
 940        return u;
 941
 942put_fail:
 943        path_put(&path);
 944fail:
 945        *error = err;
 946        return NULL;
 947}
 948
 949static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 950{
 951        struct dentry *dentry;
 952        struct path path;
 953        int err = 0;
 954        /*
 955         * Get the parent directory, calculate the hash for last
 956         * component.
 957         */
 958        dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 959        err = PTR_ERR(dentry);
 960        if (IS_ERR(dentry))
 961                return err;
 962
 963        /*
 964         * All right, let's create it.
 965         */
 966        err = security_path_mknod(&path, dentry, mode, 0);
 967        if (!err) {
 968                err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
 969                if (!err) {
 970                        res->mnt = mntget(path.mnt);
 971                        res->dentry = dget(dentry);
 972                }
 973        }
 974        done_path_create(&path, dentry);
 975        return err;
 976}
 977
 978static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 979{
 980        struct sock *sk = sock->sk;
 981        struct net *net = sock_net(sk);
 982        struct unix_sock *u = unix_sk(sk);
 983        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 984        char *sun_path = sunaddr->sun_path;
 985        int err;
 986        unsigned int hash;
 987        struct unix_address *addr;
 988        struct hlist_head *list;
 989
 990        err = -EINVAL;
 991        if (sunaddr->sun_family != AF_UNIX)
 992                goto out;
 993
 994        if (addr_len == sizeof(short)) {
 995                err = unix_autobind(sock);
 996                goto out;
 997        }
 998
 999        err = unix_mkname(sunaddr, addr_len, &hash);
1000        if (err < 0)
1001                goto out;
1002        addr_len = err;
1003
1004        err = mutex_lock_interruptible(&u->readlock);
1005        if (err)
1006                goto out;
1007
1008        err = -EINVAL;
1009        if (u->addr)
1010                goto out_up;
1011
1012        err = -ENOMEM;
1013        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1014        if (!addr)
1015                goto out_up;
1016
1017        memcpy(addr->name, sunaddr, addr_len);
1018        addr->len = addr_len;
1019        addr->hash = hash ^ sk->sk_type;
1020        atomic_set(&addr->refcnt, 1);
1021
1022        if (sun_path[0]) {
1023                struct path path;
1024                umode_t mode = S_IFSOCK |
1025                       (SOCK_INODE(sock)->i_mode & ~current_umask());
1026                err = unix_mknod(sun_path, mode, &path);
1027                if (err) {
1028                        if (err == -EEXIST)
1029                                err = -EADDRINUSE;
1030                        unix_release_addr(addr);
1031                        goto out_up;
1032                }
1033                addr->hash = UNIX_HASH_SIZE;
1034                hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
1035                spin_lock(&unix_table_lock);
1036                u->path = path;
1037                list = &unix_socket_table[hash];
1038        } else {
1039                spin_lock(&unix_table_lock);
1040                err = -EADDRINUSE;
1041                if (__unix_find_socket_byname(net, sunaddr, addr_len,
1042                                              sk->sk_type, hash)) {
1043                        unix_release_addr(addr);
1044                        goto out_unlock;
1045                }
1046
1047                list = &unix_socket_table[addr->hash];
1048        }
1049
1050        err = 0;
1051        __unix_remove_socket(sk);
1052        u->addr = addr;
1053        __unix_insert_socket(list, sk);
1054
1055out_unlock:
1056        spin_unlock(&unix_table_lock);
1057out_up:
1058        mutex_unlock(&u->readlock);
1059out:
1060        return err;
1061}
1062
1063static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1064{
1065        if (unlikely(sk1 == sk2) || !sk2) {
1066                unix_state_lock(sk1);
1067                return;
1068        }
1069        if (sk1 < sk2) {
1070                unix_state_lock(sk1);
1071                unix_state_lock_nested(sk2);
1072        } else {
1073                unix_state_lock(sk2);
1074                unix_state_lock_nested(sk1);
1075        }
1076}
1077
1078static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1079{
1080        if (unlikely(sk1 == sk2) || !sk2) {
1081                unix_state_unlock(sk1);
1082                return;
1083        }
1084        unix_state_unlock(sk1);
1085        unix_state_unlock(sk2);
1086}
1087
1088static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1089                              int alen, int flags)
1090{
1091        struct sock *sk = sock->sk;
1092        struct net *net = sock_net(sk);
1093        struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1094        struct sock *other;
1095        unsigned int hash;
1096        int err;
1097
1098        if (addr->sa_family != AF_UNSPEC) {
1099                err = unix_mkname(sunaddr, alen, &hash);
1100                if (err < 0)
1101                        goto out;
1102                alen = err;
1103
1104                if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1105                    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1106                        goto out;
1107
1108restart:
1109                other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1110                if (!other)
1111                        goto out;
1112
1113                unix_state_double_lock(sk, other);
1114
1115                /* Apparently VFS overslept socket death. Retry. */
1116                if (sock_flag(other, SOCK_DEAD)) {
1117                        unix_state_double_unlock(sk, other);
1118                        sock_put(other);
1119                        goto restart;
1120                }
1121
1122                err = -EPERM;
1123                if (!unix_may_send(sk, other))
1124                        goto out_unlock;
1125
1126                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1127                if (err)
1128                        goto out_unlock;
1129
1130        } else {
1131                /*
1132                 *      1003.1g breaking connected state with AF_UNSPEC
1133                 */
1134                other = NULL;
1135                unix_state_double_lock(sk, other);
1136        }
1137
1138        /*
1139         * If it was connected, reconnect.
1140         */
1141        if (unix_peer(sk)) {
1142                struct sock *old_peer = unix_peer(sk);
1143                unix_peer(sk) = other;
1144                unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1145
1146                unix_state_double_unlock(sk, other);
1147
1148                if (other != old_peer)
1149                        unix_dgram_disconnected(sk, old_peer);
1150                sock_put(old_peer);
1151        } else {
1152                unix_peer(sk) = other;
1153                unix_state_double_unlock(sk, other);
1154        }
1155        return 0;
1156
1157out_unlock:
1158        unix_state_double_unlock(sk, other);
1159        sock_put(other);
1160out:
1161        return err;
1162}
1163
1164static long unix_wait_for_peer(struct sock *other, long timeo)
1165{
1166        struct unix_sock *u = unix_sk(other);
1167        int sched;
1168        DEFINE_WAIT(wait);
1169
1170        prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1171
1172        sched = !sock_flag(other, SOCK_DEAD) &&
1173                !(other->sk_shutdown & RCV_SHUTDOWN) &&
1174                unix_recvq_full(other);
1175
1176        unix_state_unlock(other);
1177
1178        if (sched)
1179                timeo = schedule_timeout(timeo);
1180
1181        finish_wait(&u->peer_wait, &wait);
1182        return timeo;
1183}
1184
1185static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1186                               int addr_len, int flags)
1187{
1188        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1189        struct sock *sk = sock->sk;
1190        struct net *net = sock_net(sk);
1191        struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1192        struct sock *newsk = NULL;
1193        struct sock *other = NULL;
1194        struct sk_buff *skb = NULL;
1195        unsigned int hash;
1196        int st;
1197        int err;
1198        long timeo;
1199
1200        err = unix_mkname(sunaddr, addr_len, &hash);
1201        if (err < 0)
1202                goto out;
1203        addr_len = err;
1204
1205        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1206            (err = unix_autobind(sock)) != 0)
1207                goto out;
1208
1209        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1210
1211        /* First of all allocate resources.
1212           If we will make it after state is locked,
1213           we will have to recheck all again in any case.
1214         */
1215
1216        err = -ENOMEM;
1217
1218        /* create new sock for complete connection */
1219        newsk = unix_create1(sock_net(sk), NULL);
1220        if (newsk == NULL)
1221                goto out;
1222
1223        /* Allocate skb for sending to listening sock */
1224        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1225        if (skb == NULL)
1226                goto out;
1227
1228restart:
1229        /*  Find listening sock. */
1230        other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1231        if (!other)
1232                goto out;
1233
1234        /* Latch state of peer */
1235        unix_state_lock(other);
1236
1237        /* Apparently VFS overslept socket death. Retry. */
1238        if (sock_flag(other, SOCK_DEAD)) {
1239                unix_state_unlock(other);
1240                sock_put(other);
1241                goto restart;
1242        }
1243
1244        err = -ECONNREFUSED;
1245        if (other->sk_state != TCP_LISTEN)
1246                goto out_unlock;
1247        if (other->sk_shutdown & RCV_SHUTDOWN)
1248                goto out_unlock;
1249
1250        if (unix_recvq_full(other)) {
1251                err = -EAGAIN;
1252                if (!timeo)
1253                        goto out_unlock;
1254
1255                timeo = unix_wait_for_peer(other, timeo);
1256
1257                err = sock_intr_errno(timeo);
1258                if (signal_pending(current))
1259                        goto out;
1260                sock_put(other);
1261                goto restart;
1262        }
1263
1264        /* Latch our state.
1265
1266           It is tricky place. We need to grab our state lock and cannot
1267           drop lock on peer. It is dangerous because deadlock is
1268           possible. Connect to self case and simultaneous
1269           attempt to connect are eliminated by checking socket
1270           state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1271           check this before attempt to grab lock.
1272
1273           Well, and we have to recheck the state after socket locked.
1274         */
1275        st = sk->sk_state;
1276
1277        switch (st) {
1278        case TCP_CLOSE:
1279                /* This is ok... continue with connect */
1280                break;
1281        case TCP_ESTABLISHED:
1282                /* Socket is already connected */
1283                err = -EISCONN;
1284                goto out_unlock;
1285        default:
1286                err = -EINVAL;
1287                goto out_unlock;
1288        }
1289
1290        unix_state_lock_nested(sk);
1291
1292        if (sk->sk_state != st) {
1293                unix_state_unlock(sk);
1294                unix_state_unlock(other);
1295                sock_put(other);
1296                goto restart;
1297        }
1298
1299        err = security_unix_stream_connect(sk, other, newsk);
1300        if (err) {
1301                unix_state_unlock(sk);
1302                goto out_unlock;
1303        }
1304
1305        /* The way is open! Fastly set all the necessary fields... */
1306
1307        sock_hold(sk);
1308        unix_peer(newsk)        = sk;
1309        newsk->sk_state         = TCP_ESTABLISHED;
1310        newsk->sk_type          = sk->sk_type;
1311        init_peercred(newsk);
1312        newu = unix_sk(newsk);
1313        RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1314        otheru = unix_sk(other);
1315
1316        /* copy address information from listening to new sock*/
1317        if (otheru->addr) {
1318                atomic_inc(&otheru->addr->refcnt);
1319                newu->addr = otheru->addr;
1320        }
1321        if (otheru->path.dentry) {
1322                path_get(&otheru->path);
1323                newu->path = otheru->path;
1324        }
1325
1326        /* Set credentials */
1327        copy_peercred(sk, other);
1328
1329        sock->state     = SS_CONNECTED;
1330        sk->sk_state    = TCP_ESTABLISHED;
1331        sock_hold(newsk);
1332
1333        smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1334        unix_peer(sk)   = newsk;
1335
1336        unix_state_unlock(sk);
1337
1338        /* take ten and and send info to listening sock */
1339        spin_lock(&other->sk_receive_queue.lock);
1340        __skb_queue_tail(&other->sk_receive_queue, skb);
1341        spin_unlock(&other->sk_receive_queue.lock);
1342        unix_state_unlock(other);
1343        other->sk_data_ready(other, 0);
1344        sock_put(other);
1345        return 0;
1346
1347out_unlock:
1348        if (other)
1349                unix_state_unlock(other);
1350
1351out:
1352        kfree_skb(skb);
1353        if (newsk)
1354                unix_release_sock(newsk, 0);
1355        if (other)
1356                sock_put(other);
1357        return err;
1358}
1359
1360static int unix_socketpair(struct socket *socka, struct socket *sockb)
1361{
1362        struct sock *ska = socka->sk, *skb = sockb->sk;
1363
1364        /* Join our sockets back to back */
1365        sock_hold(ska);
1366        sock_hold(skb);
1367        unix_peer(ska) = skb;
1368        unix_peer(skb) = ska;
1369        init_peercred(ska);
1370        init_peercred(skb);
1371
1372        if (ska->sk_type != SOCK_DGRAM) {
1373                ska->sk_state = TCP_ESTABLISHED;
1374                skb->sk_state = TCP_ESTABLISHED;
1375                socka->state  = SS_CONNECTED;
1376                sockb->state  = SS_CONNECTED;
1377        }
1378        return 0;
1379}
1380
1381static void unix_sock_inherit_flags(const struct socket *old,
1382                                    struct socket *new)
1383{
1384        if (test_bit(SOCK_PASSCRED, &old->flags))
1385                set_bit(SOCK_PASSCRED, &new->flags);
1386        if (test_bit(SOCK_PASSSEC, &old->flags))
1387                set_bit(SOCK_PASSSEC, &new->flags);
1388}
1389
1390static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1391{
1392        struct sock *sk = sock->sk;
1393        struct sock *tsk;
1394        struct sk_buff *skb;
1395        int err;
1396
1397        err = -EOPNOTSUPP;
1398        if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1399                goto out;
1400
1401        err = -EINVAL;
1402        if (sk->sk_state != TCP_LISTEN)
1403                goto out;
1404
1405        /* If socket state is TCP_LISTEN it cannot change (for now...),
1406         * so that no locks are necessary.
1407         */
1408
1409        skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1410        if (!skb) {
1411                /* This means receive shutdown. */
1412                if (err == 0)
1413                        err = -EINVAL;
1414                goto out;
1415        }
1416
1417        tsk = skb->sk;
1418        skb_free_datagram(sk, skb);
1419        wake_up_interruptible(&unix_sk(sk)->peer_wait);
1420
1421        /* attach accepted sock to socket */
1422        unix_state_lock(tsk);
1423        newsock->state = SS_CONNECTED;
1424        unix_sock_inherit_flags(sock, newsock);
1425        sock_graft(tsk, newsock);
1426        unix_state_unlock(tsk);
1427        return 0;
1428
1429out:
1430        return err;
1431}
1432
1433
1434static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1435{
1436        struct sock *sk = sock->sk;
1437        struct unix_sock *u;
1438        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1439        int err = 0;
1440
1441        if (peer) {
1442                sk = unix_peer_get(sk);
1443
1444                err = -ENOTCONN;
1445                if (!sk)
1446                        goto out;
1447                err = 0;
1448        } else {
1449                sock_hold(sk);
1450        }
1451
1452        u = unix_sk(sk);
1453        unix_state_lock(sk);
1454        if (!u->addr) {
1455                sunaddr->sun_family = AF_UNIX;
1456                sunaddr->sun_path[0] = 0;
1457                *uaddr_len = sizeof(short);
1458        } else {
1459                struct unix_address *addr = u->addr;
1460
1461                *uaddr_len = addr->len;
1462                memcpy(sunaddr, addr->name, *uaddr_len);
1463        }
1464        unix_state_unlock(sk);
1465        sock_put(sk);
1466out:
1467        return err;
1468}
1469
1470static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1471{
1472        int i;
1473
1474        scm->fp = UNIXCB(skb).fp;
1475        UNIXCB(skb).fp = NULL;
1476
1477        for (i = scm->fp->count-1; i >= 0; i--)
1478                unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1479}
1480
1481static void unix_destruct_scm(struct sk_buff *skb)
1482{
1483        struct scm_cookie scm;
1484        memset(&scm, 0, sizeof(scm));
1485        scm.pid  = UNIXCB(skb).pid;
1486        if (UNIXCB(skb).fp)
1487                unix_detach_fds(&scm, skb);
1488
1489        /* Alas, it calls VFS */
1490        /* So fscking what? fput() had been SMP-safe since the last Summer */
1491        scm_destroy(&scm);
1492        sock_wfree(skb);
1493}
1494
1495/*
1496 * The "user->unix_inflight" variable is protected by the garbage
1497 * collection lock, and we just read it locklessly here. If you go
1498 * over the limit, there might be a tiny race in actually noticing
1499 * it across threads. Tough.
1500 */
1501static inline bool too_many_unix_fds(struct task_struct *p)
1502{
1503        struct user_struct *user = current_user();
1504
1505        if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1506                return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1507        return false;
1508}
1509
1510#define MAX_RECURSION_LEVEL 4
1511
1512static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1513{
1514        int i;
1515        unsigned char max_level = 0;
1516        int unix_sock_count = 0;
1517
1518        if (too_many_unix_fds(current))
1519                return -ETOOMANYREFS;
1520
1521        for (i = scm->fp->count - 1; i >= 0; i--) {
1522                struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1523
1524                if (sk) {
1525                        unix_sock_count++;
1526                        max_level = max(max_level,
1527                                        unix_sk(sk)->recursion_level);
1528                }
1529        }
1530        if (unlikely(max_level > MAX_RECURSION_LEVEL))
1531                return -ETOOMANYREFS;
1532
1533        /*
1534         * Need to duplicate file references for the sake of garbage
1535         * collection.  Otherwise a socket in the fps might become a
1536         * candidate for GC while the skb is not yet queued.
1537         */
1538        UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1539        if (!UNIXCB(skb).fp)
1540                return -ENOMEM;
1541
1542        for (i = scm->fp->count - 1; i >= 0; i--)
1543                unix_inflight(scm->fp->user, scm->fp->fp[i]);
1544        return max_level;
1545}
1546
1547static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1548{
1549        int err = 0;
1550
1551        UNIXCB(skb).pid  = get_pid(scm->pid);
1552        UNIXCB(skb).uid = scm->creds.uid;
1553        UNIXCB(skb).gid = scm->creds.gid;
1554        UNIXCB(skb).fp = NULL;
1555        if (scm->fp && send_fds)
1556                err = unix_attach_fds(scm, skb);
1557
1558        skb->destructor = unix_destruct_scm;
1559        return err;
1560}
1561
1562static bool unix_passcred_enabled(const struct socket *sock,
1563                                  const struct sock *other)
1564{
1565        return test_bit(SOCK_PASSCRED, &sock->flags) ||
1566               !other->sk_socket ||
1567               test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1568}
1569
1570/*
1571 * Some apps rely on write() giving SCM_CREDENTIALS
1572 * We include credentials if source or destination socket
1573 * asserted SOCK_PASSCRED.
1574 */
1575static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1576                            const struct sock *other)
1577{
1578        if (UNIXCB(skb).pid)
1579                return;
1580        if (unix_passcred_enabled(sock, other)) {
1581                UNIXCB(skb).pid  = get_pid(task_tgid(current));
1582                current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1583        }
1584}
1585
1586static int maybe_init_creds(struct scm_cookie *scm,
1587                            struct socket *socket,
1588                            const struct sock *other)
1589{
1590        int err;
1591        struct msghdr msg = { .msg_controllen = 0 };
1592
1593        err = scm_send(socket, &msg, scm, false);
1594        if (err)
1595                return err;
1596
1597        if (unix_passcred_enabled(socket, other)) {
1598                scm->pid = get_pid(task_tgid(current));
1599                current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1600        }
1601        return err;
1602}
1603
1604static bool unix_skb_scm_eq(struct sk_buff *skb,
1605                            struct scm_cookie *scm)
1606{
1607        const struct unix_skb_parms *u = &UNIXCB(skb);
1608
1609        return u->pid == scm->pid &&
1610               uid_eq(u->uid, scm->creds.uid) &&
1611               gid_eq(u->gid, scm->creds.gid);
1612}
1613
1614/*
1615 *      Send AF_UNIX data.
1616 */
1617
1618static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1619                              struct msghdr *msg, size_t len)
1620{
1621        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1622        struct sock *sk = sock->sk;
1623        struct net *net = sock_net(sk);
1624        struct unix_sock *u = unix_sk(sk);
1625        struct sockaddr_un *sunaddr = msg->msg_name;
1626        struct sock *other = NULL;
1627        int namelen = 0; /* fake GCC */
1628        int err;
1629        unsigned int hash;
1630        struct sk_buff *skb;
1631        long timeo;
1632        struct scm_cookie tmp_scm;
1633        int max_level;
1634        int data_len = 0;
1635        int sk_locked;
1636
1637        if (NULL == siocb->scm)
1638                siocb->scm = &tmp_scm;
1639        wait_for_unix_gc();
1640        err = scm_send(sock, msg, siocb->scm, false);
1641        if (err < 0)
1642                return err;
1643
1644        err = -EOPNOTSUPP;
1645        if (msg->msg_flags&MSG_OOB)
1646                goto out;
1647
1648        if (msg->msg_namelen) {
1649                err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1650                if (err < 0)
1651                        goto out;
1652                namelen = err;
1653        } else {
1654                sunaddr = NULL;
1655                err = -ENOTCONN;
1656                other = unix_peer_get(sk);
1657                if (!other)
1658                        goto out;
1659        }
1660
1661        if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1662            && (err = unix_autobind(sock)) != 0)
1663                goto out;
1664
1665        err = -EMSGSIZE;
1666        if (len > sk->sk_sndbuf - 32)
1667                goto out;
1668
1669        if (len > SKB_MAX_ALLOC) {
1670                data_len = min_t(size_t,
1671                                 len - SKB_MAX_ALLOC,
1672                                 MAX_SKB_FRAGS * PAGE_SIZE);
1673                data_len = PAGE_ALIGN(data_len);
1674
1675                BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1676        }
1677
1678        skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1679                                   msg->msg_flags & MSG_DONTWAIT, &err,
1680                                   PAGE_ALLOC_COSTLY_ORDER);
1681        if (skb == NULL)
1682                goto out;
1683
1684        err = unix_scm_to_skb(siocb->scm, skb, true);
1685        if (err < 0)
1686                goto out_free;
1687        max_level = err + 1;
1688        unix_get_secdata(siocb->scm, skb);
1689
1690        skb_put(skb, len - data_len);
1691        skb->data_len = data_len;
1692        skb->len = len;
1693        err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1694        if (err)
1695                goto out_free;
1696
1697        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1698
1699restart:
1700        if (!other) {
1701                err = -ECONNRESET;
1702                if (sunaddr == NULL)
1703                        goto out_free;
1704
1705                other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1706                                        hash, &err);
1707                if (other == NULL)
1708                        goto out_free;
1709        }
1710
1711        if (sk_filter(other, skb) < 0) {
1712                /* Toss the packet but do not return any error to the sender */
1713                err = len;
1714                goto out_free;
1715        }
1716
1717        sk_locked = 0;
1718        unix_state_lock(other);
1719restart_locked:
1720        err = -EPERM;
1721        if (!unix_may_send(sk, other))
1722                goto out_unlock;
1723
1724        if (unlikely(sock_flag(other, SOCK_DEAD))) {
1725                /*
1726                 *      Check with 1003.1g - what should
1727                 *      datagram error
1728                 */
1729                unix_state_unlock(other);
1730                sock_put(other);
1731
1732                if (!sk_locked)
1733                        unix_state_lock(sk);
1734
1735                err = 0;
1736                if (unix_peer(sk) == other) {
1737                        unix_peer(sk) = NULL;
1738                        unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1739
1740                        unix_state_unlock(sk);
1741
1742                        unix_dgram_disconnected(sk, other);
1743                        sock_put(other);
1744                        err = -ECONNREFUSED;
1745                } else {
1746                        unix_state_unlock(sk);
1747                }
1748
1749                other = NULL;
1750                if (err)
1751                        goto out_free;
1752                goto restart;
1753        }
1754
1755        err = -EPIPE;
1756        if (other->sk_shutdown & RCV_SHUTDOWN)
1757                goto out_unlock;
1758
1759        if (sk->sk_type != SOCK_SEQPACKET) {
1760                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1761                if (err)
1762                        goto out_unlock;
1763        }
1764
1765        /* other == sk && unix_peer(other) != sk if
1766         * - unix_peer(sk) == NULL, destination address bound to sk
1767         * - unix_peer(sk) == sk by time of get but disconnected before lock
1768         */
1769        if (other != sk &&
1770            unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1771                if (timeo) {
1772                        timeo = unix_wait_for_peer(other, timeo);
1773
1774                        err = sock_intr_errno(timeo);
1775                        if (signal_pending(current))
1776                                goto out_free;
1777
1778                        goto restart;
1779                }
1780
1781                if (!sk_locked) {
1782                        unix_state_unlock(other);
1783                        unix_state_double_lock(sk, other);
1784                }
1785
1786                if (unix_peer(sk) != other ||
1787                    unix_dgram_peer_wake_me(sk, other)) {
1788                        err = -EAGAIN;
1789                        sk_locked = 1;
1790                        goto out_unlock;
1791                }
1792
1793                if (!sk_locked) {
1794                        sk_locked = 1;
1795                        goto restart_locked;
1796                }
1797        }
1798
1799        if (unlikely(sk_locked))
1800                unix_state_unlock(sk);
1801
1802        if (sock_flag(other, SOCK_RCVTSTAMP))
1803                __net_timestamp(skb);
1804        maybe_add_creds(skb, sock, other);
1805        skb_queue_tail(&other->sk_receive_queue, skb);
1806        if (max_level > unix_sk(other)->recursion_level)
1807                unix_sk(other)->recursion_level = max_level;
1808        unix_state_unlock(other);
1809        other->sk_data_ready(other, len);
1810        sock_put(other);
1811        scm_destroy(siocb->scm);
1812        return len;
1813
1814out_unlock:
1815        if (sk_locked)
1816                unix_state_unlock(sk);
1817        unix_state_unlock(other);
1818out_free:
1819        kfree_skb(skb);
1820out:
1821        if (other)
1822                sock_put(other);
1823        scm_destroy(siocb->scm);
1824        return err;
1825}
1826
1827/* We use paged skbs for stream sockets, and limit occupancy to 32768
1828 * bytes, and a minimun of a full page.
1829 */
1830#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1831
1832static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1833                               struct msghdr *msg, size_t len)
1834{
1835        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1836        struct sock *sk = sock->sk;
1837        struct sock *other = NULL;
1838        int err, size;
1839        struct sk_buff *skb;
1840        int sent = 0;
1841        struct scm_cookie tmp_scm;
1842        bool fds_sent = false;
1843        int max_level;
1844        int data_len;
1845
1846        if (NULL == siocb->scm)
1847                siocb->scm = &tmp_scm;
1848        wait_for_unix_gc();
1849        err = scm_send(sock, msg, siocb->scm, false);
1850        if (err < 0)
1851                return err;
1852
1853        err = -EOPNOTSUPP;
1854        if (msg->msg_flags&MSG_OOB)
1855                goto out_err;
1856
1857        if (msg->msg_namelen) {
1858                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1859                goto out_err;
1860        } else {
1861                err = -ENOTCONN;
1862                other = unix_peer(sk);
1863                if (!other)
1864                        goto out_err;
1865        }
1866
1867        if (sk->sk_shutdown & SEND_SHUTDOWN)
1868                goto pipe_err;
1869
1870        while (sent < len) {
1871                size = len - sent;
1872
1873                /* Keep two messages in the pipe so it schedules better */
1874                size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1875
1876                /* allow fallback to order-0 allocations */
1877                size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1878
1879                data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1880
1881                data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1882
1883                skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1884                                           msg->msg_flags & MSG_DONTWAIT, &err,
1885                                           get_order(UNIX_SKB_FRAGS_SZ));
1886                if (!skb)
1887                        goto out_err;
1888
1889                /* Only send the fds in the first buffer */
1890                err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1891                if (err < 0) {
1892                        kfree_skb(skb);
1893                        goto out_err;
1894                }
1895                max_level = err + 1;
1896                fds_sent = true;
1897
1898                skb_put(skb, size - data_len);
1899                skb->data_len = data_len;
1900                skb->len = size;
1901                err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov,
1902                                                   sent, size);
1903                if (err) {
1904                        kfree_skb(skb);
1905                        goto out_err;
1906                }
1907
1908                unix_state_lock(other);
1909
1910                if (sock_flag(other, SOCK_DEAD) ||
1911                    (other->sk_shutdown & RCV_SHUTDOWN))
1912                        goto pipe_err_free;
1913
1914                maybe_add_creds(skb, sock, other);
1915                skb_queue_tail(&other->sk_receive_queue, skb);
1916                if (max_level > unix_sk(other)->recursion_level)
1917                        unix_sk(other)->recursion_level = max_level;
1918                unix_state_unlock(other);
1919                other->sk_data_ready(other, size);
1920                sent += size;
1921        }
1922
1923        scm_destroy(siocb->scm);
1924        siocb->scm = NULL;
1925
1926        return sent;
1927
1928pipe_err_free:
1929        unix_state_unlock(other);
1930        kfree_skb(skb);
1931pipe_err:
1932        if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1933                send_sig(SIGPIPE, current, 0);
1934        err = -EPIPE;
1935out_err:
1936        scm_destroy(siocb->scm);
1937        siocb->scm = NULL;
1938        return sent ? : err;
1939}
1940
1941static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1942                                    int offset, size_t size, int flags)
1943{
1944        int err;
1945        bool send_sigpipe = false;
1946        bool init_scm = true;
1947        struct scm_cookie scm;
1948        struct sock *other, *sk = socket->sk;
1949        struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1950
1951        if (flags & MSG_OOB)
1952                return -EOPNOTSUPP;
1953
1954        other = unix_peer(sk);
1955        if (!other || sk->sk_state != TCP_ESTABLISHED)
1956                return -ENOTCONN;
1957
1958        if (false) {
1959alloc_skb:
1960                unix_state_unlock(other);
1961                mutex_unlock(&unix_sk(other)->readlock);
1962                newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1963                                              &err, 0);
1964                if (!newskb)
1965                        goto err;
1966        }
1967
1968        /* we must acquire readlock as we modify already present
1969         * skbs in the sk_receive_queue and mess with skb->len
1970         */
1971        err = mutex_lock_interruptible(&unix_sk(other)->readlock);
1972        if (err) {
1973                err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1974                goto err;
1975        }
1976
1977        if (sk->sk_shutdown & SEND_SHUTDOWN) {
1978                err = -EPIPE;
1979                send_sigpipe = true;
1980                goto err_unlock;
1981        }
1982
1983        unix_state_lock(other);
1984
1985        if (sock_flag(other, SOCK_DEAD) ||
1986            other->sk_shutdown & RCV_SHUTDOWN) {
1987                err = -EPIPE;
1988                send_sigpipe = true;
1989                goto err_state_unlock;
1990        }
1991
1992        if (init_scm) {
1993                err = maybe_init_creds(&scm, socket, other);
1994                if (err)
1995                        goto err_state_unlock;
1996                init_scm = false;
1997        }
1998
1999        skb = skb_peek_tail(&other->sk_receive_queue);
2000        if (tail && tail == skb) {
2001                skb = newskb;
2002        } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2003                if (newskb) {
2004                        skb = newskb;
2005                } else {
2006                        tail = skb;
2007                        goto alloc_skb;
2008                }
2009        } else if (newskb) {
2010                /* this is fast path, we don't necessarily need to
2011                 * call to kfree_skb even though with newskb == NULL
2012                 * this - does no harm
2013                 */
2014                consume_skb(newskb);
2015                newskb = NULL;
2016        }
2017
2018        if (skb_append_pagefrags(skb, page, offset, size)) {
2019                tail = skb;
2020                goto alloc_skb;
2021        }
2022
2023        skb->len += size;
2024        skb->data_len += size;
2025        skb->truesize += size;
2026        atomic_add(size, &sk->sk_wmem_alloc);
2027
2028        if (newskb) {
2029                err = unix_scm_to_skb(&scm, skb, false);
2030                if (err)
2031                        goto err_state_unlock;
2032                spin_lock(&other->sk_receive_queue.lock);
2033                __skb_queue_tail(&other->sk_receive_queue, newskb);
2034                spin_unlock(&other->sk_receive_queue.lock);
2035        }
2036
2037        unix_state_unlock(other);
2038        mutex_unlock(&unix_sk(other)->readlock);
2039
2040        other->sk_data_ready(other, 0);
2041        scm_destroy(&scm);
2042        return size;
2043
2044err_state_unlock:
2045        unix_state_unlock(other);
2046err_unlock:
2047        mutex_unlock(&unix_sk(other)->readlock);
2048err:
2049        kfree_skb(newskb);
2050        if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2051                send_sig(SIGPIPE, current, 0);
2052        if (!init_scm)
2053                scm_destroy(&scm);
2054        return err;
2055}
2056
2057static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
2058                                  struct msghdr *msg, size_t len)
2059{
2060        int err;
2061        struct sock *sk = sock->sk;
2062
2063        err = sock_error(sk);
2064        if (err)
2065                return err;
2066
2067        if (sk->sk_state != TCP_ESTABLISHED)
2068                return -ENOTCONN;
2069
2070        if (msg->msg_namelen)
2071                msg->msg_namelen = 0;
2072
2073        return unix_dgram_sendmsg(kiocb, sock, msg, len);
2074}
2075
2076static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
2077                              struct msghdr *msg, size_t size,
2078                              int flags)
2079{
2080        struct sock *sk = sock->sk;
2081
2082        if (sk->sk_state != TCP_ESTABLISHED)
2083                return -ENOTCONN;
2084
2085        return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
2086}
2087
2088static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2089{
2090        struct unix_sock *u = unix_sk(sk);
2091
2092        if (u->addr) {
2093                msg->msg_namelen = u->addr->len;
2094                memcpy(msg->msg_name, u->addr->name, u->addr->len);
2095        }
2096}
2097
2098static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
2099                              struct msghdr *msg, size_t size,
2100                              int flags)
2101{
2102        struct sock_iocb *siocb = kiocb_to_siocb(iocb);
2103        struct scm_cookie tmp_scm;
2104        struct sock *sk = sock->sk;
2105        struct unix_sock *u = unix_sk(sk);
2106        int noblock = flags & MSG_DONTWAIT;
2107        struct sk_buff *skb;
2108        int err;
2109        int peeked, skip;
2110
2111        err = -EOPNOTSUPP;
2112        if (flags&MSG_OOB)
2113                goto out;
2114
2115        err = mutex_lock_interruptible(&u->readlock);
2116        if (unlikely(err)) {
2117                /* recvmsg() in non blocking mode is supposed to return -EAGAIN
2118                 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2119                 */
2120                err = noblock ? -EAGAIN : -ERESTARTSYS;
2121                goto out;
2122        }
2123
2124        skip = sk_peek_offset(sk, flags);
2125
2126        skb = __skb_recv_datagram(sk, flags, NULL, &peeked, &skip, &err);
2127        if (!skb) {
2128                unix_state_lock(sk);
2129                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2130                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2131                    (sk->sk_shutdown & RCV_SHUTDOWN))
2132                        err = 0;
2133                unix_state_unlock(sk);
2134                goto out_unlock;
2135        }
2136
2137        wake_up_interruptible_sync_poll(&u->peer_wait,
2138                                        POLLOUT | POLLWRNORM | POLLWRBAND);
2139
2140        if (msg->msg_name)
2141                unix_copy_addr(msg, skb->sk);
2142
2143        if (size > skb->len - skip)
2144                size = skb->len - skip;
2145        else if (size < skb->len - skip)
2146                msg->msg_flags |= MSG_TRUNC;
2147
2148        err = skb_copy_datagram_msg(skb, skip, msg, size);
2149        if (err)
2150                goto out_free;
2151
2152        if (sock_flag(sk, SOCK_RCVTSTAMP))
2153                __sock_recv_timestamp(msg, sk, skb);
2154
2155        if (!siocb->scm) {
2156                siocb->scm = &tmp_scm;
2157                memset(&tmp_scm, 0, sizeof(tmp_scm));
2158        }
2159        scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2160        unix_set_secdata(siocb->scm, skb);
2161
2162        if (!(flags & MSG_PEEK)) {
2163                if (UNIXCB(skb).fp)
2164                        unix_detach_fds(siocb->scm, skb);
2165
2166                sk_peek_offset_bwd(sk, skb->len);
2167        } else {
2168                /* It is questionable: on PEEK we could:
2169                   - do not return fds - good, but too simple 8)
2170                   - return fds, and do not return them on read (old strategy,
2171                     apparently wrong)
2172                   - clone fds (I chose it for now, it is the most universal
2173                     solution)
2174
2175                   POSIX 1003.1g does not actually define this clearly
2176                   at all. POSIX 1003.1g doesn't define a lot of things
2177                   clearly however!
2178
2179                */
2180
2181                sk_peek_offset_fwd(sk, size);
2182
2183                if (UNIXCB(skb).fp)
2184                        siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2185        }
2186        err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2187
2188        scm_recv(sock, msg, siocb->scm, flags);
2189
2190out_free:
2191        skb_free_datagram(sk, skb);
2192out_unlock:
2193        mutex_unlock(&u->readlock);
2194out:
2195        return err;
2196}
2197
2198/*
2199 *      Sleep until more data has arrived. But check for races..
2200 */
2201static long unix_stream_data_wait(struct sock *sk, long timeo,
2202                                  struct sk_buff *last, unsigned int last_len)
2203{
2204        struct sk_buff *tail;
2205        DEFINE_WAIT(wait);
2206
2207        unix_state_lock(sk);
2208
2209        for (;;) {
2210                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2211
2212                tail = skb_peek_tail(&sk->sk_receive_queue);
2213                if (tail != last ||
2214                    (tail && tail->len != last_len) ||
2215                    sk->sk_err ||
2216                    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2217                    signal_pending(current) ||
2218                    !timeo)
2219                        break;
2220
2221                set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2222                unix_state_unlock(sk);
2223                timeo = freezable_schedule_timeout(timeo);
2224                unix_state_lock(sk);
2225
2226                if (sock_flag(sk, SOCK_DEAD))
2227                        break;
2228
2229                clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2230        }
2231
2232        finish_wait(sk_sleep(sk), &wait);
2233        unix_state_unlock(sk);
2234        return timeo;
2235}
2236
2237static unsigned int unix_skb_len(const struct sk_buff *skb)
2238{
2239        return skb->len - UNIXCB(skb).consumed;
2240}
2241
2242struct unix_stream_read_state {
2243        int (*recv_actor)(struct sk_buff *, int, int,
2244                          struct unix_stream_read_state *);
2245        struct socket *socket;
2246        struct msghdr *msg;
2247        struct kiocb *kiocb;
2248        struct pipe_inode_info *pipe;
2249        size_t size;
2250        int flags;
2251        unsigned int splice_flags;
2252};
2253
2254static int unix_stream_read_generic(struct unix_stream_read_state *state)
2255{
2256        struct sock_iocb *siocb;
2257        struct sock_iocb tmp_siocb;
2258        struct scm_cookie tmp_scm;
2259        struct socket *sock = state->socket;
2260        struct sock *sk = sock->sk;
2261        struct unix_sock *u = unix_sk(sk);
2262        int copied = 0;
2263        int flags = state->flags;
2264        int noblock = flags & MSG_DONTWAIT;
2265        bool check_creds = false;
2266        int target;
2267        int err = 0;
2268        long timeo;
2269        int skip;
2270        size_t size = state->size;
2271        unsigned int last_len;
2272
2273        err = -EINVAL;
2274        if (sk->sk_state != TCP_ESTABLISHED)
2275                goto out;
2276
2277        err = -EOPNOTSUPP;
2278        if (flags & MSG_OOB)
2279                goto out;
2280
2281        target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2282        timeo = sock_rcvtimeo(sk, noblock);
2283
2284        if (state->kiocb) {
2285                siocb = kiocb_to_siocb(state->kiocb);
2286        } else {
2287                siocb = &tmp_siocb;
2288                memset(&tmp_siocb, 0, sizeof(tmp_siocb));
2289        }
2290
2291        /* Lock the socket to prevent queue disordering
2292         * while sleeps in memcpy_tomsg
2293         */
2294
2295        if (!siocb->scm) {
2296                siocb->scm = &tmp_scm;
2297                memset(&tmp_scm, 0, sizeof(tmp_scm));
2298        }
2299        err = mutex_lock_interruptible(&u->readlock);
2300        if (unlikely(err)) {
2301                /* recvmsg() in non blocking mode is supposed to return -EAGAIN
2302                 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2303                 */
2304                err = noblock ? -EAGAIN : -ERESTARTSYS;
2305                goto out;
2306        }
2307
2308        do {
2309                int chunk;
2310                bool drop_skb;
2311                struct sk_buff *skb, *last;
2312
2313                unix_state_lock(sk);
2314                if (sock_flag(sk, SOCK_DEAD)) {
2315                        err = -ECONNRESET;
2316                        goto unlock;
2317                }
2318                last = skb = skb_peek(&sk->sk_receive_queue);
2319                last_len = last ? last->len : 0;
2320again:
2321                if (skb == NULL) {
2322                        unix_sk(sk)->recursion_level = 0;
2323                        if (copied >= target)
2324                                goto unlock;
2325
2326                        /*
2327                         *      POSIX 1003.1g mandates this order.
2328                         */
2329
2330                        err = sock_error(sk);
2331                        if (err)
2332                                goto unlock;
2333                        if (sk->sk_shutdown & RCV_SHUTDOWN)
2334                                goto unlock;
2335
2336                        unix_state_unlock(sk);
2337                        err = -EAGAIN;
2338                        if (!timeo)
2339                                break;
2340                        mutex_unlock(&u->readlock);
2341
2342                        timeo = unix_stream_data_wait(sk, timeo, last,
2343                                                      last_len);
2344
2345                        if (signal_pending(current) ||
2346                            mutex_lock_interruptible(&u->readlock)) {
2347                                err = sock_intr_errno(timeo);
2348                                goto out;
2349                        }
2350
2351                        continue;
2352unlock:
2353                        unix_state_unlock(sk);
2354                        break;
2355                }
2356
2357                skip = sk_peek_offset(sk, flags);
2358                while (skip >= unix_skb_len(skb)) {
2359                        skip -= unix_skb_len(skb);
2360                        last = skb;
2361                        last_len = skb->len;
2362                        skb = skb_peek_next(skb, &sk->sk_receive_queue);
2363                        if (!skb)
2364                                goto again;
2365                }
2366
2367                unix_state_unlock(sk);
2368
2369                if (check_creds) {
2370                        /* Never glue messages from different writers */
2371                        if (!unix_skb_scm_eq(skb, siocb->scm))
2372                                break;
2373                } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2374                        /* Copy credentials */
2375                        scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2376                        check_creds = true;
2377                }
2378
2379                /* Copy address just once */
2380                if (state->msg && state->msg->msg_name) {
2381                        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2382                                         state->msg->msg_name);
2383                        unix_copy_addr(state->msg, skb->sk);
2384                        sunaddr = NULL;
2385                }
2386
2387                chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2388                skb_get(skb);
2389                chunk = state->recv_actor(skb, skip, chunk, state);
2390                drop_skb = !unix_skb_len(skb);
2391                /* skb is only safe to use if !drop_skb */
2392                consume_skb(skb);
2393                if (chunk < 0) {
2394                        if (copied == 0)
2395                                copied = -EFAULT;
2396                        break;
2397                }
2398                copied += chunk;
2399                size -= chunk;
2400
2401                if (drop_skb) {
2402                        /* the skb was touched by a concurrent reader;
2403                         * we should not expect anything from this skb
2404                         * anymore and assume it invalid - we can be
2405                         * sure it was dropped from the socket queue
2406                         *
2407                         * let's report a short read
2408                         */
2409                        err = 0;
2410                        break;
2411                }
2412
2413                /* Mark read part of skb as used */
2414                if (!(flags & MSG_PEEK)) {
2415                        UNIXCB(skb).consumed += chunk;
2416
2417                        sk_peek_offset_bwd(sk, chunk);
2418
2419                        if (UNIXCB(skb).fp)
2420                                unix_detach_fds(siocb->scm, skb);
2421
2422                        if (unix_skb_len(skb))
2423                                break;
2424
2425                        skb_unlink(skb, &sk->sk_receive_queue);
2426                        consume_skb(skb);
2427
2428                        if (siocb->scm->fp)
2429                                break;
2430                } else {
2431                        /* It is questionable, see note in unix_dgram_recvmsg.
2432                         */
2433                        if (UNIXCB(skb).fp)
2434                                siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2435
2436                        sk_peek_offset_fwd(sk, chunk);
2437
2438                        break;
2439                }
2440        } while (size);
2441
2442        mutex_unlock(&u->readlock);
2443        if (state->msg)
2444                scm_recv(sock, state->msg, siocb->scm, flags);
2445        else
2446                scm_destroy(siocb->scm);
2447out:
2448        return copied ? : err;
2449}
2450
2451static int unix_stream_read_actor(struct sk_buff *skb,
2452                                  int skip, int chunk,
2453                                  struct unix_stream_read_state *state)
2454{
2455        int ret;
2456
2457        ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2458                                    state->msg, chunk);
2459        return ret ?: chunk;
2460}
2461
2462static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
2463                               struct msghdr *msg, size_t size,
2464                               int flags)
2465{
2466        struct unix_stream_read_state state = {
2467                .recv_actor = unix_stream_read_actor,
2468                .socket = sock,
2469                .kiocb = iocb,
2470                .msg = msg,
2471                .size = size,
2472                .flags = flags
2473        };
2474
2475        return unix_stream_read_generic(&state);
2476}
2477
2478static ssize_t skb_unix_socket_splice(struct sock *sk,
2479                                      struct pipe_inode_info *pipe,
2480                                      struct splice_pipe_desc *spd)
2481{
2482        int ret;
2483        struct unix_sock *u = unix_sk(sk);
2484
2485        mutex_unlock(&u->readlock);
2486        ret = splice_to_pipe(pipe, spd);
2487        mutex_lock(&u->readlock);
2488
2489        return ret;
2490}
2491
2492static int unix_stream_splice_actor(struct sk_buff *skb,
2493                                    int skip, int chunk,
2494                                    struct unix_stream_read_state *state)
2495{
2496        return skb_splice_bits(skb, state->socket->sk,
2497                               UNIXCB(skb).consumed + skip,
2498                               state->pipe, chunk, state->splice_flags,
2499                               skb_unix_socket_splice);
2500}
2501
2502static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2503                                       struct pipe_inode_info *pipe,
2504                                       size_t size, unsigned int flags)
2505{
2506        struct unix_stream_read_state state = {
2507                .recv_actor = unix_stream_splice_actor,
2508                .socket = sock,
2509                .pipe = pipe,
2510                .size = size,
2511                .splice_flags = flags,
2512        };
2513
2514        if (unlikely(*ppos))
2515                return -ESPIPE;
2516
2517        if (sock->file->f_flags & O_NONBLOCK ||
2518            flags & SPLICE_F_NONBLOCK)
2519                state.flags = MSG_DONTWAIT;
2520
2521        return unix_stream_read_generic(&state);
2522}
2523
2524static int unix_shutdown(struct socket *sock, int mode)
2525{
2526        struct sock *sk = sock->sk;
2527        struct sock *other;
2528
2529        if (mode < SHUT_RD || mode > SHUT_RDWR)
2530                return -EINVAL;
2531        /* This maps:
2532         * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2533         * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2534         * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2535         */
2536        ++mode;
2537
2538        unix_state_lock(sk);
2539        sk->sk_shutdown |= mode;
2540        other = unix_peer(sk);
2541        if (other)
2542                sock_hold(other);
2543        unix_state_unlock(sk);
2544        sk->sk_state_change(sk);
2545
2546        if (other &&
2547                (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2548
2549                int peer_mode = 0;
2550
2551                if (mode&RCV_SHUTDOWN)
2552                        peer_mode |= SEND_SHUTDOWN;
2553                if (mode&SEND_SHUTDOWN)
2554                        peer_mode |= RCV_SHUTDOWN;
2555                unix_state_lock(other);
2556                other->sk_shutdown |= peer_mode;
2557                unix_state_unlock(other);
2558                other->sk_state_change(other);
2559                if (peer_mode == SHUTDOWN_MASK)
2560                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2561                else if (peer_mode & RCV_SHUTDOWN)
2562                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2563        }
2564        if (other)
2565                sock_put(other);
2566
2567        return 0;
2568}
2569
2570long unix_inq_len(struct sock *sk)
2571{
2572        struct sk_buff *skb;
2573        long amount = 0;
2574
2575        if (sk->sk_state == TCP_LISTEN)
2576                return -EINVAL;
2577
2578        spin_lock(&sk->sk_receive_queue.lock);
2579        if (sk->sk_type == SOCK_STREAM ||
2580            sk->sk_type == SOCK_SEQPACKET) {
2581                skb_queue_walk(&sk->sk_receive_queue, skb)
2582                        amount += unix_skb_len(skb);
2583        } else {
2584                skb = skb_peek(&sk->sk_receive_queue);
2585                if (skb)
2586                        amount = skb->len;
2587        }
2588        spin_unlock(&sk->sk_receive_queue.lock);
2589
2590        return amount;
2591}
2592EXPORT_SYMBOL_GPL(unix_inq_len);
2593
2594long unix_outq_len(struct sock *sk)
2595{
2596        return sk_wmem_alloc_get(sk);
2597}
2598EXPORT_SYMBOL_GPL(unix_outq_len);
2599
2600static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2601{
2602        struct sock *sk = sock->sk;
2603        long amount = 0;
2604        int err;
2605
2606        switch (cmd) {
2607        case SIOCOUTQ:
2608                amount = unix_outq_len(sk);
2609                err = put_user(amount, (int __user *)arg);
2610                break;
2611        case SIOCINQ:
2612                amount = unix_inq_len(sk);
2613                if (amount < 0)
2614                        err = amount;
2615                else
2616                        err = put_user(amount, (int __user *)arg);
2617                break;
2618        default:
2619                err = -ENOIOCTLCMD;
2620                break;
2621        }
2622        return err;
2623}
2624
2625static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2626{
2627        struct sock *sk = sock->sk;
2628        unsigned int mask;
2629
2630        sock_poll_wait(file, sk_sleep(sk), wait);
2631        mask = 0;
2632
2633        /* exceptional events? */
2634        if (sk->sk_err)
2635                mask |= POLLERR;
2636        if (sk->sk_shutdown == SHUTDOWN_MASK)
2637                mask |= POLLHUP;
2638        if (sk->sk_shutdown & RCV_SHUTDOWN)
2639                mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2640
2641        /* readable? */
2642        if (!skb_queue_empty(&sk->sk_receive_queue))
2643                mask |= POLLIN | POLLRDNORM;
2644
2645        /* Connection-based need to check for termination and startup */
2646        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2647            sk->sk_state == TCP_CLOSE)
2648                mask |= POLLHUP;
2649
2650        /*
2651         * we set writable also when the other side has shut down the
2652         * connection. This prevents stuck sockets.
2653         */
2654        if (unix_writable(sk))
2655                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2656
2657        return mask;
2658}
2659
2660static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2661                                    poll_table *wait)
2662{
2663        struct sock *sk = sock->sk, *other;
2664        unsigned int mask, writable;
2665
2666        sock_poll_wait(file, sk_sleep(sk), wait);
2667        mask = 0;
2668
2669        /* exceptional events? */
2670        if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2671                mask |= POLLERR |
2672                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2673
2674        if (sk->sk_shutdown & RCV_SHUTDOWN)
2675                mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2676        if (sk->sk_shutdown == SHUTDOWN_MASK)
2677                mask |= POLLHUP;
2678
2679        /* readable? */
2680        if (!skb_queue_empty(&sk->sk_receive_queue))
2681                mask |= POLLIN | POLLRDNORM;
2682
2683        /* Connection-based need to check for termination and startup */
2684        if (sk->sk_type == SOCK_SEQPACKET) {
2685                if (sk->sk_state == TCP_CLOSE)
2686                        mask |= POLLHUP;
2687                /* connection hasn't started yet? */
2688                if (sk->sk_state == TCP_SYN_SENT)
2689                        return mask;
2690        }
2691
2692        /* No write status requested, avoid expensive OUT tests. */
2693        if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2694                return mask;
2695
2696        writable = unix_writable(sk);
2697        if (writable) {
2698                unix_state_lock(sk);
2699
2700                other = unix_peer(sk);
2701                if (other && unix_peer(other) != sk &&
2702                    unix_recvq_full(other) &&
2703                    unix_dgram_peer_wake_me(sk, other))
2704                        writable = 0;
2705
2706                unix_state_unlock(sk);
2707        }
2708
2709        if (writable)
2710                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2711        else
2712                set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2713
2714        return mask;
2715}
2716
2717#ifdef CONFIG_PROC_FS
2718
2719#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2720
2721#define get_bucket(x) ((x) >> BUCKET_SPACE)
2722#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2723#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2724
2725static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2726{
2727        unsigned long offset = get_offset(*pos);
2728        unsigned long bucket = get_bucket(*pos);
2729        struct sock *sk;
2730        unsigned long count = 0;
2731
2732        for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2733                if (sock_net(sk) != seq_file_net(seq))
2734                        continue;
2735                if (++count == offset)
2736                        break;
2737        }
2738
2739        return sk;
2740}
2741
2742static struct sock *unix_next_socket(struct seq_file *seq,
2743                                     struct sock *sk,
2744                                     loff_t *pos)
2745{
2746        unsigned long bucket;
2747
2748        while (sk > (struct sock *)SEQ_START_TOKEN) {
2749                sk = sk_next(sk);
2750                if (!sk)
2751                        goto next_bucket;
2752                if (sock_net(sk) == seq_file_net(seq))
2753                        return sk;
2754        }
2755
2756        do {
2757                sk = unix_from_bucket(seq, pos);
2758                if (sk)
2759                        return sk;
2760
2761next_bucket:
2762                bucket = get_bucket(*pos) + 1;
2763                *pos = set_bucket_offset(bucket, 1);
2764        } while (bucket < ARRAY_SIZE(unix_socket_table));
2765
2766        return NULL;
2767}
2768
2769static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2770        __acquires(unix_table_lock)
2771{
2772        spin_lock(&unix_table_lock);
2773
2774        if (!*pos)
2775                return SEQ_START_TOKEN;
2776
2777        if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2778                return NULL;
2779
2780        return unix_next_socket(seq, NULL, pos);
2781}
2782
2783static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2784{
2785        ++*pos;
2786        return unix_next_socket(seq, v, pos);
2787}
2788
2789static void unix_seq_stop(struct seq_file *seq, void *v)
2790        __releases(unix_table_lock)
2791{
2792        spin_unlock(&unix_table_lock);
2793}
2794
2795static int unix_seq_show(struct seq_file *seq, void *v)
2796{
2797
2798        if (v == SEQ_START_TOKEN)
2799                seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2800                         "Inode Path\n");
2801        else {
2802                struct sock *s = v;
2803                struct unix_sock *u = unix_sk(s);
2804                unix_state_lock(s);
2805
2806                seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2807                        s,
2808                        atomic_read(&s->sk_refcnt),
2809                        0,
2810                        s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2811                        s->sk_type,
2812                        s->sk_socket ?
2813                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2814                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2815                        sock_i_ino(s));
2816
2817                if (u->addr) {
2818                        int i, len;
2819                        seq_putc(seq, ' ');
2820
2821                        i = 0;
2822                        len = u->addr->len - sizeof(short);
2823                        if (!UNIX_ABSTRACT(s))
2824                                len--;
2825                        else {
2826                                seq_putc(seq, '@');
2827                                i++;
2828                        }
2829                        for ( ; i < len; i++)
2830                                seq_putc(seq, u->addr->name->sun_path[i]);
2831                }
2832                unix_state_unlock(s);
2833                seq_putc(seq, '\n');
2834        }
2835
2836        return 0;
2837}
2838
2839static const struct seq_operations unix_seq_ops = {
2840        .start  = unix_seq_start,
2841        .next   = unix_seq_next,
2842        .stop   = unix_seq_stop,
2843        .show   = unix_seq_show,
2844};
2845
2846static int unix_seq_open(struct inode *inode, struct file *file)
2847{
2848        return seq_open_net(inode, file, &unix_seq_ops,
2849                            sizeof(struct seq_net_private));
2850}
2851
2852static const struct file_operations unix_seq_fops = {
2853        .owner          = THIS_MODULE,
2854        .open           = unix_seq_open,
2855        .read           = seq_read,
2856        .llseek         = seq_lseek,
2857        .release        = seq_release_net,
2858};
2859
2860#endif
2861
2862static const struct net_proto_family unix_family_ops = {
2863        .family = PF_UNIX,
2864        .create = unix_create,
2865        .owner  = THIS_MODULE,
2866};
2867
2868
2869static int __net_init unix_net_init(struct net *net)
2870{
2871        int error = -ENOMEM;
2872
2873        net->unx.sysctl_max_dgram_qlen = 10;
2874        if (unix_sysctl_register(net))
2875                goto out;
2876
2877#ifdef CONFIG_PROC_FS
2878        if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2879                unix_sysctl_unregister(net);
2880                goto out;
2881        }
2882#endif
2883        error = 0;
2884out:
2885        return error;
2886}
2887
2888static void __net_exit unix_net_exit(struct net *net)
2889{
2890        unix_sysctl_unregister(net);
2891        remove_proc_entry("unix", net->proc_net);
2892}
2893
2894static struct pernet_operations unix_net_ops = {
2895        .init = unix_net_init,
2896        .exit = unix_net_exit,
2897};
2898
2899static int __init af_unix_init(void)
2900{
2901        int rc = -1;
2902
2903        BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2904
2905        rc = proto_register(&unix_proto, 1);
2906        if (rc != 0) {
2907                printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2908                       __func__);
2909                goto out;
2910        }
2911
2912        sock_register(&unix_family_ops);
2913        register_pernet_subsys(&unix_net_ops);
2914out:
2915        return rc;
2916}
2917
2918static void __exit af_unix_exit(void)
2919{
2920        sock_unregister(PF_UNIX);
2921        proto_unregister(&unix_proto);
2922        unregister_pernet_subsys(&unix_net_ops);
2923}
2924
2925/* Earlier than device_initcall() so that other drivers invoking
2926   request_module() don't end up in a loop when modprobe tries
2927   to use a UNIX socket. But later than subsys_initcall() because
2928   we depend on stuff initialised there */
2929fs_initcall(af_unix_init);
2930module_exit(af_unix_exit);
2931
2932MODULE_LICENSE("GPL");
2933MODULE_ALIAS_NETPROTO(PF_UNIX);
2934